Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ e311ed53

History | View | Annotate | Download (340.7 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
import os
30
import os.path
31
import time
32
import re
33
import platform
34
import logging
35
import copy
36
import OpenSSL
37

    
38
from ganeti import ssh
39
from ganeti import utils
40
from ganeti import errors
41
from ganeti import hypervisor
42
from ganeti import locking
43
from ganeti import constants
44
from ganeti import objects
45
from ganeti import serializer
46
from ganeti import ssconf
47
from ganeti import uidpool
48
from ganeti import compat
49

    
50

    
51
class LogicalUnit(object):
52
  """Logical Unit base class.
53

54
  Subclasses must follow these rules:
55
    - implement ExpandNames
56
    - implement CheckPrereq (except when tasklets are used)
57
    - implement Exec (except when tasklets are used)
58
    - implement BuildHooksEnv
59
    - redefine HPATH and HTYPE
60
    - optionally redefine their run requirements:
61
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
62

63
  Note that all commands require root permissions.
64

65
  @ivar dry_run_result: the value (if any) that will be returned to the caller
66
      in dry-run mode (signalled by opcode dry_run parameter)
67

68
  """
69
  HPATH = None
70
  HTYPE = None
71
  _OP_REQP = []
72
  REQ_BGL = True
73

    
74
  def __init__(self, processor, op, context, rpc):
75
    """Constructor for LogicalUnit.
76

77
    This needs to be overridden in derived classes in order to check op
78
    validity.
79

80
    """
81
    self.proc = processor
82
    self.op = op
83
    self.cfg = context.cfg
84
    self.context = context
85
    self.rpc = rpc
86
    # Dicts used to declare locking needs to mcpu
87
    self.needed_locks = None
88
    self.acquired_locks = {}
89
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
90
    self.add_locks = {}
91
    self.remove_locks = {}
92
    # Used to force good behavior when calling helper functions
93
    self.recalculate_locks = {}
94
    self.__ssh = None
95
    # logging
96
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
97
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
98
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
99
    # support for dry-run
100
    self.dry_run_result = None
101
    # support for generic debug attribute
102
    if (not hasattr(self.op, "debug_level") or
103
        not isinstance(self.op.debug_level, int)):
104
      self.op.debug_level = 0
105

    
106
    # Tasklets
107
    self.tasklets = None
108

    
109
    for attr_name in self._OP_REQP:
110
      attr_val = getattr(op, attr_name, None)
111
      if attr_val is None:
112
        raise errors.OpPrereqError("Required parameter '%s' missing" %
113
                                   attr_name, errors.ECODE_INVAL)
114

    
115
    self.CheckArguments()
116

    
117
  def __GetSSH(self):
118
    """Returns the SshRunner object
119

120
    """
121
    if not self.__ssh:
122
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
123
    return self.__ssh
124

    
125
  ssh = property(fget=__GetSSH)
126

    
127
  def CheckArguments(self):
128
    """Check syntactic validity for the opcode arguments.
129

130
    This method is for doing a simple syntactic check and ensure
131
    validity of opcode parameters, without any cluster-related
132
    checks. While the same can be accomplished in ExpandNames and/or
133
    CheckPrereq, doing these separate is better because:
134

135
      - ExpandNames is left as as purely a lock-related function
136
      - CheckPrereq is run after we have acquired locks (and possible
137
        waited for them)
138

139
    The function is allowed to change the self.op attribute so that
140
    later methods can no longer worry about missing parameters.
141

142
    """
143
    pass
144

    
145
  def ExpandNames(self):
146
    """Expand names for this LU.
147

148
    This method is called before starting to execute the opcode, and it should
149
    update all the parameters of the opcode to their canonical form (e.g. a
150
    short node name must be fully expanded after this method has successfully
151
    completed). This way locking, hooks, logging, ecc. can work correctly.
152

153
    LUs which implement this method must also populate the self.needed_locks
154
    member, as a dict with lock levels as keys, and a list of needed lock names
155
    as values. Rules:
156

157
      - use an empty dict if you don't need any lock
158
      - if you don't need any lock at a particular level omit that level
159
      - don't put anything for the BGL level
160
      - if you want all locks at a level use locking.ALL_SET as a value
161

162
    If you need to share locks (rather than acquire them exclusively) at one
163
    level you can modify self.share_locks, setting a true value (usually 1) for
164
    that level. By default locks are not shared.
165

166
    This function can also define a list of tasklets, which then will be
167
    executed in order instead of the usual LU-level CheckPrereq and Exec
168
    functions, if those are not defined by the LU.
169

170
    Examples::
171

172
      # Acquire all nodes and one instance
173
      self.needed_locks = {
174
        locking.LEVEL_NODE: locking.ALL_SET,
175
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
176
      }
177
      # Acquire just two nodes
178
      self.needed_locks = {
179
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
180
      }
181
      # Acquire no locks
182
      self.needed_locks = {} # No, you can't leave it to the default value None
183

184
    """
185
    # The implementation of this method is mandatory only if the new LU is
186
    # concurrent, so that old LUs don't need to be changed all at the same
187
    # time.
188
    if self.REQ_BGL:
189
      self.needed_locks = {} # Exclusive LUs don't need locks.
190
    else:
191
      raise NotImplementedError
192

    
193
  def DeclareLocks(self, level):
194
    """Declare LU locking needs for a level
195

196
    While most LUs can just declare their locking needs at ExpandNames time,
197
    sometimes there's the need to calculate some locks after having acquired
198
    the ones before. This function is called just before acquiring locks at a
199
    particular level, but after acquiring the ones at lower levels, and permits
200
    such calculations. It can be used to modify self.needed_locks, and by
201
    default it does nothing.
202

203
    This function is only called if you have something already set in
204
    self.needed_locks for the level.
205

206
    @param level: Locking level which is going to be locked
207
    @type level: member of ganeti.locking.LEVELS
208

209
    """
210

    
211
  def CheckPrereq(self):
212
    """Check prerequisites for this LU.
213

214
    This method should check that the prerequisites for the execution
215
    of this LU are fulfilled. It can do internode communication, but
216
    it should be idempotent - no cluster or system changes are
217
    allowed.
218

219
    The method should raise errors.OpPrereqError in case something is
220
    not fulfilled. Its return value is ignored.
221

222
    This method should also update all the parameters of the opcode to
223
    their canonical form if it hasn't been done by ExpandNames before.
224

225
    """
226
    if self.tasklets is not None:
227
      for (idx, tl) in enumerate(self.tasklets):
228
        logging.debug("Checking prerequisites for tasklet %s/%s",
229
                      idx + 1, len(self.tasklets))
230
        tl.CheckPrereq()
231
    else:
232
      raise NotImplementedError
233

    
234
  def Exec(self, feedback_fn):
235
    """Execute the LU.
236

237
    This method should implement the actual work. It should raise
238
    errors.OpExecError for failures that are somewhat dealt with in
239
    code, or expected.
240

241
    """
242
    if self.tasklets is not None:
243
      for (idx, tl) in enumerate(self.tasklets):
244
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
245
        tl.Exec(feedback_fn)
246
    else:
247
      raise NotImplementedError
248

    
249
  def BuildHooksEnv(self):
250
    """Build hooks environment for this LU.
251

252
    This method should return a three-node tuple consisting of: a dict
253
    containing the environment that will be used for running the
254
    specific hook for this LU, a list of node names on which the hook
255
    should run before the execution, and a list of node names on which
256
    the hook should run after the execution.
257

258
    The keys of the dict must not have 'GANETI_' prefixed as this will
259
    be handled in the hooks runner. Also note additional keys will be
260
    added by the hooks runner. If the LU doesn't define any
261
    environment, an empty dict (and not None) should be returned.
262

263
    No nodes should be returned as an empty list (and not None).
264

265
    Note that if the HPATH for a LU class is None, this function will
266
    not be called.
267

268
    """
269
    raise NotImplementedError
270

    
271
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
272
    """Notify the LU about the results of its hooks.
273

274
    This method is called every time a hooks phase is executed, and notifies
275
    the Logical Unit about the hooks' result. The LU can then use it to alter
276
    its result based on the hooks.  By default the method does nothing and the
277
    previous result is passed back unchanged but any LU can define it if it
278
    wants to use the local cluster hook-scripts somehow.
279

280
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
281
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
282
    @param hook_results: the results of the multi-node hooks rpc call
283
    @param feedback_fn: function used send feedback back to the caller
284
    @param lu_result: the previous Exec result this LU had, or None
285
        in the PRE phase
286
    @return: the new Exec result, based on the previous result
287
        and hook results
288

289
    """
290
    # API must be kept, thus we ignore the unused argument and could
291
    # be a function warnings
292
    # pylint: disable-msg=W0613,R0201
293
    return lu_result
294

    
295
  def _ExpandAndLockInstance(self):
296
    """Helper function to expand and lock an instance.
297

298
    Many LUs that work on an instance take its name in self.op.instance_name
299
    and need to expand it and then declare the expanded name for locking. This
300
    function does it, and then updates self.op.instance_name to the expanded
301
    name. It also initializes needed_locks as a dict, if this hasn't been done
302
    before.
303

304
    """
305
    if self.needed_locks is None:
306
      self.needed_locks = {}
307
    else:
308
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
309
        "_ExpandAndLockInstance called with instance-level locks set"
310
    self.op.instance_name = _ExpandInstanceName(self.cfg,
311
                                                self.op.instance_name)
312
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
313

    
314
  def _LockInstancesNodes(self, primary_only=False):
315
    """Helper function to declare instances' nodes for locking.
316

317
    This function should be called after locking one or more instances to lock
318
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
319
    with all primary or secondary nodes for instances already locked and
320
    present in self.needed_locks[locking.LEVEL_INSTANCE].
321

322
    It should be called from DeclareLocks, and for safety only works if
323
    self.recalculate_locks[locking.LEVEL_NODE] is set.
324

325
    In the future it may grow parameters to just lock some instance's nodes, or
326
    to just lock primaries or secondary nodes, if needed.
327

328
    If should be called in DeclareLocks in a way similar to::
329

330
      if level == locking.LEVEL_NODE:
331
        self._LockInstancesNodes()
332

333
    @type primary_only: boolean
334
    @param primary_only: only lock primary nodes of locked instances
335

336
    """
337
    assert locking.LEVEL_NODE in self.recalculate_locks, \
338
      "_LockInstancesNodes helper function called with no nodes to recalculate"
339

    
340
    # TODO: check if we're really been called with the instance locks held
341

    
342
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
343
    # future we might want to have different behaviors depending on the value
344
    # of self.recalculate_locks[locking.LEVEL_NODE]
345
    wanted_nodes = []
346
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
347
      instance = self.context.cfg.GetInstanceInfo(instance_name)
348
      wanted_nodes.append(instance.primary_node)
349
      if not primary_only:
350
        wanted_nodes.extend(instance.secondary_nodes)
351

    
352
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
353
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
354
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
355
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
356

    
357
    del self.recalculate_locks[locking.LEVEL_NODE]
358

    
359

    
360
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
361
  """Simple LU which runs no hooks.
362

363
  This LU is intended as a parent for other LogicalUnits which will
364
  run no hooks, in order to reduce duplicate code.
365

366
  """
367
  HPATH = None
368
  HTYPE = None
369

    
370
  def BuildHooksEnv(self):
371
    """Empty BuildHooksEnv for NoHooksLu.
372

373
    This just raises an error.
374

375
    """
376
    assert False, "BuildHooksEnv called for NoHooksLUs"
377

    
378

    
379
class Tasklet:
380
  """Tasklet base class.
381

382
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
383
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
384
  tasklets know nothing about locks.
385

386
  Subclasses must follow these rules:
387
    - Implement CheckPrereq
388
    - Implement Exec
389

390
  """
391
  def __init__(self, lu):
392
    self.lu = lu
393

    
394
    # Shortcuts
395
    self.cfg = lu.cfg
396
    self.rpc = lu.rpc
397

    
398
  def CheckPrereq(self):
399
    """Check prerequisites for this tasklets.
400

401
    This method should check whether the prerequisites for the execution of
402
    this tasklet are fulfilled. It can do internode communication, but it
403
    should be idempotent - no cluster or system changes are allowed.
404

405
    The method should raise errors.OpPrereqError in case something is not
406
    fulfilled. Its return value is ignored.
407

408
    This method should also update all parameters to their canonical form if it
409
    hasn't been done before.
410

411
    """
412
    raise NotImplementedError
413

    
414
  def Exec(self, feedback_fn):
415
    """Execute the tasklet.
416

417
    This method should implement the actual work. It should raise
418
    errors.OpExecError for failures that are somewhat dealt with in code, or
419
    expected.
420

421
    """
422
    raise NotImplementedError
423

    
424

    
425
def _GetWantedNodes(lu, nodes):
426
  """Returns list of checked and expanded node names.
427

428
  @type lu: L{LogicalUnit}
429
  @param lu: the logical unit on whose behalf we execute
430
  @type nodes: list
431
  @param nodes: list of node names or None for all nodes
432
  @rtype: list
433
  @return: the list of nodes, sorted
434
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
435

436
  """
437
  if not isinstance(nodes, list):
438
    raise errors.OpPrereqError("Invalid argument type 'nodes'",
439
                               errors.ECODE_INVAL)
440

    
441
  if not nodes:
442
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
443
      " non-empty list of nodes whose name is to be expanded.")
444

    
445
  wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
446
  return utils.NiceSort(wanted)
447

    
448

    
449
def _GetWantedInstances(lu, instances):
450
  """Returns list of checked and expanded instance names.
451

452
  @type lu: L{LogicalUnit}
453
  @param lu: the logical unit on whose behalf we execute
454
  @type instances: list
455
  @param instances: list of instance names or None for all instances
456
  @rtype: list
457
  @return: the list of instances, sorted
458
  @raise errors.OpPrereqError: if the instances parameter is wrong type
459
  @raise errors.OpPrereqError: if any of the passed instances is not found
460

461
  """
462
  if not isinstance(instances, list):
463
    raise errors.OpPrereqError("Invalid argument type 'instances'",
464
                               errors.ECODE_INVAL)
465

    
466
  if instances:
467
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
468
  else:
469
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
470
  return wanted
471

    
472

    
473
def _CheckOutputFields(static, dynamic, selected):
474
  """Checks whether all selected fields are valid.
475

476
  @type static: L{utils.FieldSet}
477
  @param static: static fields set
478
  @type dynamic: L{utils.FieldSet}
479
  @param dynamic: dynamic fields set
480

481
  """
482
  f = utils.FieldSet()
483
  f.Extend(static)
484
  f.Extend(dynamic)
485

    
486
  delta = f.NonMatching(selected)
487
  if delta:
488
    raise errors.OpPrereqError("Unknown output fields selected: %s"
489
                               % ",".join(delta), errors.ECODE_INVAL)
490

    
491

    
492
def _CheckBooleanOpField(op, name):
493
  """Validates boolean opcode parameters.
494

495
  This will ensure that an opcode parameter is either a boolean value,
496
  or None (but that it always exists).
497

498
  """
499
  val = getattr(op, name, None)
500
  if not (val is None or isinstance(val, bool)):
501
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
502
                               (name, str(val)), errors.ECODE_INVAL)
503
  setattr(op, name, val)
504

    
505

    
506
def _CheckGlobalHvParams(params):
507
  """Validates that given hypervisor params are not global ones.
508

509
  This will ensure that instances don't get customised versions of
510
  global params.
511

512
  """
513
  used_globals = constants.HVC_GLOBALS.intersection(params)
514
  if used_globals:
515
    msg = ("The following hypervisor parameters are global and cannot"
516
           " be customized at instance level, please modify them at"
517
           " cluster level: %s" % utils.CommaJoin(used_globals))
518
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
519

    
520

    
521
def _CheckNodeOnline(lu, node):
522
  """Ensure that a given node is online.
523

524
  @param lu: the LU on behalf of which we make the check
525
  @param node: the node to check
526
  @raise errors.OpPrereqError: if the node is offline
527

528
  """
529
  if lu.cfg.GetNodeInfo(node).offline:
530
    raise errors.OpPrereqError("Can't use offline node %s" % node,
531
                               errors.ECODE_INVAL)
532

    
533

    
534
def _CheckNodeNotDrained(lu, node):
535
  """Ensure that a given node is not drained.
536

537
  @param lu: the LU on behalf of which we make the check
538
  @param node: the node to check
539
  @raise errors.OpPrereqError: if the node is drained
540

541
  """
542
  if lu.cfg.GetNodeInfo(node).drained:
543
    raise errors.OpPrereqError("Can't use drained node %s" % node,
544
                               errors.ECODE_INVAL)
545

    
546

    
547
def _CheckNodeHasOS(lu, node, os_name, force_variant):
548
  """Ensure that a node supports a given OS.
549

550
  @param lu: the LU on behalf of which we make the check
551
  @param node: the node to check
552
  @param os_name: the OS to query about
553
  @param force_variant: whether to ignore variant errors
554
  @raise errors.OpPrereqError: if the node is not supporting the OS
555

556
  """
557
  result = lu.rpc.call_os_get(node, os_name)
558
  result.Raise("OS '%s' not in supported OS list for node %s" %
559
               (os_name, node),
560
               prereq=True, ecode=errors.ECODE_INVAL)
561
  if not force_variant:
562
    _CheckOSVariant(result.payload, os_name)
563

    
564

    
565
def _RequireFileStorage():
566
  """Checks that file storage is enabled.
567

568
  @raise errors.OpPrereqError: when file storage is disabled
569

570
  """
571
  if not constants.ENABLE_FILE_STORAGE:
572
    raise errors.OpPrereqError("File storage disabled at configure time",
573
                               errors.ECODE_INVAL)
574

    
575

    
576
def _CheckDiskTemplate(template):
577
  """Ensure a given disk template is valid.
578

579
  """
580
  if template not in constants.DISK_TEMPLATES:
581
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
582
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
583
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
584
  if template == constants.DT_FILE:
585
    _RequireFileStorage()
586

    
587

    
588
def _CheckStorageType(storage_type):
589
  """Ensure a given storage type is valid.
590

591
  """
592
  if storage_type not in constants.VALID_STORAGE_TYPES:
593
    raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
594
                               errors.ECODE_INVAL)
595
  if storage_type == constants.ST_FILE:
596
    _RequireFileStorage()
597

    
598

    
599

    
600
def _CheckInstanceDown(lu, instance, reason):
601
  """Ensure that an instance is not running."""
602
  if instance.admin_up:
603
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
604
                               (instance.name, reason), errors.ECODE_STATE)
605

    
606
  pnode = instance.primary_node
607
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
608
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
609
              prereq=True, ecode=errors.ECODE_ENVIRON)
610

    
611
  if instance.name in ins_l.payload:
612
    raise errors.OpPrereqError("Instance %s is running, %s" %
613
                               (instance.name, reason), errors.ECODE_STATE)
614

    
615

    
616
def _ExpandItemName(fn, name, kind):
617
  """Expand an item name.
618

619
  @param fn: the function to use for expansion
620
  @param name: requested item name
621
  @param kind: text description ('Node' or 'Instance')
622
  @return: the resolved (full) name
623
  @raise errors.OpPrereqError: if the item is not found
624

625
  """
626
  full_name = fn(name)
627
  if full_name is None:
628
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
629
                               errors.ECODE_NOENT)
630
  return full_name
631

    
632

    
633
def _ExpandNodeName(cfg, name):
634
  """Wrapper over L{_ExpandItemName} for nodes."""
635
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
636

    
637

    
638
def _ExpandInstanceName(cfg, name):
639
  """Wrapper over L{_ExpandItemName} for instance."""
640
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
641

    
642

    
643
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
644
                          memory, vcpus, nics, disk_template, disks,
645
                          bep, hvp, hypervisor_name):
646
  """Builds instance related env variables for hooks
647

648
  This builds the hook environment from individual variables.
649

650
  @type name: string
651
  @param name: the name of the instance
652
  @type primary_node: string
653
  @param primary_node: the name of the instance's primary node
654
  @type secondary_nodes: list
655
  @param secondary_nodes: list of secondary nodes as strings
656
  @type os_type: string
657
  @param os_type: the name of the instance's OS
658
  @type status: boolean
659
  @param status: the should_run status of the instance
660
  @type memory: string
661
  @param memory: the memory size of the instance
662
  @type vcpus: string
663
  @param vcpus: the count of VCPUs the instance has
664
  @type nics: list
665
  @param nics: list of tuples (ip, mac, mode, link) representing
666
      the NICs the instance has
667
  @type disk_template: string
668
  @param disk_template: the disk template of the instance
669
  @type disks: list
670
  @param disks: the list of (size, mode) pairs
671
  @type bep: dict
672
  @param bep: the backend parameters for the instance
673
  @type hvp: dict
674
  @param hvp: the hypervisor parameters for the instance
675
  @type hypervisor_name: string
676
  @param hypervisor_name: the hypervisor for the instance
677
  @rtype: dict
678
  @return: the hook environment for this instance
679

680
  """
681
  if status:
682
    str_status = "up"
683
  else:
684
    str_status = "down"
685
  env = {
686
    "OP_TARGET": name,
687
    "INSTANCE_NAME": name,
688
    "INSTANCE_PRIMARY": primary_node,
689
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
690
    "INSTANCE_OS_TYPE": os_type,
691
    "INSTANCE_STATUS": str_status,
692
    "INSTANCE_MEMORY": memory,
693
    "INSTANCE_VCPUS": vcpus,
694
    "INSTANCE_DISK_TEMPLATE": disk_template,
695
    "INSTANCE_HYPERVISOR": hypervisor_name,
696
  }
697

    
698
  if nics:
699
    nic_count = len(nics)
700
    for idx, (ip, mac, mode, link) in enumerate(nics):
701
      if ip is None:
702
        ip = ""
703
      env["INSTANCE_NIC%d_IP" % idx] = ip
704
      env["INSTANCE_NIC%d_MAC" % idx] = mac
705
      env["INSTANCE_NIC%d_MODE" % idx] = mode
706
      env["INSTANCE_NIC%d_LINK" % idx] = link
707
      if mode == constants.NIC_MODE_BRIDGED:
708
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
709
  else:
710
    nic_count = 0
711

    
712
  env["INSTANCE_NIC_COUNT"] = nic_count
713

    
714
  if disks:
715
    disk_count = len(disks)
716
    for idx, (size, mode) in enumerate(disks):
717
      env["INSTANCE_DISK%d_SIZE" % idx] = size
718
      env["INSTANCE_DISK%d_MODE" % idx] = mode
719
  else:
720
    disk_count = 0
721

    
722
  env["INSTANCE_DISK_COUNT"] = disk_count
723

    
724
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
725
    for key, value in source.items():
726
      env["INSTANCE_%s_%s" % (kind, key)] = value
727

    
728
  return env
729

    
730

    
731
def _NICListToTuple(lu, nics):
732
  """Build a list of nic information tuples.
733

734
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
735
  value in LUQueryInstanceData.
736

737
  @type lu:  L{LogicalUnit}
738
  @param lu: the logical unit on whose behalf we execute
739
  @type nics: list of L{objects.NIC}
740
  @param nics: list of nics to convert to hooks tuples
741

742
  """
743
  hooks_nics = []
744
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
745
  for nic in nics:
746
    ip = nic.ip
747
    mac = nic.mac
748
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
749
    mode = filled_params[constants.NIC_MODE]
750
    link = filled_params[constants.NIC_LINK]
751
    hooks_nics.append((ip, mac, mode, link))
752
  return hooks_nics
753

    
754

    
755
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
756
  """Builds instance related env variables for hooks from an object.
757

758
  @type lu: L{LogicalUnit}
759
  @param lu: the logical unit on whose behalf we execute
760
  @type instance: L{objects.Instance}
761
  @param instance: the instance for which we should build the
762
      environment
763
  @type override: dict
764
  @param override: dictionary with key/values that will override
765
      our values
766
  @rtype: dict
767
  @return: the hook environment dictionary
768

769
  """
770
  cluster = lu.cfg.GetClusterInfo()
771
  bep = cluster.FillBE(instance)
772
  hvp = cluster.FillHV(instance)
773
  args = {
774
    'name': instance.name,
775
    'primary_node': instance.primary_node,
776
    'secondary_nodes': instance.secondary_nodes,
777
    'os_type': instance.os,
778
    'status': instance.admin_up,
779
    'memory': bep[constants.BE_MEMORY],
780
    'vcpus': bep[constants.BE_VCPUS],
781
    'nics': _NICListToTuple(lu, instance.nics),
782
    'disk_template': instance.disk_template,
783
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
784
    'bep': bep,
785
    'hvp': hvp,
786
    'hypervisor_name': instance.hypervisor,
787
  }
788
  if override:
789
    args.update(override)
790
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
791

    
792

    
793
def _AdjustCandidatePool(lu, exceptions):
794
  """Adjust the candidate pool after node operations.
795

796
  """
797
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
798
  if mod_list:
799
    lu.LogInfo("Promoted nodes to master candidate role: %s",
800
               utils.CommaJoin(node.name for node in mod_list))
801
    for name in mod_list:
802
      lu.context.ReaddNode(name)
803
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
804
  if mc_now > mc_max:
805
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
806
               (mc_now, mc_max))
807

    
808

    
809
def _DecideSelfPromotion(lu, exceptions=None):
810
  """Decide whether I should promote myself as a master candidate.
811

812
  """
813
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
814
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
815
  # the new node will increase mc_max with one, so:
816
  mc_should = min(mc_should + 1, cp_size)
817
  return mc_now < mc_should
818

    
819

    
820
def _CheckNicsBridgesExist(lu, target_nics, target_node,
821
                               profile=constants.PP_DEFAULT):
822
  """Check that the brigdes needed by a list of nics exist.
823

824
  """
825
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
826
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
827
                for nic in target_nics]
828
  brlist = [params[constants.NIC_LINK] for params in paramslist
829
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
830
  if brlist:
831
    result = lu.rpc.call_bridges_exist(target_node, brlist)
832
    result.Raise("Error checking bridges on destination node '%s'" %
833
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
834

    
835

    
836
def _CheckInstanceBridgesExist(lu, instance, node=None):
837
  """Check that the brigdes needed by an instance exist.
838

839
  """
840
  if node is None:
841
    node = instance.primary_node
842
  _CheckNicsBridgesExist(lu, instance.nics, node)
843

    
844

    
845
def _CheckOSVariant(os_obj, name):
846
  """Check whether an OS name conforms to the os variants specification.
847

848
  @type os_obj: L{objects.OS}
849
  @param os_obj: OS object to check
850
  @type name: string
851
  @param name: OS name passed by the user, to check for validity
852

853
  """
854
  if not os_obj.supported_variants:
855
    return
856
  try:
857
    variant = name.split("+", 1)[1]
858
  except IndexError:
859
    raise errors.OpPrereqError("OS name must include a variant",
860
                               errors.ECODE_INVAL)
861

    
862
  if variant not in os_obj.supported_variants:
863
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
864

    
865

    
866
def _GetNodeInstancesInner(cfg, fn):
867
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
868

    
869

    
870
def _GetNodeInstances(cfg, node_name):
871
  """Returns a list of all primary and secondary instances on a node.
872

873
  """
874

    
875
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
876

    
877

    
878
def _GetNodePrimaryInstances(cfg, node_name):
879
  """Returns primary instances on a node.
880

881
  """
882
  return _GetNodeInstancesInner(cfg,
883
                                lambda inst: node_name == inst.primary_node)
884

    
885

    
886
def _GetNodeSecondaryInstances(cfg, node_name):
887
  """Returns secondary instances on a node.
888

889
  """
890
  return _GetNodeInstancesInner(cfg,
891
                                lambda inst: node_name in inst.secondary_nodes)
892

    
893

    
894
def _GetStorageTypeArgs(cfg, storage_type):
895
  """Returns the arguments for a storage type.
896

897
  """
898
  # Special case for file storage
899
  if storage_type == constants.ST_FILE:
900
    # storage.FileStorage wants a list of storage directories
901
    return [[cfg.GetFileStorageDir()]]
902

    
903
  return []
904

    
905

    
906
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
907
  faulty = []
908

    
909
  for dev in instance.disks:
910
    cfg.SetDiskID(dev, node_name)
911

    
912
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
913
  result.Raise("Failed to get disk status from node %s" % node_name,
914
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
915

    
916
  for idx, bdev_status in enumerate(result.payload):
917
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
918
      faulty.append(idx)
919

    
920
  return faulty
921

    
922

    
923
def _FormatTimestamp(secs):
924
  """Formats a Unix timestamp with the local timezone.
925

926
  """
927
  return time.strftime("%F %T %Z", time.gmtime(secs))
928

    
929

    
930
class LUPostInitCluster(LogicalUnit):
931
  """Logical unit for running hooks after cluster initialization.
932

933
  """
934
  HPATH = "cluster-init"
935
  HTYPE = constants.HTYPE_CLUSTER
936
  _OP_REQP = []
937

    
938
  def BuildHooksEnv(self):
939
    """Build hooks env.
940

941
    """
942
    env = {"OP_TARGET": self.cfg.GetClusterName()}
943
    mn = self.cfg.GetMasterNode()
944
    return env, [], [mn]
945

    
946
  def CheckPrereq(self):
947
    """No prerequisites to check.
948

949
    """
950
    return True
951

    
952
  def Exec(self, feedback_fn):
953
    """Nothing to do.
954

955
    """
956
    return True
957

    
958

    
959
class LUDestroyCluster(LogicalUnit):
960
  """Logical unit for destroying the cluster.
961

962
  """
963
  HPATH = "cluster-destroy"
964
  HTYPE = constants.HTYPE_CLUSTER
965
  _OP_REQP = []
966

    
967
  def BuildHooksEnv(self):
968
    """Build hooks env.
969

970
    """
971
    env = {"OP_TARGET": self.cfg.GetClusterName()}
972
    return env, [], []
973

    
974
  def CheckPrereq(self):
975
    """Check prerequisites.
976

977
    This checks whether the cluster is empty.
978

979
    Any errors are signaled by raising errors.OpPrereqError.
980

981
    """
982
    master = self.cfg.GetMasterNode()
983

    
984
    nodelist = self.cfg.GetNodeList()
985
    if len(nodelist) != 1 or nodelist[0] != master:
986
      raise errors.OpPrereqError("There are still %d node(s) in"
987
                                 " this cluster." % (len(nodelist) - 1),
988
                                 errors.ECODE_INVAL)
989
    instancelist = self.cfg.GetInstanceList()
990
    if instancelist:
991
      raise errors.OpPrereqError("There are still %d instance(s) in"
992
                                 " this cluster." % len(instancelist),
993
                                 errors.ECODE_INVAL)
994

    
995
  def Exec(self, feedback_fn):
996
    """Destroys the cluster.
997

998
    """
999
    master = self.cfg.GetMasterNode()
1000
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1001

    
1002
    # Run post hooks on master node before it's removed
1003
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1004
    try:
1005
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1006
    except:
1007
      # pylint: disable-msg=W0702
1008
      self.LogWarning("Errors occurred running hooks on %s" % master)
1009

    
1010
    result = self.rpc.call_node_stop_master(master, False)
1011
    result.Raise("Could not disable the master role")
1012

    
1013
    if modify_ssh_setup:
1014
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1015
      utils.CreateBackup(priv_key)
1016
      utils.CreateBackup(pub_key)
1017

    
1018
    return master
1019

    
1020

    
1021
def _VerifyCertificateInner(filename, expired, not_before, not_after, now,
1022
                            warn_days=constants.SSL_CERT_EXPIRATION_WARN,
1023
                            error_days=constants.SSL_CERT_EXPIRATION_ERROR):
1024
  """Verifies certificate details for LUVerifyCluster.
1025

1026
  """
1027
  if expired:
1028
    msg = "Certificate %s is expired" % filename
1029

    
1030
    if not_before is not None and not_after is not None:
1031
      msg += (" (valid from %s to %s)" %
1032
              (_FormatTimestamp(not_before),
1033
               _FormatTimestamp(not_after)))
1034
    elif not_before is not None:
1035
      msg += " (valid from %s)" % _FormatTimestamp(not_before)
1036
    elif not_after is not None:
1037
      msg += " (valid until %s)" % _FormatTimestamp(not_after)
1038

    
1039
    return (LUVerifyCluster.ETYPE_ERROR, msg)
1040

    
1041
  elif not_before is not None and not_before > now:
1042
    return (LUVerifyCluster.ETYPE_WARNING,
1043
            "Certificate %s not yet valid (valid from %s)" %
1044
            (filename, _FormatTimestamp(not_before)))
1045

    
1046
  elif not_after is not None:
1047
    remaining_days = int((not_after - now) / (24 * 3600))
1048

    
1049
    msg = ("Certificate %s expires in %d days" % (filename, remaining_days))
1050

    
1051
    if remaining_days <= error_days:
1052
      return (LUVerifyCluster.ETYPE_ERROR, msg)
1053

    
1054
    if remaining_days <= warn_days:
1055
      return (LUVerifyCluster.ETYPE_WARNING, msg)
1056

    
1057
  return (None, None)
1058

    
1059

    
1060
def _VerifyCertificate(filename):
1061
  """Verifies a certificate for LUVerifyCluster.
1062

1063
  @type filename: string
1064
  @param filename: Path to PEM file
1065

1066
  """
1067
  try:
1068
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1069
                                           utils.ReadFile(filename))
1070
  except Exception, err: # pylint: disable-msg=W0703
1071
    return (LUVerifyCluster.ETYPE_ERROR,
1072
            "Failed to load X509 certificate %s: %s" % (filename, err))
1073

    
1074
  # Depending on the pyOpenSSL version, this can just return (None, None)
1075
  (not_before, not_after) = utils.GetX509CertValidity(cert)
1076

    
1077
  return _VerifyCertificateInner(filename, cert.has_expired(),
1078
                                 not_before, not_after, time.time())
1079

    
1080

    
1081
class LUVerifyCluster(LogicalUnit):
1082
  """Verifies the cluster status.
1083

1084
  """
1085
  HPATH = "cluster-verify"
1086
  HTYPE = constants.HTYPE_CLUSTER
1087
  _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1088
  REQ_BGL = False
1089

    
1090
  TCLUSTER = "cluster"
1091
  TNODE = "node"
1092
  TINSTANCE = "instance"
1093

    
1094
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1095
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1096
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1097
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1098
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1099
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1100
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1101
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1102
  ENODEDRBD = (TNODE, "ENODEDRBD")
1103
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1104
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1105
  ENODEHV = (TNODE, "ENODEHV")
1106
  ENODELVM = (TNODE, "ENODELVM")
1107
  ENODEN1 = (TNODE, "ENODEN1")
1108
  ENODENET = (TNODE, "ENODENET")
1109
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1110
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1111
  ENODERPC = (TNODE, "ENODERPC")
1112
  ENODESSH = (TNODE, "ENODESSH")
1113
  ENODEVERSION = (TNODE, "ENODEVERSION")
1114
  ENODESETUP = (TNODE, "ENODESETUP")
1115
  ENODETIME = (TNODE, "ENODETIME")
1116

    
1117
  ETYPE_FIELD = "code"
1118
  ETYPE_ERROR = "ERROR"
1119
  ETYPE_WARNING = "WARNING"
1120

    
1121
  class NodeImage(object):
1122
    """A class representing the logical and physical status of a node.
1123

1124
    @ivar volumes: a structure as returned from
1125
        L{ganeti.backend.GetVolumeList} (runtime)
1126
    @ivar instances: a list of running instances (runtime)
1127
    @ivar pinst: list of configured primary instances (config)
1128
    @ivar sinst: list of configured secondary instances (config)
1129
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1130
        of this node (config)
1131
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1132
    @ivar dfree: free disk, as reported by the node (runtime)
1133
    @ivar offline: the offline status (config)
1134
    @type rpc_fail: boolean
1135
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1136
        not whether the individual keys were correct) (runtime)
1137
    @type lvm_fail: boolean
1138
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1139
    @type hyp_fail: boolean
1140
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1141
    @type ghost: boolean
1142
    @ivar ghost: whether this is a known node or not (config)
1143

1144
    """
1145
    def __init__(self, offline=False):
1146
      self.volumes = {}
1147
      self.instances = []
1148
      self.pinst = []
1149
      self.sinst = []
1150
      self.sbp = {}
1151
      self.mfree = 0
1152
      self.dfree = 0
1153
      self.offline = offline
1154
      self.rpc_fail = False
1155
      self.lvm_fail = False
1156
      self.hyp_fail = False
1157
      self.ghost = False
1158

    
1159
  def ExpandNames(self):
1160
    self.needed_locks = {
1161
      locking.LEVEL_NODE: locking.ALL_SET,
1162
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1163
    }
1164
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1165

    
1166
  def _Error(self, ecode, item, msg, *args, **kwargs):
1167
    """Format an error message.
1168

1169
    Based on the opcode's error_codes parameter, either format a
1170
    parseable error code, or a simpler error string.
1171

1172
    This must be called only from Exec and functions called from Exec.
1173

1174
    """
1175
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1176
    itype, etxt = ecode
1177
    # first complete the msg
1178
    if args:
1179
      msg = msg % args
1180
    # then format the whole message
1181
    if self.op.error_codes:
1182
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1183
    else:
1184
      if item:
1185
        item = " " + item
1186
      else:
1187
        item = ""
1188
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1189
    # and finally report it via the feedback_fn
1190
    self._feedback_fn("  - %s" % msg)
1191

    
1192
  def _ErrorIf(self, cond, *args, **kwargs):
1193
    """Log an error message if the passed condition is True.
1194

1195
    """
1196
    cond = bool(cond) or self.op.debug_simulate_errors
1197
    if cond:
1198
      self._Error(*args, **kwargs)
1199
    # do not mark the operation as failed for WARN cases only
1200
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1201
      self.bad = self.bad or cond
1202

    
1203
  def _VerifyNode(self, ninfo, nresult):
1204
    """Run multiple tests against a node.
1205

1206
    Test list:
1207

1208
      - compares ganeti version
1209
      - checks vg existence and size > 20G
1210
      - checks config file checksum
1211
      - checks ssh to other nodes
1212

1213
    @type ninfo: L{objects.Node}
1214
    @param ninfo: the node to check
1215
    @param nresult: the results from the node
1216
    @rtype: boolean
1217
    @return: whether overall this call was successful (and we can expect
1218
         reasonable values in the respose)
1219

1220
    """
1221
    node = ninfo.name
1222
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1223

    
1224
    # main result, nresult should be a non-empty dict
1225
    test = not nresult or not isinstance(nresult, dict)
1226
    _ErrorIf(test, self.ENODERPC, node,
1227
                  "unable to verify node: no data returned")
1228
    if test:
1229
      return False
1230

    
1231
    # compares ganeti version
1232
    local_version = constants.PROTOCOL_VERSION
1233
    remote_version = nresult.get("version", None)
1234
    test = not (remote_version and
1235
                isinstance(remote_version, (list, tuple)) and
1236
                len(remote_version) == 2)
1237
    _ErrorIf(test, self.ENODERPC, node,
1238
             "connection to node returned invalid data")
1239
    if test:
1240
      return False
1241

    
1242
    test = local_version != remote_version[0]
1243
    _ErrorIf(test, self.ENODEVERSION, node,
1244
             "incompatible protocol versions: master %s,"
1245
             " node %s", local_version, remote_version[0])
1246
    if test:
1247
      return False
1248

    
1249
    # node seems compatible, we can actually try to look into its results
1250

    
1251
    # full package version
1252
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1253
                  self.ENODEVERSION, node,
1254
                  "software version mismatch: master %s, node %s",
1255
                  constants.RELEASE_VERSION, remote_version[1],
1256
                  code=self.ETYPE_WARNING)
1257

    
1258
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1259
    if isinstance(hyp_result, dict):
1260
      for hv_name, hv_result in hyp_result.iteritems():
1261
        test = hv_result is not None
1262
        _ErrorIf(test, self.ENODEHV, node,
1263
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1264

    
1265

    
1266
    test = nresult.get(constants.NV_NODESETUP,
1267
                           ["Missing NODESETUP results"])
1268
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1269
             "; ".join(test))
1270

    
1271
    return True
1272

    
1273
  def _VerifyNodeTime(self, ninfo, nresult,
1274
                      nvinfo_starttime, nvinfo_endtime):
1275
    """Check the node time.
1276

1277
    @type ninfo: L{objects.Node}
1278
    @param ninfo: the node to check
1279
    @param nresult: the remote results for the node
1280
    @param nvinfo_starttime: the start time of the RPC call
1281
    @param nvinfo_endtime: the end time of the RPC call
1282

1283
    """
1284
    node = ninfo.name
1285
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1286

    
1287
    ntime = nresult.get(constants.NV_TIME, None)
1288
    try:
1289
      ntime_merged = utils.MergeTime(ntime)
1290
    except (ValueError, TypeError):
1291
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1292
      return
1293

    
1294
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1295
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1296
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1297
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1298
    else:
1299
      ntime_diff = None
1300

    
1301
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1302
             "Node time diverges by at least %s from master node time",
1303
             ntime_diff)
1304

    
1305
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1306
    """Check the node time.
1307

1308
    @type ninfo: L{objects.Node}
1309
    @param ninfo: the node to check
1310
    @param nresult: the remote results for the node
1311
    @param vg_name: the configured VG name
1312

1313
    """
1314
    if vg_name is None:
1315
      return
1316

    
1317
    node = ninfo.name
1318
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1319

    
1320
    # checks vg existence and size > 20G
1321
    vglist = nresult.get(constants.NV_VGLIST, None)
1322
    test = not vglist
1323
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1324
    if not test:
1325
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1326
                                            constants.MIN_VG_SIZE)
1327
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1328

    
1329
    # check pv names
1330
    pvlist = nresult.get(constants.NV_PVLIST, None)
1331
    test = pvlist is None
1332
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1333
    if not test:
1334
      # check that ':' is not present in PV names, since it's a
1335
      # special character for lvcreate (denotes the range of PEs to
1336
      # use on the PV)
1337
      for _, pvname, owner_vg in pvlist:
1338
        test = ":" in pvname
1339
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1340
                 " '%s' of VG '%s'", pvname, owner_vg)
1341

    
1342
  def _VerifyNodeNetwork(self, ninfo, nresult):
1343
    """Check the node time.
1344

1345
    @type ninfo: L{objects.Node}
1346
    @param ninfo: the node to check
1347
    @param nresult: the remote results for the node
1348

1349
    """
1350
    node = ninfo.name
1351
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1352

    
1353
    test = constants.NV_NODELIST not in nresult
1354
    _ErrorIf(test, self.ENODESSH, node,
1355
             "node hasn't returned node ssh connectivity data")
1356
    if not test:
1357
      if nresult[constants.NV_NODELIST]:
1358
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1359
          _ErrorIf(True, self.ENODESSH, node,
1360
                   "ssh communication with node '%s': %s", a_node, a_msg)
1361

    
1362
    test = constants.NV_NODENETTEST not in nresult
1363
    _ErrorIf(test, self.ENODENET, node,
1364
             "node hasn't returned node tcp connectivity data")
1365
    if not test:
1366
      if nresult[constants.NV_NODENETTEST]:
1367
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1368
        for anode in nlist:
1369
          _ErrorIf(True, self.ENODENET, node,
1370
                   "tcp communication with node '%s': %s",
1371
                   anode, nresult[constants.NV_NODENETTEST][anode])
1372

    
1373
  def _VerifyInstance(self, instance, instanceconfig, node_image):
1374
    """Verify an instance.
1375

1376
    This function checks to see if the required block devices are
1377
    available on the instance's node.
1378

1379
    """
1380
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1381
    node_current = instanceconfig.primary_node
1382

    
1383
    node_vol_should = {}
1384
    instanceconfig.MapLVsByNode(node_vol_should)
1385

    
1386
    for node in node_vol_should:
1387
      n_img = node_image[node]
1388
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1389
        # ignore missing volumes on offline or broken nodes
1390
        continue
1391
      for volume in node_vol_should[node]:
1392
        test = volume not in n_img.volumes
1393
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1394
                 "volume %s missing on node %s", volume, node)
1395

    
1396
    if instanceconfig.admin_up:
1397
      pri_img = node_image[node_current]
1398
      test = instance not in pri_img.instances and not pri_img.offline
1399
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1400
               "instance not running on its primary node %s",
1401
               node_current)
1402

    
1403
    for node, n_img in node_image.items():
1404
      if (not node == node_current):
1405
        test = instance in n_img.instances
1406
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1407
                 "instance should not run on node %s", node)
1408

    
1409
  def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1410
    """Verify if there are any unknown volumes in the cluster.
1411

1412
    The .os, .swap and backup volumes are ignored. All other volumes are
1413
    reported as unknown.
1414

1415
    """
1416
    for node, n_img in node_image.items():
1417
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1418
        # skip non-healthy nodes
1419
        continue
1420
      for volume in n_img.volumes:
1421
        test = (node not in node_vol_should or
1422
                volume not in node_vol_should[node])
1423
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1424
                      "volume %s is unknown", volume)
1425

    
1426
  def _VerifyOrphanInstances(self, instancelist, node_image):
1427
    """Verify the list of running instances.
1428

1429
    This checks what instances are running but unknown to the cluster.
1430

1431
    """
1432
    for node, n_img in node_image.items():
1433
      for o_inst in n_img.instances:
1434
        test = o_inst not in instancelist
1435
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1436
                      "instance %s on node %s should not exist", o_inst, node)
1437

    
1438
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1439
    """Verify N+1 Memory Resilience.
1440

1441
    Check that if one single node dies we can still start all the
1442
    instances it was primary for.
1443

1444
    """
1445
    for node, n_img in node_image.items():
1446
      # This code checks that every node which is now listed as
1447
      # secondary has enough memory to host all instances it is
1448
      # supposed to should a single other node in the cluster fail.
1449
      # FIXME: not ready for failover to an arbitrary node
1450
      # FIXME: does not support file-backed instances
1451
      # WARNING: we currently take into account down instances as well
1452
      # as up ones, considering that even if they're down someone
1453
      # might want to start them even in the event of a node failure.
1454
      for prinode, instances in n_img.sbp.items():
1455
        needed_mem = 0
1456
        for instance in instances:
1457
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1458
          if bep[constants.BE_AUTO_BALANCE]:
1459
            needed_mem += bep[constants.BE_MEMORY]
1460
        test = n_img.mfree < needed_mem
1461
        self._ErrorIf(test, self.ENODEN1, node,
1462
                      "not enough memory on to accommodate"
1463
                      " failovers should peer node %s fail", prinode)
1464

    
1465
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1466
                       master_files):
1467
    """Verifies and computes the node required file checksums.
1468

1469
    @type ninfo: L{objects.Node}
1470
    @param ninfo: the node to check
1471
    @param nresult: the remote results for the node
1472
    @param file_list: required list of files
1473
    @param local_cksum: dictionary of local files and their checksums
1474
    @param master_files: list of files that only masters should have
1475

1476
    """
1477
    node = ninfo.name
1478
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1479

    
1480
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1481
    test = not isinstance(remote_cksum, dict)
1482
    _ErrorIf(test, self.ENODEFILECHECK, node,
1483
             "node hasn't returned file checksum data")
1484
    if test:
1485
      return
1486

    
1487
    for file_name in file_list:
1488
      node_is_mc = ninfo.master_candidate
1489
      must_have = (file_name not in master_files) or node_is_mc
1490
      # missing
1491
      test1 = file_name not in remote_cksum
1492
      # invalid checksum
1493
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1494
      # existing and good
1495
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1496
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1497
               "file '%s' missing", file_name)
1498
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1499
               "file '%s' has wrong checksum", file_name)
1500
      # not candidate and this is not a must-have file
1501
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1502
               "file '%s' should not exist on non master"
1503
               " candidates (and the file is outdated)", file_name)
1504
      # all good, except non-master/non-must have combination
1505
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1506
               "file '%s' should not exist"
1507
               " on non master candidates", file_name)
1508

    
1509
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1510
    """Verifies and the node DRBD status.
1511

1512
    @type ninfo: L{objects.Node}
1513
    @param ninfo: the node to check
1514
    @param nresult: the remote results for the node
1515
    @param instanceinfo: the dict of instances
1516
    @param drbd_map: the DRBD map as returned by
1517
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1518

1519
    """
1520
    node = ninfo.name
1521
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1522

    
1523
    # compute the DRBD minors
1524
    node_drbd = {}
1525
    for minor, instance in drbd_map[node].items():
1526
      test = instance not in instanceinfo
1527
      _ErrorIf(test, self.ECLUSTERCFG, None,
1528
               "ghost instance '%s' in temporary DRBD map", instance)
1529
        # ghost instance should not be running, but otherwise we
1530
        # don't give double warnings (both ghost instance and
1531
        # unallocated minor in use)
1532
      if test:
1533
        node_drbd[minor] = (instance, False)
1534
      else:
1535
        instance = instanceinfo[instance]
1536
        node_drbd[minor] = (instance.name, instance.admin_up)
1537

    
1538
    # and now check them
1539
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1540
    test = not isinstance(used_minors, (tuple, list))
1541
    _ErrorIf(test, self.ENODEDRBD, node,
1542
             "cannot parse drbd status file: %s", str(used_minors))
1543
    if test:
1544
      # we cannot check drbd status
1545
      return
1546

    
1547
    for minor, (iname, must_exist) in node_drbd.items():
1548
      test = minor not in used_minors and must_exist
1549
      _ErrorIf(test, self.ENODEDRBD, node,
1550
               "drbd minor %d of instance %s is not active", minor, iname)
1551
    for minor in used_minors:
1552
      test = minor not in node_drbd
1553
      _ErrorIf(test, self.ENODEDRBD, node,
1554
               "unallocated drbd minor %d is in use", minor)
1555

    
1556
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1557
    """Verifies and updates the node volume data.
1558

1559
    This function will update a L{NodeImage}'s internal structures
1560
    with data from the remote call.
1561

1562
    @type ninfo: L{objects.Node}
1563
    @param ninfo: the node to check
1564
    @param nresult: the remote results for the node
1565
    @param nimg: the node image object
1566
    @param vg_name: the configured VG name
1567

1568
    """
1569
    node = ninfo.name
1570
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1571

    
1572
    nimg.lvm_fail = True
1573
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1574
    if vg_name is None:
1575
      pass
1576
    elif isinstance(lvdata, basestring):
1577
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1578
               utils.SafeEncode(lvdata))
1579
    elif not isinstance(lvdata, dict):
1580
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1581
    else:
1582
      nimg.volumes = lvdata
1583
      nimg.lvm_fail = False
1584

    
1585
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1586
    """Verifies and updates the node instance list.
1587

1588
    If the listing was successful, then updates this node's instance
1589
    list. Otherwise, it marks the RPC call as failed for the instance
1590
    list key.
1591

1592
    @type ninfo: L{objects.Node}
1593
    @param ninfo: the node to check
1594
    @param nresult: the remote results for the node
1595
    @param nimg: the node image object
1596

1597
    """
1598
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1599
    test = not isinstance(idata, list)
1600
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1601
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1602
    if test:
1603
      nimg.hyp_fail = True
1604
    else:
1605
      nimg.instances = idata
1606

    
1607
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1608
    """Verifies and computes a node information map
1609

1610
    @type ninfo: L{objects.Node}
1611
    @param ninfo: the node to check
1612
    @param nresult: the remote results for the node
1613
    @param nimg: the node image object
1614
    @param vg_name: the configured VG name
1615

1616
    """
1617
    node = ninfo.name
1618
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1619

    
1620
    # try to read free memory (from the hypervisor)
1621
    hv_info = nresult.get(constants.NV_HVINFO, None)
1622
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1623
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1624
    if not test:
1625
      try:
1626
        nimg.mfree = int(hv_info["memory_free"])
1627
      except (ValueError, TypeError):
1628
        _ErrorIf(True, self.ENODERPC, node,
1629
                 "node returned invalid nodeinfo, check hypervisor")
1630

    
1631
    # FIXME: devise a free space model for file based instances as well
1632
    if vg_name is not None:
1633
      test = (constants.NV_VGLIST not in nresult or
1634
              vg_name not in nresult[constants.NV_VGLIST])
1635
      _ErrorIf(test, self.ENODELVM, node,
1636
               "node didn't return data for the volume group '%s'"
1637
               " - it is either missing or broken", vg_name)
1638
      if not test:
1639
        try:
1640
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1641
        except (ValueError, TypeError):
1642
          _ErrorIf(True, self.ENODERPC, node,
1643
                   "node returned invalid LVM info, check LVM status")
1644

    
1645
  def CheckPrereq(self):
1646
    """Check prerequisites.
1647

1648
    Transform the list of checks we're going to skip into a set and check that
1649
    all its members are valid.
1650

1651
    """
1652
    self.skip_set = frozenset(self.op.skip_checks)
1653
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1654
      raise errors.OpPrereqError("Invalid checks to be skipped specified",
1655
                                 errors.ECODE_INVAL)
1656

    
1657
  def BuildHooksEnv(self):
1658
    """Build hooks env.
1659

1660
    Cluster-Verify hooks just ran in the post phase and their failure makes
1661
    the output be logged in the verify output and the verification to fail.
1662

1663
    """
1664
    all_nodes = self.cfg.GetNodeList()
1665
    env = {
1666
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1667
      }
1668
    for node in self.cfg.GetAllNodesInfo().values():
1669
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1670

    
1671
    return env, [], all_nodes
1672

    
1673
  def Exec(self, feedback_fn):
1674
    """Verify integrity of cluster, performing various test on nodes.
1675

1676
    """
1677
    self.bad = False
1678
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1679
    verbose = self.op.verbose
1680
    self._feedback_fn = feedback_fn
1681
    feedback_fn("* Verifying global settings")
1682
    for msg in self.cfg.VerifyConfig():
1683
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1684

    
1685
    # Check the cluster certificates
1686
    for cert_filename in constants.ALL_CERT_FILES:
1687
      (errcode, msg) = _VerifyCertificate(cert_filename)
1688
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1689

    
1690
    vg_name = self.cfg.GetVGName()
1691
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1692
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1693
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1694
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1695
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1696
                        for iname in instancelist)
1697
    i_non_redundant = [] # Non redundant instances
1698
    i_non_a_balanced = [] # Non auto-balanced instances
1699
    n_offline = 0 # Count of offline nodes
1700
    n_drained = 0 # Count of nodes being drained
1701
    node_vol_should = {}
1702

    
1703
    # FIXME: verify OS list
1704
    # do local checksums
1705
    master_files = [constants.CLUSTER_CONF_FILE]
1706

    
1707
    file_names = ssconf.SimpleStore().GetFileList()
1708
    file_names.extend(constants.ALL_CERT_FILES)
1709
    file_names.extend(master_files)
1710

    
1711
    local_checksums = utils.FingerprintFiles(file_names)
1712

    
1713
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1714
    node_verify_param = {
1715
      constants.NV_FILELIST: file_names,
1716
      constants.NV_NODELIST: [node.name for node in nodeinfo
1717
                              if not node.offline],
1718
      constants.NV_HYPERVISOR: hypervisors,
1719
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1720
                                  node.secondary_ip) for node in nodeinfo
1721
                                 if not node.offline],
1722
      constants.NV_INSTANCELIST: hypervisors,
1723
      constants.NV_VERSION: None,
1724
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1725
      constants.NV_NODESETUP: None,
1726
      constants.NV_TIME: None,
1727
      }
1728

    
1729
    if vg_name is not None:
1730
      node_verify_param[constants.NV_VGLIST] = None
1731
      node_verify_param[constants.NV_LVLIST] = vg_name
1732
      node_verify_param[constants.NV_PVLIST] = [vg_name]
1733
      node_verify_param[constants.NV_DRBDLIST] = None
1734

    
1735
    # Build our expected cluster state
1736
    node_image = dict((node.name, self.NodeImage(offline=node.offline))
1737
                      for node in nodeinfo)
1738

    
1739
    for instance in instancelist:
1740
      inst_config = instanceinfo[instance]
1741

    
1742
      for nname in inst_config.all_nodes:
1743
        if nname not in node_image:
1744
          # ghost node
1745
          gnode = self.NodeImage()
1746
          gnode.ghost = True
1747
          node_image[nname] = gnode
1748

    
1749
      inst_config.MapLVsByNode(node_vol_should)
1750

    
1751
      pnode = inst_config.primary_node
1752
      node_image[pnode].pinst.append(instance)
1753

    
1754
      for snode in inst_config.secondary_nodes:
1755
        nimg = node_image[snode]
1756
        nimg.sinst.append(instance)
1757
        if pnode not in nimg.sbp:
1758
          nimg.sbp[pnode] = []
1759
        nimg.sbp[pnode].append(instance)
1760

    
1761
    # At this point, we have the in-memory data structures complete,
1762
    # except for the runtime information, which we'll gather next
1763

    
1764
    # Due to the way our RPC system works, exact response times cannot be
1765
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1766
    # time before and after executing the request, we can at least have a time
1767
    # window.
1768
    nvinfo_starttime = time.time()
1769
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1770
                                           self.cfg.GetClusterName())
1771
    nvinfo_endtime = time.time()
1772

    
1773
    cluster = self.cfg.GetClusterInfo()
1774
    master_node = self.cfg.GetMasterNode()
1775
    all_drbd_map = self.cfg.ComputeDRBDMap()
1776

    
1777
    feedback_fn("* Verifying node status")
1778
    for node_i in nodeinfo:
1779
      node = node_i.name
1780
      nimg = node_image[node]
1781

    
1782
      if node_i.offline:
1783
        if verbose:
1784
          feedback_fn("* Skipping offline node %s" % (node,))
1785
        n_offline += 1
1786
        continue
1787

    
1788
      if node == master_node:
1789
        ntype = "master"
1790
      elif node_i.master_candidate:
1791
        ntype = "master candidate"
1792
      elif node_i.drained:
1793
        ntype = "drained"
1794
        n_drained += 1
1795
      else:
1796
        ntype = "regular"
1797
      if verbose:
1798
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1799

    
1800
      msg = all_nvinfo[node].fail_msg
1801
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1802
      if msg:
1803
        nimg.rpc_fail = True
1804
        continue
1805

    
1806
      nresult = all_nvinfo[node].payload
1807

    
1808
      nimg.call_ok = self._VerifyNode(node_i, nresult)
1809
      self._VerifyNodeNetwork(node_i, nresult)
1810
      self._VerifyNodeLVM(node_i, nresult, vg_name)
1811
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1812
                            master_files)
1813
      self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1814
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1815

    
1816
      self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1817
      self._UpdateNodeInstances(node_i, nresult, nimg)
1818
      self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1819

    
1820
    feedback_fn("* Verifying instance status")
1821
    for instance in instancelist:
1822
      if verbose:
1823
        feedback_fn("* Verifying instance %s" % instance)
1824
      inst_config = instanceinfo[instance]
1825
      self._VerifyInstance(instance, inst_config, node_image)
1826
      inst_nodes_offline = []
1827

    
1828
      pnode = inst_config.primary_node
1829
      pnode_img = node_image[pnode]
1830
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
1831
               self.ENODERPC, pnode, "instance %s, connection to"
1832
               " primary node failed", instance)
1833

    
1834
      if pnode_img.offline:
1835
        inst_nodes_offline.append(pnode)
1836

    
1837
      # If the instance is non-redundant we cannot survive losing its primary
1838
      # node, so we are not N+1 compliant. On the other hand we have no disk
1839
      # templates with more than one secondary so that situation is not well
1840
      # supported either.
1841
      # FIXME: does not support file-backed instances
1842
      if not inst_config.secondary_nodes:
1843
        i_non_redundant.append(instance)
1844
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
1845
               instance, "instance has multiple secondary nodes: %s",
1846
               utils.CommaJoin(inst_config.secondary_nodes),
1847
               code=self.ETYPE_WARNING)
1848

    
1849
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1850
        i_non_a_balanced.append(instance)
1851

    
1852
      for snode in inst_config.secondary_nodes:
1853
        s_img = node_image[snode]
1854
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
1855
                 "instance %s, connection to secondary node failed", instance)
1856

    
1857
        if s_img.offline:
1858
          inst_nodes_offline.append(snode)
1859

    
1860
      # warn that the instance lives on offline nodes
1861
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1862
               "instance lives on offline node(s) %s",
1863
               utils.CommaJoin(inst_nodes_offline))
1864
      # ... or ghost nodes
1865
      for node in inst_config.all_nodes:
1866
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
1867
                 "instance lives on ghost node %s", node)
1868

    
1869
    feedback_fn("* Verifying orphan volumes")
1870
    self._VerifyOrphanVolumes(node_vol_should, node_image)
1871

    
1872
    feedback_fn("* Verifying oprhan instances")
1873
    self._VerifyOrphanInstances(instancelist, node_image)
1874

    
1875
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1876
      feedback_fn("* Verifying N+1 Memory redundancy")
1877
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
1878

    
1879
    feedback_fn("* Other Notes")
1880
    if i_non_redundant:
1881
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1882
                  % len(i_non_redundant))
1883

    
1884
    if i_non_a_balanced:
1885
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1886
                  % len(i_non_a_balanced))
1887

    
1888
    if n_offline:
1889
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
1890

    
1891
    if n_drained:
1892
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
1893

    
1894
    return not self.bad
1895

    
1896
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1897
    """Analyze the post-hooks' result
1898

1899
    This method analyses the hook result, handles it, and sends some
1900
    nicely-formatted feedback back to the user.
1901

1902
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1903
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1904
    @param hooks_results: the results of the multi-node hooks rpc call
1905
    @param feedback_fn: function used send feedback back to the caller
1906
    @param lu_result: previous Exec result
1907
    @return: the new Exec result, based on the previous result
1908
        and hook results
1909

1910
    """
1911
    # We only really run POST phase hooks, and are only interested in
1912
    # their results
1913
    if phase == constants.HOOKS_PHASE_POST:
1914
      # Used to change hooks' output to proper indentation
1915
      indent_re = re.compile('^', re.M)
1916
      feedback_fn("* Hooks Results")
1917
      assert hooks_results, "invalid result from hooks"
1918

    
1919
      for node_name in hooks_results:
1920
        res = hooks_results[node_name]
1921
        msg = res.fail_msg
1922
        test = msg and not res.offline
1923
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
1924
                      "Communication failure in hooks execution: %s", msg)
1925
        if res.offline or msg:
1926
          # No need to investigate payload if node is offline or gave an error.
1927
          # override manually lu_result here as _ErrorIf only
1928
          # overrides self.bad
1929
          lu_result = 1
1930
          continue
1931
        for script, hkr, output in res.payload:
1932
          test = hkr == constants.HKR_FAIL
1933
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
1934
                        "Script %s failed, output:", script)
1935
          if test:
1936
            output = indent_re.sub('      ', output)
1937
            feedback_fn("%s" % output)
1938
            lu_result = 0
1939

    
1940
      return lu_result
1941

    
1942

    
1943
class LUVerifyDisks(NoHooksLU):
1944
  """Verifies the cluster disks status.
1945

1946
  """
1947
  _OP_REQP = []
1948
  REQ_BGL = False
1949

    
1950
  def ExpandNames(self):
1951
    self.needed_locks = {
1952
      locking.LEVEL_NODE: locking.ALL_SET,
1953
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1954
    }
1955
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1956

    
1957
  def CheckPrereq(self):
1958
    """Check prerequisites.
1959

1960
    This has no prerequisites.
1961

1962
    """
1963
    pass
1964

    
1965
  def Exec(self, feedback_fn):
1966
    """Verify integrity of cluster disks.
1967

1968
    @rtype: tuple of three items
1969
    @return: a tuple of (dict of node-to-node_error, list of instances
1970
        which need activate-disks, dict of instance: (node, volume) for
1971
        missing volumes
1972

1973
    """
1974
    result = res_nodes, res_instances, res_missing = {}, [], {}
1975

    
1976
    vg_name = self.cfg.GetVGName()
1977
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1978
    instances = [self.cfg.GetInstanceInfo(name)
1979
                 for name in self.cfg.GetInstanceList()]
1980

    
1981
    nv_dict = {}
1982
    for inst in instances:
1983
      inst_lvs = {}
1984
      if (not inst.admin_up or
1985
          inst.disk_template not in constants.DTS_NET_MIRROR):
1986
        continue
1987
      inst.MapLVsByNode(inst_lvs)
1988
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1989
      for node, vol_list in inst_lvs.iteritems():
1990
        for vol in vol_list:
1991
          nv_dict[(node, vol)] = inst
1992

    
1993
    if not nv_dict:
1994
      return result
1995

    
1996
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1997

    
1998
    for node in nodes:
1999
      # node_volume
2000
      node_res = node_lvs[node]
2001
      if node_res.offline:
2002
        continue
2003
      msg = node_res.fail_msg
2004
      if msg:
2005
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2006
        res_nodes[node] = msg
2007
        continue
2008

    
2009
      lvs = node_res.payload
2010
      for lv_name, (_, _, lv_online) in lvs.items():
2011
        inst = nv_dict.pop((node, lv_name), None)
2012
        if (not lv_online and inst is not None
2013
            and inst.name not in res_instances):
2014
          res_instances.append(inst.name)
2015

    
2016
    # any leftover items in nv_dict are missing LVs, let's arrange the
2017
    # data better
2018
    for key, inst in nv_dict.iteritems():
2019
      if inst.name not in res_missing:
2020
        res_missing[inst.name] = []
2021
      res_missing[inst.name].append(key)
2022

    
2023
    return result
2024

    
2025

    
2026
class LURepairDiskSizes(NoHooksLU):
2027
  """Verifies the cluster disks sizes.
2028

2029
  """
2030
  _OP_REQP = ["instances"]
2031
  REQ_BGL = False
2032

    
2033
  def ExpandNames(self):
2034
    if not isinstance(self.op.instances, list):
2035
      raise errors.OpPrereqError("Invalid argument type 'instances'",
2036
                                 errors.ECODE_INVAL)
2037

    
2038
    if self.op.instances:
2039
      self.wanted_names = []
2040
      for name in self.op.instances:
2041
        full_name = _ExpandInstanceName(self.cfg, name)
2042
        self.wanted_names.append(full_name)
2043
      self.needed_locks = {
2044
        locking.LEVEL_NODE: [],
2045
        locking.LEVEL_INSTANCE: self.wanted_names,
2046
        }
2047
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2048
    else:
2049
      self.wanted_names = None
2050
      self.needed_locks = {
2051
        locking.LEVEL_NODE: locking.ALL_SET,
2052
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2053
        }
2054
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2055

    
2056
  def DeclareLocks(self, level):
2057
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2058
      self._LockInstancesNodes(primary_only=True)
2059

    
2060
  def CheckPrereq(self):
2061
    """Check prerequisites.
2062

2063
    This only checks the optional instance list against the existing names.
2064

2065
    """
2066
    if self.wanted_names is None:
2067
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2068

    
2069
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2070
                             in self.wanted_names]
2071

    
2072
  def _EnsureChildSizes(self, disk):
2073
    """Ensure children of the disk have the needed disk size.
2074

2075
    This is valid mainly for DRBD8 and fixes an issue where the
2076
    children have smaller disk size.
2077

2078
    @param disk: an L{ganeti.objects.Disk} object
2079

2080
    """
2081
    if disk.dev_type == constants.LD_DRBD8:
2082
      assert disk.children, "Empty children for DRBD8?"
2083
      fchild = disk.children[0]
2084
      mismatch = fchild.size < disk.size
2085
      if mismatch:
2086
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2087
                     fchild.size, disk.size)
2088
        fchild.size = disk.size
2089

    
2090
      # and we recurse on this child only, not on the metadev
2091
      return self._EnsureChildSizes(fchild) or mismatch
2092
    else:
2093
      return False
2094

    
2095
  def Exec(self, feedback_fn):
2096
    """Verify the size of cluster disks.
2097

2098
    """
2099
    # TODO: check child disks too
2100
    # TODO: check differences in size between primary/secondary nodes
2101
    per_node_disks = {}
2102
    for instance in self.wanted_instances:
2103
      pnode = instance.primary_node
2104
      if pnode not in per_node_disks:
2105
        per_node_disks[pnode] = []
2106
      for idx, disk in enumerate(instance.disks):
2107
        per_node_disks[pnode].append((instance, idx, disk))
2108

    
2109
    changed = []
2110
    for node, dskl in per_node_disks.items():
2111
      newl = [v[2].Copy() for v in dskl]
2112
      for dsk in newl:
2113
        self.cfg.SetDiskID(dsk, node)
2114
      result = self.rpc.call_blockdev_getsizes(node, newl)
2115
      if result.fail_msg:
2116
        self.LogWarning("Failure in blockdev_getsizes call to node"
2117
                        " %s, ignoring", node)
2118
        continue
2119
      if len(result.data) != len(dskl):
2120
        self.LogWarning("Invalid result from node %s, ignoring node results",
2121
                        node)
2122
        continue
2123
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2124
        if size is None:
2125
          self.LogWarning("Disk %d of instance %s did not return size"
2126
                          " information, ignoring", idx, instance.name)
2127
          continue
2128
        if not isinstance(size, (int, long)):
2129
          self.LogWarning("Disk %d of instance %s did not return valid"
2130
                          " size information, ignoring", idx, instance.name)
2131
          continue
2132
        size = size >> 20
2133
        if size != disk.size:
2134
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2135
                       " correcting: recorded %d, actual %d", idx,
2136
                       instance.name, disk.size, size)
2137
          disk.size = size
2138
          self.cfg.Update(instance, feedback_fn)
2139
          changed.append((instance.name, idx, size))
2140
        if self._EnsureChildSizes(disk):
2141
          self.cfg.Update(instance, feedback_fn)
2142
          changed.append((instance.name, idx, disk.size))
2143
    return changed
2144

    
2145

    
2146
class LURenameCluster(LogicalUnit):
2147
  """Rename the cluster.
2148

2149
  """
2150
  HPATH = "cluster-rename"
2151
  HTYPE = constants.HTYPE_CLUSTER
2152
  _OP_REQP = ["name"]
2153

    
2154
  def BuildHooksEnv(self):
2155
    """Build hooks env.
2156

2157
    """
2158
    env = {
2159
      "OP_TARGET": self.cfg.GetClusterName(),
2160
      "NEW_NAME": self.op.name,
2161
      }
2162
    mn = self.cfg.GetMasterNode()
2163
    all_nodes = self.cfg.GetNodeList()
2164
    return env, [mn], all_nodes
2165

    
2166
  def CheckPrereq(self):
2167
    """Verify that the passed name is a valid one.
2168

2169
    """
2170
    hostname = utils.GetHostInfo(self.op.name)
2171

    
2172
    new_name = hostname.name
2173
    self.ip = new_ip = hostname.ip
2174
    old_name = self.cfg.GetClusterName()
2175
    old_ip = self.cfg.GetMasterIP()
2176
    if new_name == old_name and new_ip == old_ip:
2177
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2178
                                 " cluster has changed",
2179
                                 errors.ECODE_INVAL)
2180
    if new_ip != old_ip:
2181
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2182
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2183
                                   " reachable on the network. Aborting." %
2184
                                   new_ip, errors.ECODE_NOTUNIQUE)
2185

    
2186
    self.op.name = new_name
2187

    
2188
  def Exec(self, feedback_fn):
2189
    """Rename the cluster.
2190

2191
    """
2192
    clustername = self.op.name
2193
    ip = self.ip
2194

    
2195
    # shutdown the master IP
2196
    master = self.cfg.GetMasterNode()
2197
    result = self.rpc.call_node_stop_master(master, False)
2198
    result.Raise("Could not disable the master role")
2199

    
2200
    try:
2201
      cluster = self.cfg.GetClusterInfo()
2202
      cluster.cluster_name = clustername
2203
      cluster.master_ip = ip
2204
      self.cfg.Update(cluster, feedback_fn)
2205

    
2206
      # update the known hosts file
2207
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2208
      node_list = self.cfg.GetNodeList()
2209
      try:
2210
        node_list.remove(master)
2211
      except ValueError:
2212
        pass
2213
      result = self.rpc.call_upload_file(node_list,
2214
                                         constants.SSH_KNOWN_HOSTS_FILE)
2215
      for to_node, to_result in result.iteritems():
2216
        msg = to_result.fail_msg
2217
        if msg:
2218
          msg = ("Copy of file %s to node %s failed: %s" %
2219
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2220
          self.proc.LogWarning(msg)
2221

    
2222
    finally:
2223
      result = self.rpc.call_node_start_master(master, False, False)
2224
      msg = result.fail_msg
2225
      if msg:
2226
        self.LogWarning("Could not re-enable the master role on"
2227
                        " the master, please restart manually: %s", msg)
2228

    
2229

    
2230
def _RecursiveCheckIfLVMBased(disk):
2231
  """Check if the given disk or its children are lvm-based.
2232

2233
  @type disk: L{objects.Disk}
2234
  @param disk: the disk to check
2235
  @rtype: boolean
2236
  @return: boolean indicating whether a LD_LV dev_type was found or not
2237

2238
  """
2239
  if disk.children:
2240
    for chdisk in disk.children:
2241
      if _RecursiveCheckIfLVMBased(chdisk):
2242
        return True
2243
  return disk.dev_type == constants.LD_LV
2244

    
2245

    
2246
class LUSetClusterParams(LogicalUnit):
2247
  """Change the parameters of the cluster.
2248

2249
  """
2250
  HPATH = "cluster-modify"
2251
  HTYPE = constants.HTYPE_CLUSTER
2252
  _OP_REQP = []
2253
  REQ_BGL = False
2254

    
2255
  def CheckArguments(self):
2256
    """Check parameters
2257

2258
    """
2259
    for attr in ["candidate_pool_size",
2260
                 "uid_pool", "add_uids", "remove_uids"]:
2261
      if not hasattr(self.op, attr):
2262
        setattr(self.op, attr, None)
2263

    
2264
    if self.op.candidate_pool_size is not None:
2265
      try:
2266
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2267
      except (ValueError, TypeError), err:
2268
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2269
                                   str(err), errors.ECODE_INVAL)
2270
      if self.op.candidate_pool_size < 1:
2271
        raise errors.OpPrereqError("At least one master candidate needed",
2272
                                   errors.ECODE_INVAL)
2273

    
2274
    _CheckBooleanOpField(self.op, "maintain_node_health")
2275

    
2276
    if self.op.uid_pool:
2277
      uidpool.CheckUidPool(self.op.uid_pool)
2278

    
2279
    if self.op.add_uids:
2280
      uidpool.CheckUidPool(self.op.add_uids)
2281

    
2282
    if self.op.remove_uids:
2283
      uidpool.CheckUidPool(self.op.remove_uids)
2284

    
2285
  def ExpandNames(self):
2286
    # FIXME: in the future maybe other cluster params won't require checking on
2287
    # all nodes to be modified.
2288
    self.needed_locks = {
2289
      locking.LEVEL_NODE: locking.ALL_SET,
2290
    }
2291
    self.share_locks[locking.LEVEL_NODE] = 1
2292

    
2293
  def BuildHooksEnv(self):
2294
    """Build hooks env.
2295

2296
    """
2297
    env = {
2298
      "OP_TARGET": self.cfg.GetClusterName(),
2299
      "NEW_VG_NAME": self.op.vg_name,
2300
      }
2301
    mn = self.cfg.GetMasterNode()
2302
    return env, [mn], [mn]
2303

    
2304
  def CheckPrereq(self):
2305
    """Check prerequisites.
2306

2307
    This checks whether the given params don't conflict and
2308
    if the given volume group is valid.
2309

2310
    """
2311
    if self.op.vg_name is not None and not self.op.vg_name:
2312
      instances = self.cfg.GetAllInstancesInfo().values()
2313
      for inst in instances:
2314
        for disk in inst.disks:
2315
          if _RecursiveCheckIfLVMBased(disk):
2316
            raise errors.OpPrereqError("Cannot disable lvm storage while"
2317
                                       " lvm-based instances exist",
2318
                                       errors.ECODE_INVAL)
2319

    
2320
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2321

    
2322
    # if vg_name not None, checks given volume group on all nodes
2323
    if self.op.vg_name:
2324
      vglist = self.rpc.call_vg_list(node_list)
2325
      for node in node_list:
2326
        msg = vglist[node].fail_msg
2327
        if msg:
2328
          # ignoring down node
2329
          self.LogWarning("Error while gathering data on node %s"
2330
                          " (ignoring node): %s", node, msg)
2331
          continue
2332
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2333
                                              self.op.vg_name,
2334
                                              constants.MIN_VG_SIZE)
2335
        if vgstatus:
2336
          raise errors.OpPrereqError("Error on node '%s': %s" %
2337
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2338

    
2339
    self.cluster = cluster = self.cfg.GetClusterInfo()
2340
    # validate params changes
2341
    if self.op.beparams:
2342
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2343
      self.new_beparams = objects.FillDict(
2344
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2345

    
2346
    if self.op.nicparams:
2347
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2348
      self.new_nicparams = objects.FillDict(
2349
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2350
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2351
      nic_errors = []
2352

    
2353
      # check all instances for consistency
2354
      for instance in self.cfg.GetAllInstancesInfo().values():
2355
        for nic_idx, nic in enumerate(instance.nics):
2356
          params_copy = copy.deepcopy(nic.nicparams)
2357
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2358

    
2359
          # check parameter syntax
2360
          try:
2361
            objects.NIC.CheckParameterSyntax(params_filled)
2362
          except errors.ConfigurationError, err:
2363
            nic_errors.append("Instance %s, nic/%d: %s" %
2364
                              (instance.name, nic_idx, err))
2365

    
2366
          # if we're moving instances to routed, check that they have an ip
2367
          target_mode = params_filled[constants.NIC_MODE]
2368
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2369
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2370
                              (instance.name, nic_idx))
2371
      if nic_errors:
2372
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2373
                                   "\n".join(nic_errors))
2374

    
2375
    # hypervisor list/parameters
2376
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2377
    if self.op.hvparams:
2378
      if not isinstance(self.op.hvparams, dict):
2379
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2380
                                   errors.ECODE_INVAL)
2381
      for hv_name, hv_dict in self.op.hvparams.items():
2382
        if hv_name not in self.new_hvparams:
2383
          self.new_hvparams[hv_name] = hv_dict
2384
        else:
2385
          self.new_hvparams[hv_name].update(hv_dict)
2386

    
2387
    # os hypervisor parameters
2388
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2389
    if self.op.os_hvp:
2390
      if not isinstance(self.op.os_hvp, dict):
2391
        raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2392
                                   errors.ECODE_INVAL)
2393
      for os_name, hvs in self.op.os_hvp.items():
2394
        if not isinstance(hvs, dict):
2395
          raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2396
                                      " input"), errors.ECODE_INVAL)
2397
        if os_name not in self.new_os_hvp:
2398
          self.new_os_hvp[os_name] = hvs
2399
        else:
2400
          for hv_name, hv_dict in hvs.items():
2401
            if hv_name not in self.new_os_hvp[os_name]:
2402
              self.new_os_hvp[os_name][hv_name] = hv_dict
2403
            else:
2404
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2405

    
2406
    # changes to the hypervisor list
2407
    if self.op.enabled_hypervisors is not None:
2408
      self.hv_list = self.op.enabled_hypervisors
2409
      if not self.hv_list:
2410
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2411
                                   " least one member",
2412
                                   errors.ECODE_INVAL)
2413
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2414
      if invalid_hvs:
2415
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2416
                                   " entries: %s" %
2417
                                   utils.CommaJoin(invalid_hvs),
2418
                                   errors.ECODE_INVAL)
2419
      for hv in self.hv_list:
2420
        # if the hypervisor doesn't already exist in the cluster
2421
        # hvparams, we initialize it to empty, and then (in both
2422
        # cases) we make sure to fill the defaults, as we might not
2423
        # have a complete defaults list if the hypervisor wasn't
2424
        # enabled before
2425
        if hv not in new_hvp:
2426
          new_hvp[hv] = {}
2427
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2428
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2429
    else:
2430
      self.hv_list = cluster.enabled_hypervisors
2431

    
2432
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2433
      # either the enabled list has changed, or the parameters have, validate
2434
      for hv_name, hv_params in self.new_hvparams.items():
2435
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2436
            (self.op.enabled_hypervisors and
2437
             hv_name in self.op.enabled_hypervisors)):
2438
          # either this is a new hypervisor, or its parameters have changed
2439
          hv_class = hypervisor.GetHypervisor(hv_name)
2440
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2441
          hv_class.CheckParameterSyntax(hv_params)
2442
          _CheckHVParams(self, node_list, hv_name, hv_params)
2443

    
2444
    if self.op.os_hvp:
2445
      # no need to check any newly-enabled hypervisors, since the
2446
      # defaults have already been checked in the above code-block
2447
      for os_name, os_hvp in self.new_os_hvp.items():
2448
        for hv_name, hv_params in os_hvp.items():
2449
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2450
          # we need to fill in the new os_hvp on top of the actual hv_p
2451
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2452
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2453
          hv_class = hypervisor.GetHypervisor(hv_name)
2454
          hv_class.CheckParameterSyntax(new_osp)
2455
          _CheckHVParams(self, node_list, hv_name, new_osp)
2456

    
2457

    
2458
  def Exec(self, feedback_fn):
2459
    """Change the parameters of the cluster.
2460

2461
    """
2462
    if self.op.vg_name is not None:
2463
      new_volume = self.op.vg_name
2464
      if not new_volume:
2465
        new_volume = None
2466
      if new_volume != self.cfg.GetVGName():
2467
        self.cfg.SetVGName(new_volume)
2468
      else:
2469
        feedback_fn("Cluster LVM configuration already in desired"
2470
                    " state, not changing")
2471
    if self.op.hvparams:
2472
      self.cluster.hvparams = self.new_hvparams
2473
    if self.op.os_hvp:
2474
      self.cluster.os_hvp = self.new_os_hvp
2475
    if self.op.enabled_hypervisors is not None:
2476
      self.cluster.hvparams = self.new_hvparams
2477
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2478
    if self.op.beparams:
2479
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2480
    if self.op.nicparams:
2481
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2482

    
2483
    if self.op.candidate_pool_size is not None:
2484
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2485
      # we need to update the pool size here, otherwise the save will fail
2486
      _AdjustCandidatePool(self, [])
2487

    
2488
    if self.op.maintain_node_health is not None:
2489
      self.cluster.maintain_node_health = self.op.maintain_node_health
2490

    
2491
    if self.op.add_uids is not None:
2492
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2493

    
2494
    if self.op.remove_uids is not None:
2495
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2496

    
2497
    if self.op.uid_pool is not None:
2498
      self.cluster.uid_pool = self.op.uid_pool
2499

    
2500
    self.cfg.Update(self.cluster, feedback_fn)
2501

    
2502

    
2503
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2504
  """Distribute additional files which are part of the cluster configuration.
2505

2506
  ConfigWriter takes care of distributing the config and ssconf files, but
2507
  there are more files which should be distributed to all nodes. This function
2508
  makes sure those are copied.
2509

2510
  @param lu: calling logical unit
2511
  @param additional_nodes: list of nodes not in the config to distribute to
2512

2513
  """
2514
  # 1. Gather target nodes
2515
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2516
  dist_nodes = lu.cfg.GetOnlineNodeList()
2517
  if additional_nodes is not None:
2518
    dist_nodes.extend(additional_nodes)
2519
  if myself.name in dist_nodes:
2520
    dist_nodes.remove(myself.name)
2521

    
2522
  # 2. Gather files to distribute
2523
  dist_files = set([constants.ETC_HOSTS,
2524
                    constants.SSH_KNOWN_HOSTS_FILE,
2525
                    constants.RAPI_CERT_FILE,
2526
                    constants.RAPI_USERS_FILE,
2527
                    constants.CONFD_HMAC_KEY,
2528
                   ])
2529

    
2530
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2531
  for hv_name in enabled_hypervisors:
2532
    hv_class = hypervisor.GetHypervisor(hv_name)
2533
    dist_files.update(hv_class.GetAncillaryFiles())
2534

    
2535
  # 3. Perform the files upload
2536
  for fname in dist_files:
2537
    if os.path.exists(fname):
2538
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2539
      for to_node, to_result in result.items():
2540
        msg = to_result.fail_msg
2541
        if msg:
2542
          msg = ("Copy of file %s to node %s failed: %s" %
2543
                 (fname, to_node, msg))
2544
          lu.proc.LogWarning(msg)
2545

    
2546

    
2547
class LURedistributeConfig(NoHooksLU):
2548
  """Force the redistribution of cluster configuration.
2549

2550
  This is a very simple LU.
2551

2552
  """
2553
  _OP_REQP = []
2554
  REQ_BGL = False
2555

    
2556
  def ExpandNames(self):
2557
    self.needed_locks = {
2558
      locking.LEVEL_NODE: locking.ALL_SET,
2559
    }
2560
    self.share_locks[locking.LEVEL_NODE] = 1
2561

    
2562
  def CheckPrereq(self):
2563
    """Check prerequisites.
2564

2565
    """
2566

    
2567
  def Exec(self, feedback_fn):
2568
    """Redistribute the configuration.
2569

2570
    """
2571
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2572
    _RedistributeAncillaryFiles(self)
2573

    
2574

    
2575
def _WaitForSync(lu, instance, oneshot=False):
2576
  """Sleep and poll for an instance's disk to sync.
2577

2578
  """
2579
  if not instance.disks:
2580
    return True
2581

    
2582
  if not oneshot:
2583
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2584

    
2585
  node = instance.primary_node
2586

    
2587
  for dev in instance.disks:
2588
    lu.cfg.SetDiskID(dev, node)
2589

    
2590
  # TODO: Convert to utils.Retry
2591

    
2592
  retries = 0
2593
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2594
  while True:
2595
    max_time = 0
2596
    done = True
2597
    cumul_degraded = False
2598
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2599
    msg = rstats.fail_msg
2600
    if msg:
2601
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2602
      retries += 1
2603
      if retries >= 10:
2604
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2605
                                 " aborting." % node)
2606
      time.sleep(6)
2607
      continue
2608
    rstats = rstats.payload
2609
    retries = 0
2610
    for i, mstat in enumerate(rstats):
2611
      if mstat is None:
2612
        lu.LogWarning("Can't compute data for node %s/%s",
2613
                           node, instance.disks[i].iv_name)
2614
        continue
2615

    
2616
      cumul_degraded = (cumul_degraded or
2617
                        (mstat.is_degraded and mstat.sync_percent is None))
2618
      if mstat.sync_percent is not None:
2619
        done = False
2620
        if mstat.estimated_time is not None:
2621
          rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2622
          max_time = mstat.estimated_time
2623
        else:
2624
          rem_time = "no time estimate"
2625
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2626
                        (instance.disks[i].iv_name, mstat.sync_percent,
2627
                         rem_time))
2628

    
2629
    # if we're done but degraded, let's do a few small retries, to
2630
    # make sure we see a stable and not transient situation; therefore
2631
    # we force restart of the loop
2632
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2633
      logging.info("Degraded disks found, %d retries left", degr_retries)
2634
      degr_retries -= 1
2635
      time.sleep(1)
2636
      continue
2637

    
2638
    if done or oneshot:
2639
      break
2640

    
2641
    time.sleep(min(60, max_time))
2642

    
2643
  if done:
2644
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2645
  return not cumul_degraded
2646

    
2647

    
2648
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2649
  """Check that mirrors are not degraded.
2650

2651
  The ldisk parameter, if True, will change the test from the
2652
  is_degraded attribute (which represents overall non-ok status for
2653
  the device(s)) to the ldisk (representing the local storage status).
2654

2655
  """
2656
  lu.cfg.SetDiskID(dev, node)
2657

    
2658
  result = True
2659

    
2660
  if on_primary or dev.AssembleOnSecondary():
2661
    rstats = lu.rpc.call_blockdev_find(node, dev)
2662
    msg = rstats.fail_msg
2663
    if msg:
2664
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2665
      result = False
2666
    elif not rstats.payload:
2667
      lu.LogWarning("Can't find disk on node %s", node)
2668
      result = False
2669
    else:
2670
      if ldisk:
2671
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2672
      else:
2673
        result = result and not rstats.payload.is_degraded
2674

    
2675
  if dev.children:
2676
    for child in dev.children:
2677
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2678

    
2679
  return result
2680

    
2681

    
2682
class LUDiagnoseOS(NoHooksLU):
2683
  """Logical unit for OS diagnose/query.
2684

2685
  """
2686
  _OP_REQP = ["output_fields", "names"]
2687
  REQ_BGL = False
2688
  _FIELDS_STATIC = utils.FieldSet()
2689
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2690
  # Fields that need calculation of global os validity
2691
  _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2692

    
2693
  def ExpandNames(self):
2694
    if self.op.names:
2695
      raise errors.OpPrereqError("Selective OS query not supported",
2696
                                 errors.ECODE_INVAL)
2697

    
2698
    _CheckOutputFields(static=self._FIELDS_STATIC,
2699
                       dynamic=self._FIELDS_DYNAMIC,
2700
                       selected=self.op.output_fields)
2701

    
2702
    # Lock all nodes, in shared mode
2703
    # Temporary removal of locks, should be reverted later
2704
    # TODO: reintroduce locks when they are lighter-weight
2705
    self.needed_locks = {}
2706
    #self.share_locks[locking.LEVEL_NODE] = 1
2707
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2708

    
2709
  def CheckPrereq(self):
2710
    """Check prerequisites.
2711

2712
    """
2713

    
2714
  @staticmethod
2715
  def _DiagnoseByOS(rlist):
2716
    """Remaps a per-node return list into an a per-os per-node dictionary
2717

2718
    @param rlist: a map with node names as keys and OS objects as values
2719

2720
    @rtype: dict
2721
    @return: a dictionary with osnames as keys and as value another map, with
2722
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
2723

2724
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2725
                                     (/srv/..., False, "invalid api")],
2726
                           "node2": [(/srv/..., True, "")]}
2727
          }
2728

2729
    """
2730
    all_os = {}
2731
    # we build here the list of nodes that didn't fail the RPC (at RPC
2732
    # level), so that nodes with a non-responding node daemon don't
2733
    # make all OSes invalid
2734
    good_nodes = [node_name for node_name in rlist
2735
                  if not rlist[node_name].fail_msg]
2736
    for node_name, nr in rlist.items():
2737
      if nr.fail_msg or not nr.payload:
2738
        continue
2739
      for name, path, status, diagnose, variants in nr.payload:
2740
        if name not in all_os:
2741
          # build a list of nodes for this os containing empty lists
2742
          # for each node in node_list
2743
          all_os[name] = {}
2744
          for nname in good_nodes:
2745
            all_os[name][nname] = []
2746
        all_os[name][node_name].append((path, status, diagnose, variants))
2747
    return all_os
2748

    
2749
  def Exec(self, feedback_fn):
2750
    """Compute the list of OSes.
2751

2752
    """
2753
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2754
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2755
    pol = self._DiagnoseByOS(node_data)
2756
    output = []
2757
    calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2758
    calc_variants = "variants" in self.op.output_fields
2759

    
2760
    for os_name, os_data in pol.items():
2761
      row = []
2762
      if calc_valid:
2763
        valid = True
2764
        variants = None
2765
        for osl in os_data.values():
2766
          valid = valid and osl and osl[0][1]
2767
          if not valid:
2768
            variants = None
2769
            break
2770
          if calc_variants:
2771
            node_variants = osl[0][3]
2772
            if variants is None:
2773
              variants = node_variants
2774
            else:
2775
              variants = [v for v in variants if v in node_variants]
2776

    
2777
      for field in self.op.output_fields:
2778
        if field == "name":
2779
          val = os_name
2780
        elif field == "valid":
2781
          val = valid
2782
        elif field == "node_status":
2783
          # this is just a copy of the dict
2784
          val = {}
2785
          for node_name, nos_list in os_data.items():
2786
            val[node_name] = nos_list
2787
        elif field == "variants":
2788
          val =  variants
2789
        else:
2790
          raise errors.ParameterError(field)
2791
        row.append(val)
2792
      output.append(row)
2793

    
2794
    return output
2795

    
2796

    
2797
class LURemoveNode(LogicalUnit):
2798
  """Logical unit for removing a node.
2799

2800
  """
2801
  HPATH = "node-remove"
2802
  HTYPE = constants.HTYPE_NODE
2803
  _OP_REQP = ["node_name"]
2804

    
2805
  def BuildHooksEnv(self):
2806
    """Build hooks env.
2807

2808
    This doesn't run on the target node in the pre phase as a failed
2809
    node would then be impossible to remove.
2810

2811
    """
2812
    env = {
2813
      "OP_TARGET": self.op.node_name,
2814
      "NODE_NAME": self.op.node_name,
2815
      }
2816
    all_nodes = self.cfg.GetNodeList()
2817
    try:
2818
      all_nodes.remove(self.op.node_name)
2819
    except ValueError:
2820
      logging.warning("Node %s which is about to be removed not found"
2821
                      " in the all nodes list", self.op.node_name)
2822
    return env, all_nodes, all_nodes
2823

    
2824
  def CheckPrereq(self):
2825
    """Check prerequisites.
2826

2827
    This checks:
2828
     - the node exists in the configuration
2829
     - it does not have primary or secondary instances
2830
     - it's not the master
2831

2832
    Any errors are signaled by raising errors.OpPrereqError.
2833

2834
    """
2835
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2836
    node = self.cfg.GetNodeInfo(self.op.node_name)
2837
    assert node is not None
2838

    
2839
    instance_list = self.cfg.GetInstanceList()
2840

    
2841
    masternode = self.cfg.GetMasterNode()
2842
    if node.name == masternode:
2843
      raise errors.OpPrereqError("Node is the master node,"
2844
                                 " you need to failover first.",
2845
                                 errors.ECODE_INVAL)
2846

    
2847
    for instance_name in instance_list:
2848
      instance = self.cfg.GetInstanceInfo(instance_name)
2849
      if node.name in instance.all_nodes:
2850
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2851
                                   " please remove first." % instance_name,
2852
                                   errors.ECODE_INVAL)
2853
    self.op.node_name = node.name
2854
    self.node = node
2855

    
2856
  def Exec(self, feedback_fn):
2857
    """Removes the node from the cluster.
2858

2859
    """
2860
    node = self.node
2861
    logging.info("Stopping the node daemon and removing configs from node %s",
2862
                 node.name)
2863

    
2864
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2865

    
2866
    # Promote nodes to master candidate as needed
2867
    _AdjustCandidatePool(self, exceptions=[node.name])
2868
    self.context.RemoveNode(node.name)
2869

    
2870
    # Run post hooks on the node before it's removed
2871
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2872
    try:
2873
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2874
    except:
2875
      # pylint: disable-msg=W0702
2876
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
2877

    
2878
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2879
    msg = result.fail_msg
2880
    if msg:
2881
      self.LogWarning("Errors encountered on the remote node while leaving"
2882
                      " the cluster: %s", msg)
2883

    
2884

    
2885
class LUQueryNodes(NoHooksLU):
2886
  """Logical unit for querying nodes.
2887

2888
  """
2889
  # pylint: disable-msg=W0142
2890
  _OP_REQP = ["output_fields", "names", "use_locking"]
2891
  REQ_BGL = False
2892

    
2893
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2894
                    "master_candidate", "offline", "drained"]
2895

    
2896
  _FIELDS_DYNAMIC = utils.FieldSet(
2897
    "dtotal", "dfree",
2898
    "mtotal", "mnode", "mfree",
2899
    "bootid",
2900
    "ctotal", "cnodes", "csockets",
2901
    )
2902

    
2903
  _FIELDS_STATIC = utils.FieldSet(*[
2904
    "pinst_cnt", "sinst_cnt",
2905
    "pinst_list", "sinst_list",
2906
    "pip", "sip", "tags",
2907
    "master",
2908
    "role"] + _SIMPLE_FIELDS
2909
    )
2910

    
2911
  def ExpandNames(self):
2912
    _CheckOutputFields(static=self._FIELDS_STATIC,
2913
                       dynamic=self._FIELDS_DYNAMIC,
2914
                       selected=self.op.output_fields)
2915

    
2916
    self.needed_locks = {}
2917
    self.share_locks[locking.LEVEL_NODE] = 1
2918

    
2919
    if self.op.names:
2920
      self.wanted = _GetWantedNodes(self, self.op.names)
2921
    else:
2922
      self.wanted = locking.ALL_SET
2923

    
2924
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2925
    self.do_locking = self.do_node_query and self.op.use_locking
2926
    if self.do_locking:
2927
      # if we don't request only static fields, we need to lock the nodes
2928
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2929

    
2930
  def CheckPrereq(self):
2931
    """Check prerequisites.
2932

2933
    """
2934
    # The validation of the node list is done in the _GetWantedNodes,
2935
    # if non empty, and if empty, there's no validation to do
2936
    pass
2937

    
2938
  def Exec(self, feedback_fn):
2939
    """Computes the list of nodes and their attributes.
2940

2941
    """
2942
    all_info = self.cfg.GetAllNodesInfo()
2943
    if self.do_locking:
2944
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2945
    elif self.wanted != locking.ALL_SET:
2946
      nodenames = self.wanted
2947
      missing = set(nodenames).difference(all_info.keys())
2948
      if missing:
2949
        raise errors.OpExecError(
2950
          "Some nodes were removed before retrieving their data: %s" % missing)
2951
    else:
2952
      nodenames = all_info.keys()
2953

    
2954
    nodenames = utils.NiceSort(nodenames)
2955
    nodelist = [all_info[name] for name in nodenames]
2956

    
2957
    # begin data gathering
2958

    
2959
    if self.do_node_query:
2960
      live_data = {}
2961
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2962
                                          self.cfg.GetHypervisorType())
2963
      for name in nodenames:
2964
        nodeinfo = node_data[name]
2965
        if not nodeinfo.fail_msg and nodeinfo.payload:
2966
          nodeinfo = nodeinfo.payload
2967
          fn = utils.TryConvert
2968
          live_data[name] = {
2969
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2970
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2971
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2972
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2973
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2974
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2975
            "bootid": nodeinfo.get('bootid', None),
2976
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2977
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2978
            }
2979
        else:
2980
          live_data[name] = {}
2981
    else:
2982
      live_data = dict.fromkeys(nodenames, {})
2983

    
2984
    node_to_primary = dict([(name, set()) for name in nodenames])
2985
    node_to_secondary = dict([(name, set()) for name in nodenames])
2986

    
2987
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2988
                             "sinst_cnt", "sinst_list"))
2989
    if inst_fields & frozenset(self.op.output_fields):
2990
      inst_data = self.cfg.GetAllInstancesInfo()
2991

    
2992
      for inst in inst_data.values():
2993
        if inst.primary_node in node_to_primary:
2994
          node_to_primary[inst.primary_node].add(inst.name)
2995
        for secnode in inst.secondary_nodes:
2996
          if secnode in node_to_secondary:
2997
            node_to_secondary[secnode].add(inst.name)
2998

    
2999
    master_node = self.cfg.GetMasterNode()
3000

    
3001
    # end data gathering
3002

    
3003
    output = []
3004
    for node in nodelist:
3005
      node_output = []
3006
      for field in self.op.output_fields:
3007
        if field in self._SIMPLE_FIELDS:
3008
          val = getattr(node, field)
3009
        elif field == "pinst_list":
3010
          val = list(node_to_primary[node.name])
3011
        elif field == "sinst_list":
3012
          val = list(node_to_secondary[node.name])
3013
        elif field == "pinst_cnt":
3014
          val = len(node_to_primary[node.name])
3015
        elif field == "sinst_cnt":
3016
          val = len(node_to_secondary[node.name])
3017
        elif field == "pip":
3018
          val = node.primary_ip
3019
        elif field == "sip":
3020
          val = node.secondary_ip
3021
        elif field == "tags":
3022
          val = list(node.GetTags())
3023
        elif field == "master":
3024
          val = node.name == master_node
3025
        elif self._FIELDS_DYNAMIC.Matches(field):
3026
          val = live_data[node.name].get(field, None)
3027
        elif field == "role":
3028
          if node.name == master_node:
3029
            val = "M"
3030
          elif node.master_candidate:
3031
            val = "C"
3032
          elif node.drained:
3033
            val = "D"
3034
          elif node.offline:
3035
            val = "O"
3036
          else:
3037
            val = "R"
3038
        else:
3039
          raise errors.ParameterError(field)
3040
        node_output.append(val)
3041
      output.append(node_output)
3042

    
3043
    return output
3044

    
3045

    
3046
class LUQueryNodeVolumes(NoHooksLU):
3047
  """Logical unit for getting volumes on node(s).
3048

3049
  """
3050
  _OP_REQP = ["nodes", "output_fields"]
3051
  REQ_BGL = False
3052
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3053
  _FIELDS_STATIC = utils.FieldSet("node")
3054

    
3055
  def ExpandNames(self):
3056
    _CheckOutputFields(static=self._FIELDS_STATIC,
3057
                       dynamic=self._FIELDS_DYNAMIC,
3058
                       selected=self.op.output_fields)
3059

    
3060
    self.needed_locks = {}
3061
    self.share_locks[locking.LEVEL_NODE] = 1
3062
    if not self.op.nodes:
3063
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3064
    else:
3065
      self.needed_locks[locking.LEVEL_NODE] = \
3066
        _GetWantedNodes(self, self.op.nodes)
3067

    
3068
  def CheckPrereq(self):
3069
    """Check prerequisites.
3070

3071
    This checks that the fields required are valid output fields.
3072

3073
    """
3074
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3075

    
3076
  def Exec(self, feedback_fn):
3077
    """Computes the list of nodes and their attributes.
3078

3079
    """
3080
    nodenames = self.nodes
3081
    volumes = self.rpc.call_node_volumes(nodenames)
3082

    
3083
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3084
             in self.cfg.GetInstanceList()]
3085

    
3086
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3087

    
3088
    output = []
3089
    for node in nodenames:
3090
      nresult = volumes[node]
3091
      if nresult.offline:
3092
        continue
3093
      msg = nresult.fail_msg
3094
      if msg:
3095
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3096
        continue
3097

    
3098
      node_vols = nresult.payload[:]
3099
      node_vols.sort(key=lambda vol: vol['dev'])
3100

    
3101
      for vol in node_vols:
3102
        node_output = []
3103
        for field in self.op.output_fields:
3104
          if field == "node":
3105
            val = node
3106
          elif field == "phys":
3107
            val = vol['dev']
3108
          elif field == "vg":
3109
            val = vol['vg']
3110
          elif field == "name":
3111
            val = vol['name']
3112
          elif field == "size":
3113
            val = int(float(vol['size']))
3114
          elif field == "instance":
3115
            for inst in ilist:
3116
              if node not in lv_by_node[inst]:
3117
                continue
3118
              if vol['name'] in lv_by_node[inst][node]:
3119
                val = inst.name
3120
                break
3121
            else:
3122
              val = '-'
3123
          else:
3124
            raise errors.ParameterError(field)
3125
          node_output.append(str(val))
3126

    
3127
        output.append(node_output)
3128

    
3129
    return output
3130

    
3131

    
3132
class LUQueryNodeStorage(NoHooksLU):
3133
  """Logical unit for getting information on storage units on node(s).
3134

3135
  """
3136
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
3137
  REQ_BGL = False
3138
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3139

    
3140
  def CheckArguments(self):
3141
    _CheckStorageType(self.op.storage_type)
3142

    
3143
    _CheckOutputFields(static=self._FIELDS_STATIC,
3144
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3145
                       selected=self.op.output_fields)
3146

    
3147
  def ExpandNames(self):
3148
    self.needed_locks = {}
3149
    self.share_locks[locking.LEVEL_NODE] = 1
3150

    
3151
    if self.op.nodes:
3152
      self.needed_locks[locking.LEVEL_NODE] = \
3153
        _GetWantedNodes(self, self.op.nodes)
3154
    else:
3155
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3156

    
3157
  def CheckPrereq(self):
3158
    """Check prerequisites.
3159

3160
    This checks that the fields required are valid output fields.
3161

3162
    """
3163
    self.op.name = getattr(self.op, "name", None)
3164

    
3165
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3166

    
3167
  def Exec(self, feedback_fn):
3168
    """Computes the list of nodes and their attributes.
3169

3170
    """
3171
    # Always get name to sort by
3172
    if constants.SF_NAME in self.op.output_fields:
3173
      fields = self.op.output_fields[:]
3174
    else:
3175
      fields = [constants.SF_NAME] + self.op.output_fields
3176

    
3177
    # Never ask for node or type as it's only known to the LU
3178
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3179
      while extra in fields:
3180
        fields.remove(extra)
3181

    
3182
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3183
    name_idx = field_idx[constants.SF_NAME]
3184

    
3185
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3186
    data = self.rpc.call_storage_list(self.nodes,
3187
                                      self.op.storage_type, st_args,
3188
                                      self.op.name, fields)
3189

    
3190
    result = []
3191

    
3192
    for node in utils.NiceSort(self.nodes):
3193
      nresult = data[node]
3194
      if nresult.offline:
3195
        continue
3196

    
3197
      msg = nresult.fail_msg
3198
      if msg:
3199
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3200
        continue
3201

    
3202
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3203

    
3204
      for name in utils.NiceSort(rows.keys()):
3205
        row = rows[name]
3206

    
3207
        out = []
3208

    
3209
        for field in self.op.output_fields:
3210
          if field == constants.SF_NODE:
3211
            val = node
3212
          elif field == constants.SF_TYPE:
3213
            val = self.op.storage_type
3214
          elif field in field_idx:
3215
            val = row[field_idx[field]]
3216
          else:
3217
            raise errors.ParameterError(field)
3218

    
3219
          out.append(val)
3220

    
3221
        result.append(out)
3222

    
3223
    return result
3224

    
3225

    
3226
class LUModifyNodeStorage(NoHooksLU):
3227
  """Logical unit for modifying a storage volume on a node.
3228

3229
  """
3230
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3231
  REQ_BGL = False
3232

    
3233
  def CheckArguments(self):
3234
    self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
3235

    
3236
    _CheckStorageType(self.op.storage_type)
3237

    
3238
  def ExpandNames(self):
3239
    self.needed_locks = {
3240
      locking.LEVEL_NODE: self.op.node_name,
3241
      }
3242

    
3243
  def CheckPrereq(self):
3244
    """Check prerequisites.
3245

3246
    """
3247
    storage_type = self.op.storage_type
3248

    
3249
    try:
3250
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3251
    except KeyError:
3252
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3253
                                 " modified" % storage_type,
3254
                                 errors.ECODE_INVAL)
3255

    
3256
    diff = set(self.op.changes.keys()) - modifiable
3257
    if diff:
3258
      raise errors.OpPrereqError("The following fields can not be modified for"
3259
                                 " storage units of type '%s': %r" %
3260
                                 (storage_type, list(diff)),
3261
                                 errors.ECODE_INVAL)
3262

    
3263
  def Exec(self, feedback_fn):
3264
    """Computes the list of nodes and their attributes.
3265

3266
    """
3267
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3268
    result = self.rpc.call_storage_modify(self.op.node_name,
3269
                                          self.op.storage_type, st_args,
3270
                                          self.op.name, self.op.changes)
3271
    result.Raise("Failed to modify storage unit '%s' on %s" %
3272
                 (self.op.name, self.op.node_name))
3273

    
3274

    
3275
class LUAddNode(LogicalUnit):
3276
  """Logical unit for adding node to the cluster.
3277

3278
  """
3279
  HPATH = "node-add"
3280
  HTYPE = constants.HTYPE_NODE
3281
  _OP_REQP = ["node_name"]
3282

    
3283
  def CheckArguments(self):
3284
    # validate/normalize the node name
3285
    self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3286

    
3287
  def BuildHooksEnv(self):
3288
    """Build hooks env.
3289

3290
    This will run on all nodes before, and on all nodes + the new node after.
3291

3292
    """
3293
    env = {
3294
      "OP_TARGET": self.op.node_name,
3295
      "NODE_NAME": self.op.node_name,
3296
      "NODE_PIP": self.op.primary_ip,
3297
      "NODE_SIP": self.op.secondary_ip,
3298
      }
3299
    nodes_0 = self.cfg.GetNodeList()
3300
    nodes_1 = nodes_0 + [self.op.node_name, ]
3301
    return env, nodes_0, nodes_1
3302

    
3303
  def CheckPrereq(self):
3304
    """Check prerequisites.
3305

3306
    This checks:
3307
     - the new node is not already in the config
3308
     - it is resolvable
3309
     - its parameters (single/dual homed) matches the cluster
3310

3311
    Any errors are signaled by raising errors.OpPrereqError.
3312

3313
    """
3314
    node_name = self.op.node_name
3315
    cfg = self.cfg
3316

    
3317
    dns_data = utils.GetHostInfo(node_name)
3318

    
3319
    node = dns_data.name
3320
    primary_ip = self.op.primary_ip = dns_data.ip
3321
    secondary_ip = getattr(self.op, "secondary_ip", None)
3322
    if secondary_ip is None:
3323
      secondary_ip = primary_ip
3324
    if not utils.IsValidIP(secondary_ip):
3325
      raise errors.OpPrereqError("Invalid secondary IP given",
3326
                                 errors.ECODE_INVAL)
3327
    self.op.secondary_ip = secondary_ip
3328

    
3329
    node_list = cfg.GetNodeList()
3330
    if not self.op.readd and node in node_list:
3331
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3332
                                 node, errors.ECODE_EXISTS)
3333
    elif self.op.readd and node not in node_list:
3334
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3335
                                 errors.ECODE_NOENT)
3336

    
3337
    self.changed_primary_ip = False
3338

    
3339
    for existing_node_name in node_list:
3340
      existing_node = cfg.GetNodeInfo(existing_node_name)
3341

    
3342
      if self.op.readd and node == existing_node_name:
3343
        if existing_node.secondary_ip != secondary_ip:
3344
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
3345
                                     " address configuration as before",
3346
                                     errors.ECODE_INVAL)
3347
        if existing_node.primary_ip != primary_ip:
3348
          self.changed_primary_ip = True
3349

    
3350
        continue
3351

    
3352
      if (existing_node.primary_ip == primary_ip or
3353
          existing_node.secondary_ip == primary_ip or
3354
          existing_node.primary_ip == secondary_ip or
3355
          existing_node.secondary_ip == secondary_ip):
3356
        raise errors.OpPrereqError("New node ip address(es) conflict with"
3357
                                   " existing node %s" % existing_node.name,
3358
                                   errors.ECODE_NOTUNIQUE)
3359

    
3360
    # check that the type of the node (single versus dual homed) is the
3361
    # same as for the master
3362
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3363
    master_singlehomed = myself.secondary_ip == myself.primary_ip
3364
    newbie_singlehomed = secondary_ip == primary_ip
3365
    if master_singlehomed != newbie_singlehomed:
3366
      if master_singlehomed:
3367
        raise errors.OpPrereqError("The master has no private ip but the"
3368
                                   " new node has one",
3369
                                   errors.ECODE_INVAL)
3370
      else:
3371
        raise errors.OpPrereqError("The master has a private ip but the"
3372
                                   " new node doesn't have one",
3373
                                   errors.ECODE_INVAL)
3374

    
3375
    # checks reachability
3376
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3377
      raise errors.OpPrereqError("Node not reachable by ping",
3378
                                 errors.ECODE_ENVIRON)
3379

    
3380
    if not newbie_singlehomed:
3381
      # check reachability from my secondary ip to newbie's secondary ip
3382
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3383
                           source=myself.secondary_ip):
3384
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3385
                                   " based ping to noded port",
3386
                                   errors.ECODE_ENVIRON)
3387

    
3388
    if self.op.readd:
3389
      exceptions = [node]
3390
    else:
3391
      exceptions = []
3392

    
3393
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3394

    
3395
    if self.op.readd:
3396
      self.new_node = self.cfg.GetNodeInfo(node)
3397
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
3398
    else:
3399
      self.new_node = objects.Node(name=node,
3400
                                   primary_ip=primary_ip,
3401
                                   secondary_ip=secondary_ip,
3402
                                   master_candidate=self.master_candidate,
3403
                                   offline=False, drained=False)
3404

    
3405
  def Exec(self, feedback_fn):
3406
    """Adds the new node to the cluster.
3407

3408
    """
3409
    new_node = self.new_node
3410
    node = new_node.name
3411

    
3412
    # for re-adds, reset the offline/drained/master-candidate flags;
3413
    # we need to reset here, otherwise offline would prevent RPC calls
3414
    # later in the procedure; this also means that if the re-add
3415
    # fails, we are left with a non-offlined, broken node
3416
    if self.op.readd:
3417
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3418
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3419
      # if we demote the node, we do cleanup later in the procedure
3420
      new_node.master_candidate = self.master_candidate
3421
      if self.changed_primary_ip:
3422
        new_node.primary_ip = self.op.primary_ip
3423

    
3424
    # notify the user about any possible mc promotion
3425
    if new_node.master_candidate:
3426
      self.LogInfo("Node will be a master candidate")
3427

    
3428
    # check connectivity
3429
    result = self.rpc.call_version([node])[node]
3430
    result.Raise("Can't get version information from node %s" % node)
3431
    if constants.PROTOCOL_VERSION == result.payload:
3432
      logging.info("Communication to node %s fine, sw version %s match",
3433
                   node, result.payload)
3434
    else:
3435
      raise errors.OpExecError("Version mismatch master version %s,"
3436
                               " node version %s" %
3437
                               (constants.PROTOCOL_VERSION, result.payload))
3438

    
3439
    # setup ssh on node
3440
    if self.cfg.GetClusterInfo().modify_ssh_setup:
3441
      logging.info("Copy ssh key to node %s", node)
3442
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3443
      keyarray = []
3444
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3445
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3446
                  priv_key, pub_key]
3447

    
3448
      for i in keyfiles:
3449
        keyarray.append(utils.ReadFile(i))
3450

    
3451
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3452
                                      keyarray[2], keyarray[3], keyarray[4],
3453
                                      keyarray[5])
3454
      result.Raise("Cannot transfer ssh keys to the new node")
3455

    
3456
    # Add node to our /etc/hosts, and add key to known_hosts
3457
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3458
      utils.AddHostToEtcHosts(new_node.name)
3459

    
3460
    if new_node.secondary_ip != new_node.primary_ip:
3461
      result = self.rpc.call_node_has_ip_address(new_node.name,
3462
                                                 new_node.secondary_ip)
3463
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3464
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3465
      if not result.payload:
3466
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3467
                                 " you gave (%s). Please fix and re-run this"
3468
                                 " command." % new_node.secondary_ip)
3469

    
3470
    node_verify_list = [self.cfg.GetMasterNode()]
3471
    node_verify_param = {
3472
      constants.NV_NODELIST: [node],
3473
      # TODO: do a node-net-test as well?
3474
    }
3475

    
3476
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3477
                                       self.cfg.GetClusterName())
3478
    for verifier in node_verify_list:
3479
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3480
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3481
      if nl_payload:
3482
        for failed in nl_payload:
3483
          feedback_fn("ssh/hostname verification failed"
3484
                      " (checking from %s): %s" %
3485
                      (verifier, nl_payload[failed]))
3486
        raise errors.OpExecError("ssh/hostname verification failed.")
3487

    
3488
    if self.op.readd:
3489
      _RedistributeAncillaryFiles(self)
3490
      self.context.ReaddNode(new_node)
3491
      # make sure we redistribute the config
3492
      self.cfg.Update(new_node, feedback_fn)
3493
      # and make sure the new node will not have old files around
3494
      if not new_node.master_candidate:
3495
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3496
        msg = result.fail_msg
3497
        if msg:
3498
          self.LogWarning("Node failed to demote itself from master"
3499
                          " candidate status: %s" % msg)
3500
    else:
3501
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3502
      self.context.AddNode(new_node, self.proc.GetECId())
3503

    
3504

    
3505
class LUSetNodeParams(LogicalUnit):
3506
  """Modifies the parameters of a node.
3507

3508
  """
3509
  HPATH = "node-modify"
3510
  HTYPE = constants.HTYPE_NODE
3511
  _OP_REQP = ["node_name"]
3512
  REQ_BGL = False
3513

    
3514
  def CheckArguments(self):
3515
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3516
    _CheckBooleanOpField(self.op, 'master_candidate')
3517
    _CheckBooleanOpField(self.op, 'offline')
3518
    _CheckBooleanOpField(self.op, 'drained')
3519
    _CheckBooleanOpField(self.op, 'auto_promote')
3520
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3521
    if all_mods.count(None) == 3:
3522
      raise errors.OpPrereqError("Please pass at least one modification",
3523
                                 errors.ECODE_INVAL)
3524
    if all_mods.count(True) > 1:
3525
      raise errors.OpPrereqError("Can't set the node into more than one"
3526
                                 " state at the same time",
3527
                                 errors.ECODE_INVAL)
3528

    
3529
    # Boolean value that tells us whether we're offlining or draining the node
3530
    self.offline_or_drain = (self.op.offline == True or
3531
                             self.op.drained == True)
3532
    self.deoffline_or_drain = (self.op.offline == False or
3533
                               self.op.drained == False)
3534
    self.might_demote = (self.op.master_candidate == False or
3535
                         self.offline_or_drain)
3536

    
3537
    self.lock_all = self.op.auto_promote and self.might_demote
3538

    
3539

    
3540
  def ExpandNames(self):
3541
    if self.lock_all:
3542
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3543
    else:
3544
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3545

    
3546
  def BuildHooksEnv(self):
3547
    """Build hooks env.
3548

3549
    This runs on the master node.
3550

3551
    """
3552
    env = {
3553
      "OP_TARGET": self.op.node_name,
3554
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3555
      "OFFLINE": str(self.op.offline),
3556
      "DRAINED": str(self.op.drained),
3557
      }
3558
    nl = [self.cfg.GetMasterNode(),
3559
          self.op.node_name]
3560
    return env, nl, nl
3561

    
3562
  def CheckPrereq(self):
3563
    """Check prerequisites.
3564

3565
    This only checks the instance list against the existing names.
3566

3567
    """
3568
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3569

    
3570
    if (self.op.master_candidate is not None or
3571
        self.op.drained is not None or
3572
        self.op.offline is not None):
3573
      # we can't change the master's node flags
3574
      if self.op.node_name == self.cfg.GetMasterNode():
3575
        raise errors.OpPrereqError("The master role can be changed"
3576
                                   " only via masterfailover",
3577
                                   errors.ECODE_INVAL)
3578

    
3579

    
3580
    if node.master_candidate and self.might_demote and not self.lock_all:
3581
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
3582
      # check if after removing the current node, we're missing master
3583
      # candidates
3584
      (mc_remaining, mc_should, _) = \
3585
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3586
      if mc_remaining < mc_should:
3587
        raise errors.OpPrereqError("Not enough master candidates, please"
3588
                                   " pass auto_promote to allow promotion",
3589
                                   errors.ECODE_INVAL)
3590

    
3591
    if (self.op.master_candidate == True and
3592
        ((node.offline and not self.op.offline == False) or
3593
         (node.drained and not self.op.drained == False))):
3594
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3595
                                 " to master_candidate" % node.name,
3596
                                 errors.ECODE_INVAL)
3597

    
3598
    # If we're being deofflined/drained, we'll MC ourself if needed
3599
    if (self.deoffline_or_drain and not self.offline_or_drain and not
3600
        self.op.master_candidate == True and not node.master_candidate):
3601
      self.op.master_candidate = _DecideSelfPromotion(self)
3602
      if self.op.master_candidate:
3603
        self.LogInfo("Autopromoting node to master candidate")
3604

    
3605
    return
3606

    
3607
  def Exec(self, feedback_fn):
3608
    """Modifies a node.
3609

3610
    """
3611
    node = self.node
3612

    
3613
    result = []
3614
    changed_mc = False
3615

    
3616
    if self.op.offline is not None:
3617
      node.offline = self.op.offline
3618
      result.append(("offline", str(self.op.offline)))
3619
      if self.op.offline == True:
3620
        if node.master_candidate:
3621
          node.master_candidate = False
3622
          changed_mc = True
3623
          result.append(("master_candidate", "auto-demotion due to offline"))
3624
        if node.drained:
3625
          node.drained = False
3626
          result.append(("drained", "clear drained status due to offline"))
3627

    
3628
    if self.op.master_candidate is not None:
3629
      node.master_candidate = self.op.master_candidate
3630
      changed_mc = True
3631
      result.append(("master_candidate", str(self.op.master_candidate)))
3632
      if self.op.master_candidate == False:
3633
        rrc = self.rpc.call_node_demote_from_mc(node.name)
3634
        msg = rrc.fail_msg
3635
        if msg:
3636
          self.LogWarning("Node failed to demote itself: %s" % msg)
3637

    
3638
    if self.op.drained is not None:
3639
      node.drained = self.op.drained
3640
      result.append(("drained", str(self.op.drained)))
3641
      if self.op.drained == True:
3642
        if node.master_candidate:
3643
          node.master_candidate = False
3644
          changed_mc = True
3645
          result.append(("master_candidate", "auto-demotion due to drain"))
3646
          rrc = self.rpc.call_node_demote_from_mc(node.name)
3647
          msg = rrc.fail_msg
3648
          if msg:
3649
            self.LogWarning("Node failed to demote itself: %s" % msg)
3650
        if node.offline:
3651
          node.offline = False
3652
          result.append(("offline", "clear offline status due to drain"))
3653

    
3654
    # we locked all nodes, we adjust the CP before updating this node
3655
    if self.lock_all:
3656
      _AdjustCandidatePool(self, [node.name])
3657

    
3658
    # this will trigger configuration file update, if needed
3659
    self.cfg.Update(node, feedback_fn)
3660

    
3661
    # this will trigger job queue propagation or cleanup
3662
    if changed_mc:
3663
      self.context.ReaddNode(node)
3664

    
3665
    return result
3666

    
3667

    
3668
class LUPowercycleNode(NoHooksLU):
3669
  """Powercycles a node.
3670

3671
  """
3672
  _OP_REQP = ["node_name", "force"]
3673
  REQ_BGL = False
3674

    
3675
  def CheckArguments(self):
3676
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3677
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3678
      raise errors.OpPrereqError("The node is the master and the force"
3679
                                 " parameter was not set",
3680
                                 errors.ECODE_INVAL)
3681

    
3682
  def ExpandNames(self):
3683
    """Locking for PowercycleNode.
3684

3685
    This is a last-resort option and shouldn't block on other
3686
    jobs. Therefore, we grab no locks.
3687

3688
    """
3689
    self.needed_locks = {}
3690

    
3691
  def CheckPrereq(self):
3692
    """Check prerequisites.
3693

3694
    This LU has no prereqs.
3695

3696
    """
3697
    pass
3698

    
3699
  def Exec(self, feedback_fn):
3700
    """Reboots a node.
3701

3702
    """
3703
    result = self.rpc.call_node_powercycle(self.op.node_name,
3704
                                           self.cfg.GetHypervisorType())
3705
    result.Raise("Failed to schedule the reboot")
3706
    return result.payload
3707

    
3708

    
3709
class LUQueryClusterInfo(NoHooksLU):
3710
  """Query cluster configuration.
3711

3712
  """
3713
  _OP_REQP = []
3714
  REQ_BGL = False
3715

    
3716
  def ExpandNames(self):
3717
    self.needed_locks = {}
3718

    
3719
  def CheckPrereq(self):
3720
    """No prerequsites needed for this LU.
3721

3722
    """
3723
    pass
3724

    
3725
  def Exec(self, feedback_fn):
3726
    """Return cluster config.
3727

3728
    """
3729
    cluster = self.cfg.GetClusterInfo()
3730
    os_hvp = {}
3731

    
3732
    # Filter just for enabled hypervisors
3733
    for os_name, hv_dict in cluster.os_hvp.items():
3734
      os_hvp[os_name] = {}
3735
      for hv_name, hv_params in hv_dict.items():
3736
        if hv_name in cluster.enabled_hypervisors:
3737
          os_hvp[os_name][hv_name] = hv_params
3738

    
3739
    result = {
3740
      "software_version": constants.RELEASE_VERSION,
3741
      "protocol_version": constants.PROTOCOL_VERSION,
3742
      "config_version": constants.CONFIG_VERSION,
3743
      "os_api_version": max(constants.OS_API_VERSIONS),
3744
      "export_version": constants.EXPORT_VERSION,
3745
      "architecture": (platform.architecture()[0], platform.machine()),
3746
      "name": cluster.cluster_name,
3747
      "master": cluster.master_node,
3748
      "default_hypervisor": cluster.enabled_hypervisors[0],
3749
      "enabled_hypervisors": cluster.enabled_hypervisors,
3750
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3751
                        for hypervisor_name in cluster.enabled_hypervisors]),
3752
      "os_hvp": os_hvp,
3753
      "beparams": cluster.beparams,
3754
      "nicparams": cluster.nicparams,
3755
      "candidate_pool_size": cluster.candidate_pool_size,
3756
      "master_netdev": cluster.master_netdev,
3757
      "volume_group_name": cluster.volume_group_name,
3758
      "file_storage_dir": cluster.file_storage_dir,
3759
      "maintain_node_health": cluster.maintain_node_health,
3760
      "ctime": cluster.ctime,
3761
      "mtime": cluster.mtime,
3762
      "uuid": cluster.uuid,
3763
      "tags": list(cluster.GetTags()),
3764
      "uid_pool": cluster.uid_pool,
3765
      }
3766

    
3767
    return result
3768

    
3769

    
3770
class LUQueryConfigValues(NoHooksLU):
3771
  """Return configuration values.
3772

3773
  """
3774
  _OP_REQP = []
3775
  REQ_BGL = False
3776
  _FIELDS_DYNAMIC = utils.FieldSet()
3777
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3778
                                  "watcher_pause")
3779

    
3780
  def ExpandNames(self):
3781
    self.needed_locks = {}
3782

    
3783
    _CheckOutputFields(static=self._FIELDS_STATIC,
3784
                       dynamic=self._FIELDS_DYNAMIC,
3785
                       selected=self.op.output_fields)
3786

    
3787
  def CheckPrereq(self):
3788
    """No prerequisites.
3789

3790
    """
3791
    pass
3792

    
3793
  def Exec(self, feedback_fn):
3794
    """Dump a representation of the cluster config to the standard output.
3795

3796
    """
3797
    values = []
3798
    for field in self.op.output_fields:
3799
      if field == "cluster_name":
3800
        entry = self.cfg.GetClusterName()
3801
      elif field == "master_node":
3802
        entry = self.cfg.GetMasterNode()
3803
      elif field == "drain_flag":
3804
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3805
      elif field == "watcher_pause":
3806
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3807
      else:
3808
        raise errors.ParameterError(field)
3809
      values.append(entry)
3810
    return values
3811

    
3812

    
3813
class LUActivateInstanceDisks(NoHooksLU):
3814
  """Bring up an instance's disks.
3815

3816
  """
3817
  _OP_REQP = ["instance_name"]
3818
  REQ_BGL = False
3819

    
3820
  def ExpandNames(self):
3821
    self._ExpandAndLockInstance()
3822
    self.needed_locks[locking.LEVEL_NODE] = []
3823
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3824

    
3825
  def DeclareLocks(self, level):
3826
    if level == locking.LEVEL_NODE:
3827
      self._LockInstancesNodes()
3828

    
3829
  def CheckPrereq(self):
3830
    """Check prerequisites.
3831

3832
    This checks that the instance is in the cluster.
3833

3834
    """
3835
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3836
    assert self.instance is not None, \
3837
      "Cannot retrieve locked instance %s" % self.op.instance_name
3838
    _CheckNodeOnline(self, self.instance.primary_node)
3839
    if not hasattr(self.op, "ignore_size"):
3840
      self.op.ignore_size = False
3841

    
3842
  def Exec(self, feedback_fn):
3843
    """Activate the disks.
3844

3845
    """
3846
    disks_ok, disks_info = \
3847
              _AssembleInstanceDisks(self, self.instance,
3848
                                     ignore_size=self.op.ignore_size)
3849
    if not disks_ok:
3850
      raise errors.OpExecError("Cannot activate block devices")
3851

    
3852
    return disks_info
3853

    
3854

    
3855
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3856
                           ignore_size=False):
3857
  """Prepare the block devices for an instance.
3858

3859
  This sets up the block devices on all nodes.
3860

3861
  @type lu: L{LogicalUnit}
3862
  @param lu: the logical unit on whose behalf we execute
3863
  @type instance: L{objects.Instance}
3864
  @param instance: the instance for whose disks we assemble
3865
  @type ignore_secondaries: boolean
3866
  @param ignore_secondaries: if true, errors on secondary nodes
3867
      won't result in an error return from the function
3868
  @type ignore_size: boolean
3869
  @param ignore_size: if true, the current known size of the disk
3870
      will not be used during the disk activation, useful for cases
3871
      when the size is wrong
3872
  @return: False if the operation failed, otherwise a list of
3873
      (host, instance_visible_name, node_visible_name)
3874
      with the mapping from node devices to instance devices
3875

3876
  """
3877
  device_info = []
3878
  disks_ok = True
3879
  iname = instance.name
3880
  # With the two passes mechanism we try to reduce the window of
3881
  # opportunity for the race condition of switching DRBD to primary
3882
  # before handshaking occured, but we do not eliminate it
3883

    
3884
  # The proper fix would be to wait (with some limits) until the
3885
  # connection has been made and drbd transitions from WFConnection
3886
  # into any other network-connected state (Connected, SyncTarget,
3887
  # SyncSource, etc.)
3888

    
3889
  # 1st pass, assemble on all nodes in secondary mode
3890
  for inst_disk in instance.disks:
3891
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3892
      if ignore_size:
3893
        node_disk = node_disk.Copy()
3894
        node_disk.UnsetSize()
3895
      lu.cfg.SetDiskID(node_disk, node)
3896
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3897
      msg = result.fail_msg
3898
      if msg:
3899
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3900
                           " (is_primary=False, pass=1): %s",
3901
                           inst_disk.iv_name, node, msg)
3902
        if not ignore_secondaries:
3903
          disks_ok = False
3904

    
3905
  # FIXME: race condition on drbd migration to primary
3906

    
3907
  # 2nd pass, do only the primary node
3908
  for inst_disk in instance.disks:
3909
    dev_path = None
3910

    
3911
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3912
      if node != instance.primary_node:
3913
        continue
3914
      if ignore_size:
3915
        node_disk = node_disk.Copy()
3916
        node_disk.UnsetSize()
3917
      lu.cfg.SetDiskID(node_disk, node)
3918
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3919
      msg = result.fail_msg
3920
      if msg:
3921
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3922
                           " (is_primary=True, pass=2): %s",
3923
                           inst_disk.iv_name, node, msg)
3924
        disks_ok = False
3925
      else:
3926
        dev_path = result.payload
3927

    
3928
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3929

    
3930
  # leave the disks configured for the primary node
3931
  # this is a workaround that would be fixed better by
3932
  # improving the logical/physical id handling
3933
  for disk in instance.disks:
3934
    lu.cfg.SetDiskID(disk, instance.primary_node)
3935

    
3936
  return disks_ok, device_info
3937

    
3938

    
3939
def _StartInstanceDisks(lu, instance, force):
3940
  """Start the disks of an instance.
3941

3942
  """
3943
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3944
                                           ignore_secondaries=force)
3945
  if not disks_ok:
3946
    _ShutdownInstanceDisks(lu, instance)
3947
    if force is not None and not force:
3948
      lu.proc.LogWarning("", hint="If the message above refers to a"
3949
                         " secondary node,"
3950
                         " you can retry the operation using '--force'.")
3951
    raise errors.OpExecError("Disk consistency error")
3952

    
3953

    
3954
class LUDeactivateInstanceDisks(NoHooksLU):
3955
  """Shutdown an instance's disks.
3956

3957
  """
3958
  _OP_REQP = ["instance_name"]
3959
  REQ_BGL = False
3960

    
3961
  def ExpandNames(self):
3962
    self._ExpandAndLockInstance()
3963
    self.needed_locks[locking.LEVEL_NODE] = []
3964
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3965

    
3966
  def DeclareLocks(self, level):
3967
    if level == locking.LEVEL_NODE:
3968
      self._LockInstancesNodes()
3969

    
3970
  def CheckPrereq(self):
3971
    """Check prerequisites.
3972

3973
    This checks that the instance is in the cluster.
3974

3975
    """
3976
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3977
    assert self.instance is not None, \
3978
      "Cannot retrieve locked instance %s" % self.op.instance_name
3979

    
3980
  def Exec(self, feedback_fn):
3981
    """Deactivate the disks
3982

3983
    """
3984
    instance = self.instance
3985
    _SafeShutdownInstanceDisks(self, instance)
3986

    
3987

    
3988
def _SafeShutdownInstanceDisks(lu, instance):
3989
  """Shutdown block devices of an instance.
3990

3991
  This function checks if an instance is running, before calling
3992
  _ShutdownInstanceDisks.
3993

3994
  """
3995
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
3996
  _ShutdownInstanceDisks(lu, instance)
3997

    
3998

    
3999
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
4000
  """Shutdown block devices of an instance.
4001

4002
  This does the shutdown on all nodes of the instance.
4003

4004
  If the ignore_primary is false, errors on the primary node are
4005
  ignored.
4006

4007
  """
4008
  all_result = True
4009
  for disk in instance.disks:
4010
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4011
      lu.cfg.SetDiskID(top_disk, node)
4012
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4013
      msg = result.fail_msg
4014
      if msg:
4015
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4016
                      disk.iv_name, node, msg)
4017
        if not ignore_primary or node != instance.primary_node:
4018
          all_result = False
4019
  return all_result
4020

    
4021

    
4022
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4023
  """Checks if a node has enough free memory.
4024

4025
  This function check if a given node has the needed amount of free
4026
  memory. In case the node has less memory or we cannot get the
4027
  information from the node, this function raise an OpPrereqError
4028
  exception.
4029

4030
  @type lu: C{LogicalUnit}
4031
  @param lu: a logical unit from which we get configuration data
4032
  @type node: C{str}
4033
  @param node: the node to check
4034
  @type reason: C{str}
4035
  @param reason: string to use in the error message
4036
  @type requested: C{int}
4037
  @param requested: the amount of memory in MiB to check for
4038
  @type hypervisor_name: C{str}
4039
  @param hypervisor_name: the hypervisor to ask for memory stats
4040
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4041
      we cannot check the node
4042

4043
  """
4044
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4045
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4046
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4047
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4048
  if not isinstance(free_mem, int):
4049
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4050
                               " was '%s'" % (node, free_mem),
4051
                               errors.ECODE_ENVIRON)
4052
  if requested > free_mem:
4053
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4054
                               " needed %s MiB, available %s MiB" %
4055
                               (node, reason, requested, free_mem),
4056
                               errors.ECODE_NORES)
4057

    
4058

    
4059
def _CheckNodesFreeDisk(lu, nodenames, requested):
4060
  """Checks if nodes have enough free disk space in the default VG.
4061

4062
  This function check if all given nodes have the needed amount of
4063
  free disk. In case any node has less disk or we cannot get the
4064
  information from the node, this function raise an OpPrereqError
4065
  exception.
4066

4067
  @type lu: C{LogicalUnit}
4068
  @param lu: a logical unit from which we get configuration data
4069
  @type nodenames: C{list}
4070
  @param nodenames: the list of node names to check
4071
  @type requested: C{int}
4072
  @param requested: the amount of disk in MiB to check for
4073
  @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4074
      we cannot check the node
4075

4076
  """
4077
  nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4078
                                   lu.cfg.GetHypervisorType())
4079
  for node in nodenames:
4080
    info = nodeinfo[node]
4081
    info.Raise("Cannot get current information from node %s" % node,
4082
               prereq=True, ecode=errors.ECODE_ENVIRON)
4083
    vg_free = info.payload.get("vg_free", None)
4084
    if not isinstance(vg_free, int):
4085
      raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4086
                                 " result was '%s'" % (node, vg_free),
4087
                                 errors.ECODE_ENVIRON)
4088
    if requested > vg_free:
4089
      raise errors.OpPrereqError("Not enough disk space on target node %s:"
4090
                                 " required %d MiB, available %d MiB" %
4091
                                 (node, requested, vg_free),
4092
                                 errors.ECODE_NORES)
4093

    
4094

    
4095
class LUStartupInstance(LogicalUnit):
4096
  """Starts an instance.
4097

4098
  """
4099
  HPATH = "instance-start"
4100
  HTYPE = constants.HTYPE_INSTANCE
4101
  _OP_REQP = ["instance_name", "force"]
4102
  REQ_BGL = False
4103

    
4104
  def ExpandNames(self):
4105
    self._ExpandAndLockInstance()
4106

    
4107
  def BuildHooksEnv(self):
4108
    """Build hooks env.
4109

4110
    This runs on master, primary and secondary nodes of the instance.
4111

4112
    """
4113
    env = {
4114
      "FORCE": self.op.force,
4115
      }
4116
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4117
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4118
    return env, nl, nl
4119

    
4120
  def CheckPrereq(self):
4121
    """Check prerequisites.
4122

4123
    This checks that the instance is in the cluster.
4124

4125
    """
4126
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4127
    assert self.instance is not None, \
4128
      "Cannot retrieve locked instance %s" % self.op.instance_name
4129

    
4130
    # extra beparams
4131
    self.beparams = getattr(self.op, "beparams", {})
4132
    if self.beparams:
4133
      if not isinstance(self.beparams, dict):
4134
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4135
                                   " dict" % (type(self.beparams), ),
4136
                                   errors.ECODE_INVAL)
4137
      # fill the beparams dict
4138
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
4139
      self.op.beparams = self.beparams
4140

    
4141
    # extra hvparams
4142
    self.hvparams = getattr(self.op, "hvparams", {})
4143
    if self.hvparams:
4144
      if not isinstance(self.hvparams, dict):
4145
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4146
                                   " dict" % (type(self.hvparams), ),
4147
                                   errors.ECODE_INVAL)
4148

    
4149
      # check hypervisor parameter syntax (locally)
4150
      cluster = self.cfg.GetClusterInfo()
4151
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
4152
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
4153
                                    instance.hvparams)
4154
      filled_hvp.update(self.hvparams)
4155
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4156
      hv_type.CheckParameterSyntax(filled_hvp)
4157
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4158
      self.op.hvparams = self.hvparams
4159

    
4160
    _CheckNodeOnline(self, instance.primary_node)
4161

    
4162
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4163
    # check bridges existence
4164
    _CheckInstanceBridgesExist(self, instance)
4165

    
4166
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4167
                                              instance.name,
4168
                                              instance.hypervisor)
4169
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4170
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4171
    if not remote_info.payload: # not running already
4172
      _CheckNodeFreeMemory(self, instance.primary_node,
4173
                           "starting instance %s" % instance.name,
4174
                           bep[constants.BE_MEMORY], instance.hypervisor)
4175

    
4176
  def Exec(self, feedback_fn):
4177
    """Start the instance.
4178

4179
    """
4180
    instance = self.instance
4181
    force = self.op.force
4182

    
4183
    self.cfg.MarkInstanceUp(instance.name)
4184

    
4185
    node_current = instance.primary_node
4186

    
4187
    _StartInstanceDisks(self, instance, force)
4188

    
4189
    result = self.rpc.call_instance_start(node_current, instance,
4190
                                          self.hvparams, self.beparams)
4191
    msg = result.fail_msg
4192
    if msg:
4193
      _ShutdownInstanceDisks(self, instance)
4194
      raise errors.OpExecError("Could not start instance: %s" % msg)
4195

    
4196

    
4197
class LURebootInstance(LogicalUnit):
4198
  """Reboot an instance.
4199

4200
  """
4201
  HPATH = "instance-reboot"
4202
  HTYPE = constants.HTYPE_INSTANCE
4203
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4204
  REQ_BGL = False
4205

    
4206
  def CheckArguments(self):
4207
    """Check the arguments.
4208

4209
    """
4210
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4211
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4212

    
4213
  def ExpandNames(self):
4214
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
4215
                                   constants.INSTANCE_REBOOT_HARD,
4216
                                   constants.INSTANCE_REBOOT_FULL]:
4217
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
4218
                                  (constants.INSTANCE_REBOOT_SOFT,
4219
                                   constants.INSTANCE_REBOOT_HARD,
4220
                                   constants.INSTANCE_REBOOT_FULL))
4221
    self._ExpandAndLockInstance()
4222

    
4223
  def BuildHooksEnv(self):
4224
    """Build hooks env.
4225

4226
    This runs on master, primary and secondary nodes of the instance.
4227

4228
    """
4229
    env = {
4230
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4231
      "REBOOT_TYPE": self.op.reboot_type,
4232
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4233
      }
4234
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4235
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4236
    return env, nl, nl
4237

    
4238
  def CheckPrereq(self):
4239
    """Check prerequisites.
4240

4241
    This checks that the instance is in the cluster.
4242

4243
    """
4244
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4245
    assert self.instance is not None, \
4246
      "Cannot retrieve locked instance %s" % self.op.instance_name
4247

    
4248
    _CheckNodeOnline(self, instance.primary_node)
4249

    
4250
    # check bridges existence
4251
    _CheckInstanceBridgesExist(self, instance)
4252

    
4253
  def Exec(self, feedback_fn):
4254
    """Reboot the instance.
4255

4256
    """
4257
    instance = self.instance
4258
    ignore_secondaries = self.op.ignore_secondaries
4259
    reboot_type = self.op.reboot_type
4260

    
4261
    node_current = instance.primary_node
4262

    
4263
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4264
                       constants.INSTANCE_REBOOT_HARD]:
4265
      for disk in instance.disks:
4266
        self.cfg.SetDiskID(disk, node_current)
4267
      result = self.rpc.call_instance_reboot(node_current, instance,
4268
                                             reboot_type,
4269
                                             self.shutdown_timeout)
4270
      result.Raise("Could not reboot instance")
4271
    else:
4272
      result = self.rpc.call_instance_shutdown(node_current, instance,
4273
                                               self.shutdown_timeout)
4274
      result.Raise("Could not shutdown instance for full reboot")
4275
      _ShutdownInstanceDisks(self, instance)
4276
      _StartInstanceDisks(self, instance, ignore_secondaries)
4277
      result = self.rpc.call_instance_start(node_current, instance, None, None)
4278
      msg = result.fail_msg
4279
      if msg:
4280
        _ShutdownInstanceDisks(self, instance)
4281
        raise errors.OpExecError("Could not start instance for"
4282
                                 " full reboot: %s" % msg)
4283

    
4284
    self.cfg.MarkInstanceUp(instance.name)
4285

    
4286

    
4287
class LUShutdownInstance(LogicalUnit):
4288
  """Shutdown an instance.
4289

4290
  """
4291
  HPATH = "instance-stop"
4292
  HTYPE = constants.HTYPE_INSTANCE
4293
  _OP_REQP = ["instance_name"]
4294
  REQ_BGL = False
4295

    
4296
  def CheckArguments(self):
4297
    """Check the arguments.
4298

4299
    """
4300
    self.timeout = getattr(self.op, "timeout",
4301
                           constants.DEFAULT_SHUTDOWN_TIMEOUT)
4302

    
4303
  def ExpandNames(self):
4304
    self._ExpandAndLockInstance()
4305

    
4306
  def BuildHooksEnv(self):
4307
    """Build hooks env.
4308

4309
    This runs on master, primary and secondary nodes of the instance.
4310

4311
    """
4312
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4313
    env["TIMEOUT"] = self.timeout
4314
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4315
    return env, nl, nl
4316

    
4317
  def CheckPrereq(self):
4318
    """Check prerequisites.
4319

4320
    This checks that the instance is in the cluster.
4321

4322
    """
4323
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4324
    assert self.instance is not None, \
4325
      "Cannot retrieve locked instance %s" % self.op.instance_name
4326
    _CheckNodeOnline(self, self.instance.primary_node)
4327

    
4328
  def Exec(self, feedback_fn):
4329
    """Shutdown the instance.
4330

4331
    """
4332
    instance = self.instance
4333
    node_current = instance.primary_node
4334
    timeout = self.timeout
4335
    self.cfg.MarkInstanceDown(instance.name)
4336
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4337
    msg = result.fail_msg
4338
    if msg:
4339
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4340

    
4341
    _ShutdownInstanceDisks(self, instance)
4342

    
4343

    
4344
class LUReinstallInstance(LogicalUnit):
4345
  """Reinstall an instance.
4346

4347
  """
4348
  HPATH = "instance-reinstall"
4349
  HTYPE = constants.HTYPE_INSTANCE
4350
  _OP_REQP = ["instance_name"]
4351
  REQ_BGL = False
4352

    
4353
  def ExpandNames(self):
4354
    self._ExpandAndLockInstance()
4355

    
4356
  def BuildHooksEnv(self):
4357
    """Build hooks env.
4358

4359
    This runs on master, primary and secondary nodes of the instance.
4360

4361
    """
4362
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4363
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4364
    return env, nl, nl
4365

    
4366
  def CheckPrereq(self):
4367
    """Check prerequisites.
4368

4369
    This checks that the instance is in the cluster and is not running.
4370

4371
    """
4372
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4373
    assert instance is not None, \
4374
      "Cannot retrieve locked instance %s" % self.op.instance_name
4375
    _CheckNodeOnline(self, instance.primary_node)
4376

    
4377
    if instance.disk_template == constants.DT_DISKLESS:
4378
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4379
                                 self.op.instance_name,
4380
                                 errors.ECODE_INVAL)
4381
    _CheckInstanceDown(self, instance, "cannot reinstall")
4382

    
4383
    self.op.os_type = getattr(self.op, "os_type", None)
4384
    self.op.force_variant = getattr(self.op, "force_variant", False)
4385
    if self.op.os_type is not None:
4386
      # OS verification
4387
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4388
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4389

    
4390
    self.instance = instance
4391

    
4392
  def Exec(self, feedback_fn):
4393
    """Reinstall the instance.
4394

4395
    """
4396
    inst = self.instance
4397

    
4398
    if self.op.os_type is not None:
4399
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4400
      inst.os = self.op.os_type
4401
      self.cfg.Update(inst, feedback_fn)
4402

    
4403
    _StartInstanceDisks(self, inst, None)
4404
    try:
4405
      feedback_fn("Running the instance OS create scripts...")
4406
      # FIXME: pass debug option from opcode to backend
4407
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4408
                                             self.op.debug_level)
4409
      result.Raise("Could not install OS for instance %s on node %s" %
4410
                   (inst.name, inst.primary_node))
4411
    finally:
4412
      _ShutdownInstanceDisks(self, inst)
4413

    
4414

    
4415
class LURecreateInstanceDisks(LogicalUnit):
4416
  """Recreate an instance's missing disks.
4417

4418
  """
4419
  HPATH = "instance-recreate-disks"
4420
  HTYPE = constants.HTYPE_INSTANCE
4421
  _OP_REQP = ["instance_name", "disks"]
4422
  REQ_BGL = False
4423

    
4424
  def CheckArguments(self):
4425
    """Check the arguments.
4426

4427
    """
4428
    if not isinstance(self.op.disks, list):
4429
      raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4430
    for item in self.op.disks:
4431
      if (not isinstance(item, int) or
4432
          item < 0):
4433
        raise errors.OpPrereqError("Invalid disk specification '%s'" %
4434
                                   str(item), errors.ECODE_INVAL)
4435

    
4436
  def ExpandNames(self):
4437
    self._ExpandAndLockInstance()
4438

    
4439
  def BuildHooksEnv(self):
4440
    """Build hooks env.
4441

4442
    This runs on master, primary and secondary nodes of the instance.
4443

4444
    """
4445
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4446
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4447
    return env, nl, nl
4448

    
4449
  def CheckPrereq(self):
4450
    """Check prerequisites.
4451

4452
    This checks that the instance is in the cluster and is not running.
4453

4454
    """
4455
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4456
    assert instance is not None, \
4457
      "Cannot retrieve locked instance %s" % self.op.instance_name
4458
    _CheckNodeOnline(self, instance.primary_node)
4459

    
4460
    if instance.disk_template == constants.DT_DISKLESS:
4461
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4462
                                 self.op.instance_name, errors.ECODE_INVAL)
4463
    _CheckInstanceDown(self, instance, "cannot recreate disks")
4464

    
4465
    if not self.op.disks:
4466
      self.op.disks = range(len(instance.disks))
4467
    else:
4468
      for idx in self.op.disks:
4469
        if idx >= len(instance.disks):
4470
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4471
                                     errors.ECODE_INVAL)
4472

    
4473
    self.instance = instance
4474

    
4475
  def Exec(self, feedback_fn):
4476
    """Recreate the disks.
4477

4478
    """
4479
    to_skip = []
4480
    for idx, _ in enumerate(self.instance.disks):
4481
      if idx not in self.op.disks: # disk idx has not been passed in
4482
        to_skip.append(idx)
4483
        continue
4484

    
4485
    _CreateDisks(self, self.instance, to_skip=to_skip)
4486

    
4487

    
4488
class LURenameInstance(LogicalUnit):
4489
  """Rename an instance.
4490

4491
  """
4492
  HPATH = "instance-rename"
4493
  HTYPE = constants.HTYPE_INSTANCE
4494
  _OP_REQP = ["instance_name", "new_name"]
4495

    
4496
  def BuildHooksEnv(self):
4497
    """Build hooks env.
4498

4499
    This runs on master, primary and secondary nodes of the instance.
4500

4501
    """
4502
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4503
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4504
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4505
    return env, nl, nl
4506

    
4507
  def CheckPrereq(self):
4508
    """Check prerequisites.
4509

4510
    This checks that the instance is in the cluster and is not running.
4511

4512
    """
4513
    self.op.instance_name = _ExpandInstanceName(self.cfg,
4514
                                                self.op.instance_name)
4515
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4516
    assert instance is not None
4517
    _CheckNodeOnline(self, instance.primary_node)
4518
    _CheckInstanceDown(self, instance, "cannot rename")
4519
    self.instance = instance
4520

    
4521
    # new name verification
4522
    name_info = utils.GetHostInfo(self.op.new_name)
4523

    
4524
    self.op.new_name = new_name = name_info.name
4525
    instance_list = self.cfg.GetInstanceList()
4526
    if new_name in instance_list:
4527
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4528
                                 new_name, errors.ECODE_EXISTS)
4529

    
4530
    if not getattr(self.op, "ignore_ip", False):
4531
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4532
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4533
                                   (name_info.ip, new_name),
4534
                                   errors.ECODE_NOTUNIQUE)
4535

    
4536

    
4537
  def Exec(self, feedback_fn):
4538
    """Reinstall the instance.
4539

4540
    """
4541
    inst = self.instance
4542
    old_name = inst.name
4543

    
4544
    if inst.disk_template == constants.DT_FILE:
4545
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4546

    
4547
    self.cfg.RenameInstance(inst.name, self.op.new_name)
4548
    # Change the instance lock. This is definitely safe while we hold the BGL
4549
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4550
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4551

    
4552
    # re-read the instance from the configuration after rename
4553
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4554

    
4555
    if inst.disk_template == constants.DT_FILE:
4556
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4557
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4558
                                                     old_file_storage_dir,
4559
                                                     new_file_storage_dir)
4560
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4561
                   " (but the instance has been renamed in Ganeti)" %
4562
                   (inst.primary_node, old_file_storage_dir,
4563
                    new_file_storage_dir))
4564

    
4565
    _StartInstanceDisks(self, inst, None)
4566
    try:
4567
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4568
                                                 old_name, self.op.debug_level)
4569
      msg = result.fail_msg
4570
      if msg:
4571
        msg = ("Could not run OS rename script for instance %s on node %s"
4572
               " (but the instance has been renamed in Ganeti): %s" %
4573
               (inst.name, inst.primary_node, msg))
4574
        self.proc.LogWarning(msg)
4575
    finally:
4576
      _ShutdownInstanceDisks(self, inst)
4577

    
4578

    
4579
class LURemoveInstance(LogicalUnit):
4580
  """Remove an instance.
4581

4582
  """
4583
  HPATH = "instance-remove"
4584
  HTYPE = constants.HTYPE_INSTANCE
4585
  _OP_REQP = ["instance_name", "ignore_failures"]
4586
  REQ_BGL = False
4587

    
4588
  def CheckArguments(self):
4589
    """Check the arguments.
4590

4591
    """
4592
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4593
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4594

    
4595
  def ExpandNames(self):
4596
    self._ExpandAndLockInstance()
4597
    self.needed_locks[locking.LEVEL_NODE] = []
4598
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4599

    
4600
  def DeclareLocks(self, level):
4601
    if level == locking.LEVEL_NODE:
4602
      self._LockInstancesNodes()
4603

    
4604
  def BuildHooksEnv(self):
4605
    """Build hooks env.
4606

4607
    This runs on master, primary and secondary nodes of the instance.
4608

4609
    """
4610
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4611
    env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4612
    nl = [self.cfg.GetMasterNode()]
4613
    nl_post = list(self.instance.all_nodes) + nl
4614
    return env, nl, nl_post
4615

    
4616
  def CheckPrereq(self):
4617
    """Check prerequisites.
4618

4619
    This checks that the instance is in the cluster.
4620

4621
    """
4622
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4623
    assert self.instance is not None, \
4624
      "Cannot retrieve locked instance %s" % self.op.instance_name
4625

    
4626
  def Exec(self, feedback_fn):
4627
    """Remove the instance.
4628

4629
    """
4630
    instance = self.instance
4631
    logging.info("Shutting down instance %s on node %s",
4632
                 instance.name, instance.primary_node)
4633

    
4634
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4635
                                             self.shutdown_timeout)
4636
    msg = result.fail_msg
4637
    if msg:
4638
      if self.op.ignore_failures:
4639
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
4640
      else:
4641
        raise errors.OpExecError("Could not shutdown instance %s on"
4642
                                 " node %s: %s" %
4643
                                 (instance.name, instance.primary_node, msg))
4644

    
4645
    logging.info("Removing block devices for instance %s", instance.name)
4646

    
4647
    if not _RemoveDisks(self, instance):
4648
      if self.op.ignore_failures:
4649
        feedback_fn("Warning: can't remove instance's disks")
4650
      else:
4651
        raise errors.OpExecError("Can't remove instance's disks")
4652

    
4653
    logging.info("Removing instance %s out of cluster config", instance.name)
4654

    
4655
    self.cfg.RemoveInstance(instance.name)
4656
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4657

    
4658

    
4659
class LUQueryInstances(NoHooksLU):
4660
  """Logical unit for querying instances.
4661

4662
  """
4663
  # pylint: disable-msg=W0142
4664
  _OP_REQP = ["output_fields", "names", "use_locking"]
4665
  REQ_BGL = False
4666
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4667
                    "serial_no", "ctime", "mtime", "uuid"]
4668
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4669
                                    "admin_state",
4670
                                    "disk_template", "ip", "mac", "bridge",
4671
                                    "nic_mode", "nic_link",
4672
                                    "sda_size", "sdb_size", "vcpus", "tags",
4673
                                    "network_port", "beparams",
4674
                                    r"(disk)\.(size)/([0-9]+)",
4675
                                    r"(disk)\.(sizes)", "disk_usage",
4676
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4677
                                    r"(nic)\.(bridge)/([0-9]+)",
4678
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
4679
                                    r"(disk|nic)\.(count)",
4680
                                    "hvparams",
4681
                                    ] + _SIMPLE_FIELDS +
4682
                                  ["hv/%s" % name
4683
                                   for name in constants.HVS_PARAMETERS
4684
                                   if name not in constants.HVC_GLOBALS] +
4685
                                  ["be/%s" % name
4686
                                   for name in constants.BES_PARAMETERS])
4687
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4688

    
4689

    
4690
  def ExpandNames(self):
4691
    _CheckOutputFields(static=self._FIELDS_STATIC,
4692
                       dynamic=self._FIELDS_DYNAMIC,
4693
                       selected=self.op.output_fields)
4694

    
4695
    self.needed_locks = {}
4696
    self.share_locks[locking.LEVEL_INSTANCE] = 1
4697
    self.share_locks[locking.LEVEL_NODE] = 1
4698

    
4699
    if self.op.names:
4700
      self.wanted = _GetWantedInstances(self, self.op.names)
4701
    else:
4702
      self.wanted = locking.ALL_SET
4703

    
4704
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4705
    self.do_locking = self.do_node_query and self.op.use_locking
4706
    if self.do_locking:
4707
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4708
      self.needed_locks[locking.LEVEL_NODE] = []
4709
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4710

    
4711
  def DeclareLocks(self, level):
4712
    if level == locking.LEVEL_NODE and self.do_locking:
4713
      self._LockInstancesNodes()
4714

    
4715
  def CheckPrereq(self):
4716
    """Check prerequisites.
4717

4718
    """
4719
    pass
4720

    
4721
  def Exec(self, feedback_fn):
4722
    """Computes the list of nodes and their attributes.
4723

4724
    """
4725
    # pylint: disable-msg=R0912
4726
    # way too many branches here
4727
    all_info = self.cfg.GetAllInstancesInfo()
4728
    if self.wanted == locking.ALL_SET:
4729
      # caller didn't specify instance names, so ordering is not important
4730
      if self.do_locking:
4731
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4732
      else:
4733
        instance_names = all_info.keys()
4734
      instance_names = utils.NiceSort(instance_names)
4735
    else:
4736
      # caller did specify names, so we must keep the ordering
4737
      if self.do_locking:
4738
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4739
      else:
4740
        tgt_set = all_info.keys()
4741
      missing = set(self.wanted).difference(tgt_set)
4742
      if missing:
4743
        raise errors.OpExecError("Some instances were removed before"
4744
                                 " retrieving their data: %s" % missing)
4745
      instance_names = self.wanted
4746

    
4747
    instance_list = [all_info[iname] for iname in instance_names]
4748

    
4749
    # begin data gathering
4750

    
4751
    nodes = frozenset([inst.primary_node for inst in instance_list])
4752
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4753

    
4754
    bad_nodes = []
4755
    off_nodes = []
4756
    if self.do_node_query:
4757
      live_data = {}
4758
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4759
      for name in nodes:
4760
        result = node_data[name]
4761
        if result.offline:
4762
          # offline nodes will be in both lists
4763
          off_nodes.append(name)
4764
        if result.fail_msg:
4765
          bad_nodes.append(name)
4766
        else:
4767
          if result.payload:
4768
            live_data.update(result.payload)
4769
          # else no instance is alive
4770
    else:
4771
      live_data = dict([(name, {}) for name in instance_names])
4772

    
4773
    # end data gathering
4774

    
4775
    HVPREFIX = "hv/"
4776
    BEPREFIX = "be/"
4777
    output = []
4778
    cluster = self.cfg.GetClusterInfo()
4779
    for instance in instance_list:
4780
      iout = []
4781
      i_hv = cluster.FillHV(instance, skip_globals=True)
4782
      i_be = cluster.FillBE(instance)
4783
      i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4784
                                 nic.nicparams) for nic in instance.nics]
4785
      for field in self.op.output_fields:
4786
        st_match = self._FIELDS_STATIC.Matches(field)
4787
        if field in self._SIMPLE_FIELDS:
4788
          val = getattr(instance, field)
4789
        elif field == "pnode":
4790
          val = instance.primary_node
4791
        elif field == "snodes":
4792
          val = list(instance.secondary_nodes)
4793
        elif field == "admin_state":
4794
          val = instance.admin_up
4795
        elif field == "oper_state":
4796
          if instance.primary_node in bad_nodes:
4797
            val = None
4798
          else:
4799
            val = bool(live_data.get(instance.name))
4800
        elif field == "status":
4801
          if instance.primary_node in off_nodes:
4802
            val = "ERROR_nodeoffline"
4803
          elif instance.primary_node in bad_nodes:
4804
            val = "ERROR_nodedown"
4805
          else:
4806
            running = bool(live_data.get(instance.name))
4807
            if running:
4808
              if instance.admin_up:
4809
                val = "running"
4810
              else:
4811
                val = "ERROR_up"
4812
            else:
4813
              if instance.admin_up:
4814
                val = "ERROR_down"
4815
              else:
4816
                val = "ADMIN_down"
4817
        elif field == "oper_ram":
4818
          if instance.primary_node in bad_nodes:
4819
            val = None
4820
          elif instance.name in live_data:
4821
            val = live_data[instance.name].get("memory", "?")
4822
          else:
4823
            val = "-"
4824
        elif field == "vcpus":
4825
          val = i_be[constants.BE_VCPUS]
4826
        elif field == "disk_template":
4827
          val = instance.disk_template
4828
        elif field == "ip":
4829
          if instance.nics:
4830
            val = instance.nics[0].ip
4831
          else:
4832
            val = None
4833
        elif field == "nic_mode":
4834
          if instance.nics:
4835
            val = i_nicp[0][constants.NIC_MODE]
4836
          else:
4837
            val = None
4838
        elif field == "nic_link":
4839
          if instance.nics:
4840
            val = i_nicp[0][constants.NIC_LINK]
4841
          else:
4842
            val = None
4843
        elif field == "bridge":
4844
          if (instance.nics and
4845
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4846
            val = i_nicp[0][constants.NIC_LINK]
4847
          else:
4848
            val = None
4849
        elif field == "mac":
4850
          if instance.nics:
4851
            val = instance.nics[0].mac
4852
          else:
4853
            val = None
4854
        elif field == "sda_size" or field == "sdb_size":
4855
          idx = ord(field[2]) - ord('a')
4856
          try:
4857
            val = instance.FindDisk(idx).size
4858
          except errors.OpPrereqError:
4859
            val = None
4860
        elif field == "disk_usage": # total disk usage per node
4861
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
4862
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4863
        elif field == "tags":
4864
          val = list(instance.GetTags())
4865
        elif field == "hvparams":
4866
          val = i_hv
4867
        elif (field.startswith(HVPREFIX) and
4868
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4869
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4870
          val = i_hv.get(field[len(HVPREFIX):], None)
4871
        elif field == "beparams":
4872
          val = i_be
4873
        elif (field.startswith(BEPREFIX) and
4874
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4875
          val = i_be.get(field[len(BEPREFIX):], None)
4876
        elif st_match and st_match.groups():
4877
          # matches a variable list
4878
          st_groups = st_match.groups()
4879
          if st_groups and st_groups[0] == "disk":
4880
            if st_groups[1] == "count":
4881
              val = len(instance.disks)
4882
            elif st_groups[1] == "sizes":
4883
              val = [disk.size for disk in instance.disks]
4884
            elif st_groups[1] == "size":
4885
              try:
4886
                val = instance.FindDisk(st_groups[2]).size
4887
              except errors.OpPrereqError:
4888
                val = None
4889
            else:
4890
              assert False, "Unhandled disk parameter"
4891
          elif st_groups[0] == "nic":
4892
            if st_groups[1] == "count":
4893
              val = len(instance.nics)
4894
            elif st_groups[1] == "macs":
4895
              val = [nic.mac for nic in instance.nics]
4896
            elif st_groups[1] == "ips":
4897
              val = [nic.ip for nic in instance.nics]
4898
            elif st_groups[1] == "modes":
4899
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4900
            elif st_groups[1] == "links":
4901
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4902
            elif st_groups[1] == "bridges":
4903
              val = []
4904
              for nicp in i_nicp:
4905
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4906
                  val.append(nicp[constants.NIC_LINK])
4907
                else:
4908
                  val.append(None)
4909
            else:
4910
              # index-based item
4911
              nic_idx = int(st_groups[2])
4912
              if nic_idx >= len(instance.nics):
4913
                val = None
4914
              else:
4915
                if st_groups[1] == "mac":
4916
                  val = instance.nics[nic_idx].mac
4917
                elif st_groups[1] == "ip":
4918
                  val = instance.nics[nic_idx].ip
4919
                elif st_groups[1] == "mode":
4920
                  val = i_nicp[nic_idx][constants.NIC_MODE]
4921
                elif st_groups[1] == "link":
4922
                  val = i_nicp[nic_idx][constants.NIC_LINK]
4923
                elif st_groups[1] == "bridge":
4924
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4925
                  if nic_mode == constants.NIC_MODE_BRIDGED:
4926
                    val = i_nicp[nic_idx][constants.NIC_LINK]
4927
                  else:
4928
                    val = None
4929
                else:
4930
                  assert False, "Unhandled NIC parameter"
4931
          else:
4932
            assert False, ("Declared but unhandled variable parameter '%s'" %
4933
                           field)
4934
        else:
4935
          assert False, "Declared but unhandled parameter '%s'" % field
4936
        iout.append(val)
4937
      output.append(iout)
4938

    
4939
    return output
4940

    
4941

    
4942
class LUFailoverInstance(LogicalUnit):
4943
  """Failover an instance.
4944

4945
  """
4946
  HPATH = "instance-failover"
4947
  HTYPE = constants.HTYPE_INSTANCE
4948
  _OP_REQP = ["instance_name", "ignore_consistency"]
4949
  REQ_BGL = False
4950

    
4951
  def CheckArguments(self):
4952
    """Check the arguments.
4953

4954
    """
4955
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4956
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4957

    
4958
  def ExpandNames(self):
4959
    self._ExpandAndLockInstance()
4960
    self.needed_locks[locking.LEVEL_NODE] = []
4961
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4962

    
4963
  def DeclareLocks(self, level):
4964
    if level == locking.LEVEL_NODE:
4965
      self._LockInstancesNodes()
4966

    
4967
  def BuildHooksEnv(self):
4968
    """Build hooks env.
4969

4970
    This runs on master, primary and secondary nodes of the instance.
4971

4972
    """
4973
    instance = self.instance
4974
    source_node = instance.primary_node
4975
    target_node = instance.secondary_nodes[0]
4976
    env = {
4977
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4978
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4979
      "OLD_PRIMARY": source_node,
4980
      "OLD_SECONDARY": target_node,
4981
      "NEW_PRIMARY": target_node,
4982
      "NEW_SECONDARY": source_node,
4983
      }
4984
    env.update(_BuildInstanceHookEnvByObject(self, instance))
4985
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4986
    nl_post = list(nl)
4987
    nl_post.append(source_node)
4988
    return env, nl, nl_post
4989

    
4990
  def CheckPrereq(self):
4991
    """Check prerequisites.
4992

4993
    This checks that the instance is in the cluster.
4994

4995
    """
4996
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4997
    assert self.instance is not None, \
4998
      "Cannot retrieve locked instance %s" % self.op.instance_name
4999

    
5000
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5001
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5002
      raise errors.OpPrereqError("Instance's disk layout is not"
5003
                                 " network mirrored, cannot failover.",
5004
                                 errors.ECODE_STATE)
5005

    
5006
    secondary_nodes = instance.secondary_nodes
5007
    if not secondary_nodes:
5008
      raise errors.ProgrammerError("no secondary node but using "
5009
                                   "a mirrored disk template")
5010

    
5011
    target_node = secondary_nodes[0]
5012
    _CheckNodeOnline(self, target_node)
5013
    _CheckNodeNotDrained(self, target_node)
5014
    if instance.admin_up:
5015
      # check memory requirements on the secondary node
5016
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5017
                           instance.name, bep[constants.BE_MEMORY],
5018
                           instance.hypervisor)
5019
    else:
5020
      self.LogInfo("Not checking memory on the secondary node as"
5021
                   " instance will not be started")
5022

    
5023
    # check bridge existance
5024
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5025

    
5026
  def Exec(self, feedback_fn):
5027
    """Failover an instance.
5028

5029
    The failover is done by shutting it down on its present node and
5030
    starting it on the secondary.
5031

5032
    """
5033
    instance = self.instance
5034

    
5035
    source_node = instance.primary_node
5036
    target_node = instance.secondary_nodes[0]
5037

    
5038
    if instance.admin_up:
5039
      feedback_fn("* checking disk consistency between source and target")
5040
      for dev in instance.disks:
5041
        # for drbd, these are drbd over lvm
5042
        if not _CheckDiskConsistency(self, dev, target_node, False):
5043
          if not self.op.ignore_consistency:
5044
            raise errors.OpExecError("Disk %s is degraded on target node,"
5045
                                     " aborting failover." % dev.iv_name)
5046
    else:
5047
      feedback_fn("* not checking disk consistency as instance is not running")
5048

    
5049
    feedback_fn("* shutting down instance on source node")
5050
    logging.info("Shutting down instance %s on node %s",
5051
                 instance.name, source_node)
5052

    
5053
    result = self.rpc.call_instance_shutdown(source_node, instance,
5054
                                             self.shutdown_timeout)
5055
    msg = result.fail_msg
5056
    if msg:
5057
      if self.op.ignore_consistency:
5058
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5059
                             " Proceeding anyway. Please make sure node"
5060
                             " %s is down. Error details: %s",
5061
                             instance.name, source_node, source_node, msg)
5062
      else:
5063
        raise errors.OpExecError("Could not shutdown instance %s on"
5064
                                 " node %s: %s" %
5065
                                 (instance.name, source_node, msg))
5066

    
5067
    feedback_fn("* deactivating the instance's disks on source node")
5068
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5069
      raise errors.OpExecError("Can't shut down the instance's disks.")
5070

    
5071
    instance.primary_node = target_node
5072
    # distribute new instance config to the other nodes
5073
    self.cfg.Update(instance, feedback_fn)
5074

    
5075
    # Only start the instance if it's marked as up
5076
    if instance.admin_up:
5077
      feedback_fn("* activating the instance's disks on target node")
5078
      logging.info("Starting instance %s on node %s",
5079
                   instance.name, target_node)
5080

    
5081
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5082
                                               ignore_secondaries=True)
5083
      if not disks_ok:
5084
        _ShutdownInstanceDisks(self, instance)
5085
        raise errors.OpExecError("Can't activate the instance's disks")
5086

    
5087
      feedback_fn("* starting the instance on the target node")
5088
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5089
      msg = result.fail_msg
5090
      if msg:
5091
        _ShutdownInstanceDisks(self, instance)
5092
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5093
                                 (instance.name, target_node, msg))
5094

    
5095

    
5096
class LUMigrateInstance(LogicalUnit):
5097
  """Migrate an instance.
5098

5099
  This is migration without shutting down, compared to the failover,
5100
  which is done with shutdown.
5101

5102
  """
5103
  HPATH = "instance-migrate"
5104
  HTYPE = constants.HTYPE_INSTANCE
5105
  _OP_REQP = ["instance_name", "live", "cleanup"]
5106

    
5107
  REQ_BGL = False
5108

    
5109
  def ExpandNames(self):
5110
    self._ExpandAndLockInstance()
5111

    
5112
    self.needed_locks[locking.LEVEL_NODE] = []
5113
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5114

    
5115
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5116
                                       self.op.live, self.op.cleanup)
5117
    self.tasklets = [self._migrater]
5118

    
5119
  def DeclareLocks(self, level):
5120
    if level == locking.LEVEL_NODE:
5121
      self._LockInstancesNodes()
5122

    
5123
  def BuildHooksEnv(self):
5124
    """Build hooks env.
5125

5126
    This runs on master, primary and secondary nodes of the instance.
5127

5128
    """
5129
    instance = self._migrater.instance
5130
    source_node = instance.primary_node
5131
    target_node = instance.secondary_nodes[0]
5132
    env = _BuildInstanceHookEnvByObject(self, instance)
5133
    env["MIGRATE_LIVE"] = self.op.live
5134
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5135
    env.update({
5136
        "OLD_PRIMARY": source_node,
5137
        "OLD_SECONDARY": target_node,
5138
        "NEW_PRIMARY": target_node,
5139
        "NEW_SECONDARY": source_node,
5140
        })
5141
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5142
    nl_post = list(nl)
5143
    nl_post.append(source_node)
5144
    return env, nl, nl_post
5145

    
5146

    
5147
class LUMoveInstance(LogicalUnit):
5148
  """Move an instance by data-copying.
5149

5150
  """
5151
  HPATH = "instance-move"
5152
  HTYPE = constants.HTYPE_INSTANCE
5153
  _OP_REQP = ["instance_name", "target_node"]
5154
  REQ_BGL = False
5155

    
5156
  def CheckArguments(self):
5157
    """Check the arguments.
5158

5159
    """
5160
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5161
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
5162

    
5163
  def ExpandNames(self):
5164
    self._ExpandAndLockInstance()
5165
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5166
    self.op.target_node = target_node
5167
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5168
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5169

    
5170
  def DeclareLocks(self, level):
5171
    if level == locking.LEVEL_NODE:
5172
      self._LockInstancesNodes(primary_only=True)
5173

    
5174
  def BuildHooksEnv(self):
5175
    """Build hooks env.
5176

5177
    This runs on master, primary and secondary nodes of the instance.
5178

5179
    """
5180
    env = {
5181
      "TARGET_NODE": self.op.target_node,
5182
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5183
      }
5184
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5185
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5186
                                       self.op.target_node]
5187
    return env, nl, nl
5188

    
5189
  def CheckPrereq(self):
5190
    """Check prerequisites.
5191

5192
    This checks that the instance is in the cluster.
5193

5194
    """
5195
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5196
    assert self.instance is not None, \
5197
      "Cannot retrieve locked instance %s" % self.op.instance_name
5198

    
5199
    node = self.cfg.GetNodeInfo(self.op.target_node)
5200
    assert node is not None, \
5201
      "Cannot retrieve locked node %s" % self.op.target_node
5202

    
5203
    self.target_node = target_node = node.name
5204

    
5205
    if target_node == instance.primary_node:
5206
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
5207
                                 (instance.name, target_node),
5208
                                 errors.ECODE_STATE)
5209

    
5210
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5211

    
5212
    for idx, dsk in enumerate(instance.disks):
5213
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5214
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5215
                                   " cannot copy" % idx, errors.ECODE_STATE)
5216

    
5217
    _CheckNodeOnline(self, target_node)
5218
    _CheckNodeNotDrained(self, target_node)
5219

    
5220
    if instance.admin_up:
5221
      # check memory requirements on the secondary node
5222
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5223
                           instance.name, bep[constants.BE_MEMORY],
5224
                           instance.hypervisor)
5225
    else:
5226
      self.LogInfo("Not checking memory on the secondary node as"
5227
                   " instance will not be started")
5228

    
5229
    # check bridge existance
5230
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5231

    
5232
  def Exec(self, feedback_fn):
5233
    """Move an instance.
5234

5235
    The move is done by shutting it down on its present node, copying
5236
    the data over (slow) and starting it on the new node.
5237

5238
    """
5239
    instance = self.instance
5240

    
5241
    source_node = instance.primary_node
5242
    target_node = self.target_node
5243

    
5244
    self.LogInfo("Shutting down instance %s on source node %s",
5245
                 instance.name, source_node)
5246

    
5247
    result = self.rpc.call_instance_shutdown(source_node, instance,
5248
                                             self.shutdown_timeout)
5249
    msg = result.fail_msg
5250
    if msg:
5251
      if self.op.ignore_consistency:
5252
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5253
                             " Proceeding anyway. Please make sure node"
5254
                             " %s is down. Error details: %s",
5255
                             instance.name, source_node, source_node, msg)
5256
      else:
5257
        raise errors.OpExecError("Could not shutdown instance %s on"
5258
                                 " node %s: %s" %
5259
                                 (instance.name, source_node, msg))
5260

    
5261
    # create the target disks
5262
    try:
5263
      _CreateDisks(self, instance, target_node=target_node)
5264
    except errors.OpExecError:
5265
      self.LogWarning("Device creation failed, reverting...")
5266
      try:
5267
        _RemoveDisks(self, instance, target_node=target_node)
5268
      finally:
5269
        self.cfg.ReleaseDRBDMinors(instance.name)
5270
        raise
5271

    
5272
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5273

    
5274
    errs = []
5275
    # activate, get path, copy the data over
5276
    for idx, disk in enumerate(instance.disks):
5277
      self.LogInfo("Copying data for disk %d", idx)
5278
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5279
                                               instance.name, True)
5280
      if result.fail_msg:
5281
        self.LogWarning("Can't assemble newly created disk %d: %s",
5282
                        idx, result.fail_msg)
5283
        errs.append(result.fail_msg)
5284
        break
5285
      dev_path = result.payload
5286
      result = self.rpc.call_blockdev_export(source_node, disk,
5287
                                             target_node, dev_path,
5288
                                             cluster_name)
5289
      if result.fail_msg:
5290
        self.LogWarning("Can't copy data over for disk %d: %s",
5291
                        idx, result.fail_msg)
5292
        errs.append(result.fail_msg)
5293
        break
5294

    
5295
    if errs:
5296
      self.LogWarning("Some disks failed to copy, aborting")
5297
      try:
5298
        _RemoveDisks(self, instance, target_node=target_node)
5299
      finally:
5300
        self.cfg.ReleaseDRBDMinors(instance.name)
5301
        raise errors.OpExecError("Errors during disk copy: %s" %
5302
                                 (",".join(errs),))
5303

    
5304
    instance.primary_node = target_node
5305
    self.cfg.Update(instance, feedback_fn)
5306

    
5307
    self.LogInfo("Removing the disks on the original node")
5308
    _RemoveDisks(self, instance, target_node=source_node)
5309

    
5310
    # Only start the instance if it's marked as up
5311
    if instance.admin_up:
5312
      self.LogInfo("Starting instance %s on node %s",
5313
                   instance.name, target_node)
5314

    
5315
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5316
                                           ignore_secondaries=True)
5317
      if not disks_ok:
5318
        _ShutdownInstanceDisks(self, instance)
5319
        raise errors.OpExecError("Can't activate the instance's disks")
5320

    
5321
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5322
      msg = result.fail_msg
5323
      if msg:
5324
        _ShutdownInstanceDisks(self, instance)
5325
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5326
                                 (instance.name, target_node, msg))
5327

    
5328

    
5329
class LUMigrateNode(LogicalUnit):
5330
  """Migrate all instances from a node.
5331

5332
  """
5333
  HPATH = "node-migrate"
5334
  HTYPE = constants.HTYPE_NODE
5335
  _OP_REQP = ["node_name", "live"]
5336
  REQ_BGL = False
5337

    
5338
  def ExpandNames(self):
5339
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5340

    
5341
    self.needed_locks = {
5342
      locking.LEVEL_NODE: [self.op.node_name],
5343
      }
5344

    
5345
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5346

    
5347
    # Create tasklets for migrating instances for all instances on this node
5348
    names = []
5349
    tasklets = []
5350

    
5351
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5352
      logging.debug("Migrating instance %s", inst.name)
5353
      names.append(inst.name)
5354

    
5355
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5356

    
5357
    self.tasklets = tasklets
5358

    
5359
    # Declare instance locks
5360
    self.needed_locks[locking.LEVEL_INSTANCE] = names
5361

    
5362
  def DeclareLocks(self, level):
5363
    if level == locking.LEVEL_NODE:
5364
      self._LockInstancesNodes()
5365

    
5366
  def BuildHooksEnv(self):
5367
    """Build hooks env.
5368

5369
    This runs on the master, the primary and all the secondaries.
5370

5371
    """
5372
    env = {
5373
      "NODE_NAME": self.op.node_name,
5374
      }
5375

    
5376
    nl = [self.cfg.GetMasterNode()]
5377

    
5378
    return (env, nl, nl)
5379

    
5380

    
5381
class TLMigrateInstance(Tasklet):
5382
  def __init__(self, lu, instance_name, live, cleanup):
5383
    """Initializes this class.
5384

5385
    """
5386
    Tasklet.__init__(self, lu)
5387

    
5388
    # Parameters
5389
    self.instance_name = instance_name
5390
    self.live = live
5391
    self.cleanup = cleanup
5392

    
5393
  def CheckPrereq(self):
5394
    """Check prerequisites.
5395

5396
    This checks that the instance is in the cluster.
5397

5398
    """
5399
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5400
    instance = self.cfg.GetInstanceInfo(instance_name)
5401
    assert instance is not None
5402

    
5403
    if instance.disk_template != constants.DT_DRBD8:
5404
      raise errors.OpPrereqError("Instance's disk layout is not"
5405
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5406

    
5407
    secondary_nodes = instance.secondary_nodes
5408
    if not secondary_nodes:
5409
      raise errors.ConfigurationError("No secondary node but using"
5410
                                      " drbd8 disk template")
5411

    
5412
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
5413

    
5414
    target_node = secondary_nodes[0]
5415
    # check memory requirements on the secondary node
5416
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5417
                         instance.name, i_be[constants.BE_MEMORY],
5418
                         instance.hypervisor)
5419

    
5420
    # check bridge existance
5421
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5422

    
5423
    if not self.cleanup:
5424
      _CheckNodeNotDrained(self, target_node)
5425
      result = self.rpc.call_instance_migratable(instance.primary_node,
5426
                                                 instance)
5427
      result.Raise("Can't migrate, please use failover",
5428
                   prereq=True, ecode=errors.ECODE_STATE)
5429

    
5430
    self.instance = instance
5431

    
5432
  def _WaitUntilSync(self):
5433
    """Poll with custom rpc for disk sync.
5434

5435
    This uses our own step-based rpc call.
5436

5437
    """
5438
    self.feedback_fn("* wait until resync is done")
5439
    all_done = False
5440
    while not all_done:
5441
      all_done = True
5442
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5443
                                            self.nodes_ip,
5444
                                            self.instance.disks)
5445
      min_percent = 100
5446
      for node, nres in result.items():
5447
        nres.Raise("Cannot resync disks on node %s" % node)
5448
        node_done, node_percent = nres.payload
5449
        all_done = all_done and node_done
5450
        if node_percent is not None:
5451
          min_percent = min(min_percent, node_percent)
5452
      if not all_done:
5453
        if min_percent < 100:
5454
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
5455
        time.sleep(2)
5456

    
5457
  def _EnsureSecondary(self, node):
5458
    """Demote a node to secondary.
5459

5460
    """
5461
    self.feedback_fn("* switching node %s to secondary mode" % node)
5462

    
5463
    for dev in self.instance.disks:
5464
      self.cfg.SetDiskID(dev, node)
5465

    
5466
    result = self.rpc.call_blockdev_close(node, self.instance.name,
5467
                                          self.instance.disks)
5468
    result.Raise("Cannot change disk to secondary on node %s" % node)
5469

    
5470
  def _GoStandalone(self):
5471
    """Disconnect from the network.
5472

5473
    """
5474
    self.feedback_fn("* changing into standalone mode")
5475
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5476
                                               self.instance.disks)
5477
    for node, nres in result.items():
5478
      nres.Raise("Cannot disconnect disks node %s" % node)
5479

    
5480
  def _GoReconnect(self, multimaster):
5481
    """Reconnect to the network.
5482

5483
    """
5484
    if multimaster:
5485
      msg = "dual-master"
5486
    else:
5487
      msg = "single-master"
5488
    self.feedback_fn("* changing disks into %s mode" % msg)
5489
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5490
                                           self.instance.disks,
5491
                                           self.instance.name, multimaster)
5492
    for node, nres in result.items():
5493
      nres.Raise("Cannot change disks config on node %s" % node)
5494

    
5495
  def _ExecCleanup(self):
5496
    """Try to cleanup after a failed migration.
5497

5498
    The cleanup is done by:
5499
      - check that the instance is running only on one node
5500
        (and update the config if needed)
5501
      - change disks on its secondary node to secondary
5502
      - wait until disks are fully synchronized
5503
      - disconnect from the network
5504
      - change disks into single-master mode
5505
      - wait again until disks are fully synchronized
5506

5507
    """
5508
    instance = self.instance
5509
    target_node = self.target_node
5510
    source_node = self.source_node
5511

    
5512
    # check running on only one node
5513
    self.feedback_fn("* checking where the instance actually runs"
5514
                     " (if this hangs, the hypervisor might be in"
5515
                     " a bad state)")
5516
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5517
    for node, result in ins_l.items():
5518
      result.Raise("Can't contact node %s" % node)
5519

    
5520
    runningon_source = instance.name in ins_l[source_node].payload
5521
    runningon_target = instance.name in ins_l[target_node].payload
5522

    
5523
    if runningon_source and runningon_target:
5524
      raise errors.OpExecError("Instance seems to be running on two nodes,"
5525
                               " or the hypervisor is confused. You will have"
5526
                               " to ensure manually that it runs only on one"
5527
                               " and restart this operation.")
5528

    
5529
    if not (runningon_source or runningon_target):
5530
      raise errors.OpExecError("Instance does not seem to be running at all."
5531
                               " In this case, it's safer to repair by"
5532
                               " running 'gnt-instance stop' to ensure disk"
5533
                               " shutdown, and then restarting it.")
5534

    
5535
    if runningon_target:
5536
      # the migration has actually succeeded, we need to update the config
5537
      self.feedback_fn("* instance running on secondary node (%s),"
5538
                       " updating config" % target_node)
5539
      instance.primary_node = target_node
5540
      self.cfg.Update(instance, self.feedback_fn)
5541
      demoted_node = source_node
5542
    else:
5543
      self.feedback_fn("* instance confirmed to be running on its"
5544
                       " primary node (%s)" % source_node)
5545
      demoted_node = target_node
5546

    
5547
    self._EnsureSecondary(demoted_node)
5548
    try:
5549
      self._WaitUntilSync()
5550
    except errors.OpExecError:
5551
      # we ignore here errors, since if the device is standalone, it
5552
      # won't be able to sync
5553
      pass
5554
    self._GoStandalone()
5555
    self._GoReconnect(False)
5556
    self._WaitUntilSync()
5557

    
5558
    self.feedback_fn("* done")
5559

    
5560
  def _RevertDiskStatus(self):
5561
    """Try to revert the disk status after a failed migration.
5562

5563
    """
5564
    target_node = self.target_node
5565
    try:
5566
      self._EnsureSecondary(target_node)
5567
      self._GoStandalone()
5568
      self._GoReconnect(False)
5569
      self._WaitUntilSync()
5570
    except errors.OpExecError, err:
5571
      self.lu.LogWarning("Migration failed and I can't reconnect the"
5572
                         " drives: error '%s'\n"
5573
                         "Please look and recover the instance status" %
5574
                         str(err))
5575

    
5576
  def _AbortMigration(self):
5577
    """Call the hypervisor code to abort a started migration.
5578

5579
    """
5580
    instance = self.instance
5581
    target_node = self.target_node
5582
    migration_info = self.migration_info
5583

    
5584
    abort_result = self.rpc.call_finalize_migration(target_node,
5585
                                                    instance,
5586
                                                    migration_info,
5587
                                                    False)
5588
    abort_msg = abort_result.fail_msg
5589
    if abort_msg:
5590
      logging.error("Aborting migration failed on target node %s: %s",
5591
                    target_node, abort_msg)
5592
      # Don't raise an exception here, as we stil have to try to revert the
5593
      # disk status, even if this step failed.
5594

    
5595
  def _ExecMigration(self):
5596
    """Migrate an instance.
5597

5598
    The migrate is done by:
5599
      - change the disks into dual-master mode
5600
      - wait until disks are fully synchronized again
5601
      - migrate the instance
5602
      - change disks on the new secondary node (the old primary) to secondary
5603
      - wait until disks are fully synchronized
5604
      - change disks into single-master mode
5605

5606
    """
5607
    instance = self.instance
5608
    target_node = self.target_node
5609
    source_node = self.source_node
5610

    
5611
    self.feedback_fn("* checking disk consistency between source and target")
5612
    for dev in instance.disks:
5613
      if not _CheckDiskConsistency(self, dev, target_node, False):
5614
        raise errors.OpExecError("Disk %s is degraded or not fully"
5615
                                 " synchronized on target node,"
5616
                                 " aborting migrate." % dev.iv_name)
5617

    
5618
    # First get the migration information from the remote node
5619
    result = self.rpc.call_migration_info(source_node, instance)
5620
    msg = result.fail_msg
5621
    if msg:
5622
      log_err = ("Failed fetching source migration information from %s: %s" %
5623
                 (source_node, msg))
5624
      logging.error(log_err)
5625
      raise errors.OpExecError(log_err)
5626

    
5627
    self.migration_info = migration_info = result.payload
5628

    
5629
    # Then switch the disks to master/master mode
5630
    self._EnsureSecondary(target_node)
5631
    self._GoStandalone()
5632
    self._GoReconnect(True)
5633
    self._WaitUntilSync()
5634

    
5635
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
5636
    result = self.rpc.call_accept_instance(target_node,
5637
                                           instance,
5638
                                           migration_info,
5639
                                           self.nodes_ip[target_node])
5640

    
5641
    msg = result.fail_msg
5642
    if msg:
5643
      logging.error("Instance pre-migration failed, trying to revert"
5644
                    " disk status: %s", msg)
5645
      self.feedback_fn("Pre-migration failed, aborting")
5646
      self._AbortMigration()
5647
      self._RevertDiskStatus()
5648
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5649
                               (instance.name, msg))
5650

    
5651
    self.feedback_fn("* migrating instance to %s" % target_node)
5652
    time.sleep(10)
5653
    result = self.rpc.call_instance_migrate(source_node, instance,
5654
                                            self.nodes_ip[target_node],
5655
                                            self.live)
5656
    msg = result.fail_msg
5657
    if msg:
5658
      logging.error("Instance migration failed, trying to revert"
5659
                    " disk status: %s", msg)
5660
      self.feedback_fn("Migration failed, aborting")
5661
      self._AbortMigration()
5662
      self._RevertDiskStatus()
5663
      raise errors.OpExecError("Could not migrate instance %s: %s" %
5664
                               (instance.name, msg))
5665
    time.sleep(10)
5666

    
5667
    instance.primary_node = target_node
5668
    # distribute new instance config to the other nodes
5669
    self.cfg.Update(instance, self.feedback_fn)
5670

    
5671
    result = self.rpc.call_finalize_migration(target_node,
5672
                                              instance,
5673
                                              migration_info,
5674
                                              True)
5675
    msg = result.fail_msg
5676
    if msg:
5677
      logging.error("Instance migration succeeded, but finalization failed:"
5678
                    " %s", msg)
5679
      raise errors.OpExecError("Could not finalize instance migration: %s" %
5680
                               msg)
5681

    
5682
    self._EnsureSecondary(source_node)
5683
    self._WaitUntilSync()
5684
    self._GoStandalone()
5685
    self._GoReconnect(False)
5686
    self._WaitUntilSync()
5687

    
5688
    self.feedback_fn("* done")
5689

    
5690
  def Exec(self, feedback_fn):
5691
    """Perform the migration.
5692

5693
    """
5694
    feedback_fn("Migrating instance %s" % self.instance.name)
5695

    
5696
    self.feedback_fn = feedback_fn
5697

    
5698
    self.source_node = self.instance.primary_node
5699
    self.target_node = self.instance.secondary_nodes[0]
5700
    self.all_nodes = [self.source_node, self.target_node]
5701
    self.nodes_ip = {
5702
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5703
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5704
      }
5705

    
5706
    if self.cleanup:
5707
      return self._ExecCleanup()
5708
    else:
5709
      return self._ExecMigration()
5710

    
5711

    
5712
def _CreateBlockDev(lu, node, instance, device, force_create,
5713
                    info, force_open):
5714
  """Create a tree of block devices on a given node.
5715

5716
  If this device type has to be created on secondaries, create it and
5717
  all its children.
5718

5719
  If not, just recurse to children keeping the same 'force' value.
5720

5721
  @param lu: the lu on whose behalf we execute
5722
  @param node: the node on which to create the device
5723
  @type instance: L{objects.Instance}
5724
  @param instance: the instance which owns the device
5725
  @type device: L{objects.Disk}
5726
  @param device: the device to create
5727
  @type force_create: boolean
5728
  @param force_create: whether to force creation of this device; this
5729
      will be change to True whenever we find a device which has
5730
      CreateOnSecondary() attribute
5731
  @param info: the extra 'metadata' we should attach to the device
5732
      (this will be represented as a LVM tag)
5733
  @type force_open: boolean
5734
  @param force_open: this parameter will be passes to the
5735
      L{backend.BlockdevCreate} function where it specifies
5736
      whether we run on primary or not, and it affects both
5737
      the child assembly and the device own Open() execution
5738

5739
  """
5740
  if device.CreateOnSecondary():
5741
    force_create = True
5742

    
5743
  if device.children:
5744
    for child in device.children:
5745
      _CreateBlockDev(lu, node, instance, child, force_create,
5746
                      info, force_open)
5747

    
5748
  if not force_create:
5749
    return
5750

    
5751
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5752

    
5753

    
5754
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5755
  """Create a single block device on a given node.
5756

5757
  This will not recurse over children of the device, so they must be
5758
  created in advance.
5759

5760
  @param lu: the lu on whose behalf we execute
5761
  @param node: the node on which to create the device
5762
  @type instance: L{objects.Instance}
5763
  @param instance: the instance which owns the device
5764
  @type device: L{objects.Disk}
5765
  @param device: the device to create
5766
  @param info: the extra 'metadata' we should attach to the device
5767
      (this will be represented as a LVM tag)
5768
  @type force_open: boolean
5769
  @param force_open: this parameter will be passes to the
5770
      L{backend.BlockdevCreate} function where it specifies
5771
      whether we run on primary or not, and it affects both
5772
      the child assembly and the device own Open() execution
5773

5774
  """
5775
  lu.cfg.SetDiskID(device, node)
5776
  result = lu.rpc.call_blockdev_create(node, device, device.size,
5777
                                       instance.name, force_open, info)
5778
  result.Raise("Can't create block device %s on"
5779
               " node %s for instance %s" % (device, node, instance.name))
5780
  if device.physical_id is None:
5781
    device.physical_id = result.payload
5782

    
5783

    
5784
def _GenerateUniqueNames(lu, exts):
5785
  """Generate a suitable LV name.
5786

5787
  This will generate a logical volume name for the given instance.
5788

5789
  """
5790
  results = []
5791
  for val in exts:
5792
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5793
    results.append("%s%s" % (new_id, val))
5794
  return results
5795

    
5796

    
5797
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5798
                         p_minor, s_minor):
5799
  """Generate a drbd8 device complete with its children.
5800

5801
  """
5802
  port = lu.cfg.AllocatePort()
5803
  vgname = lu.cfg.GetVGName()
5804
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5805
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5806
                          logical_id=(vgname, names[0]))
5807
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5808
                          logical_id=(vgname, names[1]))
5809
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5810
                          logical_id=(primary, secondary, port,
5811
                                      p_minor, s_minor,
5812
                                      shared_secret),
5813
                          children=[dev_data, dev_meta],
5814
                          iv_name=iv_name)
5815
  return drbd_dev
5816

    
5817

    
5818
def _GenerateDiskTemplate(lu, template_name,
5819
                          instance_name, primary_node,
5820
                          secondary_nodes, disk_info,
5821
                          file_storage_dir, file_driver,
5822
                          base_index):
5823
  """Generate the entire disk layout for a given template type.
5824

5825
  """
5826
  #TODO: compute space requirements
5827

    
5828
  vgname = lu.cfg.GetVGName()
5829
  disk_count = len(disk_info)
5830
  disks = []
5831
  if template_name == constants.DT_DISKLESS:
5832
    pass
5833
  elif template_name == constants.DT_PLAIN:
5834
    if len(secondary_nodes) != 0:
5835
      raise errors.ProgrammerError("Wrong template configuration")
5836

    
5837
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5838
                                      for i in range(disk_count)])
5839
    for idx, disk in enumerate(disk_info):
5840
      disk_index = idx + base_index
5841
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5842
                              logical_id=(vgname, names[idx]),
5843
                              iv_name="disk/%d" % disk_index,
5844
                              mode=disk["mode"])
5845
      disks.append(disk_dev)
5846
  elif template_name == constants.DT_DRBD8:
5847
    if len(secondary_nodes) != 1:
5848
      raise errors.ProgrammerError("Wrong template configuration")
5849
    remote_node = secondary_nodes[0]
5850
    minors = lu.cfg.AllocateDRBDMinor(
5851
      [primary_node, remote_node] * len(disk_info), instance_name)
5852

    
5853
    names = []
5854
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5855
                                               for i in range(disk_count)]):
5856
      names.append(lv_prefix + "_data")
5857
      names.append(lv_prefix + "_meta")
5858
    for idx, disk in enumerate(disk_info):
5859
      disk_index = idx + base_index
5860
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5861
                                      disk["size"], names[idx*2:idx*2+2],
5862
                                      "disk/%d" % disk_index,
5863
                                      minors[idx*2], minors[idx*2+1])
5864
      disk_dev.mode = disk["mode"]
5865
      disks.append(disk_dev)
5866
  elif template_name == constants.DT_FILE:
5867
    if len(secondary_nodes) != 0:
5868
      raise errors.ProgrammerError("Wrong template configuration")
5869

    
5870
    _RequireFileStorage()
5871

    
5872
    for idx, disk in enumerate(disk_info):
5873
      disk_index = idx + base_index
5874
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5875
                              iv_name="disk/%d" % disk_index,
5876
                              logical_id=(file_driver,
5877
                                          "%s/disk%d" % (file_storage_dir,
5878
                                                         disk_index)),
5879
                              mode=disk["mode"])
5880
      disks.append(disk_dev)
5881
  else:
5882
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5883
  return disks
5884

    
5885

    
5886
def _GetInstanceInfoText(instance):
5887
  """Compute that text that should be added to the disk's metadata.
5888

5889
  """
5890
  return "originstname+%s" % instance.name
5891

    
5892

    
5893
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5894
  """Create all disks for an instance.
5895

5896
  This abstracts away some work from AddInstance.
5897

5898
  @type lu: L{LogicalUnit}
5899
  @param lu: the logical unit on whose behalf we execute
5900
  @type instance: L{objects.Instance}
5901
  @param instance: the instance whose disks we should create
5902
  @type to_skip: list
5903
  @param to_skip: list of indices to skip
5904
  @type target_node: string
5905
  @param target_node: if passed, overrides the target node for creation
5906
  @rtype: boolean
5907
  @return: the success of the creation
5908

5909
  """
5910
  info = _GetInstanceInfoText(instance)
5911
  if target_node is None:
5912
    pnode = instance.primary_node
5913
    all_nodes = instance.all_nodes
5914
  else:
5915
    pnode = target_node
5916
    all_nodes = [pnode]
5917

    
5918
  if instance.disk_template == constants.DT_FILE:
5919
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5920
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5921

    
5922
    result.Raise("Failed to create directory '%s' on"
5923
                 " node %s" % (file_storage_dir, pnode))
5924

    
5925
  # Note: this needs to be kept in sync with adding of disks in
5926
  # LUSetInstanceParams
5927
  for idx, device in enumerate(instance.disks):
5928
    if to_skip and idx in to_skip:
5929
      continue
5930
    logging.info("Creating volume %s for instance %s",
5931
                 device.iv_name, instance.name)
5932
    #HARDCODE
5933
    for node in all_nodes:
5934
      f_create = node == pnode
5935
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5936

    
5937

    
5938
def _RemoveDisks(lu, instance, target_node=None):
5939
  """Remove all disks for an instance.
5940

5941
  This abstracts away some work from `AddInstance()` and
5942
  `RemoveInstance()`. Note that in case some of the devices couldn't
5943
  be removed, the removal will continue with the other ones (compare
5944
  with `_CreateDisks()`).
5945

5946
  @type lu: L{LogicalUnit}
5947
  @param lu: the logical unit on whose behalf we execute
5948
  @type instance: L{objects.Instance}
5949
  @param instance: the instance whose disks we should remove
5950
  @type target_node: string
5951
  @param target_node: used to override the node on which to remove the disks
5952
  @rtype: boolean
5953
  @return: the success of the removal
5954

5955
  """
5956
  logging.info("Removing block devices for instance %s", instance.name)
5957

    
5958
  all_result = True
5959
  for device in instance.disks:
5960
    if target_node:
5961
      edata = [(target_node, device)]
5962
    else:
5963
      edata = device.ComputeNodeTree(instance.primary_node)
5964
    for node, disk in edata:
5965
      lu.cfg.SetDiskID(disk, node)
5966
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5967
      if msg:
5968
        lu.LogWarning("Could not remove block device %s on node %s,"
5969
                      " continuing anyway: %s", device.iv_name, node, msg)
5970
        all_result = False
5971

    
5972
  if instance.disk_template == constants.DT_FILE:
5973
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5974
    if target_node:
5975
      tgt = target_node
5976
    else:
5977
      tgt = instance.primary_node
5978
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5979
    if result.fail_msg:
5980
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5981
                    file_storage_dir, instance.primary_node, result.fail_msg)
5982
      all_result = False
5983

    
5984
  return all_result
5985

    
5986

    
5987
def _ComputeDiskSize(disk_template, disks):
5988
  """Compute disk size requirements in the volume group
5989

5990
  """
5991
  # Required free disk space as a function of disk and swap space
5992
  req_size_dict = {
5993
    constants.DT_DISKLESS: None,
5994
    constants.DT_PLAIN: sum(d["size"] for d in disks),
5995
    # 128 MB are added for drbd metadata for each disk
5996
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5997
    constants.DT_FILE: None,
5998
  }
5999

    
6000
  if disk_template not in req_size_dict:
6001
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6002
                                 " is unknown" %  disk_template)
6003

    
6004
  return req_size_dict[disk_template]
6005

    
6006

    
6007
def _CheckHVParams(lu, nodenames, hvname, hvparams):
6008
  """Hypervisor parameter validation.
6009

6010
  This function abstract the hypervisor parameter validation to be
6011
  used in both instance create and instance modify.
6012

6013
  @type lu: L{LogicalUnit}
6014
  @param lu: the logical unit for which we check
6015
  @type nodenames: list
6016
  @param nodenames: the list of nodes on which we should check
6017
  @type hvname: string
6018
  @param hvname: the name of the hypervisor we should use
6019
  @type hvparams: dict
6020
  @param hvparams: the parameters which we need to check
6021
  @raise errors.OpPrereqError: if the parameters are not valid
6022

6023
  """
6024
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6025
                                                  hvname,
6026
                                                  hvparams)
6027
  for node in nodenames:
6028
    info = hvinfo[node]
6029
    if info.offline:
6030
      continue
6031
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6032

    
6033

    
6034
class LUCreateInstance(LogicalUnit):
6035
  """Create an instance.
6036

6037
  """
6038
  HPATH = "instance-add"
6039
  HTYPE = constants.HTYPE_INSTANCE
6040
  _OP_REQP = ["instance_name", "disks",
6041
              "mode", "start",
6042
              "wait_for_sync", "ip_check", "nics",
6043
              "hvparams", "beparams"]
6044
  REQ_BGL = False
6045

    
6046
  def CheckArguments(self):
6047
    """Check arguments.
6048

6049
    """
6050
    # set optional parameters to none if they don't exist
6051
    for attr in ["pnode", "snode", "iallocator", "hypervisor",
6052
                 "disk_template", "identify_defaults"]:
6053
      if not hasattr(self.op, attr):
6054
        setattr(self.op, attr, None)
6055

    
6056
    # do not require name_check to ease forward/backward compatibility
6057
    # for tools
6058
    if not hasattr(self.op, "name_check"):
6059
      self.op.name_check = True
6060
    if not hasattr(self.op, "no_install"):
6061
      self.op.no_install = False
6062
    if self.op.no_install and self.op.start:
6063
      self.LogInfo("No-installation mode selected, disabling startup")
6064
      self.op.start = False
6065
    # validate/normalize the instance name
6066
    self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6067
    if self.op.ip_check and not self.op.name_check:
6068
      # TODO: make the ip check more flexible and not depend on the name check
6069
      raise errors.OpPrereqError("Cannot do ip checks without a name check",
6070
                                 errors.ECODE_INVAL)
6071
    # check disk information: either all adopt, or no adopt
6072
    has_adopt = has_no_adopt = False
6073
    for disk in self.op.disks:
6074
      if "adopt" in disk:
6075
        has_adopt = True
6076
      else:
6077
        has_no_adopt = True
6078
    if has_adopt and has_no_adopt:
6079
      raise errors.OpPrereqError("Either all disks are adopted or none is",
6080
                                 errors.ECODE_INVAL)
6081
    if has_adopt:
6082
      if self.op.disk_template != constants.DT_PLAIN:
6083
        raise errors.OpPrereqError("Disk adoption is only supported for the"
6084
                                   " 'plain' disk template",
6085
                                   errors.ECODE_INVAL)
6086
      if self.op.iallocator is not None:
6087
        raise errors.OpPrereqError("Disk adoption not allowed with an"
6088
                                   " iallocator script", errors.ECODE_INVAL)
6089
      if self.op.mode == constants.INSTANCE_IMPORT:
6090
        raise errors.OpPrereqError("Disk adoption not allowed for"
6091
                                   " instance import", errors.ECODE_INVAL)
6092

    
6093
    self.adopt_disks = has_adopt
6094

    
6095
    # verify creation mode
6096
    if self.op.mode not in (constants.INSTANCE_CREATE,
6097
                            constants.INSTANCE_IMPORT):
6098
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6099
                                 self.op.mode, errors.ECODE_INVAL)
6100

    
6101
    # instance name verification
6102
    if self.op.name_check:
6103
      self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6104
      self.op.instance_name = self.hostname1.name
6105
      # used in CheckPrereq for ip ping check
6106
      self.check_ip = self.hostname1.ip
6107
    else:
6108
      self.check_ip = None
6109

    
6110
    # file storage checks
6111
    if (self.op.file_driver and
6112
        not self.op.file_driver in constants.FILE_DRIVER):
6113
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6114
                                 self.op.file_driver, errors.ECODE_INVAL)
6115

    
6116
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6117
      raise errors.OpPrereqError("File storage directory path not absolute",
6118
                                 errors.ECODE_INVAL)
6119

    
6120
    ### Node/iallocator related checks
6121
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
6122
      raise errors.OpPrereqError("One and only one of iallocator and primary"
6123
                                 " node must be given",
6124
                                 errors.ECODE_INVAL)
6125

    
6126
    if self.op.mode == constants.INSTANCE_IMPORT:
6127
      # On import force_variant must be True, because if we forced it at
6128
      # initial install, our only chance when importing it back is that it
6129
      # works again!
6130
      self.op.force_variant = True
6131

    
6132
      if self.op.no_install:
6133
        self.LogInfo("No-installation mode has no effect during import")
6134

    
6135
    else: # INSTANCE_CREATE
6136
      if getattr(self.op, "os_type", None) is None:
6137
        raise errors.OpPrereqError("No guest OS specified",
6138
                                   errors.ECODE_INVAL)
6139
      self.op.force_variant = getattr(self.op, "force_variant", False)
6140
      if self.op.disk_template is None:
6141
        raise errors.OpPrereqError("No disk template specified",
6142
                                   errors.ECODE_INVAL)
6143

    
6144
  def ExpandNames(self):
6145
    """ExpandNames for CreateInstance.
6146

6147
    Figure out the right locks for instance creation.
6148

6149
    """
6150
    self.needed_locks = {}
6151

    
6152
    instance_name = self.op.instance_name
6153
    # this is just a preventive check, but someone might still add this
6154
    # instance in the meantime, and creation will fail at lock-add time
6155
    if instance_name in self.cfg.GetInstanceList():
6156
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6157
                                 instance_name, errors.ECODE_EXISTS)
6158

    
6159
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6160

    
6161
    if self.op.iallocator:
6162
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6163
    else:
6164
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6165
      nodelist = [self.op.pnode]
6166
      if self.op.snode is not None:
6167
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6168
        nodelist.append(self.op.snode)
6169
      self.needed_locks[locking.LEVEL_NODE] = nodelist
6170

    
6171
    # in case of import lock the source node too
6172
    if self.op.mode == constants.INSTANCE_IMPORT:
6173
      src_node = getattr(self.op, "src_node", None)
6174
      src_path = getattr(self.op, "src_path", None)
6175

    
6176
      if src_path is None:
6177
        self.op.src_path = src_path = self.op.instance_name
6178

    
6179
      if src_node is None:
6180
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6181
        self.op.src_node = None
6182
        if os.path.isabs(src_path):
6183
          raise errors.OpPrereqError("Importing an instance from an absolute"
6184
                                     " path requires a source node option.",
6185
                                     errors.ECODE_INVAL)
6186
      else:
6187
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6188
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6189
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
6190
        if not os.path.isabs(src_path):
6191
          self.op.src_path = src_path = \
6192
            utils.PathJoin(constants.EXPORT_DIR, src_path)
6193

    
6194
  def _RunAllocator(self):
6195
    """Run the allocator based on input opcode.
6196

6197
    """
6198
    nics = [n.ToDict() for n in self.nics]
6199
    ial = IAllocator(self.cfg, self.rpc,
6200
                     mode=constants.IALLOCATOR_MODE_ALLOC,
6201
                     name=self.op.instance_name,
6202
                     disk_template=self.op.disk_template,
6203
                     tags=[],
6204
                     os=self.op.os_type,
6205
                     vcpus=self.be_full[constants.BE_VCPUS],
6206
                     mem_size=self.be_full[constants.BE_MEMORY],
6207
                     disks=self.disks,
6208
                     nics=nics,
6209
                     hypervisor=self.op.hypervisor,
6210
                     )
6211

    
6212
    ial.Run(self.op.iallocator)
6213

    
6214
    if not ial.success:
6215
      raise errors.OpPrereqError("Can't compute nodes using"
6216
                                 " iallocator '%s': %s" %
6217
                                 (self.op.iallocator, ial.info),
6218
                                 errors.ECODE_NORES)
6219
    if len(ial.result) != ial.required_nodes:
6220
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6221
                                 " of nodes (%s), required %s" %
6222
                                 (self.op.iallocator, len(ial.result),
6223
                                  ial.required_nodes), errors.ECODE_FAULT)
6224
    self.op.pnode = ial.result[0]
6225
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6226
                 self.op.instance_name, self.op.iallocator,
6227
                 utils.CommaJoin(ial.result))
6228
    if ial.required_nodes == 2:
6229
      self.op.snode = ial.result[1]
6230

    
6231
  def BuildHooksEnv(self):
6232
    """Build hooks env.
6233

6234
    This runs on master, primary and secondary nodes of the instance.
6235

6236
    """
6237
    env = {
6238
      "ADD_MODE": self.op.mode,
6239
      }
6240
    if self.op.mode == constants.INSTANCE_IMPORT:
6241
      env["SRC_NODE"] = self.op.src_node
6242
      env["SRC_PATH"] = self.op.src_path
6243
      env["SRC_IMAGES"] = self.src_images
6244

    
6245
    env.update(_BuildInstanceHookEnv(
6246
      name=self.op.instance_name,
6247
      primary_node=self.op.pnode,
6248
      secondary_nodes=self.secondaries,
6249
      status=self.op.start,
6250
      os_type=self.op.os_type,
6251
      memory=self.be_full[constants.BE_MEMORY],
6252
      vcpus=self.be_full[constants.BE_VCPUS],
6253
      nics=_NICListToTuple(self, self.nics),
6254
      disk_template=self.op.disk_template,
6255
      disks=[(d["size"], d["mode"]) for d in self.disks],
6256
      bep=self.be_full,
6257
      hvp=self.hv_full,
6258
      hypervisor_name=self.op.hypervisor,
6259
    ))
6260

    
6261
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6262
          self.secondaries)
6263
    return env, nl, nl
6264

    
6265
  def _ReadExportInfo(self):
6266
    """Reads the export information from disk.
6267

6268
    It will override the opcode source node and path with the actual
6269
    information, if these two were not specified before.
6270

6271
    @return: the export information
6272

6273
    """
6274
    assert self.op.mode == constants.INSTANCE_IMPORT
6275

    
6276
    src_node = self.op.src_node
6277
    src_path = self.op.src_path
6278

    
6279
    if src_node is None:
6280
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6281
      exp_list = self.rpc.call_export_list(locked_nodes)
6282
      found = False
6283
      for node in exp_list:
6284
        if exp_list[node].fail_msg:
6285
          continue
6286
        if src_path in exp_list[node].payload:
6287
          found = True
6288
          self.op.src_node = src_node = node
6289
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6290
                                                       src_path)
6291
          break
6292
      if not found:
6293
        raise errors.OpPrereqError("No export found for relative path %s" %
6294
                                    src_path, errors.ECODE_INVAL)
6295

    
6296
    _CheckNodeOnline(self, src_node)
6297
    result = self.rpc.call_export_info(src_node, src_path)
6298
    result.Raise("No export or invalid export found in dir %s" % src_path)
6299

    
6300
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6301
    if not export_info.has_section(constants.INISECT_EXP):
6302
      raise errors.ProgrammerError("Corrupted export config",
6303
                                   errors.ECODE_ENVIRON)
6304

    
6305
    ei_version = export_info.get(constants.INISECT_EXP, "version")
6306
    if (int(ei_version) != constants.EXPORT_VERSION):
6307
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6308
                                 (ei_version, constants.EXPORT_VERSION),
6309
                                 errors.ECODE_ENVIRON)
6310
    return export_info
6311

    
6312
  def _ReadExportParams(self, einfo):
6313
    """Use export parameters as defaults.
6314

6315
    In case the opcode doesn't specify (as in override) some instance
6316
    parameters, then try to use them from the export information, if
6317
    that declares them.
6318

6319
    """
6320
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6321

    
6322
    if self.op.disk_template is None:
6323
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
6324
        self.op.disk_template = einfo.get(constants.INISECT_INS,
6325
                                          "disk_template")
6326
      else:
6327
        raise errors.OpPrereqError("No disk template specified and the export"
6328
                                   " is missing the disk_template information",
6329
                                   errors.ECODE_INVAL)
6330

    
6331
    if not self.op.disks:
6332
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
6333
        disks = []
6334
        # TODO: import the disk iv_name too
6335
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6336
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6337
          disks.append({"size": disk_sz})
6338
        self.op.disks = disks
6339
      else:
6340
        raise errors.OpPrereqError("No disk info specified and the export"
6341
                                   " is missing the disk information",
6342
                                   errors.ECODE_INVAL)
6343

    
6344
    if (not self.op.nics and
6345
        einfo.has_option(constants.INISECT_INS, "nic_count")):
6346
      nics = []
6347
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6348
        ndict = {}
6349
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6350
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6351
          ndict[name] = v
6352
        nics.append(ndict)
6353
      self.op.nics = nics
6354

    
6355
    if (self.op.hypervisor is None and
6356
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
6357
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6358
    if einfo.has_section(constants.INISECT_HYP):
6359
      # use the export parameters but do not override the ones
6360
      # specified by the user
6361
      for name, value in einfo.items(constants.INISECT_HYP):
6362
        if name not in self.op.hvparams:
6363
          self.op.hvparams[name] = value
6364

    
6365
    if einfo.has_section(constants.INISECT_BEP):
6366
      # use the parameters, without overriding
6367
      for name, value in einfo.items(constants.INISECT_BEP):
6368
        if name not in self.op.beparams:
6369
          self.op.beparams[name] = value
6370
    else:
6371
      # try to read the parameters old style, from the main section
6372
      for name in constants.BES_PARAMETERS:
6373
        if (name not in self.op.beparams and
6374
            einfo.has_option(constants.INISECT_INS, name)):
6375
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6376

    
6377
  def _RevertToDefaults(self, cluster):
6378
    """Revert the instance parameters to the default values.
6379

6380
    """
6381
    # hvparams
6382
    hv_defs = cluster.GetHVDefaults(self.op.hypervisor, self.op.os_type)
6383
    for name in self.op.hvparams.keys():
6384
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6385
        del self.op.hvparams[name]
6386
    # beparams
6387
    be_defs = cluster.beparams.get(constants.PP_DEFAULT, {})
6388
    for name in self.op.beparams.keys():
6389
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
6390
        del self.op.beparams[name]
6391
    # nic params
6392
    nic_defs = cluster.nicparams.get(constants.PP_DEFAULT, {})
6393
    for nic in self.op.nics:
6394
      for name in constants.NICS_PARAMETERS:
6395
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6396
          del nic[name]
6397

    
6398
  def CheckPrereq(self):
6399
    """Check prerequisites.
6400

6401
    """
6402
    if self.op.mode == constants.INSTANCE_IMPORT:
6403
      export_info = self._ReadExportInfo()
6404
      self._ReadExportParams(export_info)
6405

    
6406
    _CheckDiskTemplate(self.op.disk_template)
6407

    
6408
    if (not self.cfg.GetVGName() and
6409
        self.op.disk_template not in constants.DTS_NOT_LVM):
6410
      raise errors.OpPrereqError("Cluster does not support lvm-based"
6411
                                 " instances", errors.ECODE_STATE)
6412

    
6413
    if self.op.hypervisor is None:
6414
      self.op.hypervisor = self.cfg.GetHypervisorType()
6415

    
6416
    cluster = self.cfg.GetClusterInfo()
6417
    enabled_hvs = cluster.enabled_hypervisors
6418
    if self.op.hypervisor not in enabled_hvs:
6419
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6420
                                 " cluster (%s)" % (self.op.hypervisor,
6421
                                  ",".join(enabled_hvs)),
6422
                                 errors.ECODE_STATE)
6423

    
6424
    # check hypervisor parameter syntax (locally)
6425
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6426
    filled_hvp = objects.FillDict(cluster.GetHVDefaults(self.op.hypervisor,
6427
                                                        self.op.os_type),
6428
                                  self.op.hvparams)
6429
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6430
    hv_type.CheckParameterSyntax(filled_hvp)
6431
    self.hv_full = filled_hvp
6432
    # check that we don't specify global parameters on an instance
6433
    _CheckGlobalHvParams(self.op.hvparams)
6434

    
6435
    # fill and remember the beparams dict
6436
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6437
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
6438
                                    self.op.beparams)
6439

    
6440
    # now that hvp/bep are in final format, let's reset to defaults,
6441
    # if told to do so
6442
    if self.op.identify_defaults:
6443
      self._RevertToDefaults(cluster)
6444

    
6445
    # NIC buildup
6446
    self.nics = []
6447
    for idx, nic in enumerate(self.op.nics):
6448
      nic_mode_req = nic.get("mode", None)
6449
      nic_mode = nic_mode_req
6450
      if nic_mode is None:
6451
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6452

    
6453
      # in routed mode, for the first nic, the default ip is 'auto'
6454
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6455
        default_ip_mode = constants.VALUE_AUTO
6456
      else:
6457
        default_ip_mode = constants.VALUE_NONE
6458

    
6459
      # ip validity checks
6460
      ip = nic.get("ip", default_ip_mode)
6461
      if ip is None or ip.lower() == constants.VALUE_NONE:
6462
        nic_ip = None
6463
      elif ip.lower() == constants.VALUE_AUTO:
6464
        if not self.op.name_check:
6465
          raise errors.OpPrereqError("IP address set to auto but name checks"
6466
                                     " have been skipped. Aborting.",
6467
                                     errors.ECODE_INVAL)
6468
        nic_ip = self.hostname1.ip
6469
      else:
6470
        if not utils.IsValidIP(ip):
6471
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6472
                                     " like a valid IP" % ip,
6473
                                     errors.ECODE_INVAL)
6474
        nic_ip = ip
6475

    
6476
      # TODO: check the ip address for uniqueness
6477
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6478
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
6479
                                   errors.ECODE_INVAL)
6480

    
6481
      # MAC address verification
6482
      mac = nic.get("mac", constants.VALUE_AUTO)
6483
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6484
        mac = utils.NormalizeAndValidateMac(mac)
6485

    
6486
        try:
6487
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
6488
        except errors.ReservationError:
6489
          raise errors.OpPrereqError("MAC address %s already in use"
6490
                                     " in cluster" % mac,
6491
                                     errors.ECODE_NOTUNIQUE)
6492

    
6493
      # bridge verification
6494
      bridge = nic.get("bridge", None)
6495
      link = nic.get("link", None)
6496
      if bridge and link:
6497
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6498
                                   " at the same time", errors.ECODE_INVAL)
6499
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6500
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6501
                                   errors.ECODE_INVAL)
6502
      elif bridge:
6503
        link = bridge
6504

    
6505
      nicparams = {}
6506
      if nic_mode_req:
6507
        nicparams[constants.NIC_MODE] = nic_mode_req
6508
      if link:
6509
        nicparams[constants.NIC_LINK] = link
6510

    
6511
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
6512
                                      nicparams)
6513
      objects.NIC.CheckParameterSyntax(check_params)
6514
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6515

    
6516
    # disk checks/pre-build
6517
    self.disks = []
6518
    for disk in self.op.disks:
6519
      mode = disk.get("mode", constants.DISK_RDWR)
6520
      if mode not in constants.DISK_ACCESS_SET:
6521
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6522
                                   mode, errors.ECODE_INVAL)
6523
      size = disk.get("size", None)
6524
      if size is None:
6525
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6526
      try:
6527
        size = int(size)
6528
      except (TypeError, ValueError):
6529
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6530
                                   errors.ECODE_INVAL)
6531
      new_disk = {"size": size, "mode": mode}
6532
      if "adopt" in disk:
6533
        new_disk["adopt"] = disk["adopt"]
6534
      self.disks.append(new_disk)
6535

    
6536
    if self.op.mode == constants.INSTANCE_IMPORT:
6537

    
6538
      # Check that the new instance doesn't have less disks than the export
6539
      instance_disks = len(self.disks)
6540
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6541
      if instance_disks < export_disks:
6542
        raise errors.OpPrereqError("Not enough disks to import."
6543
                                   " (instance: %d, export: %d)" %
6544
                                   (instance_disks, export_disks),
6545
                                   errors.ECODE_INVAL)
6546

    
6547
      disk_images = []
6548
      for idx in range(export_disks):
6549
        option = 'disk%d_dump' % idx
6550
        if export_info.has_option(constants.INISECT_INS, option):
6551
          # FIXME: are the old os-es, disk sizes, etc. useful?
6552
          export_name = export_info.get(constants.INISECT_INS, option)
6553
          image = utils.PathJoin(self.op.src_path, export_name)
6554
          disk_images.append(image)
6555
        else:
6556
          disk_images.append(False)
6557

    
6558
      self.src_images = disk_images
6559

    
6560
      old_name = export_info.get(constants.INISECT_INS, 'name')
6561
      try:
6562
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6563
      except (TypeError, ValueError), err:
6564
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
6565
                                   " an integer: %s" % str(err),
6566
                                   errors.ECODE_STATE)
6567
      if self.op.instance_name == old_name:
6568
        for idx, nic in enumerate(self.nics):
6569
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6570
            nic_mac_ini = 'nic%d_mac' % idx
6571
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6572

    
6573
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6574

    
6575
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
6576
    if self.op.ip_check:
6577
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6578
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6579
                                   (self.check_ip, self.op.instance_name),
6580
                                   errors.ECODE_NOTUNIQUE)
6581

    
6582
    #### mac address generation
6583
    # By generating here the mac address both the allocator and the hooks get
6584
    # the real final mac address rather than the 'auto' or 'generate' value.
6585
    # There is a race condition between the generation and the instance object
6586
    # creation, which means that we know the mac is valid now, but we're not
6587
    # sure it will be when we actually add the instance. If things go bad
6588
    # adding the instance will abort because of a duplicate mac, and the
6589
    # creation job will fail.
6590
    for nic in self.nics:
6591
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6592
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6593

    
6594
    #### allocator run
6595

    
6596
    if self.op.iallocator is not None:
6597
      self._RunAllocator()
6598

    
6599
    #### node related checks
6600

    
6601
    # check primary node
6602
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6603
    assert self.pnode is not None, \
6604
      "Cannot retrieve locked node %s" % self.op.pnode
6605
    if pnode.offline:
6606
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6607
                                 pnode.name, errors.ECODE_STATE)
6608
    if pnode.drained:
6609
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6610
                                 pnode.name, errors.ECODE_STATE)
6611

    
6612
    self.secondaries = []
6613

    
6614
    # mirror node verification
6615
    if self.op.disk_template in constants.DTS_NET_MIRROR:
6616
      if self.op.snode is None:
6617
        raise errors.OpPrereqError("The networked disk templates need"
6618
                                   " a mirror node", errors.ECODE_INVAL)
6619
      if self.op.snode == pnode.name:
6620
        raise errors.OpPrereqError("The secondary node cannot be the"
6621
                                   " primary node.", errors.ECODE_INVAL)
6622
      _CheckNodeOnline(self, self.op.snode)
6623
      _CheckNodeNotDrained(self, self.op.snode)
6624
      self.secondaries.append(self.op.snode)
6625

    
6626
    nodenames = [pnode.name] + self.secondaries
6627

    
6628
    req_size = _ComputeDiskSize(self.op.disk_template,
6629
                                self.disks)
6630

    
6631
    # Check lv size requirements, if not adopting
6632
    if req_size is not None and not self.adopt_disks:
6633
      _CheckNodesFreeDisk(self, nodenames, req_size)
6634

    
6635
    if self.adopt_disks: # instead, we must check the adoption data
6636
      all_lvs = set([i["adopt"] for i in self.disks])
6637
      if len(all_lvs) != len(self.disks):
6638
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
6639
                                   errors.ECODE_INVAL)
6640
      for lv_name in all_lvs:
6641
        try:
6642
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6643
        except errors.ReservationError:
6644
          raise errors.OpPrereqError("LV named %s used by another instance" %
6645
                                     lv_name, errors.ECODE_NOTUNIQUE)
6646

    
6647
      node_lvs = self.rpc.call_lv_list([pnode.name],
6648
                                       self.cfg.GetVGName())[pnode.name]
6649
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6650
      node_lvs = node_lvs.payload
6651
      delta = all_lvs.difference(node_lvs.keys())
6652
      if delta:
6653
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
6654
                                   utils.CommaJoin(delta),
6655
                                   errors.ECODE_INVAL)
6656
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6657
      if online_lvs:
6658
        raise errors.OpPrereqError("Online logical volumes found, cannot"
6659
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
6660
                                   errors.ECODE_STATE)
6661
      # update the size of disk based on what is found
6662
      for dsk in self.disks:
6663
        dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6664

    
6665
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6666

    
6667
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6668

    
6669
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6670

    
6671
    # memory check on primary node
6672
    if self.op.start:
6673
      _CheckNodeFreeMemory(self, self.pnode.name,
6674
                           "creating instance %s" % self.op.instance_name,
6675
                           self.be_full[constants.BE_MEMORY],
6676
                           self.op.hypervisor)
6677

    
6678
    self.dry_run_result = list(nodenames)
6679

    
6680
  def Exec(self, feedback_fn):
6681
    """Create and add the instance to the cluster.
6682

6683
    """
6684
    instance = self.op.instance_name
6685
    pnode_name = self.pnode.name
6686

    
6687
    ht_kind = self.op.hypervisor
6688
    if ht_kind in constants.HTS_REQ_PORT:
6689
      network_port = self.cfg.AllocatePort()
6690
    else:
6691
      network_port = None
6692

    
6693
    if constants.ENABLE_FILE_STORAGE:
6694
      # this is needed because os.path.join does not accept None arguments
6695
      if self.op.file_storage_dir is None:
6696
        string_file_storage_dir = ""
6697
      else:
6698
        string_file_storage_dir = self.op.file_storage_dir
6699

    
6700
      # build the full file storage dir path
6701
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6702
                                        string_file_storage_dir, instance)
6703
    else:
6704
      file_storage_dir = ""
6705

    
6706

    
6707
    disks = _GenerateDiskTemplate(self,
6708
                                  self.op.disk_template,
6709
                                  instance, pnode_name,
6710
                                  self.secondaries,
6711
                                  self.disks,
6712
                                  file_storage_dir,
6713
                                  self.op.file_driver,
6714
                                  0)
6715

    
6716
    iobj = objects.Instance(name=instance, os=self.op.os_type,
6717
                            primary_node=pnode_name,
6718
                            nics=self.nics, disks=disks,
6719
                            disk_template=self.op.disk_template,
6720
                            admin_up=False,
6721
                            network_port=network_port,
6722
                            beparams=self.op.beparams,
6723
                            hvparams=self.op.hvparams,
6724
                            hypervisor=self.op.hypervisor,
6725
                            )
6726

    
6727
    if self.adopt_disks:
6728
      # rename LVs to the newly-generated names; we need to construct
6729
      # 'fake' LV disks with the old data, plus the new unique_id
6730
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6731
      rename_to = []
6732
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6733
        rename_to.append(t_dsk.logical_id)
6734
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6735
        self.cfg.SetDiskID(t_dsk, pnode_name)
6736
      result = self.rpc.call_blockdev_rename(pnode_name,
6737
                                             zip(tmp_disks, rename_to))
6738
      result.Raise("Failed to rename adoped LVs")
6739
    else:
6740
      feedback_fn("* creating instance disks...")
6741
      try:
6742
        _CreateDisks(self, iobj)
6743
      except errors.OpExecError:
6744
        self.LogWarning("Device creation failed, reverting...")
6745
        try:
6746
          _RemoveDisks(self, iobj)
6747
        finally:
6748
          self.cfg.ReleaseDRBDMinors(instance)
6749
          raise
6750

    
6751
    feedback_fn("adding instance %s to cluster config" % instance)
6752

    
6753
    self.cfg.AddInstance(iobj, self.proc.GetECId())
6754

    
6755
    # Declare that we don't want to remove the instance lock anymore, as we've
6756
    # added the instance to the config
6757
    del self.remove_locks[locking.LEVEL_INSTANCE]
6758
    # Unlock all the nodes
6759
    if self.op.mode == constants.INSTANCE_IMPORT:
6760
      nodes_keep = [self.op.src_node]
6761
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6762
                       if node != self.op.src_node]
6763
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6764
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6765
    else:
6766
      self.context.glm.release(locking.LEVEL_NODE)
6767
      del self.acquired_locks[locking.LEVEL_NODE]
6768

    
6769
    if self.op.wait_for_sync:
6770
      disk_abort = not _WaitForSync(self, iobj)
6771
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
6772
      # make sure the disks are not degraded (still sync-ing is ok)
6773
      time.sleep(15)
6774
      feedback_fn("* checking mirrors status")
6775
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6776
    else:
6777
      disk_abort = False
6778

    
6779
    if disk_abort:
6780
      _RemoveDisks(self, iobj)
6781
      self.cfg.RemoveInstance(iobj.name)
6782
      # Make sure the instance lock gets removed
6783
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6784
      raise errors.OpExecError("There are some degraded disks for"
6785
                               " this instance")
6786

    
6787
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6788
      if self.op.mode == constants.INSTANCE_CREATE:
6789
        if not self.op.no_install:
6790
          feedback_fn("* running the instance OS create scripts...")
6791
          # FIXME: pass debug option from opcode to backend
6792
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6793
                                                 self.op.debug_level)
6794
          result.Raise("Could not add os for instance %s"
6795
                       " on node %s" % (instance, pnode_name))
6796

    
6797
      elif self.op.mode == constants.INSTANCE_IMPORT:
6798
        feedback_fn("* running the instance OS import scripts...")
6799
        src_node = self.op.src_node
6800
        src_images = self.src_images
6801
        cluster_name = self.cfg.GetClusterName()
6802
        # FIXME: pass debug option from opcode to backend
6803
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6804
                                                         src_node, src_images,
6805
                                                         cluster_name,
6806
                                                         self.op.debug_level)
6807
        msg = import_result.fail_msg
6808
        if msg:
6809
          self.LogWarning("Error while importing the disk images for instance"
6810
                          " %s on node %s: %s" % (instance, pnode_name, msg))
6811
      else:
6812
        # also checked in the prereq part
6813
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6814
                                     % self.op.mode)
6815

    
6816
    if self.op.start:
6817
      iobj.admin_up = True
6818
      self.cfg.Update(iobj, feedback_fn)
6819
      logging.info("Starting instance %s on node %s", instance, pnode_name)
6820
      feedback_fn("* starting instance...")
6821
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6822
      result.Raise("Could not start instance")
6823

    
6824
    return list(iobj.all_nodes)
6825

    
6826

    
6827
class LUConnectConsole(NoHooksLU):
6828
  """Connect to an instance's console.
6829

6830
  This is somewhat special in that it returns the command line that
6831
  you need to run on the master node in order to connect to the
6832
  console.
6833

6834
  """
6835
  _OP_REQP = ["instance_name"]
6836
  REQ_BGL = False
6837

    
6838
  def ExpandNames(self):
6839
    self._ExpandAndLockInstance()
6840

    
6841
  def CheckPrereq(self):
6842
    """Check prerequisites.
6843

6844
    This checks that the instance is in the cluster.
6845

6846
    """
6847
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6848
    assert self.instance is not None, \
6849
      "Cannot retrieve locked instance %s" % self.op.instance_name
6850
    _CheckNodeOnline(self, self.instance.primary_node)
6851

    
6852
  def Exec(self, feedback_fn):
6853
    """Connect to the console of an instance
6854

6855
    """
6856
    instance = self.instance
6857
    node = instance.primary_node
6858

    
6859
    node_insts = self.rpc.call_instance_list([node],
6860
                                             [instance.hypervisor])[node]
6861
    node_insts.Raise("Can't get node information from %s" % node)
6862

    
6863
    if instance.name not in node_insts.payload:
6864
      raise errors.OpExecError("Instance %s is not running." % instance.name)
6865

    
6866
    logging.debug("Connecting to console of %s on %s", instance.name, node)
6867

    
6868
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
6869
    cluster = self.cfg.GetClusterInfo()
6870
    # beparams and hvparams are passed separately, to avoid editing the
6871
    # instance and then saving the defaults in the instance itself.
6872
    hvparams = cluster.FillHV(instance)
6873
    beparams = cluster.FillBE(instance)
6874
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6875

    
6876
    # build ssh cmdline
6877
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6878

    
6879

    
6880
class LUReplaceDisks(LogicalUnit):
6881
  """Replace the disks of an instance.
6882

6883
  """
6884
  HPATH = "mirrors-replace"
6885
  HTYPE = constants.HTYPE_INSTANCE
6886
  _OP_REQP = ["instance_name", "mode", "disks"]
6887
  REQ_BGL = False
6888

    
6889
  def CheckArguments(self):
6890
    if not hasattr(self.op, "remote_node"):
6891
      self.op.remote_node = None
6892
    if not hasattr(self.op, "iallocator"):
6893
      self.op.iallocator = None
6894
    if not hasattr(self.op, "early_release"):
6895
      self.op.early_release = False
6896

    
6897
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6898
                                  self.op.iallocator)
6899

    
6900
  def ExpandNames(self):
6901
    self._ExpandAndLockInstance()
6902

    
6903
    if self.op.iallocator is not None:
6904
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6905

    
6906
    elif self.op.remote_node is not None:
6907
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6908
      self.op.remote_node = remote_node
6909

    
6910
      # Warning: do not remove the locking of the new secondary here
6911
      # unless DRBD8.AddChildren is changed to work in parallel;
6912
      # currently it doesn't since parallel invocations of
6913
      # FindUnusedMinor will conflict
6914
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6915
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6916

    
6917
    else:
6918
      self.needed_locks[locking.LEVEL_NODE] = []
6919
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6920

    
6921
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6922
                                   self.op.iallocator, self.op.remote_node,
6923
                                   self.op.disks, False, self.op.early_release)
6924

    
6925
    self.tasklets = [self.replacer]
6926

    
6927
  def DeclareLocks(self, level):
6928
    # If we're not already locking all nodes in the set we have to declare the
6929
    # instance's primary/secondary nodes.
6930
    if (level == locking.LEVEL_NODE and
6931
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6932
      self._LockInstancesNodes()
6933

    
6934
  def BuildHooksEnv(self):
6935
    """Build hooks env.
6936

6937
    This runs on the master, the primary and all the secondaries.
6938

6939
    """
6940
    instance = self.replacer.instance
6941
    env = {
6942
      "MODE": self.op.mode,
6943
      "NEW_SECONDARY": self.op.remote_node,
6944
      "OLD_SECONDARY": instance.secondary_nodes[0],
6945
      }
6946
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6947
    nl = [
6948
      self.cfg.GetMasterNode(),
6949
      instance.primary_node,
6950
      ]
6951
    if self.op.remote_node is not None:
6952
      nl.append(self.op.remote_node)
6953
    return env, nl, nl
6954

    
6955

    
6956
class LUEvacuateNode(LogicalUnit):
6957
  """Relocate the secondary instances from a node.
6958

6959
  """
6960
  HPATH = "node-evacuate"
6961
  HTYPE = constants.HTYPE_NODE
6962
  _OP_REQP = ["node_name"]
6963
  REQ_BGL = False
6964

    
6965
  def CheckArguments(self):
6966
    if not hasattr(self.op, "remote_node"):
6967
      self.op.remote_node = None
6968
    if not hasattr(self.op, "iallocator"):
6969
      self.op.iallocator = None
6970
    if not hasattr(self.op, "early_release"):
6971
      self.op.early_release = False
6972

    
6973
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6974
                                  self.op.remote_node,
6975
                                  self.op.iallocator)
6976

    
6977
  def ExpandNames(self):
6978
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6979

    
6980
    self.needed_locks = {}
6981

    
6982
    # Declare node locks
6983
    if self.op.iallocator is not None:
6984
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6985

    
6986
    elif self.op.remote_node is not None:
6987
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6988

    
6989
      # Warning: do not remove the locking of the new secondary here
6990
      # unless DRBD8.AddChildren is changed to work in parallel;
6991
      # currently it doesn't since parallel invocations of
6992
      # FindUnusedMinor will conflict
6993
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
6994
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6995

    
6996
    else:
6997
      raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
6998

    
6999
    # Create tasklets for replacing disks for all secondary instances on this
7000
    # node
7001
    names = []
7002
    tasklets = []
7003

    
7004
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
7005
      logging.debug("Replacing disks for instance %s", inst.name)
7006
      names.append(inst.name)
7007

    
7008
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
7009
                                self.op.iallocator, self.op.remote_node, [],
7010
                                True, self.op.early_release)
7011
      tasklets.append(replacer)
7012

    
7013
    self.tasklets = tasklets
7014
    self.instance_names = names
7015

    
7016
    # Declare instance locks
7017
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
7018

    
7019
  def DeclareLocks(self, level):
7020
    # If we're not already locking all nodes in the set we have to declare the
7021
    # instance's primary/secondary nodes.
7022
    if (level == locking.LEVEL_NODE and
7023
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7024
      self._LockInstancesNodes()
7025

    
7026
  def BuildHooksEnv(self):
7027
    """Build hooks env.
7028

7029
    This runs on the master, the primary and all the secondaries.
7030

7031
    """
7032
    env = {
7033
      "NODE_NAME": self.op.node_name,
7034
      }
7035

    
7036
    nl = [self.cfg.GetMasterNode()]
7037

    
7038
    if self.op.remote_node is not None:
7039
      env["NEW_SECONDARY"] = self.op.remote_node
7040
      nl.append(self.op.remote_node)
7041

    
7042
    return (env, nl, nl)
7043

    
7044

    
7045
class TLReplaceDisks(Tasklet):
7046
  """Replaces disks for an instance.
7047

7048
  Note: Locking is not within the scope of this class.
7049

7050
  """
7051
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7052
               disks, delay_iallocator, early_release):
7053
    """Initializes this class.
7054

7055
    """
7056
    Tasklet.__init__(self, lu)
7057

    
7058
    # Parameters
7059
    self.instance_name = instance_name
7060
    self.mode = mode
7061
    self.iallocator_name = iallocator_name
7062
    self.remote_node = remote_node
7063
    self.disks = disks
7064
    self.delay_iallocator = delay_iallocator
7065
    self.early_release = early_release
7066

    
7067
    # Runtime data
7068
    self.instance = None
7069
    self.new_node = None
7070
    self.target_node = None
7071
    self.other_node = None
7072
    self.remote_node_info = None
7073
    self.node_secondary_ip = None
7074

    
7075
  @staticmethod
7076
  def CheckArguments(mode, remote_node, iallocator):
7077
    """Helper function for users of this class.
7078

7079
    """
7080
    # check for valid parameter combination
7081
    if mode == constants.REPLACE_DISK_CHG:
7082
      if remote_node is None and iallocator is None:
7083
        raise errors.OpPrereqError("When changing the secondary either an"
7084
                                   " iallocator script must be used or the"
7085
                                   " new node given", errors.ECODE_INVAL)
7086

    
7087
      if remote_node is not None and iallocator is not None:
7088
        raise errors.OpPrereqError("Give either the iallocator or the new"
7089
                                   " secondary, not both", errors.ECODE_INVAL)
7090

    
7091
    elif remote_node is not None or iallocator is not None:
7092
      # Not replacing the secondary
7093
      raise errors.OpPrereqError("The iallocator and new node options can"
7094
                                 " only be used when changing the"
7095
                                 " secondary node", errors.ECODE_INVAL)
7096

    
7097
  @staticmethod
7098
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7099
    """Compute a new secondary node using an IAllocator.
7100

7101
    """
7102
    ial = IAllocator(lu.cfg, lu.rpc,
7103
                     mode=constants.IALLOCATOR_MODE_RELOC,
7104
                     name=instance_name,
7105
                     relocate_from=relocate_from)
7106

    
7107
    ial.Run(iallocator_name)
7108

    
7109
    if not ial.success:
7110
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7111
                                 " %s" % (iallocator_name, ial.info),
7112
                                 errors.ECODE_NORES)
7113

    
7114
    if len(ial.result) != ial.required_nodes:
7115
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7116
                                 " of nodes (%s), required %s" %
7117
                                 (iallocator_name,
7118
                                  len(ial.result), ial.required_nodes),
7119
                                 errors.ECODE_FAULT)
7120

    
7121
    remote_node_name = ial.result[0]
7122

    
7123
    lu.LogInfo("Selected new secondary for instance '%s': %s",
7124
               instance_name, remote_node_name)
7125

    
7126
    return remote_node_name
7127

    
7128
  def _FindFaultyDisks(self, node_name):
7129
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7130
                                    node_name, True)
7131

    
7132
  def CheckPrereq(self):
7133
    """Check prerequisites.
7134

7135
    This checks that the instance is in the cluster.
7136

7137
    """
7138
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7139
    assert instance is not None, \
7140
      "Cannot retrieve locked instance %s" % self.instance_name
7141

    
7142
    if instance.disk_template != constants.DT_DRBD8:
7143
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7144
                                 " instances", errors.ECODE_INVAL)
7145

    
7146
    if len(instance.secondary_nodes) != 1:
7147
      raise errors.OpPrereqError("The instance has a strange layout,"
7148
                                 " expected one secondary but found %d" %
7149
                                 len(instance.secondary_nodes),
7150
                                 errors.ECODE_FAULT)
7151

    
7152
    if not self.delay_iallocator:
7153
      self._CheckPrereq2()
7154

    
7155
  def _CheckPrereq2(self):
7156
    """Check prerequisites, second part.
7157

7158
    This function should always be part of CheckPrereq. It was separated and is
7159
    now called from Exec because during node evacuation iallocator was only
7160
    called with an unmodified cluster model, not taking planned changes into
7161
    account.
7162

7163
    """
7164
    instance = self.instance
7165
    secondary_node = instance.secondary_nodes[0]
7166

    
7167
    if self.iallocator_name is None:
7168
      remote_node = self.remote_node
7169
    else:
7170
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7171
                                       instance.name, instance.secondary_nodes)
7172

    
7173
    if remote_node is not None:
7174
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7175
      assert self.remote_node_info is not None, \
7176
        "Cannot retrieve locked node %s" % remote_node
7177
    else:
7178
      self.remote_node_info = None
7179

    
7180
    if remote_node == self.instance.primary_node:
7181
      raise errors.OpPrereqError("The specified node is the primary node of"
7182
                                 " the instance.", errors.ECODE_INVAL)
7183

    
7184
    if remote_node == secondary_node:
7185
      raise errors.OpPrereqError("The specified node is already the"
7186
                                 " secondary node of the instance.",
7187
                                 errors.ECODE_INVAL)
7188

    
7189
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7190
                                    constants.REPLACE_DISK_CHG):
7191
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
7192
                                 errors.ECODE_INVAL)
7193

    
7194
    if self.mode == constants.REPLACE_DISK_AUTO:
7195
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
7196
      faulty_secondary = self._FindFaultyDisks(secondary_node)
7197

    
7198
      if faulty_primary and faulty_secondary:
7199
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7200
                                   " one node and can not be repaired"
7201
                                   " automatically" % self.instance_name,
7202
                                   errors.ECODE_STATE)
7203

    
7204
      if faulty_primary:
7205
        self.disks = faulty_primary
7206
        self.target_node = instance.primary_node
7207
        self.other_node = secondary_node
7208
        check_nodes = [self.target_node, self.other_node]
7209
      elif faulty_secondary:
7210
        self.disks = faulty_secondary
7211
        self.target_node = secondary_node
7212
        self.other_node = instance.primary_node
7213
        check_nodes = [self.target_node, self.other_node]
7214
      else:
7215
        self.disks = []
7216
        check_nodes = []
7217

    
7218
    else:
7219
      # Non-automatic modes
7220
      if self.mode == constants.REPLACE_DISK_PRI:
7221
        self.target_node = instance.primary_node
7222
        self.other_node = secondary_node
7223
        check_nodes = [self.target_node, self.other_node]
7224

    
7225
      elif self.mode == constants.REPLACE_DISK_SEC:
7226
        self.target_node = secondary_node
7227
        self.other_node = instance.primary_node
7228
        check_nodes = [self.target_node, self.other_node]
7229

    
7230
      elif self.mode == constants.REPLACE_DISK_CHG:
7231
        self.new_node = remote_node
7232
        self.other_node = instance.primary_node
7233
        self.target_node = secondary_node
7234
        check_nodes = [self.new_node, self.other_node]
7235

    
7236
        _CheckNodeNotDrained(self.lu, remote_node)
7237

    
7238
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
7239
        assert old_node_info is not None
7240
        if old_node_info.offline and not self.early_release:
7241
          # doesn't make sense to delay the release
7242
          self.early_release = True
7243
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7244
                          " early-release mode", secondary_node)
7245

    
7246
      else:
7247
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7248
                                     self.mode)
7249

    
7250
      # If not specified all disks should be replaced
7251
      if not self.disks:
7252
        self.disks = range(len(self.instance.disks))
7253

    
7254
    for node in check_nodes:
7255
      _CheckNodeOnline(self.lu, node)
7256

    
7257
    # Check whether disks are valid
7258
    for disk_idx in self.disks:
7259
      instance.FindDisk(disk_idx)
7260

    
7261
    # Get secondary node IP addresses
7262
    node_2nd_ip = {}
7263

    
7264
    for node_name in [self.target_node, self.other_node, self.new_node]:
7265
      if node_name is not None:
7266
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7267

    
7268
    self.node_secondary_ip = node_2nd_ip
7269

    
7270
  def Exec(self, feedback_fn):
7271
    """Execute disk replacement.
7272

7273
    This dispatches the disk replacement to the appropriate handler.
7274

7275
    """
7276
    if self.delay_iallocator:
7277
      self._CheckPrereq2()
7278

    
7279
    if not self.disks:
7280
      feedback_fn("No disks need replacement")
7281
      return
7282

    
7283
    feedback_fn("Replacing disk(s) %s for %s" %
7284
                (utils.CommaJoin(self.disks), self.instance.name))
7285

    
7286
    activate_disks = (not self.instance.admin_up)
7287

    
7288
    # Activate the instance disks if we're replacing them on a down instance
7289
    if activate_disks:
7290
      _StartInstanceDisks(self.lu, self.instance, True)
7291

    
7292
    try:
7293
      # Should we replace the secondary node?
7294
      if self.new_node is not None:
7295
        fn = self._ExecDrbd8Secondary
7296
      else:
7297
        fn = self._ExecDrbd8DiskOnly
7298

    
7299
      return fn(feedback_fn)
7300

    
7301
    finally:
7302
      # Deactivate the instance disks if we're replacing them on a
7303
      # down instance
7304
      if activate_disks:
7305
        _SafeShutdownInstanceDisks(self.lu, self.instance)
7306

    
7307
  def _CheckVolumeGroup(self, nodes):
7308
    self.lu.LogInfo("Checking volume groups")
7309

    
7310
    vgname = self.cfg.GetVGName()
7311

    
7312
    # Make sure volume group exists on all involved nodes
7313
    results = self.rpc.call_vg_list(nodes)
7314
    if not results:
7315
      raise errors.OpExecError("Can't list volume groups on the nodes")
7316

    
7317
    for node in nodes:
7318
      res = results[node]
7319
      res.Raise("Error checking node %s" % node)
7320
      if vgname not in res.payload:
7321
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
7322
                                 (vgname, node))
7323

    
7324
  def _CheckDisksExistence(self, nodes):
7325
    # Check disk existence
7326
    for idx, dev in enumerate(self.instance.disks):
7327
      if idx not in self.disks:
7328
        continue
7329

    
7330
      for node in nodes:
7331
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7332
        self.cfg.SetDiskID(dev, node)
7333

    
7334
        result = self.rpc.call_blockdev_find(node, dev)
7335

    
7336
        msg = result.fail_msg
7337
        if msg or not result.payload:
7338
          if not msg:
7339
            msg = "disk not found"
7340
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7341
                                   (idx, node, msg))
7342

    
7343
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7344
    for idx, dev in enumerate(self.instance.disks):
7345
      if idx not in self.disks:
7346
        continue
7347

    
7348
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7349
                      (idx, node_name))
7350

    
7351
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7352
                                   ldisk=ldisk):
7353
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7354
                                 " replace disks for instance %s" %
7355
                                 (node_name, self.instance.name))
7356

    
7357
  def _CreateNewStorage(self, node_name):
7358
    vgname = self.cfg.GetVGName()
7359
    iv_names = {}
7360

    
7361
    for idx, dev in enumerate(self.instance.disks):
7362
      if idx not in self.disks:
7363
        continue
7364

    
7365
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7366

    
7367
      self.cfg.SetDiskID(dev, node_name)
7368

    
7369
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7370
      names = _GenerateUniqueNames(self.lu, lv_names)
7371

    
7372
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7373
                             logical_id=(vgname, names[0]))
7374
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7375
                             logical_id=(vgname, names[1]))
7376

    
7377
      new_lvs = [lv_data, lv_meta]
7378
      old_lvs = dev.children
7379
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7380

    
7381
      # we pass force_create=True to force the LVM creation
7382
      for new_lv in new_lvs:
7383
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7384
                        _GetInstanceInfoText(self.instance), False)
7385

    
7386
    return iv_names
7387

    
7388
  def _CheckDevices(self, node_name, iv_names):
7389
    for name, (dev, _, _) in iv_names.iteritems():
7390
      self.cfg.SetDiskID(dev, node_name)
7391

    
7392
      result = self.rpc.call_blockdev_find(node_name, dev)
7393

    
7394
      msg = result.fail_msg
7395
      if msg or not result.payload:
7396
        if not msg:
7397
          msg = "disk not found"
7398
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
7399
                                 (name, msg))
7400

    
7401
      if result.payload.is_degraded:
7402
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
7403

    
7404
  def _RemoveOldStorage(self, node_name, iv_names):
7405
    for name, (_, old_lvs, _) in iv_names.iteritems():
7406
      self.lu.LogInfo("Remove logical volumes for %s" % name)
7407

    
7408
      for lv in old_lvs:
7409
        self.cfg.SetDiskID(lv, node_name)
7410

    
7411
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7412
        if msg:
7413
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
7414
                             hint="remove unused LVs manually")
7415

    
7416
  def _ReleaseNodeLock(self, node_name):
7417
    """Releases the lock for a given node."""
7418
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7419

    
7420
  def _ExecDrbd8DiskOnly(self, feedback_fn):
7421
    """Replace a disk on the primary or secondary for DRBD 8.
7422

7423
    The algorithm for replace is quite complicated:
7424

7425
      1. for each disk to be replaced:
7426

7427
        1. create new LVs on the target node with unique names
7428
        1. detach old LVs from the drbd device
7429
        1. rename old LVs to name_replaced.<time_t>
7430
        1. rename new LVs to old LVs
7431
        1. attach the new LVs (with the old names now) to the drbd device
7432

7433
      1. wait for sync across all devices
7434

7435
      1. for each modified disk:
7436

7437
        1. remove old LVs (which have the name name_replaces.<time_t>)
7438

7439
    Failures are not very well handled.
7440

7441
    """
7442
    steps_total = 6
7443

    
7444
    # Step: check device activation
7445
    self.lu.LogStep(1, steps_total, "Check device existence")
7446
    self._CheckDisksExistence([self.other_node, self.target_node])
7447
    self._CheckVolumeGroup([self.target_node, self.other_node])
7448

    
7449
    # Step: check other node consistency
7450
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7451
    self._CheckDisksConsistency(self.other_node,
7452
                                self.other_node == self.instance.primary_node,
7453
                                False)
7454

    
7455
    # Step: create new storage
7456
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7457
    iv_names = self._CreateNewStorage(self.target_node)
7458

    
7459
    # Step: for each lv, detach+rename*2+attach
7460
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7461
    for dev, old_lvs, new_lvs in iv_names.itervalues():
7462
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7463

    
7464
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7465
                                                     old_lvs)
7466
      result.Raise("Can't detach drbd from local storage on node"
7467
                   " %s for device %s" % (self.target_node, dev.iv_name))
7468
      #dev.children = []
7469
      #cfg.Update(instance)
7470

    
7471
      # ok, we created the new LVs, so now we know we have the needed
7472
      # storage; as such, we proceed on the target node to rename
7473
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7474
      # using the assumption that logical_id == physical_id (which in
7475
      # turn is the unique_id on that node)
7476

    
7477
      # FIXME(iustin): use a better name for the replaced LVs
7478
      temp_suffix = int(time.time())
7479
      ren_fn = lambda d, suff: (d.physical_id[0],
7480
                                d.physical_id[1] + "_replaced-%s" % suff)
7481

    
7482
      # Build the rename list based on what LVs exist on the node
7483
      rename_old_to_new = []
7484
      for to_ren in old_lvs:
7485
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7486
        if not result.fail_msg and result.payload:
7487
          # device exists
7488
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7489

    
7490
      self.lu.LogInfo("Renaming the old LVs on the target node")
7491
      result = self.rpc.call_blockdev_rename(self.target_node,
7492
                                             rename_old_to_new)
7493
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
7494

    
7495
      # Now we rename the new LVs to the old LVs
7496
      self.lu.LogInfo("Renaming the new LVs on the target node")
7497
      rename_new_to_old = [(new, old.physical_id)
7498
                           for old, new in zip(old_lvs, new_lvs)]
7499
      result = self.rpc.call_blockdev_rename(self.target_node,
7500
                                             rename_new_to_old)
7501
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
7502

    
7503
      for old, new in zip(old_lvs, new_lvs):
7504
        new.logical_id = old.logical_id
7505
        self.cfg.SetDiskID(new, self.target_node)
7506

    
7507
      for disk in old_lvs:
7508
        disk.logical_id = ren_fn(disk, temp_suffix)
7509
        self.cfg.SetDiskID(disk, self.target_node)
7510

    
7511
      # Now that the new lvs have the old name, we can add them to the device
7512
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7513
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7514
                                                  new_lvs)
7515
      msg = result.fail_msg
7516
      if msg:
7517
        for new_lv in new_lvs:
7518
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
7519
                                               new_lv).fail_msg
7520
          if msg2:
7521
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7522
                               hint=("cleanup manually the unused logical"
7523
                                     "volumes"))
7524
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7525

    
7526
      dev.children = new_lvs
7527

    
7528
      self.cfg.Update(self.instance, feedback_fn)
7529

    
7530
    cstep = 5
7531
    if self.early_release:
7532
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7533
      cstep += 1
7534
      self._RemoveOldStorage(self.target_node, iv_names)
7535
      # WARNING: we release both node locks here, do not do other RPCs
7536
      # than WaitForSync to the primary node
7537
      self._ReleaseNodeLock([self.target_node, self.other_node])
7538

    
7539
    # Wait for sync
7540
    # This can fail as the old devices are degraded and _WaitForSync
7541
    # does a combined result over all disks, so we don't check its return value
7542
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7543
    cstep += 1
7544
    _WaitForSync(self.lu, self.instance)
7545

    
7546
    # Check all devices manually
7547
    self._CheckDevices(self.instance.primary_node, iv_names)
7548

    
7549
    # Step: remove old storage
7550
    if not self.early_release:
7551
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7552
      cstep += 1
7553
      self._RemoveOldStorage(self.target_node, iv_names)
7554

    
7555
  def _ExecDrbd8Secondary(self, feedback_fn):
7556
    """Replace the secondary node for DRBD 8.
7557

7558
    The algorithm for replace is quite complicated:
7559
      - for all disks of the instance:
7560
        - create new LVs on the new node with same names
7561
        - shutdown the drbd device on the old secondary
7562
        - disconnect the drbd network on the primary
7563
        - create the drbd device on the new secondary
7564
        - network attach the drbd on the primary, using an artifice:
7565
          the drbd code for Attach() will connect to the network if it
7566
          finds a device which is connected to the good local disks but
7567
          not network enabled
7568
      - wait for sync across all devices
7569
      - remove all disks from the old secondary
7570

7571
    Failures are not very well handled.
7572

7573
    """
7574
    steps_total = 6
7575

    
7576
    # Step: check device activation
7577
    self.lu.LogStep(1, steps_total, "Check device existence")
7578
    self._CheckDisksExistence([self.instance.primary_node])
7579
    self._CheckVolumeGroup([self.instance.primary_node])
7580

    
7581
    # Step: check other node consistency
7582
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7583
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
7584

    
7585
    # Step: create new storage
7586
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7587
    for idx, dev in enumerate(self.instance.disks):
7588
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7589
                      (self.new_node, idx))
7590
      # we pass force_create=True to force LVM creation
7591
      for new_lv in dev.children:
7592
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7593
                        _GetInstanceInfoText(self.instance), False)
7594

    
7595
    # Step 4: dbrd minors and drbd setups changes
7596
    # after this, we must manually remove the drbd minors on both the
7597
    # error and the success paths
7598
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7599
    minors = self.cfg.AllocateDRBDMinor([self.new_node
7600
                                         for dev in self.instance.disks],
7601
                                        self.instance.name)
7602
    logging.debug("Allocated minors %r", minors)
7603

    
7604
    iv_names = {}
7605
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7606
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7607
                      (self.new_node, idx))
7608
      # create new devices on new_node; note that we create two IDs:
7609
      # one without port, so the drbd will be activated without
7610
      # networking information on the new node at this stage, and one
7611
      # with network, for the latter activation in step 4
7612
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7613
      if self.instance.primary_node == o_node1:
7614
        p_minor = o_minor1
7615
      else:
7616
        assert self.instance.primary_node == o_node2, "Three-node instance?"
7617
        p_minor = o_minor2
7618

    
7619
      new_alone_id = (self.instance.primary_node, self.new_node, None,
7620
                      p_minor, new_minor, o_secret)
7621
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
7622
                    p_minor, new_minor, o_secret)
7623

    
7624
      iv_names[idx] = (dev, dev.children, new_net_id)
7625
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7626
                    new_net_id)
7627
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7628
                              logical_id=new_alone_id,
7629
                              children=dev.children,
7630
                              size=dev.size)
7631
      try:
7632
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7633
                              _GetInstanceInfoText(self.instance), False)
7634
      except errors.GenericError:
7635
        self.cfg.ReleaseDRBDMinors(self.instance.name)
7636
        raise
7637

    
7638
    # We have new devices, shutdown the drbd on the old secondary
7639
    for idx, dev in enumerate(self.instance.disks):
7640
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7641
      self.cfg.SetDiskID(dev, self.target_node)
7642
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7643
      if msg:
7644
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7645
                           "node: %s" % (idx, msg),
7646
                           hint=("Please cleanup this device manually as"
7647
                                 " soon as possible"))
7648

    
7649
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7650
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7651
                                               self.node_secondary_ip,
7652
                                               self.instance.disks)\
7653
                                              [self.instance.primary_node]
7654

    
7655
    msg = result.fail_msg
7656
    if msg:
7657
      # detaches didn't succeed (unlikely)
7658
      self.cfg.ReleaseDRBDMinors(self.instance.name)
7659
      raise errors.OpExecError("Can't detach the disks from the network on"
7660
                               " old node: %s" % (msg,))
7661

    
7662
    # if we managed to detach at least one, we update all the disks of
7663
    # the instance to point to the new secondary
7664
    self.lu.LogInfo("Updating instance configuration")
7665
    for dev, _, new_logical_id in iv_names.itervalues():
7666
      dev.logical_id = new_logical_id
7667
      self.cfg.SetDiskID(dev, self.instance.primary_node)
7668

    
7669
    self.cfg.Update(self.instance, feedback_fn)
7670

    
7671
    # and now perform the drbd attach
7672
    self.lu.LogInfo("Attaching primary drbds to new secondary"
7673
                    " (standalone => connected)")
7674
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7675
                                            self.new_node],
7676
                                           self.node_secondary_ip,
7677
                                           self.instance.disks,
7678
                                           self.instance.name,
7679
                                           False)
7680
    for to_node, to_result in result.items():
7681
      msg = to_result.fail_msg
7682
      if msg:
7683
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7684
                           to_node, msg,
7685
                           hint=("please do a gnt-instance info to see the"
7686
                                 " status of disks"))
7687
    cstep = 5
7688
    if self.early_release:
7689
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7690
      cstep += 1
7691
      self._RemoveOldStorage(self.target_node, iv_names)
7692
      # WARNING: we release all node locks here, do not do other RPCs
7693
      # than WaitForSync to the primary node
7694
      self._ReleaseNodeLock([self.instance.primary_node,
7695
                             self.target_node,
7696
                             self.new_node])
7697

    
7698
    # Wait for sync
7699
    # This can fail as the old devices are degraded and _WaitForSync
7700
    # does a combined result over all disks, so we don't check its return value
7701
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7702
    cstep += 1
7703
    _WaitForSync(self.lu, self.instance)
7704

    
7705
    # Check all devices manually
7706
    self._CheckDevices(self.instance.primary_node, iv_names)
7707

    
7708
    # Step: remove old storage
7709
    if not self.early_release:
7710
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7711
      self._RemoveOldStorage(self.target_node, iv_names)
7712

    
7713

    
7714
class LURepairNodeStorage(NoHooksLU):
7715
  """Repairs the volume group on a node.
7716

7717
  """
7718
  _OP_REQP = ["node_name"]
7719
  REQ_BGL = False
7720

    
7721
  def CheckArguments(self):
7722
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7723

    
7724
    _CheckStorageType(self.op.storage_type)
7725

    
7726
  def ExpandNames(self):
7727
    self.needed_locks = {
7728
      locking.LEVEL_NODE: [self.op.node_name],
7729
      }
7730

    
7731
  def _CheckFaultyDisks(self, instance, node_name):
7732
    """Ensure faulty disks abort the opcode or at least warn."""
7733
    try:
7734
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7735
                                  node_name, True):
7736
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7737
                                   " node '%s'" % (instance.name, node_name),
7738
                                   errors.ECODE_STATE)
7739
    except errors.OpPrereqError, err:
7740
      if self.op.ignore_consistency:
7741
        self.proc.LogWarning(str(err.args[0]))
7742
      else:
7743
        raise
7744

    
7745
  def CheckPrereq(self):
7746
    """Check prerequisites.
7747

7748
    """
7749
    storage_type = self.op.storage_type
7750

    
7751
    if (constants.SO_FIX_CONSISTENCY not in
7752
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7753
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
7754
                                 " repaired" % storage_type,
7755
                                 errors.ECODE_INVAL)
7756

    
7757
    # Check whether any instance on this node has faulty disks
7758
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7759
      if not inst.admin_up:
7760
        continue
7761
      check_nodes = set(inst.all_nodes)
7762
      check_nodes.discard(self.op.node_name)
7763
      for inst_node_name in check_nodes:
7764
        self._CheckFaultyDisks(inst, inst_node_name)
7765

    
7766
  def Exec(self, feedback_fn):
7767
    feedback_fn("Repairing storage unit '%s' on %s ..." %
7768
                (self.op.name, self.op.node_name))
7769

    
7770
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7771
    result = self.rpc.call_storage_execute(self.op.node_name,
7772
                                           self.op.storage_type, st_args,
7773
                                           self.op.name,
7774
                                           constants.SO_FIX_CONSISTENCY)
7775
    result.Raise("Failed to repair storage unit '%s' on %s" %
7776
                 (self.op.name, self.op.node_name))
7777

    
7778

    
7779
class LUNodeEvacuationStrategy(NoHooksLU):
7780
  """Computes the node evacuation strategy.
7781

7782
  """
7783
  _OP_REQP = ["nodes"]
7784
  REQ_BGL = False
7785

    
7786
  def CheckArguments(self):
7787
    if not hasattr(self.op, "remote_node"):
7788
      self.op.remote_node = None
7789
    if not hasattr(self.op, "iallocator"):
7790
      self.op.iallocator = None
7791
    if self.op.remote_node is not None and self.op.iallocator is not None:
7792
      raise errors.OpPrereqError("Give either the iallocator or the new"
7793
                                 " secondary, not both", errors.ECODE_INVAL)
7794

    
7795
  def ExpandNames(self):
7796
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7797
    self.needed_locks = locks = {}
7798
    if self.op.remote_node is None:
7799
      locks[locking.LEVEL_NODE] = locking.ALL_SET
7800
    else:
7801
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7802
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7803

    
7804
  def CheckPrereq(self):
7805
    pass
7806

    
7807
  def Exec(self, feedback_fn):
7808
    if self.op.remote_node is not None:
7809
      instances = []
7810
      for node in self.op.nodes:
7811
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7812
      result = []
7813
      for i in instances:
7814
        if i.primary_node == self.op.remote_node:
7815
          raise errors.OpPrereqError("Node %s is the primary node of"
7816
                                     " instance %s, cannot use it as"
7817
                                     " secondary" %
7818
                                     (self.op.remote_node, i.name),
7819
                                     errors.ECODE_INVAL)
7820
        result.append([i.name, self.op.remote_node])
7821
    else:
7822
      ial = IAllocator(self.cfg, self.rpc,
7823
                       mode=constants.IALLOCATOR_MODE_MEVAC,
7824
                       evac_nodes=self.op.nodes)
7825
      ial.Run(self.op.iallocator, validate=True)
7826
      if not ial.success:
7827
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7828
                                 errors.ECODE_NORES)
7829
      result = ial.result
7830
    return result
7831

    
7832

    
7833
class LUGrowDisk(LogicalUnit):
7834
  """Grow a disk of an instance.
7835

7836
  """
7837
  HPATH = "disk-grow"
7838
  HTYPE = constants.HTYPE_INSTANCE
7839
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7840
  REQ_BGL = False
7841

    
7842
  def ExpandNames(self):
7843
    self._ExpandAndLockInstance()
7844
    self.needed_locks[locking.LEVEL_NODE] = []
7845
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7846

    
7847
  def DeclareLocks(self, level):
7848
    if level == locking.LEVEL_NODE:
7849
      self._LockInstancesNodes()
7850

    
7851
  def BuildHooksEnv(self):
7852
    """Build hooks env.
7853

7854
    This runs on the master, the primary and all the secondaries.
7855

7856
    """
7857
    env = {
7858
      "DISK": self.op.disk,
7859
      "AMOUNT": self.op.amount,
7860
      }
7861
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7862
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7863
    return env, nl, nl
7864

    
7865
  def CheckPrereq(self):
7866
    """Check prerequisites.
7867

7868
    This checks that the instance is in the cluster.
7869

7870
    """
7871
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7872
    assert instance is not None, \
7873
      "Cannot retrieve locked instance %s" % self.op.instance_name
7874
    nodenames = list(instance.all_nodes)
7875
    for node in nodenames:
7876
      _CheckNodeOnline(self, node)
7877

    
7878

    
7879
    self.instance = instance
7880

    
7881
    if instance.disk_template not in constants.DTS_GROWABLE:
7882
      raise errors.OpPrereqError("Instance's disk layout does not support"
7883
                                 " growing.", errors.ECODE_INVAL)
7884

    
7885
    self.disk = instance.FindDisk(self.op.disk)
7886

    
7887
    if instance.disk_template != constants.DT_FILE:
7888
      # TODO: check the free disk space for file, when that feature will be
7889
      # supported
7890
      _CheckNodesFreeDisk(self, nodenames, self.op.amount)
7891

    
7892
  def Exec(self, feedback_fn):
7893
    """Execute disk grow.
7894

7895
    """
7896
    instance = self.instance
7897
    disk = self.disk
7898
    for node in instance.all_nodes:
7899
      self.cfg.SetDiskID(disk, node)
7900
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7901
      result.Raise("Grow request failed to node %s" % node)
7902

    
7903
      # TODO: Rewrite code to work properly
7904
      # DRBD goes into sync mode for a short amount of time after executing the
7905
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7906
      # calling "resize" in sync mode fails. Sleeping for a short amount of
7907
      # time is a work-around.
7908
      time.sleep(5)
7909

    
7910
    disk.RecordGrow(self.op.amount)
7911
    self.cfg.Update(instance, feedback_fn)
7912
    if self.op.wait_for_sync:
7913
      disk_abort = not _WaitForSync(self, instance)
7914
      if disk_abort:
7915
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7916
                             " status.\nPlease check the instance.")
7917

    
7918

    
7919
class LUQueryInstanceData(NoHooksLU):
7920
  """Query runtime instance data.
7921

7922
  """
7923
  _OP_REQP = ["instances", "static"]
7924
  REQ_BGL = False
7925

    
7926
  def ExpandNames(self):
7927
    self.needed_locks = {}
7928
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7929

    
7930
    if not isinstance(self.op.instances, list):
7931
      raise errors.OpPrereqError("Invalid argument type 'instances'",
7932
                                 errors.ECODE_INVAL)
7933

    
7934
    if self.op.instances:
7935
      self.wanted_names = []
7936
      for name in self.op.instances:
7937
        full_name = _ExpandInstanceName(self.cfg, name)
7938
        self.wanted_names.append(full_name)
7939
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7940
    else:
7941
      self.wanted_names = None
7942
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7943

    
7944
    self.needed_locks[locking.LEVEL_NODE] = []
7945
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7946

    
7947
  def DeclareLocks(self, level):
7948
    if level == locking.LEVEL_NODE:
7949
      self._LockInstancesNodes()
7950

    
7951
  def CheckPrereq(self):
7952
    """Check prerequisites.
7953

7954
    This only checks the optional instance list against the existing names.
7955

7956
    """
7957
    if self.wanted_names is None:
7958
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7959

    
7960
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7961
                             in self.wanted_names]
7962
    return
7963

    
7964
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
7965
    """Returns the status of a block device
7966

7967
    """
7968
    if self.op.static or not node:
7969
      return None
7970

    
7971
    self.cfg.SetDiskID(dev, node)
7972

    
7973
    result = self.rpc.call_blockdev_find(node, dev)
7974
    if result.offline:
7975
      return None
7976

    
7977
    result.Raise("Can't compute disk status for %s" % instance_name)
7978

    
7979
    status = result.payload
7980
    if status is None:
7981
      return None
7982

    
7983
    return (status.dev_path, status.major, status.minor,
7984
            status.sync_percent, status.estimated_time,
7985
            status.is_degraded, status.ldisk_status)
7986

    
7987
  def _ComputeDiskStatus(self, instance, snode, dev):
7988
    """Compute block device status.
7989

7990
    """
7991
    if dev.dev_type in constants.LDS_DRBD:
7992
      # we change the snode then (otherwise we use the one passed in)
7993
      if dev.logical_id[0] == instance.primary_node:
7994
        snode = dev.logical_id[1]
7995
      else:
7996
        snode = dev.logical_id[0]
7997

    
7998
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
7999
                                              instance.name, dev)
8000
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8001

    
8002
    if dev.children:
8003
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
8004
                      for child in dev.children]
8005
    else:
8006
      dev_children = []
8007

    
8008
    data = {
8009
      "iv_name": dev.iv_name,
8010
      "dev_type": dev.dev_type,
8011
      "logical_id": dev.logical_id,
8012
      "physical_id": dev.physical_id,
8013
      "pstatus": dev_pstatus,
8014
      "sstatus": dev_sstatus,
8015
      "children": dev_children,
8016
      "mode": dev.mode,
8017
      "size": dev.size,
8018
      }
8019

    
8020
    return data
8021

    
8022
  def Exec(self, feedback_fn):
8023
    """Gather and return data"""
8024
    result = {}
8025

    
8026
    cluster = self.cfg.GetClusterInfo()
8027

    
8028
    for instance in self.wanted_instances:
8029
      if not self.op.static:
8030
        remote_info = self.rpc.call_instance_info(instance.primary_node,
8031
                                                  instance.name,
8032
                                                  instance.hypervisor)
8033
        remote_info.Raise("Error checking node %s" % instance.primary_node)
8034
        remote_info = remote_info.payload
8035
        if remote_info and "state" in remote_info:
8036
          remote_state = "up"
8037
        else:
8038
          remote_state = "down"
8039
      else:
8040
        remote_state = None
8041
      if instance.admin_up:
8042
        config_state = "up"
8043
      else:
8044
        config_state = "down"
8045

    
8046
      disks = [self._ComputeDiskStatus(instance, None, device)
8047
               for device in instance.disks]
8048

    
8049
      idict = {
8050
        "name": instance.name,
8051
        "config_state": config_state,
8052
        "run_state": remote_state,
8053
        "pnode": instance.primary_node,
8054
        "snodes": instance.secondary_nodes,
8055
        "os": instance.os,
8056
        # this happens to be the same format used for hooks
8057
        "nics": _NICListToTuple(self, instance.nics),
8058
        "disks": disks,
8059
        "hypervisor": instance.hypervisor,
8060
        "network_port": instance.network_port,
8061
        "hv_instance": instance.hvparams,
8062
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
8063
        "be_instance": instance.beparams,
8064
        "be_actual": cluster.FillBE(instance),
8065
        "serial_no": instance.serial_no,
8066
        "mtime": instance.mtime,
8067
        "ctime": instance.ctime,
8068
        "uuid": instance.uuid,
8069
        }
8070

    
8071
      result[instance.name] = idict
8072

    
8073
    return result
8074

    
8075

    
8076
class LUSetInstanceParams(LogicalUnit):
8077
  """Modifies an instances's parameters.
8078

8079
  """
8080
  HPATH = "instance-modify"
8081
  HTYPE = constants.HTYPE_INSTANCE
8082
  _OP_REQP = ["instance_name"]
8083
  REQ_BGL = False
8084

    
8085
  def CheckArguments(self):
8086
    if not hasattr(self.op, 'nics'):
8087
      self.op.nics = []
8088
    if not hasattr(self.op, 'disks'):
8089
      self.op.disks = []
8090
    if not hasattr(self.op, 'beparams'):
8091
      self.op.beparams = {}
8092
    if not hasattr(self.op, 'hvparams'):
8093
      self.op.hvparams = {}
8094
    if not hasattr(self.op, "disk_template"):
8095
      self.op.disk_template = None
8096
    if not hasattr(self.op, "remote_node"):
8097
      self.op.remote_node = None
8098
    if not hasattr(self.op, "os_name"):
8099
      self.op.os_name = None
8100
    if not hasattr(self.op, "force_variant"):
8101
      self.op.force_variant = False
8102
    self.op.force = getattr(self.op, "force", False)
8103
    if not (self.op.nics or self.op.disks or self.op.disk_template or
8104
            self.op.hvparams or self.op.beparams or self.op.os_name):
8105
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8106

    
8107
    if self.op.hvparams:
8108
      _CheckGlobalHvParams(self.op.hvparams)
8109

    
8110
    # Disk validation
8111
    disk_addremove = 0
8112
    for disk_op, disk_dict in self.op.disks:
8113
      if disk_op == constants.DDM_REMOVE:
8114
        disk_addremove += 1
8115
        continue
8116
      elif disk_op == constants.DDM_ADD:
8117
        disk_addremove += 1
8118
      else:
8119
        if not isinstance(disk_op, int):
8120
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8121
        if not isinstance(disk_dict, dict):
8122
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8123
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8124

    
8125
      if disk_op == constants.DDM_ADD:
8126
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8127
        if mode not in constants.DISK_ACCESS_SET:
8128
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8129
                                     errors.ECODE_INVAL)
8130
        size = disk_dict.get('size', None)
8131
        if size is None:
8132
          raise errors.OpPrereqError("Required disk parameter size missing",
8133
                                     errors.ECODE_INVAL)
8134
        try:
8135
          size = int(size)
8136
        except (TypeError, ValueError), err:
8137
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8138
                                     str(err), errors.ECODE_INVAL)
8139
        disk_dict['size'] = size
8140
      else:
8141
        # modification of disk
8142
        if 'size' in disk_dict:
8143
          raise errors.OpPrereqError("Disk size change not possible, use"
8144
                                     " grow-disk", errors.ECODE_INVAL)
8145

    
8146
    if disk_addremove > 1:
8147
      raise errors.OpPrereqError("Only one disk add or remove operation"
8148
                                 " supported at a time", errors.ECODE_INVAL)
8149

    
8150
    if self.op.disks and self.op.disk_template is not None:
8151
      raise errors.OpPrereqError("Disk template conversion and other disk"
8152
                                 " changes not supported at the same time",
8153
                                 errors.ECODE_INVAL)
8154

    
8155
    if self.op.disk_template:
8156
      _CheckDiskTemplate(self.op.disk_template)
8157
      if (self.op.disk_template in constants.DTS_NET_MIRROR and
8158
          self.op.remote_node is None):
8159
        raise errors.OpPrereqError("Changing the disk template to a mirrored"
8160
                                   " one requires specifying a secondary node",
8161
                                   errors.ECODE_INVAL)
8162

    
8163
    # NIC validation
8164
    nic_addremove = 0
8165
    for nic_op, nic_dict in self.op.nics:
8166
      if nic_op == constants.DDM_REMOVE:
8167
        nic_addremove += 1
8168
        continue
8169
      elif nic_op == constants.DDM_ADD:
8170
        nic_addremove += 1
8171
      else:
8172
        if not isinstance(nic_op, int):
8173
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8174
        if not isinstance(nic_dict, dict):
8175
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8176
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8177

    
8178
      # nic_dict should be a dict
8179
      nic_ip = nic_dict.get('ip', None)
8180
      if nic_ip is not None:
8181
        if nic_ip.lower() == constants.VALUE_NONE:
8182
          nic_dict['ip'] = None
8183
        else:
8184
          if not utils.IsValidIP(nic_ip):
8185
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8186
                                       errors.ECODE_INVAL)
8187

    
8188
      nic_bridge = nic_dict.get('bridge', None)
8189
      nic_link = nic_dict.get('link', None)
8190
      if nic_bridge and nic_link:
8191
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8192
                                   " at the same time", errors.ECODE_INVAL)
8193
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8194
        nic_dict['bridge'] = None
8195
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8196
        nic_dict['link'] = None
8197

    
8198
      if nic_op == constants.DDM_ADD:
8199
        nic_mac = nic_dict.get('mac', None)
8200
        if nic_mac is None:
8201
          nic_dict['mac'] = constants.VALUE_AUTO
8202

    
8203
      if 'mac' in nic_dict:
8204
        nic_mac = nic_dict['mac']
8205
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8206
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8207

    
8208
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8209
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8210
                                     " modifying an existing nic",
8211
                                     errors.ECODE_INVAL)
8212

    
8213
    if nic_addremove > 1:
8214
      raise errors.OpPrereqError("Only one NIC add or remove operation"
8215
                                 " supported at a time", errors.ECODE_INVAL)
8216

    
8217
  def ExpandNames(self):
8218
    self._ExpandAndLockInstance()
8219
    self.needed_locks[locking.LEVEL_NODE] = []
8220
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8221

    
8222
  def DeclareLocks(self, level):
8223
    if level == locking.LEVEL_NODE:
8224
      self._LockInstancesNodes()
8225
      if self.op.disk_template and self.op.remote_node:
8226
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8227
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8228

    
8229
  def BuildHooksEnv(self):
8230
    """Build hooks env.
8231

8232
    This runs on the master, primary and secondaries.
8233

8234
    """
8235
    args = dict()
8236
    if constants.BE_MEMORY in self.be_new:
8237
      args['memory'] = self.be_new[constants.BE_MEMORY]
8238
    if constants.BE_VCPUS in self.be_new:
8239
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
8240
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8241
    # information at all.
8242
    if self.op.nics:
8243
      args['nics'] = []
8244
      nic_override = dict(self.op.nics)
8245
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
8246
      for idx, nic in enumerate(self.instance.nics):
8247
        if idx in nic_override:
8248
          this_nic_override = nic_override[idx]
8249
        else:
8250
          this_nic_override = {}
8251
        if 'ip' in this_nic_override:
8252
          ip = this_nic_override['ip']
8253
        else:
8254
          ip = nic.ip
8255
        if 'mac' in this_nic_override:
8256
          mac = this_nic_override['mac']
8257
        else:
8258
          mac = nic.mac
8259
        if idx in self.nic_pnew:
8260
          nicparams = self.nic_pnew[idx]
8261
        else:
8262
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
8263
        mode = nicparams[constants.NIC_MODE]
8264
        link = nicparams[constants.NIC_LINK]
8265
        args['nics'].append((ip, mac, mode, link))
8266
      if constants.DDM_ADD in nic_override:
8267
        ip = nic_override[constants.DDM_ADD].get('ip', None)
8268
        mac = nic_override[constants.DDM_ADD]['mac']
8269
        nicparams = self.nic_pnew[constants.DDM_ADD]
8270
        mode = nicparams[constants.NIC_MODE]
8271
        link = nicparams[constants.NIC_LINK]
8272
        args['nics'].append((ip, mac, mode, link))
8273
      elif constants.DDM_REMOVE in nic_override:
8274
        del args['nics'][-1]
8275

    
8276
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8277
    if self.op.disk_template:
8278
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8279
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8280
    return env, nl, nl
8281

    
8282
  @staticmethod
8283
  def _GetUpdatedParams(old_params, update_dict,
8284
                        default_values, parameter_types):
8285
    """Return the new params dict for the given params.
8286

8287
    @type old_params: dict
8288
    @param old_params: old parameters
8289
    @type update_dict: dict
8290
    @param update_dict: dict containing new parameter values,
8291
                        or constants.VALUE_DEFAULT to reset the
8292
                        parameter to its default value
8293
    @type default_values: dict
8294
    @param default_values: default values for the filled parameters
8295
    @type parameter_types: dict
8296
    @param parameter_types: dict mapping target dict keys to types
8297
                            in constants.ENFORCEABLE_TYPES
8298
    @rtype: (dict, dict)
8299
    @return: (new_parameters, filled_parameters)
8300

8301
    """
8302
    params_copy = copy.deepcopy(old_params)
8303
    for key, val in update_dict.iteritems():
8304
      if val == constants.VALUE_DEFAULT:
8305
        try:
8306
          del params_copy[key]
8307
        except KeyError:
8308
          pass
8309
      else:
8310
        params_copy[key] = val
8311
    utils.ForceDictType(params_copy, parameter_types)
8312
    params_filled = objects.FillDict(default_values, params_copy)
8313
    return (params_copy, params_filled)
8314

    
8315
  def CheckPrereq(self):
8316
    """Check prerequisites.
8317

8318
    This only checks the instance list against the existing names.
8319

8320
    """
8321
    self.force = self.op.force
8322

    
8323
    # checking the new params on the primary/secondary nodes
8324

    
8325
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8326
    cluster = self.cluster = self.cfg.GetClusterInfo()
8327
    assert self.instance is not None, \
8328
      "Cannot retrieve locked instance %s" % self.op.instance_name
8329
    pnode = instance.primary_node
8330
    nodelist = list(instance.all_nodes)
8331

    
8332
    if self.op.disk_template:
8333
      if instance.disk_template == self.op.disk_template:
8334
        raise errors.OpPrereqError("Instance already has disk template %s" %
8335
                                   instance.disk_template, errors.ECODE_INVAL)
8336

    
8337
      if (instance.disk_template,
8338
          self.op.disk_template) not in self._DISK_CONVERSIONS:
8339
        raise errors.OpPrereqError("Unsupported disk template conversion from"
8340
                                   " %s to %s" % (instance.disk_template,
8341
                                                  self.op.disk_template),
8342
                                   errors.ECODE_INVAL)
8343
      if self.op.disk_template in constants.DTS_NET_MIRROR:
8344
        _CheckNodeOnline(self, self.op.remote_node)
8345
        _CheckNodeNotDrained(self, self.op.remote_node)
8346
        disks = [{"size": d.size} for d in instance.disks]
8347
        required = _ComputeDiskSize(self.op.disk_template, disks)
8348
        _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8349
        _CheckInstanceDown(self, instance, "cannot change disk template")
8350

    
8351
    # hvparams processing
8352
    if self.op.hvparams:
8353
      i_hvdict, hv_new = self._GetUpdatedParams(
8354
                             instance.hvparams, self.op.hvparams,
8355
                             cluster.hvparams[instance.hypervisor],
8356
                             constants.HVS_PARAMETER_TYPES)
8357
      # local check
8358
      hypervisor.GetHypervisor(
8359
        instance.hypervisor).CheckParameterSyntax(hv_new)
8360
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8361
      self.hv_new = hv_new # the new actual values
8362
      self.hv_inst = i_hvdict # the new dict (without defaults)
8363
    else:
8364
      self.hv_new = self.hv_inst = {}
8365

    
8366
    # beparams processing
8367
    if self.op.beparams:
8368
      i_bedict, be_new = self._GetUpdatedParams(
8369
                             instance.beparams, self.op.beparams,
8370
                             cluster.beparams[constants.PP_DEFAULT],
8371
                             constants.BES_PARAMETER_TYPES)
8372
      self.be_new = be_new # the new actual values
8373
      self.be_inst = i_bedict # the new dict (without defaults)
8374
    else:
8375
      self.be_new = self.be_inst = {}
8376

    
8377
    self.warn = []
8378

    
8379
    if constants.BE_MEMORY in self.op.beparams and not self.force:
8380
      mem_check_list = [pnode]
8381
      if be_new[constants.BE_AUTO_BALANCE]:
8382
        # either we changed auto_balance to yes or it was from before
8383
        mem_check_list.extend(instance.secondary_nodes)
8384
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
8385
                                                  instance.hypervisor)
8386
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8387
                                         instance.hypervisor)
8388
      pninfo = nodeinfo[pnode]
8389
      msg = pninfo.fail_msg
8390
      if msg:
8391
        # Assume the primary node is unreachable and go ahead
8392
        self.warn.append("Can't get info from primary node %s: %s" %
8393
                         (pnode,  msg))
8394
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
8395
        self.warn.append("Node data from primary node %s doesn't contain"
8396
                         " free memory information" % pnode)
8397
      elif instance_info.fail_msg:
8398
        self.warn.append("Can't get instance runtime information: %s" %
8399
                        instance_info.fail_msg)
8400
      else:
8401
        if instance_info.payload:
8402
          current_mem = int(instance_info.payload['memory'])
8403
        else:
8404
          # Assume instance not running
8405
          # (there is a slight race condition here, but it's not very probable,
8406
          # and we have no other way to check)
8407
          current_mem = 0
8408
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8409
                    pninfo.payload['memory_free'])
8410
        if miss_mem > 0:
8411
          raise errors.OpPrereqError("This change will prevent the instance"
8412
                                     " from starting, due to %d MB of memory"
8413
                                     " missing on its primary node" % miss_mem,
8414
                                     errors.ECODE_NORES)
8415

    
8416
      if be_new[constants.BE_AUTO_BALANCE]:
8417
        for node, nres in nodeinfo.items():
8418
          if node not in instance.secondary_nodes:
8419
            continue
8420
          msg = nres.fail_msg
8421
          if msg:
8422
            self.warn.append("Can't get info from secondary node %s: %s" %
8423
                             (node, msg))
8424
          elif not isinstance(nres.payload.get('memory_free', None), int):
8425
            self.warn.append("Secondary node %s didn't return free"
8426
                             " memory information" % node)
8427
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8428
            self.warn.append("Not enough memory to failover instance to"
8429
                             " secondary node %s" % node)
8430

    
8431
    # NIC processing
8432
    self.nic_pnew = {}
8433
    self.nic_pinst = {}
8434
    for nic_op, nic_dict in self.op.nics:
8435
      if nic_op == constants.DDM_REMOVE:
8436
        if not instance.nics:
8437
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8438
                                     errors.ECODE_INVAL)
8439
        continue
8440
      if nic_op != constants.DDM_ADD:
8441
        # an existing nic
8442
        if not instance.nics:
8443
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8444
                                     " no NICs" % nic_op,
8445
                                     errors.ECODE_INVAL)
8446
        if nic_op < 0 or nic_op >= len(instance.nics):
8447
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8448
                                     " are 0 to %d" %
8449
                                     (nic_op, len(instance.nics) - 1),
8450
                                     errors.ECODE_INVAL)
8451
        old_nic_params = instance.nics[nic_op].nicparams
8452
        old_nic_ip = instance.nics[nic_op].ip
8453
      else:
8454
        old_nic_params = {}
8455
        old_nic_ip = None
8456

    
8457
      update_params_dict = dict([(key, nic_dict[key])
8458
                                 for key in constants.NICS_PARAMETERS
8459
                                 if key in nic_dict])
8460

    
8461
      if 'bridge' in nic_dict:
8462
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8463

    
8464
      new_nic_params, new_filled_nic_params = \
8465
          self._GetUpdatedParams(old_nic_params, update_params_dict,
8466
                                 cluster.nicparams[constants.PP_DEFAULT],
8467
                                 constants.NICS_PARAMETER_TYPES)
8468
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8469
      self.nic_pinst[nic_op] = new_nic_params
8470
      self.nic_pnew[nic_op] = new_filled_nic_params
8471
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8472

    
8473
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
8474
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8475
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8476
        if msg:
8477
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8478
          if self.force:
8479
            self.warn.append(msg)
8480
          else:
8481
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8482
      if new_nic_mode == constants.NIC_MODE_ROUTED:
8483
        if 'ip' in nic_dict:
8484
          nic_ip = nic_dict['ip']
8485
        else:
8486
          nic_ip = old_nic_ip
8487
        if nic_ip is None:
8488
          raise errors.OpPrereqError('Cannot set the nic ip to None'
8489
                                     ' on a routed nic', errors.ECODE_INVAL)
8490
      if 'mac' in nic_dict:
8491
        nic_mac = nic_dict['mac']
8492
        if nic_mac is None:
8493
          raise errors.OpPrereqError('Cannot set the nic mac to None',
8494
                                     errors.ECODE_INVAL)
8495
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8496
          # otherwise generate the mac
8497
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8498
        else:
8499
          # or validate/reserve the current one
8500
          try:
8501
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8502
          except errors.ReservationError:
8503
            raise errors.OpPrereqError("MAC address %s already in use"
8504
                                       " in cluster" % nic_mac,
8505
                                       errors.ECODE_NOTUNIQUE)
8506

    
8507
    # DISK processing
8508
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8509
      raise errors.OpPrereqError("Disk operations not supported for"
8510
                                 " diskless instances",
8511
                                 errors.ECODE_INVAL)
8512
    for disk_op, _ in self.op.disks:
8513
      if disk_op == constants.DDM_REMOVE:
8514
        if len(instance.disks) == 1:
8515
          raise errors.OpPrereqError("Cannot remove the last disk of"
8516
                                     " an instance", errors.ECODE_INVAL)
8517
        _CheckInstanceDown(self, instance, "cannot remove disks")
8518

    
8519
      if (disk_op == constants.DDM_ADD and
8520
          len(instance.nics) >= constants.MAX_DISKS):
8521
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8522
                                   " add more" % constants.MAX_DISKS,
8523
                                   errors.ECODE_STATE)
8524
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8525
        # an existing disk
8526
        if disk_op < 0 or disk_op >= len(instance.disks):
8527
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
8528
                                     " are 0 to %d" %
8529
                                     (disk_op, len(instance.disks)),
8530
                                     errors.ECODE_INVAL)
8531

    
8532
    # OS change
8533
    if self.op.os_name and not self.op.force:
8534
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8535
                      self.op.force_variant)
8536

    
8537
    return
8538

    
8539
  def _ConvertPlainToDrbd(self, feedback_fn):
8540
    """Converts an instance from plain to drbd.
8541

8542
    """
8543
    feedback_fn("Converting template to drbd")
8544
    instance = self.instance
8545
    pnode = instance.primary_node
8546
    snode = self.op.remote_node
8547

    
8548
    # create a fake disk info for _GenerateDiskTemplate
8549
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8550
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8551
                                      instance.name, pnode, [snode],
8552
                                      disk_info, None, None, 0)
8553
    info = _GetInstanceInfoText(instance)
8554
    feedback_fn("Creating aditional volumes...")
8555
    # first, create the missing data and meta devices
8556
    for disk in new_disks:
8557
      # unfortunately this is... not too nice
8558
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8559
                            info, True)
8560
      for child in disk.children:
8561
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
8562
    # at this stage, all new LVs have been created, we can rename the
8563
    # old ones
8564
    feedback_fn("Renaming original volumes...")
8565
    rename_list = [(o, n.children[0].logical_id)
8566
                   for (o, n) in zip(instance.disks, new_disks)]
8567
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
8568
    result.Raise("Failed to rename original LVs")
8569

    
8570
    feedback_fn("Initializing DRBD devices...")
8571
    # all child devices are in place, we can now create the DRBD devices
8572
    for disk in new_disks:
8573
      for node in [pnode, snode]:
8574
        f_create = node == pnode
8575
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8576

    
8577
    # at this point, the instance has been modified
8578
    instance.disk_template = constants.DT_DRBD8
8579
    instance.disks = new_disks
8580
    self.cfg.Update(instance, feedback_fn)
8581

    
8582
    # disks are created, waiting for sync
8583
    disk_abort = not _WaitForSync(self, instance)
8584
    if disk_abort:
8585
      raise errors.OpExecError("There are some degraded disks for"
8586
                               " this instance, please cleanup manually")
8587

    
8588
  def _ConvertDrbdToPlain(self, feedback_fn):
8589
    """Converts an instance from drbd to plain.
8590

8591
    """
8592
    instance = self.instance
8593
    assert len(instance.secondary_nodes) == 1
8594
    pnode = instance.primary_node
8595
    snode = instance.secondary_nodes[0]
8596
    feedback_fn("Converting template to plain")
8597

    
8598
    old_disks = instance.disks
8599
    new_disks = [d.children[0] for d in old_disks]
8600

    
8601
    # copy over size and mode
8602
    for parent, child in zip(old_disks, new_disks):
8603
      child.size = parent.size
8604
      child.mode = parent.mode
8605

    
8606
    # update instance structure
8607
    instance.disks = new_disks
8608
    instance.disk_template = constants.DT_PLAIN
8609
    self.cfg.Update(instance, feedback_fn)
8610

    
8611
    feedback_fn("Removing volumes on the secondary node...")
8612
    for disk in old_disks:
8613
      self.cfg.SetDiskID(disk, snode)
8614
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8615
      if msg:
8616
        self.LogWarning("Could not remove block device %s on node %s,"
8617
                        " continuing anyway: %s", disk.iv_name, snode, msg)
8618

    
8619
    feedback_fn("Removing unneeded volumes on the primary node...")
8620
    for idx, disk in enumerate(old_disks):
8621
      meta = disk.children[1]
8622
      self.cfg.SetDiskID(meta, pnode)
8623
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8624
      if msg:
8625
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
8626
                        " continuing anyway: %s", idx, pnode, msg)
8627

    
8628

    
8629
  def Exec(self, feedback_fn):
8630
    """Modifies an instance.
8631

8632
    All parameters take effect only at the next restart of the instance.
8633

8634
    """
8635
    # Process here the warnings from CheckPrereq, as we don't have a
8636
    # feedback_fn there.
8637
    for warn in self.warn:
8638
      feedback_fn("WARNING: %s" % warn)
8639

    
8640
    result = []
8641
    instance = self.instance
8642
    # disk changes
8643
    for disk_op, disk_dict in self.op.disks:
8644
      if disk_op == constants.DDM_REMOVE:
8645
        # remove the last disk
8646
        device = instance.disks.pop()
8647
        device_idx = len(instance.disks)
8648
        for node, disk in device.ComputeNodeTree(instance.primary_node):
8649
          self.cfg.SetDiskID(disk, node)
8650
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8651
          if msg:
8652
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
8653
                            " continuing anyway", device_idx, node, msg)
8654
        result.append(("disk/%d" % device_idx, "remove"))
8655
      elif disk_op == constants.DDM_ADD:
8656
        # add a new disk
8657
        if instance.disk_template == constants.DT_FILE:
8658
          file_driver, file_path = instance.disks[0].logical_id
8659
          file_path = os.path.dirname(file_path)
8660
        else:
8661
          file_driver = file_path = None
8662
        disk_idx_base = len(instance.disks)
8663
        new_disk = _GenerateDiskTemplate(self,
8664
                                         instance.disk_template,
8665
                                         instance.name, instance.primary_node,
8666
                                         instance.secondary_nodes,
8667
                                         [disk_dict],
8668
                                         file_path,
8669
                                         file_driver,
8670
                                         disk_idx_base)[0]
8671
        instance.disks.append(new_disk)
8672
        info = _GetInstanceInfoText(instance)
8673

    
8674
        logging.info("Creating volume %s for instance %s",
8675
                     new_disk.iv_name, instance.name)
8676
        # Note: this needs to be kept in sync with _CreateDisks
8677
        #HARDCODE
8678
        for node in instance.all_nodes:
8679
          f_create = node == instance.primary_node
8680
          try:
8681
            _CreateBlockDev(self, node, instance, new_disk,
8682
                            f_create, info, f_create)
8683
          except errors.OpExecError, err:
8684
            self.LogWarning("Failed to create volume %s (%s) on"
8685
                            " node %s: %s",
8686
                            new_disk.iv_name, new_disk, node, err)
8687
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8688
                       (new_disk.size, new_disk.mode)))
8689
      else:
8690
        # change a given disk
8691
        instance.disks[disk_op].mode = disk_dict['mode']
8692
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8693

    
8694
    if self.op.disk_template:
8695
      r_shut = _ShutdownInstanceDisks(self, instance)
8696
      if not r_shut:
8697
        raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8698
                                 " proceed with disk template conversion")
8699
      mode = (instance.disk_template, self.op.disk_template)
8700
      try:
8701
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
8702
      except:
8703
        self.cfg.ReleaseDRBDMinors(instance.name)
8704
        raise
8705
      result.append(("disk_template", self.op.disk_template))
8706

    
8707
    # NIC changes
8708
    for nic_op, nic_dict in self.op.nics:
8709
      if nic_op == constants.DDM_REMOVE:
8710
        # remove the last nic
8711
        del instance.nics[-1]
8712
        result.append(("nic.%d" % len(instance.nics), "remove"))
8713
      elif nic_op == constants.DDM_ADD:
8714
        # mac and bridge should be set, by now
8715
        mac = nic_dict['mac']
8716
        ip = nic_dict.get('ip', None)
8717
        nicparams = self.nic_pinst[constants.DDM_ADD]
8718
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8719
        instance.nics.append(new_nic)
8720
        result.append(("nic.%d" % (len(instance.nics) - 1),
8721
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
8722
                       (new_nic.mac, new_nic.ip,
8723
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8724
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8725
                       )))
8726
      else:
8727
        for key in 'mac', 'ip':
8728
          if key in nic_dict:
8729
            setattr(instance.nics[nic_op], key, nic_dict[key])
8730
        if nic_op in self.nic_pinst:
8731
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8732
        for key, val in nic_dict.iteritems():
8733
          result.append(("nic.%s/%d" % (key, nic_op), val))
8734

    
8735
    # hvparams changes
8736
    if self.op.hvparams:
8737
      instance.hvparams = self.hv_inst
8738
      for key, val in self.op.hvparams.iteritems():
8739
        result.append(("hv/%s" % key, val))
8740

    
8741
    # beparams changes
8742
    if self.op.beparams:
8743
      instance.beparams = self.be_inst
8744
      for key, val in self.op.beparams.iteritems():
8745
        result.append(("be/%s" % key, val))
8746

    
8747
    # OS change
8748
    if self.op.os_name:
8749
      instance.os = self.op.os_name
8750

    
8751
    self.cfg.Update(instance, feedback_fn)
8752

    
8753
    return result
8754

    
8755
  _DISK_CONVERSIONS = {
8756
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8757
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8758
    }
8759

    
8760
class LUQueryExports(NoHooksLU):
8761
  """Query the exports list
8762

8763
  """
8764
  _OP_REQP = ['nodes']
8765
  REQ_BGL = False
8766

    
8767
  def ExpandNames(self):
8768
    self.needed_locks = {}
8769
    self.share_locks[locking.LEVEL_NODE] = 1
8770
    if not self.op.nodes:
8771
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8772
    else:
8773
      self.needed_locks[locking.LEVEL_NODE] = \
8774
        _GetWantedNodes(self, self.op.nodes)
8775

    
8776
  def CheckPrereq(self):
8777
    """Check prerequisites.
8778

8779
    """
8780
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8781

    
8782
  def Exec(self, feedback_fn):
8783
    """Compute the list of all the exported system images.
8784

8785
    @rtype: dict
8786
    @return: a dictionary with the structure node->(export-list)
8787
        where export-list is a list of the instances exported on
8788
        that node.
8789

8790
    """
8791
    rpcresult = self.rpc.call_export_list(self.nodes)
8792
    result = {}
8793
    for node in rpcresult:
8794
      if rpcresult[node].fail_msg:
8795
        result[node] = False
8796
      else:
8797
        result[node] = rpcresult[node].payload
8798

    
8799
    return result
8800

    
8801

    
8802
class LUExportInstance(LogicalUnit):
8803
  """Export an instance to an image in the cluster.
8804

8805
  """
8806
  HPATH = "instance-export"
8807
  HTYPE = constants.HTYPE_INSTANCE
8808
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
8809
  REQ_BGL = False
8810

    
8811
  def CheckArguments(self):
8812
    """Check the arguments.
8813

8814
    """
8815
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8816
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
8817

    
8818
  def ExpandNames(self):
8819
    self._ExpandAndLockInstance()
8820
    # FIXME: lock only instance primary and destination node
8821
    #
8822
    # Sad but true, for now we have do lock all nodes, as we don't know where
8823
    # the previous export might be, and and in this LU we search for it and
8824
    # remove it from its current node. In the future we could fix this by:
8825
    #  - making a tasklet to search (share-lock all), then create the new one,
8826
    #    then one to remove, after
8827
    #  - removing the removal operation altogether
8828
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8829

    
8830
  def DeclareLocks(self, level):
8831
    """Last minute lock declaration."""
8832
    # All nodes are locked anyway, so nothing to do here.
8833

    
8834
  def BuildHooksEnv(self):
8835
    """Build hooks env.
8836

8837
    This will run on the master, primary node and target node.
8838

8839
    """
8840
    env = {
8841
      "EXPORT_NODE": self.op.target_node,
8842
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8843
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8844
      }
8845
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8846
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8847
          self.op.target_node]
8848
    return env, nl, nl
8849

    
8850
  def CheckPrereq(self):
8851
    """Check prerequisites.
8852

8853
    This checks that the instance and node names are valid.
8854

8855
    """
8856
    instance_name = self.op.instance_name
8857
    self.instance = self.cfg.GetInstanceInfo(instance_name)
8858
    assert self.instance is not None, \
8859
          "Cannot retrieve locked instance %s" % self.op.instance_name
8860
    _CheckNodeOnline(self, self.instance.primary_node)
8861

    
8862
    self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8863
    self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
8864
    assert self.dst_node is not None
8865

    
8866
    _CheckNodeOnline(self, self.dst_node.name)
8867
    _CheckNodeNotDrained(self, self.dst_node.name)
8868

    
8869
    # instance disk type verification
8870
    for disk in self.instance.disks:
8871
      if disk.dev_type == constants.LD_FILE:
8872
        raise errors.OpPrereqError("Export not supported for instances with"
8873
                                   " file-based disks", errors.ECODE_INVAL)
8874

    
8875
  def _CreateSnapshots(self, feedback_fn):
8876
    """Creates an LVM snapshot for every disk of the instance.
8877

8878
    @return: List of snapshots as L{objects.Disk} instances
8879

8880
    """
8881
    instance = self.instance
8882
    src_node = instance.primary_node
8883

    
8884
    vgname = self.cfg.GetVGName()
8885

    
8886
    snap_disks = []
8887

    
8888
    for idx, disk in enumerate(instance.disks):
8889
      feedback_fn("Creating a snapshot of disk/%s on node %s" %
8890
                  (idx, src_node))
8891

    
8892
      # result.payload will be a snapshot of an lvm leaf of the one we
8893
      # passed
8894
      result = self.rpc.call_blockdev_snapshot(src_node, disk)
8895
      msg = result.fail_msg
8896
      if msg:
8897
        self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8898
                        idx, src_node, msg)
8899
        snap_disks.append(False)
8900
      else:
8901
        disk_id = (vgname, result.payload)
8902
        new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8903
                               logical_id=disk_id, physical_id=disk_id,
8904
                               iv_name=disk.iv_name)
8905
        snap_disks.append(new_dev)
8906

    
8907
    return snap_disks
8908

    
8909
  def _RemoveSnapshot(self, feedback_fn, snap_disks, disk_index):
8910
    """Removes an LVM snapshot.
8911

8912
    @type snap_disks: list
8913
    @param snap_disks: The list of all snapshots as returned by
8914
                       L{_CreateSnapshots}
8915
    @type disk_index: number
8916
    @param disk_index: Index of the snapshot to be removed
8917
    @rtype: bool
8918
    @return: Whether removal was successful or not
8919

8920
    """
8921
    disk = snap_disks[disk_index]
8922
    if disk:
8923
      src_node = self.instance.primary_node
8924

    
8925
      feedback_fn("Removing snapshot of disk/%s on node %s" %
8926
                  (disk_index, src_node))
8927

    
8928
      result = self.rpc.call_blockdev_remove(src_node, disk)
8929
      if not result.fail_msg:
8930
        return True
8931

    
8932
      self.LogWarning("Could not remove snapshot for disk/%d from node"
8933
                      " %s: %s", disk_index, src_node, result.fail_msg)
8934

    
8935
    return False
8936

    
8937
  def _CleanupExports(self, feedback_fn):
8938
    """Removes exports of current instance from all other nodes.
8939

8940
    If an instance in a cluster with nodes A..D was exported to node C, its
8941
    exports will be removed from the nodes A, B and D.
8942

8943
    """
8944
    nodelist = self.cfg.GetNodeList()
8945
    nodelist.remove(self.dst_node.name)
8946

    
8947
    # on one-node clusters nodelist will be empty after the removal
8948
    # if we proceed the backup would be removed because OpQueryExports
8949
    # substitutes an empty list with the full cluster node list.
8950
    iname = self.instance.name
8951
    if nodelist:
8952
      feedback_fn("Removing old exports for instance %s" % iname)
8953
      exportlist = self.rpc.call_export_list(nodelist)
8954
      for node in exportlist:
8955
        if exportlist[node].fail_msg:
8956
          continue
8957
        if iname in exportlist[node].payload:
8958
          msg = self.rpc.call_export_remove(node, iname).fail_msg
8959
          if msg:
8960
            self.LogWarning("Could not remove older export for instance %s"
8961
                            " on node %s: %s", iname, node, msg)
8962

    
8963
  def Exec(self, feedback_fn):
8964
    """Export an instance to an image in the cluster.
8965

8966
    """
8967
    instance = self.instance
8968
    dst_node = self.dst_node
8969
    src_node = instance.primary_node
8970

    
8971
    if self.op.shutdown:
8972
      # shutdown the instance, but not the disks
8973
      feedback_fn("Shutting down instance %s" % instance.name)
8974
      result = self.rpc.call_instance_shutdown(src_node, instance,
8975
                                               self.shutdown_timeout)
8976
      result.Raise("Could not shutdown instance %s on"
8977
                   " node %s" % (instance.name, src_node))
8978

    
8979
    # set the disks ID correctly since call_instance_start needs the
8980
    # correct drbd minor to create the symlinks
8981
    for disk in instance.disks:
8982
      self.cfg.SetDiskID(disk, src_node)
8983

    
8984
    activate_disks = (not instance.admin_up)
8985

    
8986
    if activate_disks:
8987
      # Activate the instance disks if we'exporting a stopped instance
8988
      feedback_fn("Activating disks for %s" % instance.name)
8989
      _StartInstanceDisks(self, instance, None)
8990

    
8991
    try:
8992
      # per-disk results
8993
      dresults = []
8994
      removed_snaps = [False] * len(instance.disks)
8995

    
8996
      snap_disks = None
8997
      try:
8998
        try:
8999
          snap_disks = self._CreateSnapshots(feedback_fn)
9000
        finally:
9001
          if self.op.shutdown and instance.admin_up:
9002
            feedback_fn("Starting instance %s" % instance.name)
9003
            result = self.rpc.call_instance_start(src_node, instance,
9004
                                                  None, None)
9005
            msg = result.fail_msg
9006
            if msg:
9007
              _ShutdownInstanceDisks(self, instance)
9008
              raise errors.OpExecError("Could not start instance: %s" % msg)
9009

    
9010
        assert len(snap_disks) == len(instance.disks)
9011
        assert len(removed_snaps) == len(instance.disks)
9012

    
9013
        # TODO: check for size
9014

    
9015
        cluster_name = self.cfg.GetClusterName()
9016
        for idx, dev in enumerate(snap_disks):
9017
          feedback_fn("Exporting snapshot %s from %s to %s" %
9018
                      (idx, src_node, dst_node.name))
9019
          if dev:
9020
            # FIXME: pass debug from opcode to backend
9021
            result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
9022
                                                   instance, cluster_name,
9023
                                                   idx, self.op.debug_level)
9024
            msg = result.fail_msg
9025
            if msg:
9026
              self.LogWarning("Could not export disk/%s from node %s to"
9027
                              " node %s: %s", idx, src_node, dst_node.name, msg)
9028
              dresults.append(False)
9029
            else:
9030
              dresults.append(True)
9031

    
9032
            # Remove snapshot
9033
            if self._RemoveSnapshot(feedback_fn, snap_disks, idx):
9034
              removed_snaps[idx] = True
9035
          else:
9036
            dresults.append(False)
9037

    
9038
        assert len(dresults) == len(instance.disks)
9039

    
9040
        # Check for backwards compatibility
9041
        assert compat.all(isinstance(i, bool) for i in dresults), \
9042
               "Not all results are boolean: %r" % dresults
9043

    
9044
        feedback_fn("Finalizing export on %s" % dst_node.name)
9045
        result = self.rpc.call_finalize_export(dst_node.name, instance,
9046
                                               snap_disks)
9047
        msg = result.fail_msg
9048
        fin_resu = not msg
9049
        if msg:
9050
          self.LogWarning("Could not finalize export for instance %s"
9051
                          " on node %s: %s", instance.name, dst_node.name, msg)
9052

    
9053
      finally:
9054
        # Remove all snapshots
9055
        assert len(removed_snaps) == len(instance.disks)
9056
        for idx, removed in enumerate(removed_snaps):
9057
          if not removed:
9058
            self._RemoveSnapshot(feedback_fn, snap_disks, idx)
9059

    
9060
    finally:
9061
      if activate_disks:
9062
        feedback_fn("Deactivating disks for %s" % instance.name)
9063
        _ShutdownInstanceDisks(self, instance)
9064

    
9065
    self._CleanupExports(feedback_fn)
9066

    
9067
    return fin_resu, dresults
9068

    
9069

    
9070
class LURemoveExport(NoHooksLU):
9071
  """Remove exports related to the named instance.
9072

9073
  """
9074
  _OP_REQP = ["instance_name"]
9075
  REQ_BGL = False
9076

    
9077
  def ExpandNames(self):
9078
    self.needed_locks = {}
9079
    # We need all nodes to be locked in order for RemoveExport to work, but we
9080
    # don't need to lock the instance itself, as nothing will happen to it (and
9081
    # we can remove exports also for a removed instance)
9082
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9083

    
9084
  def CheckPrereq(self):
9085
    """Check prerequisites.
9086
    """
9087
    pass
9088

    
9089
  def Exec(self, feedback_fn):
9090
    """Remove any export.
9091

9092
    """
9093
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9094
    # If the instance was not found we'll try with the name that was passed in.
9095
    # This will only work if it was an FQDN, though.
9096
    fqdn_warn = False
9097
    if not instance_name:
9098
      fqdn_warn = True
9099
      instance_name = self.op.instance_name
9100

    
9101
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9102
    exportlist = self.rpc.call_export_list(locked_nodes)
9103
    found = False
9104
    for node in exportlist:
9105
      msg = exportlist[node].fail_msg
9106
      if msg:
9107
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9108
        continue
9109
      if instance_name in exportlist[node].payload:
9110
        found = True
9111
        result = self.rpc.call_export_remove(node, instance_name)
9112
        msg = result.fail_msg
9113
        if msg:
9114
          logging.error("Could not remove export for instance %s"
9115
                        " on node %s: %s", instance_name, node, msg)
9116

    
9117
    if fqdn_warn and not found:
9118
      feedback_fn("Export not found. If trying to remove an export belonging"
9119
                  " to a deleted instance please use its Fully Qualified"
9120
                  " Domain Name.")
9121

    
9122

    
9123
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9124
  """Generic tags LU.
9125

9126
  This is an abstract class which is the parent of all the other tags LUs.
9127

9128
  """
9129

    
9130
  def ExpandNames(self):
9131
    self.needed_locks = {}
9132
    if self.op.kind == constants.TAG_NODE:
9133
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9134
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
9135
    elif self.op.kind == constants.TAG_INSTANCE:
9136
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9137
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9138

    
9139
  def CheckPrereq(self):
9140
    """Check prerequisites.
9141

9142
    """
9143
    if self.op.kind == constants.TAG_CLUSTER:
9144
      self.target = self.cfg.GetClusterInfo()
9145
    elif self.op.kind == constants.TAG_NODE:
9146
      self.target = self.cfg.GetNodeInfo(self.op.name)
9147
    elif self.op.kind == constants.TAG_INSTANCE:
9148
      self.target = self.cfg.GetInstanceInfo(self.op.name)
9149
    else:
9150
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9151
                                 str(self.op.kind), errors.ECODE_INVAL)
9152

    
9153

    
9154
class LUGetTags(TagsLU):
9155
  """Returns the tags of a given object.
9156

9157
  """
9158
  _OP_REQP = ["kind", "name"]
9159
  REQ_BGL = False
9160

    
9161
  def Exec(self, feedback_fn):
9162
    """Returns the tag list.
9163

9164
    """
9165
    return list(self.target.GetTags())
9166

    
9167

    
9168
class LUSearchTags(NoHooksLU):
9169
  """Searches the tags for a given pattern.
9170

9171
  """
9172
  _OP_REQP = ["pattern"]
9173
  REQ_BGL = False
9174

    
9175
  def ExpandNames(self):
9176
    self.needed_locks = {}
9177

    
9178
  def CheckPrereq(self):
9179
    """Check prerequisites.
9180

9181
    This checks the pattern passed for validity by compiling it.
9182

9183
    """
9184
    try:
9185
      self.re = re.compile(self.op.pattern)
9186
    except re.error, err:
9187
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9188
                                 (self.op.pattern, err), errors.ECODE_INVAL)
9189

    
9190
  def Exec(self, feedback_fn):
9191
    """Returns the tag list.
9192

9193
    """
9194
    cfg = self.cfg
9195
    tgts = [("/cluster", cfg.GetClusterInfo())]
9196
    ilist = cfg.GetAllInstancesInfo().values()
9197
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9198
    nlist = cfg.GetAllNodesInfo().values()
9199
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9200
    results = []
9201
    for path, target in tgts:
9202
      for tag in target.GetTags():
9203
        if self.re.search(tag):
9204
          results.append((path, tag))
9205
    return results
9206

    
9207

    
9208
class LUAddTags(TagsLU):
9209
  """Sets a tag on a given object.
9210

9211
  """
9212
  _OP_REQP = ["kind", "name", "tags"]
9213
  REQ_BGL = False
9214

    
9215
  def CheckPrereq(self):
9216
    """Check prerequisites.
9217

9218
    This checks the type and length of the tag name and value.
9219

9220
    """
9221
    TagsLU.CheckPrereq(self)
9222
    for tag in self.op.tags:
9223
      objects.TaggableObject.ValidateTag(tag)
9224

    
9225
  def Exec(self, feedback_fn):
9226
    """Sets the tag.
9227

9228
    """
9229
    try:
9230
      for tag in self.op.tags:
9231
        self.target.AddTag(tag)
9232
    except errors.TagError, err:
9233
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
9234
    self.cfg.Update(self.target, feedback_fn)
9235

    
9236

    
9237
class LUDelTags(TagsLU):
9238
  """Delete a list of tags from a given object.
9239

9240
  """
9241
  _OP_REQP = ["kind", "name", "tags"]
9242
  REQ_BGL = False
9243

    
9244
  def CheckPrereq(self):
9245
    """Check prerequisites.
9246

9247
    This checks that we have the given tag.
9248

9249
    """
9250
    TagsLU.CheckPrereq(self)
9251
    for tag in self.op.tags:
9252
      objects.TaggableObject.ValidateTag(tag)
9253
    del_tags = frozenset(self.op.tags)
9254
    cur_tags = self.target.GetTags()
9255
    if not del_tags <= cur_tags:
9256
      diff_tags = del_tags - cur_tags
9257
      diff_names = ["'%s'" % tag for tag in diff_tags]
9258
      diff_names.sort()
9259
      raise errors.OpPrereqError("Tag(s) %s not found" %
9260
                                 (",".join(diff_names)), errors.ECODE_NOENT)
9261

    
9262
  def Exec(self, feedback_fn):
9263
    """Remove the tag from the object.
9264

9265
    """
9266
    for tag in self.op.tags:
9267
      self.target.RemoveTag(tag)
9268
    self.cfg.Update(self.target, feedback_fn)
9269

    
9270

    
9271
class LUTestDelay(NoHooksLU):
9272
  """Sleep for a specified amount of time.
9273

9274
  This LU sleeps on the master and/or nodes for a specified amount of
9275
  time.
9276

9277
  """
9278
  _OP_REQP = ["duration", "on_master", "on_nodes"]
9279
  REQ_BGL = False
9280

    
9281
  def ExpandNames(self):
9282
    """Expand names and set required locks.
9283

9284
    This expands the node list, if any.
9285

9286
    """
9287
    self.needed_locks = {}
9288
    if self.op.on_nodes:
9289
      # _GetWantedNodes can be used here, but is not always appropriate to use
9290
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9291
      # more information.
9292
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9293
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9294

    
9295
  def CheckPrereq(self):
9296
    """Check prerequisites.
9297

9298
    """
9299

    
9300
  def Exec(self, feedback_fn):
9301
    """Do the actual sleep.
9302

9303
    """
9304
    if self.op.on_master:
9305
      if not utils.TestDelay(self.op.duration):
9306
        raise errors.OpExecError("Error during master delay test")
9307
    if self.op.on_nodes:
9308
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9309
      for node, node_result in result.items():
9310
        node_result.Raise("Failure during rpc call to node %s" % node)
9311

    
9312

    
9313
class IAllocator(object):
9314
  """IAllocator framework.
9315

9316
  An IAllocator instance has three sets of attributes:
9317
    - cfg that is needed to query the cluster
9318
    - input data (all members of the _KEYS class attribute are required)
9319
    - four buffer attributes (in|out_data|text), that represent the
9320
      input (to the external script) in text and data structure format,
9321
      and the output from it, again in two formats
9322
    - the result variables from the script (success, info, nodes) for
9323
      easy usage
9324

9325
  """
9326
  # pylint: disable-msg=R0902
9327
  # lots of instance attributes
9328
  _ALLO_KEYS = [
9329
    "name", "mem_size", "disks", "disk_template",
9330
    "os", "tags", "nics", "vcpus", "hypervisor",
9331
    ]
9332
  _RELO_KEYS = [
9333
    "name", "relocate_from",
9334
    ]
9335
  _EVAC_KEYS = [
9336
    "evac_nodes",
9337
    ]
9338

    
9339
  def __init__(self, cfg, rpc, mode, **kwargs):
9340
    self.cfg = cfg
9341
    self.rpc = rpc
9342
    # init buffer variables
9343
    self.in_text = self.out_text = self.in_data = self.out_data = None
9344
    # init all input fields so that pylint is happy
9345
    self.mode = mode
9346
    self.mem_size = self.disks = self.disk_template = None
9347
    self.os = self.tags = self.nics = self.vcpus = None
9348
    self.hypervisor = None
9349
    self.relocate_from = None
9350
    self.name = None
9351
    self.evac_nodes = None
9352
    # computed fields
9353
    self.required_nodes = None
9354
    # init result fields
9355
    self.success = self.info = self.result = None
9356
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9357
      keyset = self._ALLO_KEYS
9358
      fn = self._AddNewInstance
9359
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9360
      keyset = self._RELO_KEYS
9361
      fn = self._AddRelocateInstance
9362
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9363
      keyset = self._EVAC_KEYS
9364
      fn = self._AddEvacuateNodes
9365
    else:
9366
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9367
                                   " IAllocator" % self.mode)
9368
    for key in kwargs:
9369
      if key not in keyset:
9370
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
9371
                                     " IAllocator" % key)
9372
      setattr(self, key, kwargs[key])
9373

    
9374
    for key in keyset:
9375
      if key not in kwargs:
9376
        raise errors.ProgrammerError("Missing input parameter '%s' to"
9377
                                     " IAllocator" % key)
9378
    self._BuildInputData(fn)
9379

    
9380
  def _ComputeClusterData(self):
9381
    """Compute the generic allocator input data.
9382

9383
    This is the data that is independent of the actual operation.
9384

9385
    """
9386
    cfg = self.cfg
9387
    cluster_info = cfg.GetClusterInfo()
9388
    # cluster data
9389
    data = {
9390
      "version": constants.IALLOCATOR_VERSION,
9391
      "cluster_name": cfg.GetClusterName(),
9392
      "cluster_tags": list(cluster_info.GetTags()),
9393
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9394
      # we don't have job IDs
9395
      }
9396
    iinfo = cfg.GetAllInstancesInfo().values()
9397
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9398

    
9399
    # node data
9400
    node_results = {}
9401
    node_list = cfg.GetNodeList()
9402

    
9403
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9404
      hypervisor_name = self.hypervisor
9405
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9406
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9407
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9408
      hypervisor_name = cluster_info.enabled_hypervisors[0]
9409

    
9410
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9411
                                        hypervisor_name)
9412
    node_iinfo = \
9413
      self.rpc.call_all_instances_info(node_list,
9414
                                       cluster_info.enabled_hypervisors)
9415
    for nname, nresult in node_data.items():
9416
      # first fill in static (config-based) values
9417
      ninfo = cfg.GetNodeInfo(nname)
9418
      pnr = {
9419
        "tags": list(ninfo.GetTags()),
9420
        "primary_ip": ninfo.primary_ip,
9421
        "secondary_ip": ninfo.secondary_ip,
9422
        "offline": ninfo.offline,
9423
        "drained": ninfo.drained,
9424
        "master_candidate": ninfo.master_candidate,
9425
        }
9426

    
9427
      if not (ninfo.offline or ninfo.drained):
9428
        nresult.Raise("Can't get data for node %s" % nname)
9429
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9430
                                nname)
9431
        remote_info = nresult.payload
9432

    
9433
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
9434
                     'vg_size', 'vg_free', 'cpu_total']:
9435
          if attr not in remote_info:
9436
            raise errors.OpExecError("Node '%s' didn't return attribute"
9437
                                     " '%s'" % (nname, attr))
9438
          if not isinstance(remote_info[attr], int):
9439
            raise errors.OpExecError("Node '%s' returned invalid value"
9440
                                     " for '%s': %s" %
9441
                                     (nname, attr, remote_info[attr]))
9442
        # compute memory used by primary instances
9443
        i_p_mem = i_p_up_mem = 0
9444
        for iinfo, beinfo in i_list:
9445
          if iinfo.primary_node == nname:
9446
            i_p_mem += beinfo[constants.BE_MEMORY]
9447
            if iinfo.name not in node_iinfo[nname].payload:
9448
              i_used_mem = 0
9449
            else:
9450
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9451
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9452
            remote_info['memory_free'] -= max(0, i_mem_diff)
9453

    
9454
            if iinfo.admin_up:
9455
              i_p_up_mem += beinfo[constants.BE_MEMORY]
9456

    
9457
        # compute memory used by instances
9458
        pnr_dyn = {
9459
          "total_memory": remote_info['memory_total'],
9460
          "reserved_memory": remote_info['memory_dom0'],
9461
          "free_memory": remote_info['memory_free'],
9462
          "total_disk": remote_info['vg_size'],
9463
          "free_disk": remote_info['vg_free'],
9464
          "total_cpus": remote_info['cpu_total'],
9465
          "i_pri_memory": i_p_mem,
9466
          "i_pri_up_memory": i_p_up_mem,
9467
          }
9468
        pnr.update(pnr_dyn)
9469

    
9470
      node_results[nname] = pnr
9471
    data["nodes"] = node_results
9472

    
9473
    # instance data
9474
    instance_data = {}
9475
    for iinfo, beinfo in i_list:
9476
      nic_data = []
9477
      for nic in iinfo.nics:
9478
        filled_params = objects.FillDict(
9479
            cluster_info.nicparams[constants.PP_DEFAULT],
9480
            nic.nicparams)
9481
        nic_dict = {"mac": nic.mac,
9482
                    "ip": nic.ip,
9483
                    "mode": filled_params[constants.NIC_MODE],
9484
                    "link": filled_params[constants.NIC_LINK],
9485
                   }
9486
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9487
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9488
        nic_data.append(nic_dict)
9489
      pir = {
9490
        "tags": list(iinfo.GetTags()),
9491
        "admin_up": iinfo.admin_up,
9492
        "vcpus": beinfo[constants.BE_VCPUS],
9493
        "memory": beinfo[constants.BE_MEMORY],
9494
        "os": iinfo.os,
9495
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9496
        "nics": nic_data,
9497
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9498
        "disk_template": iinfo.disk_template,
9499
        "hypervisor": iinfo.hypervisor,
9500
        }
9501
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9502
                                                 pir["disks"])
9503
      instance_data[iinfo.name] = pir
9504

    
9505
    data["instances"] = instance_data
9506

    
9507
    self.in_data = data
9508

    
9509
  def _AddNewInstance(self):
9510
    """Add new instance data to allocator structure.
9511

9512
    This in combination with _AllocatorGetClusterData will create the
9513
    correct structure needed as input for the allocator.
9514

9515
    The checks for the completeness of the opcode must have already been
9516
    done.
9517

9518
    """
9519
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9520

    
9521
    if self.disk_template in constants.DTS_NET_MIRROR:
9522
      self.required_nodes = 2
9523
    else:
9524
      self.required_nodes = 1
9525
    request = {
9526
      "name": self.name,
9527
      "disk_template": self.disk_template,
9528
      "tags": self.tags,
9529
      "os": self.os,
9530
      "vcpus": self.vcpus,
9531
      "memory": self.mem_size,
9532
      "disks": self.disks,
9533
      "disk_space_total": disk_space,
9534
      "nics": self.nics,
9535
      "required_nodes": self.required_nodes,
9536
      }
9537
    return request
9538

    
9539
  def _AddRelocateInstance(self):
9540
    """Add relocate instance data to allocator structure.
9541

9542
    This in combination with _IAllocatorGetClusterData will create the
9543
    correct structure needed as input for the allocator.
9544

9545
    The checks for the completeness of the opcode must have already been
9546
    done.
9547

9548
    """
9549
    instance = self.cfg.GetInstanceInfo(self.name)
9550
    if instance is None:
9551
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
9552
                                   " IAllocator" % self.name)
9553

    
9554
    if instance.disk_template not in constants.DTS_NET_MIRROR:
9555
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9556
                                 errors.ECODE_INVAL)
9557

    
9558
    if len(instance.secondary_nodes) != 1:
9559
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
9560
                                 errors.ECODE_STATE)
9561

    
9562
    self.required_nodes = 1
9563
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
9564
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9565

    
9566
    request = {
9567
      "name": self.name,
9568
      "disk_space_total": disk_space,
9569
      "required_nodes": self.required_nodes,
9570
      "relocate_from": self.relocate_from,
9571
      }
9572
    return request
9573

    
9574
  def _AddEvacuateNodes(self):
9575
    """Add evacuate nodes data to allocator structure.
9576

9577
    """
9578
    request = {
9579
      "evac_nodes": self.evac_nodes
9580
      }
9581
    return request
9582

    
9583
  def _BuildInputData(self, fn):
9584
    """Build input data structures.
9585

9586
    """
9587
    self._ComputeClusterData()
9588

    
9589
    request = fn()
9590
    request["type"] = self.mode
9591
    self.in_data["request"] = request
9592

    
9593
    self.in_text = serializer.Dump(self.in_data)
9594

    
9595
  def Run(self, name, validate=True, call_fn=None):
9596
    """Run an instance allocator and return the results.
9597

9598
    """
9599
    if call_fn is None:
9600
      call_fn = self.rpc.call_iallocator_runner
9601

    
9602
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9603
    result.Raise("Failure while running the iallocator script")
9604

    
9605
    self.out_text = result.payload
9606
    if validate:
9607
      self._ValidateResult()
9608

    
9609
  def _ValidateResult(self):
9610
    """Process the allocator results.
9611

9612
    This will process and if successful save the result in
9613
    self.out_data and the other parameters.
9614

9615
    """
9616
    try:
9617
      rdict = serializer.Load(self.out_text)
9618
    except Exception, err:
9619
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9620

    
9621
    if not isinstance(rdict, dict):
9622
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
9623

    
9624
    # TODO: remove backwards compatiblity in later versions
9625
    if "nodes" in rdict and "result" not in rdict:
9626
      rdict["result"] = rdict["nodes"]
9627
      del rdict["nodes"]
9628

    
9629
    for key in "success", "info", "result":
9630
      if key not in rdict:
9631
        raise errors.OpExecError("Can't parse iallocator results:"
9632
                                 " missing key '%s'" % key)
9633
      setattr(self, key, rdict[key])
9634

    
9635
    if not isinstance(rdict["result"], list):
9636
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9637
                               " is not a list")
9638
    self.out_data = rdict
9639

    
9640

    
9641
class LUTestAllocator(NoHooksLU):
9642
  """Run allocator tests.
9643

9644
  This LU runs the allocator tests
9645

9646
  """
9647
  _OP_REQP = ["direction", "mode", "name"]
9648

    
9649
  def CheckPrereq(self):
9650
    """Check prerequisites.
9651

9652
    This checks the opcode parameters depending on the director and mode test.
9653

9654
    """
9655
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9656
      for attr in ["name", "mem_size", "disks", "disk_template",
9657
                   "os", "tags", "nics", "vcpus"]:
9658
        if not hasattr(self.op, attr):
9659
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9660
                                     attr, errors.ECODE_INVAL)
9661
      iname = self.cfg.ExpandInstanceName(self.op.name)
9662
      if iname is not None:
9663
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9664
                                   iname, errors.ECODE_EXISTS)
9665
      if not isinstance(self.op.nics, list):
9666
        raise errors.OpPrereqError("Invalid parameter 'nics'",
9667
                                   errors.ECODE_INVAL)
9668
      for row in self.op.nics:
9669
        if (not isinstance(row, dict) or
9670
            "mac" not in row or
9671
            "ip" not in row or
9672
            "bridge" not in row):
9673
          raise errors.OpPrereqError("Invalid contents of the 'nics'"
9674
                                     " parameter", errors.ECODE_INVAL)
9675
      if not isinstance(self.op.disks, list):
9676
        raise errors.OpPrereqError("Invalid parameter 'disks'",
9677
                                   errors.ECODE_INVAL)
9678
      for row in self.op.disks:
9679
        if (not isinstance(row, dict) or
9680
            "size" not in row or
9681
            not isinstance(row["size"], int) or
9682
            "mode" not in row or
9683
            row["mode"] not in ['r', 'w']):
9684
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
9685
                                     " parameter", errors.ECODE_INVAL)
9686
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9687
        self.op.hypervisor = self.cfg.GetHypervisorType()
9688
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9689
      if not hasattr(self.op, "name"):
9690
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9691
                                   errors.ECODE_INVAL)
9692
      fname = _ExpandInstanceName(self.cfg, self.op.name)
9693
      self.op.name = fname
9694
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9695
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9696
      if not hasattr(self.op, "evac_nodes"):
9697
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9698
                                   " opcode input", errors.ECODE_INVAL)
9699
    else:
9700
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9701
                                 self.op.mode, errors.ECODE_INVAL)
9702

    
9703
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9704
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
9705
        raise errors.OpPrereqError("Missing allocator name",
9706
                                   errors.ECODE_INVAL)
9707
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9708
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
9709
                                 self.op.direction, errors.ECODE_INVAL)
9710

    
9711
  def Exec(self, feedback_fn):
9712
    """Run the allocator test.
9713

9714
    """
9715
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9716
      ial = IAllocator(self.cfg, self.rpc,
9717
                       mode=self.op.mode,
9718
                       name=self.op.name,
9719
                       mem_size=self.op.mem_size,
9720
                       disks=self.op.disks,
9721
                       disk_template=self.op.disk_template,
9722
                       os=self.op.os,
9723
                       tags=self.op.tags,
9724
                       nics=self.op.nics,
9725
                       vcpus=self.op.vcpus,
9726
                       hypervisor=self.op.hypervisor,
9727
                       )
9728
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9729
      ial = IAllocator(self.cfg, self.rpc,
9730
                       mode=self.op.mode,
9731
                       name=self.op.name,
9732
                       relocate_from=list(self.relocate_from),
9733
                       )
9734
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9735
      ial = IAllocator(self.cfg, self.rpc,
9736
                       mode=self.op.mode,
9737
                       evac_nodes=self.op.evac_nodes)
9738
    else:
9739
      raise errors.ProgrammerError("Uncatched mode %s in"
9740
                                   " LUTestAllocator.Exec", self.op.mode)
9741

    
9742
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
9743
      result = ial.in_text
9744
    else:
9745
      ial.Run(self.op.allocator, validate=False)
9746
      result = ial.out_text
9747
    return result