Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 7672a621

History | View | Annotate | Download (341.1 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
import os
30
import os.path
31
import time
32
import re
33
import platform
34
import logging
35
import copy
36
import OpenSSL
37

    
38
from ganeti import ssh
39
from ganeti import utils
40
from ganeti import errors
41
from ganeti import hypervisor
42
from ganeti import locking
43
from ganeti import constants
44
from ganeti import objects
45
from ganeti import serializer
46
from ganeti import ssconf
47
from ganeti import uidpool
48
from ganeti import compat
49

    
50

    
51
class LogicalUnit(object):
52
  """Logical Unit base class.
53

54
  Subclasses must follow these rules:
55
    - implement ExpandNames
56
    - implement CheckPrereq (except when tasklets are used)
57
    - implement Exec (except when tasklets are used)
58
    - implement BuildHooksEnv
59
    - redefine HPATH and HTYPE
60
    - optionally redefine their run requirements:
61
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
62

63
  Note that all commands require root permissions.
64

65
  @ivar dry_run_result: the value (if any) that will be returned to the caller
66
      in dry-run mode (signalled by opcode dry_run parameter)
67

68
  """
69
  HPATH = None
70
  HTYPE = None
71
  _OP_REQP = []
72
  REQ_BGL = True
73

    
74
  def __init__(self, processor, op, context, rpc):
75
    """Constructor for LogicalUnit.
76

77
    This needs to be overridden in derived classes in order to check op
78
    validity.
79

80
    """
81
    self.proc = processor
82
    self.op = op
83
    self.cfg = context.cfg
84
    self.context = context
85
    self.rpc = rpc
86
    # Dicts used to declare locking needs to mcpu
87
    self.needed_locks = None
88
    self.acquired_locks = {}
89
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
90
    self.add_locks = {}
91
    self.remove_locks = {}
92
    # Used to force good behavior when calling helper functions
93
    self.recalculate_locks = {}
94
    self.__ssh = None
95
    # logging
96
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
97
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
98
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
99
    # support for dry-run
100
    self.dry_run_result = None
101
    # support for generic debug attribute
102
    if (not hasattr(self.op, "debug_level") or
103
        not isinstance(self.op.debug_level, int)):
104
      self.op.debug_level = 0
105

    
106
    # Tasklets
107
    self.tasklets = None
108

    
109
    for attr_name in self._OP_REQP:
110
      attr_val = getattr(op, attr_name, None)
111
      if attr_val is None:
112
        raise errors.OpPrereqError("Required parameter '%s' missing" %
113
                                   attr_name, errors.ECODE_INVAL)
114

    
115
    self.CheckArguments()
116

    
117
  def __GetSSH(self):
118
    """Returns the SshRunner object
119

120
    """
121
    if not self.__ssh:
122
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
123
    return self.__ssh
124

    
125
  ssh = property(fget=__GetSSH)
126

    
127
  def CheckArguments(self):
128
    """Check syntactic validity for the opcode arguments.
129

130
    This method is for doing a simple syntactic check and ensure
131
    validity of opcode parameters, without any cluster-related
132
    checks. While the same can be accomplished in ExpandNames and/or
133
    CheckPrereq, doing these separate is better because:
134

135
      - ExpandNames is left as as purely a lock-related function
136
      - CheckPrereq is run after we have acquired locks (and possible
137
        waited for them)
138

139
    The function is allowed to change the self.op attribute so that
140
    later methods can no longer worry about missing parameters.
141

142
    """
143
    pass
144

    
145
  def ExpandNames(self):
146
    """Expand names for this LU.
147

148
    This method is called before starting to execute the opcode, and it should
149
    update all the parameters of the opcode to their canonical form (e.g. a
150
    short node name must be fully expanded after this method has successfully
151
    completed). This way locking, hooks, logging, ecc. can work correctly.
152

153
    LUs which implement this method must also populate the self.needed_locks
154
    member, as a dict with lock levels as keys, and a list of needed lock names
155
    as values. Rules:
156

157
      - use an empty dict if you don't need any lock
158
      - if you don't need any lock at a particular level omit that level
159
      - don't put anything for the BGL level
160
      - if you want all locks at a level use locking.ALL_SET as a value
161

162
    If you need to share locks (rather than acquire them exclusively) at one
163
    level you can modify self.share_locks, setting a true value (usually 1) for
164
    that level. By default locks are not shared.
165

166
    This function can also define a list of tasklets, which then will be
167
    executed in order instead of the usual LU-level CheckPrereq and Exec
168
    functions, if those are not defined by the LU.
169

170
    Examples::
171

172
      # Acquire all nodes and one instance
173
      self.needed_locks = {
174
        locking.LEVEL_NODE: locking.ALL_SET,
175
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
176
      }
177
      # Acquire just two nodes
178
      self.needed_locks = {
179
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
180
      }
181
      # Acquire no locks
182
      self.needed_locks = {} # No, you can't leave it to the default value None
183

184
    """
185
    # The implementation of this method is mandatory only if the new LU is
186
    # concurrent, so that old LUs don't need to be changed all at the same
187
    # time.
188
    if self.REQ_BGL:
189
      self.needed_locks = {} # Exclusive LUs don't need locks.
190
    else:
191
      raise NotImplementedError
192

    
193
  def DeclareLocks(self, level):
194
    """Declare LU locking needs for a level
195

196
    While most LUs can just declare their locking needs at ExpandNames time,
197
    sometimes there's the need to calculate some locks after having acquired
198
    the ones before. This function is called just before acquiring locks at a
199
    particular level, but after acquiring the ones at lower levels, and permits
200
    such calculations. It can be used to modify self.needed_locks, and by
201
    default it does nothing.
202

203
    This function is only called if you have something already set in
204
    self.needed_locks for the level.
205

206
    @param level: Locking level which is going to be locked
207
    @type level: member of ganeti.locking.LEVELS
208

209
    """
210

    
211
  def CheckPrereq(self):
212
    """Check prerequisites for this LU.
213

214
    This method should check that the prerequisites for the execution
215
    of this LU are fulfilled. It can do internode communication, but
216
    it should be idempotent - no cluster or system changes are
217
    allowed.
218

219
    The method should raise errors.OpPrereqError in case something is
220
    not fulfilled. Its return value is ignored.
221

222
    This method should also update all the parameters of the opcode to
223
    their canonical form if it hasn't been done by ExpandNames before.
224

225
    """
226
    if self.tasklets is not None:
227
      for (idx, tl) in enumerate(self.tasklets):
228
        logging.debug("Checking prerequisites for tasklet %s/%s",
229
                      idx + 1, len(self.tasklets))
230
        tl.CheckPrereq()
231
    else:
232
      raise NotImplementedError
233

    
234
  def Exec(self, feedback_fn):
235
    """Execute the LU.
236

237
    This method should implement the actual work. It should raise
238
    errors.OpExecError for failures that are somewhat dealt with in
239
    code, or expected.
240

241
    """
242
    if self.tasklets is not None:
243
      for (idx, tl) in enumerate(self.tasklets):
244
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
245
        tl.Exec(feedback_fn)
246
    else:
247
      raise NotImplementedError
248

    
249
  def BuildHooksEnv(self):
250
    """Build hooks environment for this LU.
251

252
    This method should return a three-node tuple consisting of: a dict
253
    containing the environment that will be used for running the
254
    specific hook for this LU, a list of node names on which the hook
255
    should run before the execution, and a list of node names on which
256
    the hook should run after the execution.
257

258
    The keys of the dict must not have 'GANETI_' prefixed as this will
259
    be handled in the hooks runner. Also note additional keys will be
260
    added by the hooks runner. If the LU doesn't define any
261
    environment, an empty dict (and not None) should be returned.
262

263
    No nodes should be returned as an empty list (and not None).
264

265
    Note that if the HPATH for a LU class is None, this function will
266
    not be called.
267

268
    """
269
    raise NotImplementedError
270

    
271
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
272
    """Notify the LU about the results of its hooks.
273

274
    This method is called every time a hooks phase is executed, and notifies
275
    the Logical Unit about the hooks' result. The LU can then use it to alter
276
    its result based on the hooks.  By default the method does nothing and the
277
    previous result is passed back unchanged but any LU can define it if it
278
    wants to use the local cluster hook-scripts somehow.
279

280
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
281
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
282
    @param hook_results: the results of the multi-node hooks rpc call
283
    @param feedback_fn: function used send feedback back to the caller
284
    @param lu_result: the previous Exec result this LU had, or None
285
        in the PRE phase
286
    @return: the new Exec result, based on the previous result
287
        and hook results
288

289
    """
290
    # API must be kept, thus we ignore the unused argument and could
291
    # be a function warnings
292
    # pylint: disable-msg=W0613,R0201
293
    return lu_result
294

    
295
  def _ExpandAndLockInstance(self):
296
    """Helper function to expand and lock an instance.
297

298
    Many LUs that work on an instance take its name in self.op.instance_name
299
    and need to expand it and then declare the expanded name for locking. This
300
    function does it, and then updates self.op.instance_name to the expanded
301
    name. It also initializes needed_locks as a dict, if this hasn't been done
302
    before.
303

304
    """
305
    if self.needed_locks is None:
306
      self.needed_locks = {}
307
    else:
308
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
309
        "_ExpandAndLockInstance called with instance-level locks set"
310
    self.op.instance_name = _ExpandInstanceName(self.cfg,
311
                                                self.op.instance_name)
312
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
313

    
314
  def _LockInstancesNodes(self, primary_only=False):
315
    """Helper function to declare instances' nodes for locking.
316

317
    This function should be called after locking one or more instances to lock
318
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
319
    with all primary or secondary nodes for instances already locked and
320
    present in self.needed_locks[locking.LEVEL_INSTANCE].
321

322
    It should be called from DeclareLocks, and for safety only works if
323
    self.recalculate_locks[locking.LEVEL_NODE] is set.
324

325
    In the future it may grow parameters to just lock some instance's nodes, or
326
    to just lock primaries or secondary nodes, if needed.
327

328
    If should be called in DeclareLocks in a way similar to::
329

330
      if level == locking.LEVEL_NODE:
331
        self._LockInstancesNodes()
332

333
    @type primary_only: boolean
334
    @param primary_only: only lock primary nodes of locked instances
335

336
    """
337
    assert locking.LEVEL_NODE in self.recalculate_locks, \
338
      "_LockInstancesNodes helper function called with no nodes to recalculate"
339

    
340
    # TODO: check if we're really been called with the instance locks held
341

    
342
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
343
    # future we might want to have different behaviors depending on the value
344
    # of self.recalculate_locks[locking.LEVEL_NODE]
345
    wanted_nodes = []
346
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
347
      instance = self.context.cfg.GetInstanceInfo(instance_name)
348
      wanted_nodes.append(instance.primary_node)
349
      if not primary_only:
350
        wanted_nodes.extend(instance.secondary_nodes)
351

    
352
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
353
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
354
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
355
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
356

    
357
    del self.recalculate_locks[locking.LEVEL_NODE]
358

    
359

    
360
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
361
  """Simple LU which runs no hooks.
362

363
  This LU is intended as a parent for other LogicalUnits which will
364
  run no hooks, in order to reduce duplicate code.
365

366
  """
367
  HPATH = None
368
  HTYPE = None
369

    
370
  def BuildHooksEnv(self):
371
    """Empty BuildHooksEnv for NoHooksLu.
372

373
    This just raises an error.
374

375
    """
376
    assert False, "BuildHooksEnv called for NoHooksLUs"
377

    
378

    
379
class Tasklet:
380
  """Tasklet base class.
381

382
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
383
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
384
  tasklets know nothing about locks.
385

386
  Subclasses must follow these rules:
387
    - Implement CheckPrereq
388
    - Implement Exec
389

390
  """
391
  def __init__(self, lu):
392
    self.lu = lu
393

    
394
    # Shortcuts
395
    self.cfg = lu.cfg
396
    self.rpc = lu.rpc
397

    
398
  def CheckPrereq(self):
399
    """Check prerequisites for this tasklets.
400

401
    This method should check whether the prerequisites for the execution of
402
    this tasklet are fulfilled. It can do internode communication, but it
403
    should be idempotent - no cluster or system changes are allowed.
404

405
    The method should raise errors.OpPrereqError in case something is not
406
    fulfilled. Its return value is ignored.
407

408
    This method should also update all parameters to their canonical form if it
409
    hasn't been done before.
410

411
    """
412
    raise NotImplementedError
413

    
414
  def Exec(self, feedback_fn):
415
    """Execute the tasklet.
416

417
    This method should implement the actual work. It should raise
418
    errors.OpExecError for failures that are somewhat dealt with in code, or
419
    expected.
420

421
    """
422
    raise NotImplementedError
423

    
424

    
425
def _GetWantedNodes(lu, nodes):
426
  """Returns list of checked and expanded node names.
427

428
  @type lu: L{LogicalUnit}
429
  @param lu: the logical unit on whose behalf we execute
430
  @type nodes: list
431
  @param nodes: list of node names or None for all nodes
432
  @rtype: list
433
  @return: the list of nodes, sorted
434
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
435

436
  """
437
  if not isinstance(nodes, list):
438
    raise errors.OpPrereqError("Invalid argument type 'nodes'",
439
                               errors.ECODE_INVAL)
440

    
441
  if not nodes:
442
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
443
      " non-empty list of nodes whose name is to be expanded.")
444

    
445
  wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
446
  return utils.NiceSort(wanted)
447

    
448

    
449
def _GetWantedInstances(lu, instances):
450
  """Returns list of checked and expanded instance names.
451

452
  @type lu: L{LogicalUnit}
453
  @param lu: the logical unit on whose behalf we execute
454
  @type instances: list
455
  @param instances: list of instance names or None for all instances
456
  @rtype: list
457
  @return: the list of instances, sorted
458
  @raise errors.OpPrereqError: if the instances parameter is wrong type
459
  @raise errors.OpPrereqError: if any of the passed instances is not found
460

461
  """
462
  if not isinstance(instances, list):
463
    raise errors.OpPrereqError("Invalid argument type 'instances'",
464
                               errors.ECODE_INVAL)
465

    
466
  if instances:
467
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
468
  else:
469
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
470
  return wanted
471

    
472

    
473
def _CheckOutputFields(static, dynamic, selected):
474
  """Checks whether all selected fields are valid.
475

476
  @type static: L{utils.FieldSet}
477
  @param static: static fields set
478
  @type dynamic: L{utils.FieldSet}
479
  @param dynamic: dynamic fields set
480

481
  """
482
  f = utils.FieldSet()
483
  f.Extend(static)
484
  f.Extend(dynamic)
485

    
486
  delta = f.NonMatching(selected)
487
  if delta:
488
    raise errors.OpPrereqError("Unknown output fields selected: %s"
489
                               % ",".join(delta), errors.ECODE_INVAL)
490

    
491

    
492
def _CheckBooleanOpField(op, name):
493
  """Validates boolean opcode parameters.
494

495
  This will ensure that an opcode parameter is either a boolean value,
496
  or None (but that it always exists).
497

498
  """
499
  val = getattr(op, name, None)
500
  if not (val is None or isinstance(val, bool)):
501
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
502
                               (name, str(val)), errors.ECODE_INVAL)
503
  setattr(op, name, val)
504

    
505

    
506
def _CheckGlobalHvParams(params):
507
  """Validates that given hypervisor params are not global ones.
508

509
  This will ensure that instances don't get customised versions of
510
  global params.
511

512
  """
513
  used_globals = constants.HVC_GLOBALS.intersection(params)
514
  if used_globals:
515
    msg = ("The following hypervisor parameters are global and cannot"
516
           " be customized at instance level, please modify them at"
517
           " cluster level: %s" % utils.CommaJoin(used_globals))
518
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
519

    
520

    
521
def _CheckNodeOnline(lu, node):
522
  """Ensure that a given node is online.
523

524
  @param lu: the LU on behalf of which we make the check
525
  @param node: the node to check
526
  @raise errors.OpPrereqError: if the node is offline
527

528
  """
529
  if lu.cfg.GetNodeInfo(node).offline:
530
    raise errors.OpPrereqError("Can't use offline node %s" % node,
531
                               errors.ECODE_INVAL)
532

    
533

    
534
def _CheckNodeNotDrained(lu, node):
535
  """Ensure that a given node is not drained.
536

537
  @param lu: the LU on behalf of which we make the check
538
  @param node: the node to check
539
  @raise errors.OpPrereqError: if the node is drained
540

541
  """
542
  if lu.cfg.GetNodeInfo(node).drained:
543
    raise errors.OpPrereqError("Can't use drained node %s" % node,
544
                               errors.ECODE_INVAL)
545

    
546

    
547
def _CheckNodeHasOS(lu, node, os_name, force_variant):
548
  """Ensure that a node supports a given OS.
549

550
  @param lu: the LU on behalf of which we make the check
551
  @param node: the node to check
552
  @param os_name: the OS to query about
553
  @param force_variant: whether to ignore variant errors
554
  @raise errors.OpPrereqError: if the node is not supporting the OS
555

556
  """
557
  result = lu.rpc.call_os_get(node, os_name)
558
  result.Raise("OS '%s' not in supported OS list for node %s" %
559
               (os_name, node),
560
               prereq=True, ecode=errors.ECODE_INVAL)
561
  if not force_variant:
562
    _CheckOSVariant(result.payload, os_name)
563

    
564

    
565
def _RequireFileStorage():
566
  """Checks that file storage is enabled.
567

568
  @raise errors.OpPrereqError: when file storage is disabled
569

570
  """
571
  if not constants.ENABLE_FILE_STORAGE:
572
    raise errors.OpPrereqError("File storage disabled at configure time",
573
                               errors.ECODE_INVAL)
574

    
575

    
576
def _CheckDiskTemplate(template):
577
  """Ensure a given disk template is valid.
578

579
  """
580
  if template not in constants.DISK_TEMPLATES:
581
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
582
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
583
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
584
  if template == constants.DT_FILE:
585
    _RequireFileStorage()
586

    
587

    
588
def _CheckStorageType(storage_type):
589
  """Ensure a given storage type is valid.
590

591
  """
592
  if storage_type not in constants.VALID_STORAGE_TYPES:
593
    raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
594
                               errors.ECODE_INVAL)
595
  if storage_type == constants.ST_FILE:
596
    _RequireFileStorage()
597

    
598

    
599

    
600
def _CheckInstanceDown(lu, instance, reason):
601
  """Ensure that an instance is not running."""
602
  if instance.admin_up:
603
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
604
                               (instance.name, reason), errors.ECODE_STATE)
605

    
606
  pnode = instance.primary_node
607
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
608
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
609
              prereq=True, ecode=errors.ECODE_ENVIRON)
610

    
611
  if instance.name in ins_l.payload:
612
    raise errors.OpPrereqError("Instance %s is running, %s" %
613
                               (instance.name, reason), errors.ECODE_STATE)
614

    
615

    
616
def _ExpandItemName(fn, name, kind):
617
  """Expand an item name.
618

619
  @param fn: the function to use for expansion
620
  @param name: requested item name
621
  @param kind: text description ('Node' or 'Instance')
622
  @return: the resolved (full) name
623
  @raise errors.OpPrereqError: if the item is not found
624

625
  """
626
  full_name = fn(name)
627
  if full_name is None:
628
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
629
                               errors.ECODE_NOENT)
630
  return full_name
631

    
632

    
633
def _ExpandNodeName(cfg, name):
634
  """Wrapper over L{_ExpandItemName} for nodes."""
635
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
636

    
637

    
638
def _ExpandInstanceName(cfg, name):
639
  """Wrapper over L{_ExpandItemName} for instance."""
640
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
641

    
642

    
643
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
644
                          memory, vcpus, nics, disk_template, disks,
645
                          bep, hvp, hypervisor_name):
646
  """Builds instance related env variables for hooks
647

648
  This builds the hook environment from individual variables.
649

650
  @type name: string
651
  @param name: the name of the instance
652
  @type primary_node: string
653
  @param primary_node: the name of the instance's primary node
654
  @type secondary_nodes: list
655
  @param secondary_nodes: list of secondary nodes as strings
656
  @type os_type: string
657
  @param os_type: the name of the instance's OS
658
  @type status: boolean
659
  @param status: the should_run status of the instance
660
  @type memory: string
661
  @param memory: the memory size of the instance
662
  @type vcpus: string
663
  @param vcpus: the count of VCPUs the instance has
664
  @type nics: list
665
  @param nics: list of tuples (ip, mac, mode, link) representing
666
      the NICs the instance has
667
  @type disk_template: string
668
  @param disk_template: the disk template of the instance
669
  @type disks: list
670
  @param disks: the list of (size, mode) pairs
671
  @type bep: dict
672
  @param bep: the backend parameters for the instance
673
  @type hvp: dict
674
  @param hvp: the hypervisor parameters for the instance
675
  @type hypervisor_name: string
676
  @param hypervisor_name: the hypervisor for the instance
677
  @rtype: dict
678
  @return: the hook environment for this instance
679

680
  """
681
  if status:
682
    str_status = "up"
683
  else:
684
    str_status = "down"
685
  env = {
686
    "OP_TARGET": name,
687
    "INSTANCE_NAME": name,
688
    "INSTANCE_PRIMARY": primary_node,
689
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
690
    "INSTANCE_OS_TYPE": os_type,
691
    "INSTANCE_STATUS": str_status,
692
    "INSTANCE_MEMORY": memory,
693
    "INSTANCE_VCPUS": vcpus,
694
    "INSTANCE_DISK_TEMPLATE": disk_template,
695
    "INSTANCE_HYPERVISOR": hypervisor_name,
696
  }
697

    
698
  if nics:
699
    nic_count = len(nics)
700
    for idx, (ip, mac, mode, link) in enumerate(nics):
701
      if ip is None:
702
        ip = ""
703
      env["INSTANCE_NIC%d_IP" % idx] = ip
704
      env["INSTANCE_NIC%d_MAC" % idx] = mac
705
      env["INSTANCE_NIC%d_MODE" % idx] = mode
706
      env["INSTANCE_NIC%d_LINK" % idx] = link
707
      if mode == constants.NIC_MODE_BRIDGED:
708
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
709
  else:
710
    nic_count = 0
711

    
712
  env["INSTANCE_NIC_COUNT"] = nic_count
713

    
714
  if disks:
715
    disk_count = len(disks)
716
    for idx, (size, mode) in enumerate(disks):
717
      env["INSTANCE_DISK%d_SIZE" % idx] = size
718
      env["INSTANCE_DISK%d_MODE" % idx] = mode
719
  else:
720
    disk_count = 0
721

    
722
  env["INSTANCE_DISK_COUNT"] = disk_count
723

    
724
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
725
    for key, value in source.items():
726
      env["INSTANCE_%s_%s" % (kind, key)] = value
727

    
728
  return env
729

    
730

    
731
def _NICListToTuple(lu, nics):
732
  """Build a list of nic information tuples.
733

734
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
735
  value in LUQueryInstanceData.
736

737
  @type lu:  L{LogicalUnit}
738
  @param lu: the logical unit on whose behalf we execute
739
  @type nics: list of L{objects.NIC}
740
  @param nics: list of nics to convert to hooks tuples
741

742
  """
743
  hooks_nics = []
744
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
745
  for nic in nics:
746
    ip = nic.ip
747
    mac = nic.mac
748
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
749
    mode = filled_params[constants.NIC_MODE]
750
    link = filled_params[constants.NIC_LINK]
751
    hooks_nics.append((ip, mac, mode, link))
752
  return hooks_nics
753

    
754

    
755
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
756
  """Builds instance related env variables for hooks from an object.
757

758
  @type lu: L{LogicalUnit}
759
  @param lu: the logical unit on whose behalf we execute
760
  @type instance: L{objects.Instance}
761
  @param instance: the instance for which we should build the
762
      environment
763
  @type override: dict
764
  @param override: dictionary with key/values that will override
765
      our values
766
  @rtype: dict
767
  @return: the hook environment dictionary
768

769
  """
770
  cluster = lu.cfg.GetClusterInfo()
771
  bep = cluster.FillBE(instance)
772
  hvp = cluster.FillHV(instance)
773
  args = {
774
    'name': instance.name,
775
    'primary_node': instance.primary_node,
776
    'secondary_nodes': instance.secondary_nodes,
777
    'os_type': instance.os,
778
    'status': instance.admin_up,
779
    'memory': bep[constants.BE_MEMORY],
780
    'vcpus': bep[constants.BE_VCPUS],
781
    'nics': _NICListToTuple(lu, instance.nics),
782
    'disk_template': instance.disk_template,
783
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
784
    'bep': bep,
785
    'hvp': hvp,
786
    'hypervisor_name': instance.hypervisor,
787
  }
788
  if override:
789
    args.update(override)
790
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
791

    
792

    
793
def _AdjustCandidatePool(lu, exceptions):
794
  """Adjust the candidate pool after node operations.
795

796
  """
797
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
798
  if mod_list:
799
    lu.LogInfo("Promoted nodes to master candidate role: %s",
800
               utils.CommaJoin(node.name for node in mod_list))
801
    for name in mod_list:
802
      lu.context.ReaddNode(name)
803
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
804
  if mc_now > mc_max:
805
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
806
               (mc_now, mc_max))
807

    
808

    
809
def _DecideSelfPromotion(lu, exceptions=None):
810
  """Decide whether I should promote myself as a master candidate.
811

812
  """
813
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
814
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
815
  # the new node will increase mc_max with one, so:
816
  mc_should = min(mc_should + 1, cp_size)
817
  return mc_now < mc_should
818

    
819

    
820
def _CheckNicsBridgesExist(lu, target_nics, target_node,
821
                               profile=constants.PP_DEFAULT):
822
  """Check that the brigdes needed by a list of nics exist.
823

824
  """
825
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
826
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
827
                for nic in target_nics]
828
  brlist = [params[constants.NIC_LINK] for params in paramslist
829
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
830
  if brlist:
831
    result = lu.rpc.call_bridges_exist(target_node, brlist)
832
    result.Raise("Error checking bridges on destination node '%s'" %
833
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
834

    
835

    
836
def _CheckInstanceBridgesExist(lu, instance, node=None):
837
  """Check that the brigdes needed by an instance exist.
838

839
  """
840
  if node is None:
841
    node = instance.primary_node
842
  _CheckNicsBridgesExist(lu, instance.nics, node)
843

    
844

    
845
def _CheckOSVariant(os_obj, name):
846
  """Check whether an OS name conforms to the os variants specification.
847

848
  @type os_obj: L{objects.OS}
849
  @param os_obj: OS object to check
850
  @type name: string
851
  @param name: OS name passed by the user, to check for validity
852

853
  """
854
  if not os_obj.supported_variants:
855
    return
856
  try:
857
    variant = name.split("+", 1)[1]
858
  except IndexError:
859
    raise errors.OpPrereqError("OS name must include a variant",
860
                               errors.ECODE_INVAL)
861

    
862
  if variant not in os_obj.supported_variants:
863
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
864

    
865

    
866
def _GetNodeInstancesInner(cfg, fn):
867
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
868

    
869

    
870
def _GetNodeInstances(cfg, node_name):
871
  """Returns a list of all primary and secondary instances on a node.
872

873
  """
874

    
875
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
876

    
877

    
878
def _GetNodePrimaryInstances(cfg, node_name):
879
  """Returns primary instances on a node.
880

881
  """
882
  return _GetNodeInstancesInner(cfg,
883
                                lambda inst: node_name == inst.primary_node)
884

    
885

    
886
def _GetNodeSecondaryInstances(cfg, node_name):
887
  """Returns secondary instances on a node.
888

889
  """
890
  return _GetNodeInstancesInner(cfg,
891
                                lambda inst: node_name in inst.secondary_nodes)
892

    
893

    
894
def _GetStorageTypeArgs(cfg, storage_type):
895
  """Returns the arguments for a storage type.
896

897
  """
898
  # Special case for file storage
899
  if storage_type == constants.ST_FILE:
900
    # storage.FileStorage wants a list of storage directories
901
    return [[cfg.GetFileStorageDir()]]
902

    
903
  return []
904

    
905

    
906
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
907
  faulty = []
908

    
909
  for dev in instance.disks:
910
    cfg.SetDiskID(dev, node_name)
911

    
912
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
913
  result.Raise("Failed to get disk status from node %s" % node_name,
914
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
915

    
916
  for idx, bdev_status in enumerate(result.payload):
917
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
918
      faulty.append(idx)
919

    
920
  return faulty
921

    
922

    
923
def _FormatTimestamp(secs):
924
  """Formats a Unix timestamp with the local timezone.
925

926
  """
927
  return time.strftime("%F %T %Z", time.gmtime(secs))
928

    
929

    
930
class LUPostInitCluster(LogicalUnit):
931
  """Logical unit for running hooks after cluster initialization.
932

933
  """
934
  HPATH = "cluster-init"
935
  HTYPE = constants.HTYPE_CLUSTER
936
  _OP_REQP = []
937

    
938
  def BuildHooksEnv(self):
939
    """Build hooks env.
940

941
    """
942
    env = {"OP_TARGET": self.cfg.GetClusterName()}
943
    mn = self.cfg.GetMasterNode()
944
    return env, [], [mn]
945

    
946
  def CheckPrereq(self):
947
    """No prerequisites to check.
948

949
    """
950
    return True
951

    
952
  def Exec(self, feedback_fn):
953
    """Nothing to do.
954

955
    """
956
    return True
957

    
958

    
959
class LUDestroyCluster(LogicalUnit):
960
  """Logical unit for destroying the cluster.
961

962
  """
963
  HPATH = "cluster-destroy"
964
  HTYPE = constants.HTYPE_CLUSTER
965
  _OP_REQP = []
966

    
967
  def BuildHooksEnv(self):
968
    """Build hooks env.
969

970
    """
971
    env = {"OP_TARGET": self.cfg.GetClusterName()}
972
    return env, [], []
973

    
974
  def CheckPrereq(self):
975
    """Check prerequisites.
976

977
    This checks whether the cluster is empty.
978

979
    Any errors are signaled by raising errors.OpPrereqError.
980

981
    """
982
    master = self.cfg.GetMasterNode()
983

    
984
    nodelist = self.cfg.GetNodeList()
985
    if len(nodelist) != 1 or nodelist[0] != master:
986
      raise errors.OpPrereqError("There are still %d node(s) in"
987
                                 " this cluster." % (len(nodelist) - 1),
988
                                 errors.ECODE_INVAL)
989
    instancelist = self.cfg.GetInstanceList()
990
    if instancelist:
991
      raise errors.OpPrereqError("There are still %d instance(s) in"
992
                                 " this cluster." % len(instancelist),
993
                                 errors.ECODE_INVAL)
994

    
995
  def Exec(self, feedback_fn):
996
    """Destroys the cluster.
997

998
    """
999
    master = self.cfg.GetMasterNode()
1000
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1001

    
1002
    # Run post hooks on master node before it's removed
1003
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1004
    try:
1005
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1006
    except:
1007
      # pylint: disable-msg=W0702
1008
      self.LogWarning("Errors occurred running hooks on %s" % master)
1009

    
1010
    result = self.rpc.call_node_stop_master(master, False)
1011
    result.Raise("Could not disable the master role")
1012

    
1013
    if modify_ssh_setup:
1014
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1015
      utils.CreateBackup(priv_key)
1016
      utils.CreateBackup(pub_key)
1017

    
1018
    return master
1019

    
1020

    
1021
def _VerifyCertificateInner(filename, expired, not_before, not_after, now,
1022
                            warn_days=constants.SSL_CERT_EXPIRATION_WARN,
1023
                            error_days=constants.SSL_CERT_EXPIRATION_ERROR):
1024
  """Verifies certificate details for LUVerifyCluster.
1025

1026
  """
1027
  if expired:
1028
    msg = "Certificate %s is expired" % filename
1029

    
1030
    if not_before is not None and not_after is not None:
1031
      msg += (" (valid from %s to %s)" %
1032
              (_FormatTimestamp(not_before),
1033
               _FormatTimestamp(not_after)))
1034
    elif not_before is not None:
1035
      msg += " (valid from %s)" % _FormatTimestamp(not_before)
1036
    elif not_after is not None:
1037
      msg += " (valid until %s)" % _FormatTimestamp(not_after)
1038

    
1039
    return (LUVerifyCluster.ETYPE_ERROR, msg)
1040

    
1041
  elif not_before is not None and not_before > now:
1042
    return (LUVerifyCluster.ETYPE_WARNING,
1043
            "Certificate %s not yet valid (valid from %s)" %
1044
            (filename, _FormatTimestamp(not_before)))
1045

    
1046
  elif not_after is not None:
1047
    remaining_days = int((not_after - now) / (24 * 3600))
1048

    
1049
    msg = ("Certificate %s expires in %d days" % (filename, remaining_days))
1050

    
1051
    if remaining_days <= error_days:
1052
      return (LUVerifyCluster.ETYPE_ERROR, msg)
1053

    
1054
    if remaining_days <= warn_days:
1055
      return (LUVerifyCluster.ETYPE_WARNING, msg)
1056

    
1057
  return (None, None)
1058

    
1059

    
1060
def _VerifyCertificate(filename):
1061
  """Verifies a certificate for LUVerifyCluster.
1062

1063
  @type filename: string
1064
  @param filename: Path to PEM file
1065

1066
  """
1067
  try:
1068
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1069
                                           utils.ReadFile(filename))
1070
  except Exception, err: # pylint: disable-msg=W0703
1071
    return (LUVerifyCluster.ETYPE_ERROR,
1072
            "Failed to load X509 certificate %s: %s" % (filename, err))
1073

    
1074
  # Depending on the pyOpenSSL version, this can just return (None, None)
1075
  (not_before, not_after) = utils.GetX509CertValidity(cert)
1076

    
1077
  return _VerifyCertificateInner(filename, cert.has_expired(),
1078
                                 not_before, not_after, time.time())
1079

    
1080

    
1081
class LUVerifyCluster(LogicalUnit):
1082
  """Verifies the cluster status.
1083

1084
  """
1085
  HPATH = "cluster-verify"
1086
  HTYPE = constants.HTYPE_CLUSTER
1087
  _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1088
  REQ_BGL = False
1089

    
1090
  TCLUSTER = "cluster"
1091
  TNODE = "node"
1092
  TINSTANCE = "instance"
1093

    
1094
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1095
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1096
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1097
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1098
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1099
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1100
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1101
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1102
  ENODEDRBD = (TNODE, "ENODEDRBD")
1103
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1104
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1105
  ENODEHV = (TNODE, "ENODEHV")
1106
  ENODELVM = (TNODE, "ENODELVM")
1107
  ENODEN1 = (TNODE, "ENODEN1")
1108
  ENODENET = (TNODE, "ENODENET")
1109
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1110
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1111
  ENODERPC = (TNODE, "ENODERPC")
1112
  ENODESSH = (TNODE, "ENODESSH")
1113
  ENODEVERSION = (TNODE, "ENODEVERSION")
1114
  ENODESETUP = (TNODE, "ENODESETUP")
1115
  ENODETIME = (TNODE, "ENODETIME")
1116

    
1117
  ETYPE_FIELD = "code"
1118
  ETYPE_ERROR = "ERROR"
1119
  ETYPE_WARNING = "WARNING"
1120

    
1121
  class NodeImage(object):
1122
    """A class representing the logical and physical status of a node.
1123

1124
    @ivar volumes: a structure as returned from
1125
        L{ganeti.backend.GetVolumeList} (runtime)
1126
    @ivar instances: a list of running instances (runtime)
1127
    @ivar pinst: list of configured primary instances (config)
1128
    @ivar sinst: list of configured secondary instances (config)
1129
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1130
        of this node (config)
1131
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1132
    @ivar dfree: free disk, as reported by the node (runtime)
1133
    @ivar offline: the offline status (config)
1134
    @type rpc_fail: boolean
1135
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1136
        not whether the individual keys were correct) (runtime)
1137
    @type lvm_fail: boolean
1138
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1139
    @type hyp_fail: boolean
1140
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1141
    @type ghost: boolean
1142
    @ivar ghost: whether this is a known node or not (config)
1143

1144
    """
1145
    def __init__(self, offline=False):
1146
      self.volumes = {}
1147
      self.instances = []
1148
      self.pinst = []
1149
      self.sinst = []
1150
      self.sbp = {}
1151
      self.mfree = 0
1152
      self.dfree = 0
1153
      self.offline = offline
1154
      self.rpc_fail = False
1155
      self.lvm_fail = False
1156
      self.hyp_fail = False
1157
      self.ghost = False
1158

    
1159
  def ExpandNames(self):
1160
    self.needed_locks = {
1161
      locking.LEVEL_NODE: locking.ALL_SET,
1162
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1163
    }
1164
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1165

    
1166
  def _Error(self, ecode, item, msg, *args, **kwargs):
1167
    """Format an error message.
1168

1169
    Based on the opcode's error_codes parameter, either format a
1170
    parseable error code, or a simpler error string.
1171

1172
    This must be called only from Exec and functions called from Exec.
1173

1174
    """
1175
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1176
    itype, etxt = ecode
1177
    # first complete the msg
1178
    if args:
1179
      msg = msg % args
1180
    # then format the whole message
1181
    if self.op.error_codes:
1182
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1183
    else:
1184
      if item:
1185
        item = " " + item
1186
      else:
1187
        item = ""
1188
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1189
    # and finally report it via the feedback_fn
1190
    self._feedback_fn("  - %s" % msg)
1191

    
1192
  def _ErrorIf(self, cond, *args, **kwargs):
1193
    """Log an error message if the passed condition is True.
1194

1195
    """
1196
    cond = bool(cond) or self.op.debug_simulate_errors
1197
    if cond:
1198
      self._Error(*args, **kwargs)
1199
    # do not mark the operation as failed for WARN cases only
1200
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1201
      self.bad = self.bad or cond
1202

    
1203
  def _VerifyNode(self, ninfo, nresult):
1204
    """Run multiple tests against a node.
1205

1206
    Test list:
1207

1208
      - compares ganeti version
1209
      - checks vg existence and size > 20G
1210
      - checks config file checksum
1211
      - checks ssh to other nodes
1212

1213
    @type ninfo: L{objects.Node}
1214
    @param ninfo: the node to check
1215
    @param nresult: the results from the node
1216
    @rtype: boolean
1217
    @return: whether overall this call was successful (and we can expect
1218
         reasonable values in the respose)
1219

1220
    """
1221
    node = ninfo.name
1222
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1223

    
1224
    # main result, nresult should be a non-empty dict
1225
    test = not nresult or not isinstance(nresult, dict)
1226
    _ErrorIf(test, self.ENODERPC, node,
1227
                  "unable to verify node: no data returned")
1228
    if test:
1229
      return False
1230

    
1231
    # compares ganeti version
1232
    local_version = constants.PROTOCOL_VERSION
1233
    remote_version = nresult.get("version", None)
1234
    test = not (remote_version and
1235
                isinstance(remote_version, (list, tuple)) and
1236
                len(remote_version) == 2)
1237
    _ErrorIf(test, self.ENODERPC, node,
1238
             "connection to node returned invalid data")
1239
    if test:
1240
      return False
1241

    
1242
    test = local_version != remote_version[0]
1243
    _ErrorIf(test, self.ENODEVERSION, node,
1244
             "incompatible protocol versions: master %s,"
1245
             " node %s", local_version, remote_version[0])
1246
    if test:
1247
      return False
1248

    
1249
    # node seems compatible, we can actually try to look into its results
1250

    
1251
    # full package version
1252
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1253
                  self.ENODEVERSION, node,
1254
                  "software version mismatch: master %s, node %s",
1255
                  constants.RELEASE_VERSION, remote_version[1],
1256
                  code=self.ETYPE_WARNING)
1257

    
1258
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1259
    if isinstance(hyp_result, dict):
1260
      for hv_name, hv_result in hyp_result.iteritems():
1261
        test = hv_result is not None
1262
        _ErrorIf(test, self.ENODEHV, node,
1263
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1264

    
1265

    
1266
    test = nresult.get(constants.NV_NODESETUP,
1267
                           ["Missing NODESETUP results"])
1268
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1269
             "; ".join(test))
1270

    
1271
    return True
1272

    
1273
  def _VerifyNodeTime(self, ninfo, nresult,
1274
                      nvinfo_starttime, nvinfo_endtime):
1275
    """Check the node time.
1276

1277
    @type ninfo: L{objects.Node}
1278
    @param ninfo: the node to check
1279
    @param nresult: the remote results for the node
1280
    @param nvinfo_starttime: the start time of the RPC call
1281
    @param nvinfo_endtime: the end time of the RPC call
1282

1283
    """
1284
    node = ninfo.name
1285
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1286

    
1287
    ntime = nresult.get(constants.NV_TIME, None)
1288
    try:
1289
      ntime_merged = utils.MergeTime(ntime)
1290
    except (ValueError, TypeError):
1291
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1292
      return
1293

    
1294
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1295
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1296
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1297
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1298
    else:
1299
      ntime_diff = None
1300

    
1301
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1302
             "Node time diverges by at least %s from master node time",
1303
             ntime_diff)
1304

    
1305
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1306
    """Check the node time.
1307

1308
    @type ninfo: L{objects.Node}
1309
    @param ninfo: the node to check
1310
    @param nresult: the remote results for the node
1311
    @param vg_name: the configured VG name
1312

1313
    """
1314
    if vg_name is None:
1315
      return
1316

    
1317
    node = ninfo.name
1318
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1319

    
1320
    # checks vg existence and size > 20G
1321
    vglist = nresult.get(constants.NV_VGLIST, None)
1322
    test = not vglist
1323
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1324
    if not test:
1325
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1326
                                            constants.MIN_VG_SIZE)
1327
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1328

    
1329
    # check pv names
1330
    pvlist = nresult.get(constants.NV_PVLIST, None)
1331
    test = pvlist is None
1332
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1333
    if not test:
1334
      # check that ':' is not present in PV names, since it's a
1335
      # special character for lvcreate (denotes the range of PEs to
1336
      # use on the PV)
1337
      for _, pvname, owner_vg in pvlist:
1338
        test = ":" in pvname
1339
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1340
                 " '%s' of VG '%s'", pvname, owner_vg)
1341

    
1342
  def _VerifyNodeNetwork(self, ninfo, nresult):
1343
    """Check the node time.
1344

1345
    @type ninfo: L{objects.Node}
1346
    @param ninfo: the node to check
1347
    @param nresult: the remote results for the node
1348

1349
    """
1350
    node = ninfo.name
1351
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1352

    
1353
    test = constants.NV_NODELIST not in nresult
1354
    _ErrorIf(test, self.ENODESSH, node,
1355
             "node hasn't returned node ssh connectivity data")
1356
    if not test:
1357
      if nresult[constants.NV_NODELIST]:
1358
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1359
          _ErrorIf(True, self.ENODESSH, node,
1360
                   "ssh communication with node '%s': %s", a_node, a_msg)
1361

    
1362
    test = constants.NV_NODENETTEST not in nresult
1363
    _ErrorIf(test, self.ENODENET, node,
1364
             "node hasn't returned node tcp connectivity data")
1365
    if not test:
1366
      if nresult[constants.NV_NODENETTEST]:
1367
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1368
        for anode in nlist:
1369
          _ErrorIf(True, self.ENODENET, node,
1370
                   "tcp communication with node '%s': %s",
1371
                   anode, nresult[constants.NV_NODENETTEST][anode])
1372

    
1373
  def _VerifyInstance(self, instance, instanceconfig, node_image):
1374
    """Verify an instance.
1375

1376
    This function checks to see if the required block devices are
1377
    available on the instance's node.
1378

1379
    """
1380
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1381
    node_current = instanceconfig.primary_node
1382

    
1383
    node_vol_should = {}
1384
    instanceconfig.MapLVsByNode(node_vol_should)
1385

    
1386
    for node in node_vol_should:
1387
      n_img = node_image[node]
1388
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1389
        # ignore missing volumes on offline or broken nodes
1390
        continue
1391
      for volume in node_vol_should[node]:
1392
        test = volume not in n_img.volumes
1393
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1394
                 "volume %s missing on node %s", volume, node)
1395

    
1396
    if instanceconfig.admin_up:
1397
      pri_img = node_image[node_current]
1398
      test = instance not in pri_img.instances and not pri_img.offline
1399
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1400
               "instance not running on its primary node %s",
1401
               node_current)
1402

    
1403
    for node, n_img in node_image.items():
1404
      if (not node == node_current):
1405
        test = instance in n_img.instances
1406
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1407
                 "instance should not run on node %s", node)
1408

    
1409
  def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1410
    """Verify if there are any unknown volumes in the cluster.
1411

1412
    The .os, .swap and backup volumes are ignored. All other volumes are
1413
    reported as unknown.
1414

1415
    """
1416
    for node, n_img in node_image.items():
1417
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1418
        # skip non-healthy nodes
1419
        continue
1420
      for volume in n_img.volumes:
1421
        test = (node not in node_vol_should or
1422
                volume not in node_vol_should[node])
1423
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1424
                      "volume %s is unknown", volume)
1425

    
1426
  def _VerifyOrphanInstances(self, instancelist, node_image):
1427
    """Verify the list of running instances.
1428

1429
    This checks what instances are running but unknown to the cluster.
1430

1431
    """
1432
    for node, n_img in node_image.items():
1433
      for o_inst in n_img.instances:
1434
        test = o_inst not in instancelist
1435
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1436
                      "instance %s on node %s should not exist", o_inst, node)
1437

    
1438
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1439
    """Verify N+1 Memory Resilience.
1440

1441
    Check that if one single node dies we can still start all the
1442
    instances it was primary for.
1443

1444
    """
1445
    for node, n_img in node_image.items():
1446
      # This code checks that every node which is now listed as
1447
      # secondary has enough memory to host all instances it is
1448
      # supposed to should a single other node in the cluster fail.
1449
      # FIXME: not ready for failover to an arbitrary node
1450
      # FIXME: does not support file-backed instances
1451
      # WARNING: we currently take into account down instances as well
1452
      # as up ones, considering that even if they're down someone
1453
      # might want to start them even in the event of a node failure.
1454
      for prinode, instances in n_img.sbp.items():
1455
        needed_mem = 0
1456
        for instance in instances:
1457
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1458
          if bep[constants.BE_AUTO_BALANCE]:
1459
            needed_mem += bep[constants.BE_MEMORY]
1460
        test = n_img.mfree < needed_mem
1461
        self._ErrorIf(test, self.ENODEN1, node,
1462
                      "not enough memory on to accommodate"
1463
                      " failovers should peer node %s fail", prinode)
1464

    
1465
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1466
                       master_files):
1467
    """Verifies and computes the node required file checksums.
1468

1469
    @type ninfo: L{objects.Node}
1470
    @param ninfo: the node to check
1471
    @param nresult: the remote results for the node
1472
    @param file_list: required list of files
1473
    @param local_cksum: dictionary of local files and their checksums
1474
    @param master_files: list of files that only masters should have
1475

1476
    """
1477
    node = ninfo.name
1478
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1479

    
1480
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1481
    test = not isinstance(remote_cksum, dict)
1482
    _ErrorIf(test, self.ENODEFILECHECK, node,
1483
             "node hasn't returned file checksum data")
1484
    if test:
1485
      return
1486

    
1487
    for file_name in file_list:
1488
      node_is_mc = ninfo.master_candidate
1489
      must_have = (file_name not in master_files) or node_is_mc
1490
      # missing
1491
      test1 = file_name not in remote_cksum
1492
      # invalid checksum
1493
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1494
      # existing and good
1495
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1496
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1497
               "file '%s' missing", file_name)
1498
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1499
               "file '%s' has wrong checksum", file_name)
1500
      # not candidate and this is not a must-have file
1501
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1502
               "file '%s' should not exist on non master"
1503
               " candidates (and the file is outdated)", file_name)
1504
      # all good, except non-master/non-must have combination
1505
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1506
               "file '%s' should not exist"
1507
               " on non master candidates", file_name)
1508

    
1509
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1510
    """Verifies and the node DRBD status.
1511

1512
    @type ninfo: L{objects.Node}
1513
    @param ninfo: the node to check
1514
    @param nresult: the remote results for the node
1515
    @param instanceinfo: the dict of instances
1516
    @param drbd_map: the DRBD map as returned by
1517
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1518

1519
    """
1520
    node = ninfo.name
1521
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1522

    
1523
    # compute the DRBD minors
1524
    node_drbd = {}
1525
    for minor, instance in drbd_map[node].items():
1526
      test = instance not in instanceinfo
1527
      _ErrorIf(test, self.ECLUSTERCFG, None,
1528
               "ghost instance '%s' in temporary DRBD map", instance)
1529
        # ghost instance should not be running, but otherwise we
1530
        # don't give double warnings (both ghost instance and
1531
        # unallocated minor in use)
1532
      if test:
1533
        node_drbd[minor] = (instance, False)
1534
      else:
1535
        instance = instanceinfo[instance]
1536
        node_drbd[minor] = (instance.name, instance.admin_up)
1537

    
1538
    # and now check them
1539
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1540
    test = not isinstance(used_minors, (tuple, list))
1541
    _ErrorIf(test, self.ENODEDRBD, node,
1542
             "cannot parse drbd status file: %s", str(used_minors))
1543
    if test:
1544
      # we cannot check drbd status
1545
      return
1546

    
1547
    for minor, (iname, must_exist) in node_drbd.items():
1548
      test = minor not in used_minors and must_exist
1549
      _ErrorIf(test, self.ENODEDRBD, node,
1550
               "drbd minor %d of instance %s is not active", minor, iname)
1551
    for minor in used_minors:
1552
      test = minor not in node_drbd
1553
      _ErrorIf(test, self.ENODEDRBD, node,
1554
               "unallocated drbd minor %d is in use", minor)
1555

    
1556
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1557
    """Verifies and updates the node volume data.
1558

1559
    This function will update a L{NodeImage}'s internal structures
1560
    with data from the remote call.
1561

1562
    @type ninfo: L{objects.Node}
1563
    @param ninfo: the node to check
1564
    @param nresult: the remote results for the node
1565
    @param nimg: the node image object
1566
    @param vg_name: the configured VG name
1567

1568
    """
1569
    node = ninfo.name
1570
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1571

    
1572
    nimg.lvm_fail = True
1573
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1574
    if vg_name is None:
1575
      pass
1576
    elif isinstance(lvdata, basestring):
1577
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1578
               utils.SafeEncode(lvdata))
1579
    elif not isinstance(lvdata, dict):
1580
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1581
    else:
1582
      nimg.volumes = lvdata
1583
      nimg.lvm_fail = False
1584

    
1585
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1586
    """Verifies and updates the node instance list.
1587

1588
    If the listing was successful, then updates this node's instance
1589
    list. Otherwise, it marks the RPC call as failed for the instance
1590
    list key.
1591

1592
    @type ninfo: L{objects.Node}
1593
    @param ninfo: the node to check
1594
    @param nresult: the remote results for the node
1595
    @param nimg: the node image object
1596

1597
    """
1598
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1599
    test = not isinstance(idata, list)
1600
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1601
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1602
    if test:
1603
      nimg.hyp_fail = True
1604
    else:
1605
      nimg.instances = idata
1606

    
1607
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1608
    """Verifies and computes a node information map
1609

1610
    @type ninfo: L{objects.Node}
1611
    @param ninfo: the node to check
1612
    @param nresult: the remote results for the node
1613
    @param nimg: the node image object
1614
    @param vg_name: the configured VG name
1615

1616
    """
1617
    node = ninfo.name
1618
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1619

    
1620
    # try to read free memory (from the hypervisor)
1621
    hv_info = nresult.get(constants.NV_HVINFO, None)
1622
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1623
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1624
    if not test:
1625
      try:
1626
        nimg.mfree = int(hv_info["memory_free"])
1627
      except (ValueError, TypeError):
1628
        _ErrorIf(True, self.ENODERPC, node,
1629
                 "node returned invalid nodeinfo, check hypervisor")
1630

    
1631
    # FIXME: devise a free space model for file based instances as well
1632
    if vg_name is not None:
1633
      test = (constants.NV_VGLIST not in nresult or
1634
              vg_name not in nresult[constants.NV_VGLIST])
1635
      _ErrorIf(test, self.ENODELVM, node,
1636
               "node didn't return data for the volume group '%s'"
1637
               " - it is either missing or broken", vg_name)
1638
      if not test:
1639
        try:
1640
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1641
        except (ValueError, TypeError):
1642
          _ErrorIf(True, self.ENODERPC, node,
1643
                   "node returned invalid LVM info, check LVM status")
1644

    
1645
  def CheckPrereq(self):
1646
    """Check prerequisites.
1647

1648
    Transform the list of checks we're going to skip into a set and check that
1649
    all its members are valid.
1650

1651
    """
1652
    self.skip_set = frozenset(self.op.skip_checks)
1653
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1654
      raise errors.OpPrereqError("Invalid checks to be skipped specified",
1655
                                 errors.ECODE_INVAL)
1656

    
1657
  def BuildHooksEnv(self):
1658
    """Build hooks env.
1659

1660
    Cluster-Verify hooks just ran in the post phase and their failure makes
1661
    the output be logged in the verify output and the verification to fail.
1662

1663
    """
1664
    all_nodes = self.cfg.GetNodeList()
1665
    env = {
1666
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1667
      }
1668
    for node in self.cfg.GetAllNodesInfo().values():
1669
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1670

    
1671
    return env, [], all_nodes
1672

    
1673
  def Exec(self, feedback_fn):
1674
    """Verify integrity of cluster, performing various test on nodes.
1675

1676
    """
1677
    self.bad = False
1678
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1679
    verbose = self.op.verbose
1680
    self._feedback_fn = feedback_fn
1681
    feedback_fn("* Verifying global settings")
1682
    for msg in self.cfg.VerifyConfig():
1683
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1684

    
1685
    # Check the cluster certificates
1686
    for cert_filename in constants.ALL_CERT_FILES:
1687
      (errcode, msg) = _VerifyCertificate(cert_filename)
1688
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1689

    
1690
    vg_name = self.cfg.GetVGName()
1691
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1692
    cluster = self.cfg.GetClusterInfo()
1693
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1694
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1695
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1696
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1697
                        for iname in instancelist)
1698
    i_non_redundant = [] # Non redundant instances
1699
    i_non_a_balanced = [] # Non auto-balanced instances
1700
    n_offline = 0 # Count of offline nodes
1701
    n_drained = 0 # Count of nodes being drained
1702
    node_vol_should = {}
1703

    
1704
    # FIXME: verify OS list
1705
    # do local checksums
1706
    master_files = [constants.CLUSTER_CONF_FILE]
1707

    
1708
    file_names = ssconf.SimpleStore().GetFileList()
1709
    file_names.extend(constants.ALL_CERT_FILES)
1710
    file_names.extend(master_files)
1711
    if cluster.modify_etc_hosts:
1712
      file_names.append(constants.ETC_HOSTS)
1713

    
1714
    local_checksums = utils.FingerprintFiles(file_names)
1715

    
1716
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1717
    node_verify_param = {
1718
      constants.NV_FILELIST: file_names,
1719
      constants.NV_NODELIST: [node.name for node in nodeinfo
1720
                              if not node.offline],
1721
      constants.NV_HYPERVISOR: hypervisors,
1722
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1723
                                  node.secondary_ip) for node in nodeinfo
1724
                                 if not node.offline],
1725
      constants.NV_INSTANCELIST: hypervisors,
1726
      constants.NV_VERSION: None,
1727
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1728
      constants.NV_NODESETUP: None,
1729
      constants.NV_TIME: None,
1730
      }
1731

    
1732
    if vg_name is not None:
1733
      node_verify_param[constants.NV_VGLIST] = None
1734
      node_verify_param[constants.NV_LVLIST] = vg_name
1735
      node_verify_param[constants.NV_PVLIST] = [vg_name]
1736
      node_verify_param[constants.NV_DRBDLIST] = None
1737

    
1738
    # Build our expected cluster state
1739
    node_image = dict((node.name, self.NodeImage(offline=node.offline))
1740
                      for node in nodeinfo)
1741

    
1742
    for instance in instancelist:
1743
      inst_config = instanceinfo[instance]
1744

    
1745
      for nname in inst_config.all_nodes:
1746
        if nname not in node_image:
1747
          # ghost node
1748
          gnode = self.NodeImage()
1749
          gnode.ghost = True
1750
          node_image[nname] = gnode
1751

    
1752
      inst_config.MapLVsByNode(node_vol_should)
1753

    
1754
      pnode = inst_config.primary_node
1755
      node_image[pnode].pinst.append(instance)
1756

    
1757
      for snode in inst_config.secondary_nodes:
1758
        nimg = node_image[snode]
1759
        nimg.sinst.append(instance)
1760
        if pnode not in nimg.sbp:
1761
          nimg.sbp[pnode] = []
1762
        nimg.sbp[pnode].append(instance)
1763

    
1764
    # At this point, we have the in-memory data structures complete,
1765
    # except for the runtime information, which we'll gather next
1766

    
1767
    # Due to the way our RPC system works, exact response times cannot be
1768
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1769
    # time before and after executing the request, we can at least have a time
1770
    # window.
1771
    nvinfo_starttime = time.time()
1772
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1773
                                           self.cfg.GetClusterName())
1774
    nvinfo_endtime = time.time()
1775

    
1776
    master_node = self.cfg.GetMasterNode()
1777
    all_drbd_map = self.cfg.ComputeDRBDMap()
1778

    
1779
    feedback_fn("* Verifying node status")
1780
    for node_i in nodeinfo:
1781
      node = node_i.name
1782
      nimg = node_image[node]
1783

    
1784
      if node_i.offline:
1785
        if verbose:
1786
          feedback_fn("* Skipping offline node %s" % (node,))
1787
        n_offline += 1
1788
        continue
1789

    
1790
      if node == master_node:
1791
        ntype = "master"
1792
      elif node_i.master_candidate:
1793
        ntype = "master candidate"
1794
      elif node_i.drained:
1795
        ntype = "drained"
1796
        n_drained += 1
1797
      else:
1798
        ntype = "regular"
1799
      if verbose:
1800
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1801

    
1802
      msg = all_nvinfo[node].fail_msg
1803
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1804
      if msg:
1805
        nimg.rpc_fail = True
1806
        continue
1807

    
1808
      nresult = all_nvinfo[node].payload
1809

    
1810
      nimg.call_ok = self._VerifyNode(node_i, nresult)
1811
      self._VerifyNodeNetwork(node_i, nresult)
1812
      self._VerifyNodeLVM(node_i, nresult, vg_name)
1813
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1814
                            master_files)
1815
      self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1816
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1817

    
1818
      self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1819
      self._UpdateNodeInstances(node_i, nresult, nimg)
1820
      self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1821

    
1822
    feedback_fn("* Verifying instance status")
1823
    for instance in instancelist:
1824
      if verbose:
1825
        feedback_fn("* Verifying instance %s" % instance)
1826
      inst_config = instanceinfo[instance]
1827
      self._VerifyInstance(instance, inst_config, node_image)
1828
      inst_nodes_offline = []
1829

    
1830
      pnode = inst_config.primary_node
1831
      pnode_img = node_image[pnode]
1832
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
1833
               self.ENODERPC, pnode, "instance %s, connection to"
1834
               " primary node failed", instance)
1835

    
1836
      if pnode_img.offline:
1837
        inst_nodes_offline.append(pnode)
1838

    
1839
      # If the instance is non-redundant we cannot survive losing its primary
1840
      # node, so we are not N+1 compliant. On the other hand we have no disk
1841
      # templates with more than one secondary so that situation is not well
1842
      # supported either.
1843
      # FIXME: does not support file-backed instances
1844
      if not inst_config.secondary_nodes:
1845
        i_non_redundant.append(instance)
1846
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
1847
               instance, "instance has multiple secondary nodes: %s",
1848
               utils.CommaJoin(inst_config.secondary_nodes),
1849
               code=self.ETYPE_WARNING)
1850

    
1851
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1852
        i_non_a_balanced.append(instance)
1853

    
1854
      for snode in inst_config.secondary_nodes:
1855
        s_img = node_image[snode]
1856
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
1857
                 "instance %s, connection to secondary node failed", instance)
1858

    
1859
        if s_img.offline:
1860
          inst_nodes_offline.append(snode)
1861

    
1862
      # warn that the instance lives on offline nodes
1863
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1864
               "instance lives on offline node(s) %s",
1865
               utils.CommaJoin(inst_nodes_offline))
1866
      # ... or ghost nodes
1867
      for node in inst_config.all_nodes:
1868
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
1869
                 "instance lives on ghost node %s", node)
1870

    
1871
    feedback_fn("* Verifying orphan volumes")
1872
    self._VerifyOrphanVolumes(node_vol_should, node_image)
1873

    
1874
    feedback_fn("* Verifying oprhan instances")
1875
    self._VerifyOrphanInstances(instancelist, node_image)
1876

    
1877
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1878
      feedback_fn("* Verifying N+1 Memory redundancy")
1879
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
1880

    
1881
    feedback_fn("* Other Notes")
1882
    if i_non_redundant:
1883
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1884
                  % len(i_non_redundant))
1885

    
1886
    if i_non_a_balanced:
1887
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1888
                  % len(i_non_a_balanced))
1889

    
1890
    if n_offline:
1891
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
1892

    
1893
    if n_drained:
1894
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
1895

    
1896
    return not self.bad
1897

    
1898
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1899
    """Analyze the post-hooks' result
1900

1901
    This method analyses the hook result, handles it, and sends some
1902
    nicely-formatted feedback back to the user.
1903

1904
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1905
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1906
    @param hooks_results: the results of the multi-node hooks rpc call
1907
    @param feedback_fn: function used send feedback back to the caller
1908
    @param lu_result: previous Exec result
1909
    @return: the new Exec result, based on the previous result
1910
        and hook results
1911

1912
    """
1913
    # We only really run POST phase hooks, and are only interested in
1914
    # their results
1915
    if phase == constants.HOOKS_PHASE_POST:
1916
      # Used to change hooks' output to proper indentation
1917
      indent_re = re.compile('^', re.M)
1918
      feedback_fn("* Hooks Results")
1919
      assert hooks_results, "invalid result from hooks"
1920

    
1921
      for node_name in hooks_results:
1922
        res = hooks_results[node_name]
1923
        msg = res.fail_msg
1924
        test = msg and not res.offline
1925
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
1926
                      "Communication failure in hooks execution: %s", msg)
1927
        if res.offline or msg:
1928
          # No need to investigate payload if node is offline or gave an error.
1929
          # override manually lu_result here as _ErrorIf only
1930
          # overrides self.bad
1931
          lu_result = 1
1932
          continue
1933
        for script, hkr, output in res.payload:
1934
          test = hkr == constants.HKR_FAIL
1935
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
1936
                        "Script %s failed, output:", script)
1937
          if test:
1938
            output = indent_re.sub('      ', output)
1939
            feedback_fn("%s" % output)
1940
            lu_result = 0
1941

    
1942
      return lu_result
1943

    
1944

    
1945
class LUVerifyDisks(NoHooksLU):
1946
  """Verifies the cluster disks status.
1947

1948
  """
1949
  _OP_REQP = []
1950
  REQ_BGL = False
1951

    
1952
  def ExpandNames(self):
1953
    self.needed_locks = {
1954
      locking.LEVEL_NODE: locking.ALL_SET,
1955
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1956
    }
1957
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1958

    
1959
  def CheckPrereq(self):
1960
    """Check prerequisites.
1961

1962
    This has no prerequisites.
1963

1964
    """
1965
    pass
1966

    
1967
  def Exec(self, feedback_fn):
1968
    """Verify integrity of cluster disks.
1969

1970
    @rtype: tuple of three items
1971
    @return: a tuple of (dict of node-to-node_error, list of instances
1972
        which need activate-disks, dict of instance: (node, volume) for
1973
        missing volumes
1974

1975
    """
1976
    result = res_nodes, res_instances, res_missing = {}, [], {}
1977

    
1978
    vg_name = self.cfg.GetVGName()
1979
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1980
    instances = [self.cfg.GetInstanceInfo(name)
1981
                 for name in self.cfg.GetInstanceList()]
1982

    
1983
    nv_dict = {}
1984
    for inst in instances:
1985
      inst_lvs = {}
1986
      if (not inst.admin_up or
1987
          inst.disk_template not in constants.DTS_NET_MIRROR):
1988
        continue
1989
      inst.MapLVsByNode(inst_lvs)
1990
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1991
      for node, vol_list in inst_lvs.iteritems():
1992
        for vol in vol_list:
1993
          nv_dict[(node, vol)] = inst
1994

    
1995
    if not nv_dict:
1996
      return result
1997

    
1998
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1999

    
2000
    for node in nodes:
2001
      # node_volume
2002
      node_res = node_lvs[node]
2003
      if node_res.offline:
2004
        continue
2005
      msg = node_res.fail_msg
2006
      if msg:
2007
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2008
        res_nodes[node] = msg
2009
        continue
2010

    
2011
      lvs = node_res.payload
2012
      for lv_name, (_, _, lv_online) in lvs.items():
2013
        inst = nv_dict.pop((node, lv_name), None)
2014
        if (not lv_online and inst is not None
2015
            and inst.name not in res_instances):
2016
          res_instances.append(inst.name)
2017

    
2018
    # any leftover items in nv_dict are missing LVs, let's arrange the
2019
    # data better
2020
    for key, inst in nv_dict.iteritems():
2021
      if inst.name not in res_missing:
2022
        res_missing[inst.name] = []
2023
      res_missing[inst.name].append(key)
2024

    
2025
    return result
2026

    
2027

    
2028
class LURepairDiskSizes(NoHooksLU):
2029
  """Verifies the cluster disks sizes.
2030

2031
  """
2032
  _OP_REQP = ["instances"]
2033
  REQ_BGL = False
2034

    
2035
  def ExpandNames(self):
2036
    if not isinstance(self.op.instances, list):
2037
      raise errors.OpPrereqError("Invalid argument type 'instances'",
2038
                                 errors.ECODE_INVAL)
2039

    
2040
    if self.op.instances:
2041
      self.wanted_names = []
2042
      for name in self.op.instances:
2043
        full_name = _ExpandInstanceName(self.cfg, name)
2044
        self.wanted_names.append(full_name)
2045
      self.needed_locks = {
2046
        locking.LEVEL_NODE: [],
2047
        locking.LEVEL_INSTANCE: self.wanted_names,
2048
        }
2049
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2050
    else:
2051
      self.wanted_names = None
2052
      self.needed_locks = {
2053
        locking.LEVEL_NODE: locking.ALL_SET,
2054
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2055
        }
2056
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2057

    
2058
  def DeclareLocks(self, level):
2059
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2060
      self._LockInstancesNodes(primary_only=True)
2061

    
2062
  def CheckPrereq(self):
2063
    """Check prerequisites.
2064

2065
    This only checks the optional instance list against the existing names.
2066

2067
    """
2068
    if self.wanted_names is None:
2069
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2070

    
2071
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2072
                             in self.wanted_names]
2073

    
2074
  def _EnsureChildSizes(self, disk):
2075
    """Ensure children of the disk have the needed disk size.
2076

2077
    This is valid mainly for DRBD8 and fixes an issue where the
2078
    children have smaller disk size.
2079

2080
    @param disk: an L{ganeti.objects.Disk} object
2081

2082
    """
2083
    if disk.dev_type == constants.LD_DRBD8:
2084
      assert disk.children, "Empty children for DRBD8?"
2085
      fchild = disk.children[0]
2086
      mismatch = fchild.size < disk.size
2087
      if mismatch:
2088
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2089
                     fchild.size, disk.size)
2090
        fchild.size = disk.size
2091

    
2092
      # and we recurse on this child only, not on the metadev
2093
      return self._EnsureChildSizes(fchild) or mismatch
2094
    else:
2095
      return False
2096

    
2097
  def Exec(self, feedback_fn):
2098
    """Verify the size of cluster disks.
2099

2100
    """
2101
    # TODO: check child disks too
2102
    # TODO: check differences in size between primary/secondary nodes
2103
    per_node_disks = {}
2104
    for instance in self.wanted_instances:
2105
      pnode = instance.primary_node
2106
      if pnode not in per_node_disks:
2107
        per_node_disks[pnode] = []
2108
      for idx, disk in enumerate(instance.disks):
2109
        per_node_disks[pnode].append((instance, idx, disk))
2110

    
2111
    changed = []
2112
    for node, dskl in per_node_disks.items():
2113
      newl = [v[2].Copy() for v in dskl]
2114
      for dsk in newl:
2115
        self.cfg.SetDiskID(dsk, node)
2116
      result = self.rpc.call_blockdev_getsizes(node, newl)
2117
      if result.fail_msg:
2118
        self.LogWarning("Failure in blockdev_getsizes call to node"
2119
                        " %s, ignoring", node)
2120
        continue
2121
      if len(result.data) != len(dskl):
2122
        self.LogWarning("Invalid result from node %s, ignoring node results",
2123
                        node)
2124
        continue
2125
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2126
        if size is None:
2127
          self.LogWarning("Disk %d of instance %s did not return size"
2128
                          " information, ignoring", idx, instance.name)
2129
          continue
2130
        if not isinstance(size, (int, long)):
2131
          self.LogWarning("Disk %d of instance %s did not return valid"
2132
                          " size information, ignoring", idx, instance.name)
2133
          continue
2134
        size = size >> 20
2135
        if size != disk.size:
2136
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2137
                       " correcting: recorded %d, actual %d", idx,
2138
                       instance.name, disk.size, size)
2139
          disk.size = size
2140
          self.cfg.Update(instance, feedback_fn)
2141
          changed.append((instance.name, idx, size))
2142
        if self._EnsureChildSizes(disk):
2143
          self.cfg.Update(instance, feedback_fn)
2144
          changed.append((instance.name, idx, disk.size))
2145
    return changed
2146

    
2147

    
2148
class LURenameCluster(LogicalUnit):
2149
  """Rename the cluster.
2150

2151
  """
2152
  HPATH = "cluster-rename"
2153
  HTYPE = constants.HTYPE_CLUSTER
2154
  _OP_REQP = ["name"]
2155

    
2156
  def BuildHooksEnv(self):
2157
    """Build hooks env.
2158

2159
    """
2160
    env = {
2161
      "OP_TARGET": self.cfg.GetClusterName(),
2162
      "NEW_NAME": self.op.name,
2163
      }
2164
    mn = self.cfg.GetMasterNode()
2165
    all_nodes = self.cfg.GetNodeList()
2166
    return env, [mn], all_nodes
2167

    
2168
  def CheckPrereq(self):
2169
    """Verify that the passed name is a valid one.
2170

2171
    """
2172
    hostname = utils.GetHostInfo(self.op.name)
2173

    
2174
    new_name = hostname.name
2175
    self.ip = new_ip = hostname.ip
2176
    old_name = self.cfg.GetClusterName()
2177
    old_ip = self.cfg.GetMasterIP()
2178
    if new_name == old_name and new_ip == old_ip:
2179
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2180
                                 " cluster has changed",
2181
                                 errors.ECODE_INVAL)
2182
    if new_ip != old_ip:
2183
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2184
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2185
                                   " reachable on the network. Aborting." %
2186
                                   new_ip, errors.ECODE_NOTUNIQUE)
2187

    
2188
    self.op.name = new_name
2189

    
2190
  def Exec(self, feedback_fn):
2191
    """Rename the cluster.
2192

2193
    """
2194
    clustername = self.op.name
2195
    ip = self.ip
2196

    
2197
    # shutdown the master IP
2198
    master = self.cfg.GetMasterNode()
2199
    result = self.rpc.call_node_stop_master(master, False)
2200
    result.Raise("Could not disable the master role")
2201

    
2202
    try:
2203
      cluster = self.cfg.GetClusterInfo()
2204
      cluster.cluster_name = clustername
2205
      cluster.master_ip = ip
2206
      self.cfg.Update(cluster, feedback_fn)
2207

    
2208
      # update the known hosts file
2209
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2210
      node_list = self.cfg.GetNodeList()
2211
      try:
2212
        node_list.remove(master)
2213
      except ValueError:
2214
        pass
2215
      result = self.rpc.call_upload_file(node_list,
2216
                                         constants.SSH_KNOWN_HOSTS_FILE)
2217
      for to_node, to_result in result.iteritems():
2218
        msg = to_result.fail_msg
2219
        if msg:
2220
          msg = ("Copy of file %s to node %s failed: %s" %
2221
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2222
          self.proc.LogWarning(msg)
2223

    
2224
    finally:
2225
      result = self.rpc.call_node_start_master(master, False, False)
2226
      msg = result.fail_msg
2227
      if msg:
2228
        self.LogWarning("Could not re-enable the master role on"
2229
                        " the master, please restart manually: %s", msg)
2230

    
2231

    
2232
def _RecursiveCheckIfLVMBased(disk):
2233
  """Check if the given disk or its children are lvm-based.
2234

2235
  @type disk: L{objects.Disk}
2236
  @param disk: the disk to check
2237
  @rtype: boolean
2238
  @return: boolean indicating whether a LD_LV dev_type was found or not
2239

2240
  """
2241
  if disk.children:
2242
    for chdisk in disk.children:
2243
      if _RecursiveCheckIfLVMBased(chdisk):
2244
        return True
2245
  return disk.dev_type == constants.LD_LV
2246

    
2247

    
2248
class LUSetClusterParams(LogicalUnit):
2249
  """Change the parameters of the cluster.
2250

2251
  """
2252
  HPATH = "cluster-modify"
2253
  HTYPE = constants.HTYPE_CLUSTER
2254
  _OP_REQP = []
2255
  REQ_BGL = False
2256

    
2257
  def CheckArguments(self):
2258
    """Check parameters
2259

2260
    """
2261
    for attr in ["candidate_pool_size",
2262
                 "uid_pool", "add_uids", "remove_uids"]:
2263
      if not hasattr(self.op, attr):
2264
        setattr(self.op, attr, None)
2265

    
2266
    if self.op.candidate_pool_size is not None:
2267
      try:
2268
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2269
      except (ValueError, TypeError), err:
2270
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2271
                                   str(err), errors.ECODE_INVAL)
2272
      if self.op.candidate_pool_size < 1:
2273
        raise errors.OpPrereqError("At least one master candidate needed",
2274
                                   errors.ECODE_INVAL)
2275

    
2276
    _CheckBooleanOpField(self.op, "maintain_node_health")
2277

    
2278
    if self.op.uid_pool:
2279
      uidpool.CheckUidPool(self.op.uid_pool)
2280

    
2281
    if self.op.add_uids:
2282
      uidpool.CheckUidPool(self.op.add_uids)
2283

    
2284
    if self.op.remove_uids:
2285
      uidpool.CheckUidPool(self.op.remove_uids)
2286

    
2287
  def ExpandNames(self):
2288
    # FIXME: in the future maybe other cluster params won't require checking on
2289
    # all nodes to be modified.
2290
    self.needed_locks = {
2291
      locking.LEVEL_NODE: locking.ALL_SET,
2292
    }
2293
    self.share_locks[locking.LEVEL_NODE] = 1
2294

    
2295
  def BuildHooksEnv(self):
2296
    """Build hooks env.
2297

2298
    """
2299
    env = {
2300
      "OP_TARGET": self.cfg.GetClusterName(),
2301
      "NEW_VG_NAME": self.op.vg_name,
2302
      }
2303
    mn = self.cfg.GetMasterNode()
2304
    return env, [mn], [mn]
2305

    
2306
  def CheckPrereq(self):
2307
    """Check prerequisites.
2308

2309
    This checks whether the given params don't conflict and
2310
    if the given volume group is valid.
2311

2312
    """
2313
    if self.op.vg_name is not None and not self.op.vg_name:
2314
      instances = self.cfg.GetAllInstancesInfo().values()
2315
      for inst in instances:
2316
        for disk in inst.disks:
2317
          if _RecursiveCheckIfLVMBased(disk):
2318
            raise errors.OpPrereqError("Cannot disable lvm storage while"
2319
                                       " lvm-based instances exist",
2320
                                       errors.ECODE_INVAL)
2321

    
2322
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2323

    
2324
    # if vg_name not None, checks given volume group on all nodes
2325
    if self.op.vg_name:
2326
      vglist = self.rpc.call_vg_list(node_list)
2327
      for node in node_list:
2328
        msg = vglist[node].fail_msg
2329
        if msg:
2330
          # ignoring down node
2331
          self.LogWarning("Error while gathering data on node %s"
2332
                          " (ignoring node): %s", node, msg)
2333
          continue
2334
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2335
                                              self.op.vg_name,
2336
                                              constants.MIN_VG_SIZE)
2337
        if vgstatus:
2338
          raise errors.OpPrereqError("Error on node '%s': %s" %
2339
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2340

    
2341
    self.cluster = cluster = self.cfg.GetClusterInfo()
2342
    # validate params changes
2343
    if self.op.beparams:
2344
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2345
      self.new_beparams = objects.FillDict(
2346
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2347

    
2348
    if self.op.nicparams:
2349
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2350
      self.new_nicparams = objects.FillDict(
2351
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2352
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2353
      nic_errors = []
2354

    
2355
      # check all instances for consistency
2356
      for instance in self.cfg.GetAllInstancesInfo().values():
2357
        for nic_idx, nic in enumerate(instance.nics):
2358
          params_copy = copy.deepcopy(nic.nicparams)
2359
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2360

    
2361
          # check parameter syntax
2362
          try:
2363
            objects.NIC.CheckParameterSyntax(params_filled)
2364
          except errors.ConfigurationError, err:
2365
            nic_errors.append("Instance %s, nic/%d: %s" %
2366
                              (instance.name, nic_idx, err))
2367

    
2368
          # if we're moving instances to routed, check that they have an ip
2369
          target_mode = params_filled[constants.NIC_MODE]
2370
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2371
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2372
                              (instance.name, nic_idx))
2373
      if nic_errors:
2374
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2375
                                   "\n".join(nic_errors))
2376

    
2377
    # hypervisor list/parameters
2378
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2379
    if self.op.hvparams:
2380
      if not isinstance(self.op.hvparams, dict):
2381
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2382
                                   errors.ECODE_INVAL)
2383
      for hv_name, hv_dict in self.op.hvparams.items():
2384
        if hv_name not in self.new_hvparams:
2385
          self.new_hvparams[hv_name] = hv_dict
2386
        else:
2387
          self.new_hvparams[hv_name].update(hv_dict)
2388

    
2389
    # os hypervisor parameters
2390
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2391
    if self.op.os_hvp:
2392
      if not isinstance(self.op.os_hvp, dict):
2393
        raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2394
                                   errors.ECODE_INVAL)
2395
      for os_name, hvs in self.op.os_hvp.items():
2396
        if not isinstance(hvs, dict):
2397
          raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2398
                                      " input"), errors.ECODE_INVAL)
2399
        if os_name not in self.new_os_hvp:
2400
          self.new_os_hvp[os_name] = hvs
2401
        else:
2402
          for hv_name, hv_dict in hvs.items():
2403
            if hv_name not in self.new_os_hvp[os_name]:
2404
              self.new_os_hvp[os_name][hv_name] = hv_dict
2405
            else:
2406
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2407

    
2408
    # changes to the hypervisor list
2409
    if self.op.enabled_hypervisors is not None:
2410
      self.hv_list = self.op.enabled_hypervisors
2411
      if not self.hv_list:
2412
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2413
                                   " least one member",
2414
                                   errors.ECODE_INVAL)
2415
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2416
      if invalid_hvs:
2417
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2418
                                   " entries: %s" %
2419
                                   utils.CommaJoin(invalid_hvs),
2420
                                   errors.ECODE_INVAL)
2421
      for hv in self.hv_list:
2422
        # if the hypervisor doesn't already exist in the cluster
2423
        # hvparams, we initialize it to empty, and then (in both
2424
        # cases) we make sure to fill the defaults, as we might not
2425
        # have a complete defaults list if the hypervisor wasn't
2426
        # enabled before
2427
        if hv not in new_hvp:
2428
          new_hvp[hv] = {}
2429
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2430
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2431
    else:
2432
      self.hv_list = cluster.enabled_hypervisors
2433

    
2434
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2435
      # either the enabled list has changed, or the parameters have, validate
2436
      for hv_name, hv_params in self.new_hvparams.items():
2437
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2438
            (self.op.enabled_hypervisors and
2439
             hv_name in self.op.enabled_hypervisors)):
2440
          # either this is a new hypervisor, or its parameters have changed
2441
          hv_class = hypervisor.GetHypervisor(hv_name)
2442
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2443
          hv_class.CheckParameterSyntax(hv_params)
2444
          _CheckHVParams(self, node_list, hv_name, hv_params)
2445

    
2446
    if self.op.os_hvp:
2447
      # no need to check any newly-enabled hypervisors, since the
2448
      # defaults have already been checked in the above code-block
2449
      for os_name, os_hvp in self.new_os_hvp.items():
2450
        for hv_name, hv_params in os_hvp.items():
2451
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2452
          # we need to fill in the new os_hvp on top of the actual hv_p
2453
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2454
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2455
          hv_class = hypervisor.GetHypervisor(hv_name)
2456
          hv_class.CheckParameterSyntax(new_osp)
2457
          _CheckHVParams(self, node_list, hv_name, new_osp)
2458

    
2459

    
2460
  def Exec(self, feedback_fn):
2461
    """Change the parameters of the cluster.
2462

2463
    """
2464
    if self.op.vg_name is not None:
2465
      new_volume = self.op.vg_name
2466
      if not new_volume:
2467
        new_volume = None
2468
      if new_volume != self.cfg.GetVGName():
2469
        self.cfg.SetVGName(new_volume)
2470
      else:
2471
        feedback_fn("Cluster LVM configuration already in desired"
2472
                    " state, not changing")
2473
    if self.op.hvparams:
2474
      self.cluster.hvparams = self.new_hvparams
2475
    if self.op.os_hvp:
2476
      self.cluster.os_hvp = self.new_os_hvp
2477
    if self.op.enabled_hypervisors is not None:
2478
      self.cluster.hvparams = self.new_hvparams
2479
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2480
    if self.op.beparams:
2481
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2482
    if self.op.nicparams:
2483
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2484

    
2485
    if self.op.candidate_pool_size is not None:
2486
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2487
      # we need to update the pool size here, otherwise the save will fail
2488
      _AdjustCandidatePool(self, [])
2489

    
2490
    if self.op.maintain_node_health is not None:
2491
      self.cluster.maintain_node_health = self.op.maintain_node_health
2492

    
2493
    if self.op.add_uids is not None:
2494
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2495

    
2496
    if self.op.remove_uids is not None:
2497
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2498

    
2499
    if self.op.uid_pool is not None:
2500
      self.cluster.uid_pool = self.op.uid_pool
2501

    
2502
    self.cfg.Update(self.cluster, feedback_fn)
2503

    
2504

    
2505
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2506
  """Distribute additional files which are part of the cluster configuration.
2507

2508
  ConfigWriter takes care of distributing the config and ssconf files, but
2509
  there are more files which should be distributed to all nodes. This function
2510
  makes sure those are copied.
2511

2512
  @param lu: calling logical unit
2513
  @param additional_nodes: list of nodes not in the config to distribute to
2514

2515
  """
2516
  # 1. Gather target nodes
2517
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2518
  dist_nodes = lu.cfg.GetOnlineNodeList()
2519
  if additional_nodes is not None:
2520
    dist_nodes.extend(additional_nodes)
2521
  if myself.name in dist_nodes:
2522
    dist_nodes.remove(myself.name)
2523

    
2524
  # 2. Gather files to distribute
2525
  dist_files = set([constants.ETC_HOSTS,
2526
                    constants.SSH_KNOWN_HOSTS_FILE,
2527
                    constants.RAPI_CERT_FILE,
2528
                    constants.RAPI_USERS_FILE,
2529
                    constants.CONFD_HMAC_KEY,
2530
                   ])
2531

    
2532
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2533
  for hv_name in enabled_hypervisors:
2534
    hv_class = hypervisor.GetHypervisor(hv_name)
2535
    dist_files.update(hv_class.GetAncillaryFiles())
2536

    
2537
  # 3. Perform the files upload
2538
  for fname in dist_files:
2539
    if os.path.exists(fname):
2540
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2541
      for to_node, to_result in result.items():
2542
        msg = to_result.fail_msg
2543
        if msg:
2544
          msg = ("Copy of file %s to node %s failed: %s" %
2545
                 (fname, to_node, msg))
2546
          lu.proc.LogWarning(msg)
2547

    
2548

    
2549
class LURedistributeConfig(NoHooksLU):
2550
  """Force the redistribution of cluster configuration.
2551

2552
  This is a very simple LU.
2553

2554
  """
2555
  _OP_REQP = []
2556
  REQ_BGL = False
2557

    
2558
  def ExpandNames(self):
2559
    self.needed_locks = {
2560
      locking.LEVEL_NODE: locking.ALL_SET,
2561
    }
2562
    self.share_locks[locking.LEVEL_NODE] = 1
2563

    
2564
  def CheckPrereq(self):
2565
    """Check prerequisites.
2566

2567
    """
2568

    
2569
  def Exec(self, feedback_fn):
2570
    """Redistribute the configuration.
2571

2572
    """
2573
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2574
    _RedistributeAncillaryFiles(self)
2575

    
2576

    
2577
def _WaitForSync(lu, instance, oneshot=False):
2578
  """Sleep and poll for an instance's disk to sync.
2579

2580
  """
2581
  if not instance.disks:
2582
    return True
2583

    
2584
  if not oneshot:
2585
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2586

    
2587
  node = instance.primary_node
2588

    
2589
  for dev in instance.disks:
2590
    lu.cfg.SetDiskID(dev, node)
2591

    
2592
  # TODO: Convert to utils.Retry
2593

    
2594
  retries = 0
2595
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2596
  while True:
2597
    max_time = 0
2598
    done = True
2599
    cumul_degraded = False
2600
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2601
    msg = rstats.fail_msg
2602
    if msg:
2603
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2604
      retries += 1
2605
      if retries >= 10:
2606
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2607
                                 " aborting." % node)
2608
      time.sleep(6)
2609
      continue
2610
    rstats = rstats.payload
2611
    retries = 0
2612
    for i, mstat in enumerate(rstats):
2613
      if mstat is None:
2614
        lu.LogWarning("Can't compute data for node %s/%s",
2615
                           node, instance.disks[i].iv_name)
2616
        continue
2617

    
2618
      cumul_degraded = (cumul_degraded or
2619
                        (mstat.is_degraded and mstat.sync_percent is None))
2620
      if mstat.sync_percent is not None:
2621
        done = False
2622
        if mstat.estimated_time is not None:
2623
          rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2624
          max_time = mstat.estimated_time
2625
        else:
2626
          rem_time = "no time estimate"
2627
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2628
                        (instance.disks[i].iv_name, mstat.sync_percent,
2629
                         rem_time))
2630

    
2631
    # if we're done but degraded, let's do a few small retries, to
2632
    # make sure we see a stable and not transient situation; therefore
2633
    # we force restart of the loop
2634
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2635
      logging.info("Degraded disks found, %d retries left", degr_retries)
2636
      degr_retries -= 1
2637
      time.sleep(1)
2638
      continue
2639

    
2640
    if done or oneshot:
2641
      break
2642

    
2643
    time.sleep(min(60, max_time))
2644

    
2645
  if done:
2646
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2647
  return not cumul_degraded
2648

    
2649

    
2650
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2651
  """Check that mirrors are not degraded.
2652

2653
  The ldisk parameter, if True, will change the test from the
2654
  is_degraded attribute (which represents overall non-ok status for
2655
  the device(s)) to the ldisk (representing the local storage status).
2656

2657
  """
2658
  lu.cfg.SetDiskID(dev, node)
2659

    
2660
  result = True
2661

    
2662
  if on_primary or dev.AssembleOnSecondary():
2663
    rstats = lu.rpc.call_blockdev_find(node, dev)
2664
    msg = rstats.fail_msg
2665
    if msg:
2666
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2667
      result = False
2668
    elif not rstats.payload:
2669
      lu.LogWarning("Can't find disk on node %s", node)
2670
      result = False
2671
    else:
2672
      if ldisk:
2673
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2674
      else:
2675
        result = result and not rstats.payload.is_degraded
2676

    
2677
  if dev.children:
2678
    for child in dev.children:
2679
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2680

    
2681
  return result
2682

    
2683

    
2684
class LUDiagnoseOS(NoHooksLU):
2685
  """Logical unit for OS diagnose/query.
2686

2687
  """
2688
  _OP_REQP = ["output_fields", "names"]
2689
  REQ_BGL = False
2690
  _FIELDS_STATIC = utils.FieldSet()
2691
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2692
  # Fields that need calculation of global os validity
2693
  _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2694

    
2695
  def ExpandNames(self):
2696
    if self.op.names:
2697
      raise errors.OpPrereqError("Selective OS query not supported",
2698
                                 errors.ECODE_INVAL)
2699

    
2700
    _CheckOutputFields(static=self._FIELDS_STATIC,
2701
                       dynamic=self._FIELDS_DYNAMIC,
2702
                       selected=self.op.output_fields)
2703

    
2704
    # Lock all nodes, in shared mode
2705
    # Temporary removal of locks, should be reverted later
2706
    # TODO: reintroduce locks when they are lighter-weight
2707
    self.needed_locks = {}
2708
    #self.share_locks[locking.LEVEL_NODE] = 1
2709
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2710

    
2711
  def CheckPrereq(self):
2712
    """Check prerequisites.
2713

2714
    """
2715

    
2716
  @staticmethod
2717
  def _DiagnoseByOS(rlist):
2718
    """Remaps a per-node return list into an a per-os per-node dictionary
2719

2720
    @param rlist: a map with node names as keys and OS objects as values
2721

2722
    @rtype: dict
2723
    @return: a dictionary with osnames as keys and as value another map, with
2724
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
2725

2726
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2727
                                     (/srv/..., False, "invalid api")],
2728
                           "node2": [(/srv/..., True, "")]}
2729
          }
2730

2731
    """
2732
    all_os = {}
2733
    # we build here the list of nodes that didn't fail the RPC (at RPC
2734
    # level), so that nodes with a non-responding node daemon don't
2735
    # make all OSes invalid
2736
    good_nodes = [node_name for node_name in rlist
2737
                  if not rlist[node_name].fail_msg]
2738
    for node_name, nr in rlist.items():
2739
      if nr.fail_msg or not nr.payload:
2740
        continue
2741
      for name, path, status, diagnose, variants in nr.payload:
2742
        if name not in all_os:
2743
          # build a list of nodes for this os containing empty lists
2744
          # for each node in node_list
2745
          all_os[name] = {}
2746
          for nname in good_nodes:
2747
            all_os[name][nname] = []
2748
        all_os[name][node_name].append((path, status, diagnose, variants))
2749
    return all_os
2750

    
2751
  def Exec(self, feedback_fn):
2752
    """Compute the list of OSes.
2753

2754
    """
2755
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2756
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2757
    pol = self._DiagnoseByOS(node_data)
2758
    output = []
2759
    calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2760
    calc_variants = "variants" in self.op.output_fields
2761

    
2762
    for os_name, os_data in pol.items():
2763
      row = []
2764
      if calc_valid:
2765
        valid = True
2766
        variants = None
2767
        for osl in os_data.values():
2768
          valid = valid and osl and osl[0][1]
2769
          if not valid:
2770
            variants = None
2771
            break
2772
          if calc_variants:
2773
            node_variants = osl[0][3]
2774
            if variants is None:
2775
              variants = node_variants
2776
            else:
2777
              variants = [v for v in variants if v in node_variants]
2778

    
2779
      for field in self.op.output_fields:
2780
        if field == "name":
2781
          val = os_name
2782
        elif field == "valid":
2783
          val = valid
2784
        elif field == "node_status":
2785
          # this is just a copy of the dict
2786
          val = {}
2787
          for node_name, nos_list in os_data.items():
2788
            val[node_name] = nos_list
2789
        elif field == "variants":
2790
          val =  variants
2791
        else:
2792
          raise errors.ParameterError(field)
2793
        row.append(val)
2794
      output.append(row)
2795

    
2796
    return output
2797

    
2798

    
2799
class LURemoveNode(LogicalUnit):
2800
  """Logical unit for removing a node.
2801

2802
  """
2803
  HPATH = "node-remove"
2804
  HTYPE = constants.HTYPE_NODE
2805
  _OP_REQP = ["node_name"]
2806

    
2807
  def BuildHooksEnv(self):
2808
    """Build hooks env.
2809

2810
    This doesn't run on the target node in the pre phase as a failed
2811
    node would then be impossible to remove.
2812

2813
    """
2814
    env = {
2815
      "OP_TARGET": self.op.node_name,
2816
      "NODE_NAME": self.op.node_name,
2817
      }
2818
    all_nodes = self.cfg.GetNodeList()
2819
    try:
2820
      all_nodes.remove(self.op.node_name)
2821
    except ValueError:
2822
      logging.warning("Node %s which is about to be removed not found"
2823
                      " in the all nodes list", self.op.node_name)
2824
    return env, all_nodes, all_nodes
2825

    
2826
  def CheckPrereq(self):
2827
    """Check prerequisites.
2828

2829
    This checks:
2830
     - the node exists in the configuration
2831
     - it does not have primary or secondary instances
2832
     - it's not the master
2833

2834
    Any errors are signaled by raising errors.OpPrereqError.
2835

2836
    """
2837
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2838
    node = self.cfg.GetNodeInfo(self.op.node_name)
2839
    assert node is not None
2840

    
2841
    instance_list = self.cfg.GetInstanceList()
2842

    
2843
    masternode = self.cfg.GetMasterNode()
2844
    if node.name == masternode:
2845
      raise errors.OpPrereqError("Node is the master node,"
2846
                                 " you need to failover first.",
2847
                                 errors.ECODE_INVAL)
2848

    
2849
    for instance_name in instance_list:
2850
      instance = self.cfg.GetInstanceInfo(instance_name)
2851
      if node.name in instance.all_nodes:
2852
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2853
                                   " please remove first." % instance_name,
2854
                                   errors.ECODE_INVAL)
2855
    self.op.node_name = node.name
2856
    self.node = node
2857

    
2858
  def Exec(self, feedback_fn):
2859
    """Removes the node from the cluster.
2860

2861
    """
2862
    node = self.node
2863
    logging.info("Stopping the node daemon and removing configs from node %s",
2864
                 node.name)
2865

    
2866
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2867

    
2868
    # Promote nodes to master candidate as needed
2869
    _AdjustCandidatePool(self, exceptions=[node.name])
2870
    self.context.RemoveNode(node.name)
2871

    
2872
    # Run post hooks on the node before it's removed
2873
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2874
    try:
2875
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2876
    except:
2877
      # pylint: disable-msg=W0702
2878
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
2879

    
2880
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2881
    msg = result.fail_msg
2882
    if msg:
2883
      self.LogWarning("Errors encountered on the remote node while leaving"
2884
                      " the cluster: %s", msg)
2885

    
2886
    # Remove node from our /etc/hosts
2887
    if self.cfg.GetClusterInfo().modify_etc_hosts:
2888
      # FIXME: this should be done via an rpc call to node daemon
2889
      utils.RemoveHostFromEtcHosts(node.name)
2890
      _RedistributeAncillaryFiles(self)
2891

    
2892

    
2893
class LUQueryNodes(NoHooksLU):
2894
  """Logical unit for querying nodes.
2895

2896
  """
2897
  # pylint: disable-msg=W0142
2898
  _OP_REQP = ["output_fields", "names", "use_locking"]
2899
  REQ_BGL = False
2900

    
2901
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2902
                    "master_candidate", "offline", "drained"]
2903

    
2904
  _FIELDS_DYNAMIC = utils.FieldSet(
2905
    "dtotal", "dfree",
2906
    "mtotal", "mnode", "mfree",
2907
    "bootid",
2908
    "ctotal", "cnodes", "csockets",
2909
    )
2910

    
2911
  _FIELDS_STATIC = utils.FieldSet(*[
2912
    "pinst_cnt", "sinst_cnt",
2913
    "pinst_list", "sinst_list",
2914
    "pip", "sip", "tags",
2915
    "master",
2916
    "role"] + _SIMPLE_FIELDS
2917
    )
2918

    
2919
  def ExpandNames(self):
2920
    _CheckOutputFields(static=self._FIELDS_STATIC,
2921
                       dynamic=self._FIELDS_DYNAMIC,
2922
                       selected=self.op.output_fields)
2923

    
2924
    self.needed_locks = {}
2925
    self.share_locks[locking.LEVEL_NODE] = 1
2926

    
2927
    if self.op.names:
2928
      self.wanted = _GetWantedNodes(self, self.op.names)
2929
    else:
2930
      self.wanted = locking.ALL_SET
2931

    
2932
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2933
    self.do_locking = self.do_node_query and self.op.use_locking
2934
    if self.do_locking:
2935
      # if we don't request only static fields, we need to lock the nodes
2936
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2937

    
2938
  def CheckPrereq(self):
2939
    """Check prerequisites.
2940

2941
    """
2942
    # The validation of the node list is done in the _GetWantedNodes,
2943
    # if non empty, and if empty, there's no validation to do
2944
    pass
2945

    
2946
  def Exec(self, feedback_fn):
2947
    """Computes the list of nodes and their attributes.
2948

2949
    """
2950
    all_info = self.cfg.GetAllNodesInfo()
2951
    if self.do_locking:
2952
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2953
    elif self.wanted != locking.ALL_SET:
2954
      nodenames = self.wanted
2955
      missing = set(nodenames).difference(all_info.keys())
2956
      if missing:
2957
        raise errors.OpExecError(
2958
          "Some nodes were removed before retrieving their data: %s" % missing)
2959
    else:
2960
      nodenames = all_info.keys()
2961

    
2962
    nodenames = utils.NiceSort(nodenames)
2963
    nodelist = [all_info[name] for name in nodenames]
2964

    
2965
    # begin data gathering
2966

    
2967
    if self.do_node_query:
2968
      live_data = {}
2969
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2970
                                          self.cfg.GetHypervisorType())
2971
      for name in nodenames:
2972
        nodeinfo = node_data[name]
2973
        if not nodeinfo.fail_msg and nodeinfo.payload:
2974
          nodeinfo = nodeinfo.payload
2975
          fn = utils.TryConvert
2976
          live_data[name] = {
2977
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2978
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2979
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2980
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2981
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2982
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2983
            "bootid": nodeinfo.get('bootid', None),
2984
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2985
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2986
            }
2987
        else:
2988
          live_data[name] = {}
2989
    else:
2990
      live_data = dict.fromkeys(nodenames, {})
2991

    
2992
    node_to_primary = dict([(name, set()) for name in nodenames])
2993
    node_to_secondary = dict([(name, set()) for name in nodenames])
2994

    
2995
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2996
                             "sinst_cnt", "sinst_list"))
2997
    if inst_fields & frozenset(self.op.output_fields):
2998
      inst_data = self.cfg.GetAllInstancesInfo()
2999

    
3000
      for inst in inst_data.values():
3001
        if inst.primary_node in node_to_primary:
3002
          node_to_primary[inst.primary_node].add(inst.name)
3003
        for secnode in inst.secondary_nodes:
3004
          if secnode in node_to_secondary:
3005
            node_to_secondary[secnode].add(inst.name)
3006

    
3007
    master_node = self.cfg.GetMasterNode()
3008

    
3009
    # end data gathering
3010

    
3011
    output = []
3012
    for node in nodelist:
3013
      node_output = []
3014
      for field in self.op.output_fields:
3015
        if field in self._SIMPLE_FIELDS:
3016
          val = getattr(node, field)
3017
        elif field == "pinst_list":
3018
          val = list(node_to_primary[node.name])
3019
        elif field == "sinst_list":
3020
          val = list(node_to_secondary[node.name])
3021
        elif field == "pinst_cnt":
3022
          val = len(node_to_primary[node.name])
3023
        elif field == "sinst_cnt":
3024
          val = len(node_to_secondary[node.name])
3025
        elif field == "pip":
3026
          val = node.primary_ip
3027
        elif field == "sip":
3028
          val = node.secondary_ip
3029
        elif field == "tags":
3030
          val = list(node.GetTags())
3031
        elif field == "master":
3032
          val = node.name == master_node
3033
        elif self._FIELDS_DYNAMIC.Matches(field):
3034
          val = live_data[node.name].get(field, None)
3035
        elif field == "role":
3036
          if node.name == master_node:
3037
            val = "M"
3038
          elif node.master_candidate:
3039
            val = "C"
3040
          elif node.drained:
3041
            val = "D"
3042
          elif node.offline:
3043
            val = "O"
3044
          else:
3045
            val = "R"
3046
        else:
3047
          raise errors.ParameterError(field)
3048
        node_output.append(val)
3049
      output.append(node_output)
3050

    
3051
    return output
3052

    
3053

    
3054
class LUQueryNodeVolumes(NoHooksLU):
3055
  """Logical unit for getting volumes on node(s).
3056

3057
  """
3058
  _OP_REQP = ["nodes", "output_fields"]
3059
  REQ_BGL = False
3060
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3061
  _FIELDS_STATIC = utils.FieldSet("node")
3062

    
3063
  def ExpandNames(self):
3064
    _CheckOutputFields(static=self._FIELDS_STATIC,
3065
                       dynamic=self._FIELDS_DYNAMIC,
3066
                       selected=self.op.output_fields)
3067

    
3068
    self.needed_locks = {}
3069
    self.share_locks[locking.LEVEL_NODE] = 1
3070
    if not self.op.nodes:
3071
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3072
    else:
3073
      self.needed_locks[locking.LEVEL_NODE] = \
3074
        _GetWantedNodes(self, self.op.nodes)
3075

    
3076
  def CheckPrereq(self):
3077
    """Check prerequisites.
3078

3079
    This checks that the fields required are valid output fields.
3080

3081
    """
3082
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3083

    
3084
  def Exec(self, feedback_fn):
3085
    """Computes the list of nodes and their attributes.
3086

3087
    """
3088
    nodenames = self.nodes
3089
    volumes = self.rpc.call_node_volumes(nodenames)
3090

    
3091
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3092
             in self.cfg.GetInstanceList()]
3093

    
3094
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3095

    
3096
    output = []
3097
    for node in nodenames:
3098
      nresult = volumes[node]
3099
      if nresult.offline:
3100
        continue
3101
      msg = nresult.fail_msg
3102
      if msg:
3103
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3104
        continue
3105

    
3106
      node_vols = nresult.payload[:]
3107
      node_vols.sort(key=lambda vol: vol['dev'])
3108

    
3109
      for vol in node_vols:
3110
        node_output = []
3111
        for field in self.op.output_fields:
3112
          if field == "node":
3113
            val = node
3114
          elif field == "phys":
3115
            val = vol['dev']
3116
          elif field == "vg":
3117
            val = vol['vg']
3118
          elif field == "name":
3119
            val = vol['name']
3120
          elif field == "size":
3121
            val = int(float(vol['size']))
3122
          elif field == "instance":
3123
            for inst in ilist:
3124
              if node not in lv_by_node[inst]:
3125
                continue
3126
              if vol['name'] in lv_by_node[inst][node]:
3127
                val = inst.name
3128
                break
3129
            else:
3130
              val = '-'
3131
          else:
3132
            raise errors.ParameterError(field)
3133
          node_output.append(str(val))
3134

    
3135
        output.append(node_output)
3136

    
3137
    return output
3138

    
3139

    
3140
class LUQueryNodeStorage(NoHooksLU):
3141
  """Logical unit for getting information on storage units on node(s).
3142

3143
  """
3144
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
3145
  REQ_BGL = False
3146
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3147

    
3148
  def CheckArguments(self):
3149
    _CheckStorageType(self.op.storage_type)
3150

    
3151
    _CheckOutputFields(static=self._FIELDS_STATIC,
3152
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3153
                       selected=self.op.output_fields)
3154

    
3155
  def ExpandNames(self):
3156
    self.needed_locks = {}
3157
    self.share_locks[locking.LEVEL_NODE] = 1
3158

    
3159
    if self.op.nodes:
3160
      self.needed_locks[locking.LEVEL_NODE] = \
3161
        _GetWantedNodes(self, self.op.nodes)
3162
    else:
3163
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3164

    
3165
  def CheckPrereq(self):
3166
    """Check prerequisites.
3167

3168
    This checks that the fields required are valid output fields.
3169

3170
    """
3171
    self.op.name = getattr(self.op, "name", None)
3172

    
3173
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3174

    
3175
  def Exec(self, feedback_fn):
3176
    """Computes the list of nodes and their attributes.
3177

3178
    """
3179
    # Always get name to sort by
3180
    if constants.SF_NAME in self.op.output_fields:
3181
      fields = self.op.output_fields[:]
3182
    else:
3183
      fields = [constants.SF_NAME] + self.op.output_fields
3184

    
3185
    # Never ask for node or type as it's only known to the LU
3186
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3187
      while extra in fields:
3188
        fields.remove(extra)
3189

    
3190
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3191
    name_idx = field_idx[constants.SF_NAME]
3192

    
3193
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3194
    data = self.rpc.call_storage_list(self.nodes,
3195
                                      self.op.storage_type, st_args,
3196
                                      self.op.name, fields)
3197

    
3198
    result = []
3199

    
3200
    for node in utils.NiceSort(self.nodes):
3201
      nresult = data[node]
3202
      if nresult.offline:
3203
        continue
3204

    
3205
      msg = nresult.fail_msg
3206
      if msg:
3207
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3208
        continue
3209

    
3210
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3211

    
3212
      for name in utils.NiceSort(rows.keys()):
3213
        row = rows[name]
3214

    
3215
        out = []
3216

    
3217
        for field in self.op.output_fields:
3218
          if field == constants.SF_NODE:
3219
            val = node
3220
          elif field == constants.SF_TYPE:
3221
            val = self.op.storage_type
3222
          elif field in field_idx:
3223
            val = row[field_idx[field]]
3224
          else:
3225
            raise errors.ParameterError(field)
3226

    
3227
          out.append(val)
3228

    
3229
        result.append(out)
3230

    
3231
    return result
3232

    
3233

    
3234
class LUModifyNodeStorage(NoHooksLU):
3235
  """Logical unit for modifying a storage volume on a node.
3236

3237
  """
3238
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3239
  REQ_BGL = False
3240

    
3241
  def CheckArguments(self):
3242
    self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
3243

    
3244
    _CheckStorageType(self.op.storage_type)
3245

    
3246
  def ExpandNames(self):
3247
    self.needed_locks = {
3248
      locking.LEVEL_NODE: self.op.node_name,
3249
      }
3250

    
3251
  def CheckPrereq(self):
3252
    """Check prerequisites.
3253

3254
    """
3255
    storage_type = self.op.storage_type
3256

    
3257
    try:
3258
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3259
    except KeyError:
3260
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3261
                                 " modified" % storage_type,
3262
                                 errors.ECODE_INVAL)
3263

    
3264
    diff = set(self.op.changes.keys()) - modifiable
3265
    if diff:
3266
      raise errors.OpPrereqError("The following fields can not be modified for"
3267
                                 " storage units of type '%s': %r" %
3268
                                 (storage_type, list(diff)),
3269
                                 errors.ECODE_INVAL)
3270

    
3271
  def Exec(self, feedback_fn):
3272
    """Computes the list of nodes and their attributes.
3273

3274
    """
3275
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3276
    result = self.rpc.call_storage_modify(self.op.node_name,
3277
                                          self.op.storage_type, st_args,
3278
                                          self.op.name, self.op.changes)
3279
    result.Raise("Failed to modify storage unit '%s' on %s" %
3280
                 (self.op.name, self.op.node_name))
3281

    
3282

    
3283
class LUAddNode(LogicalUnit):
3284
  """Logical unit for adding node to the cluster.
3285

3286
  """
3287
  HPATH = "node-add"
3288
  HTYPE = constants.HTYPE_NODE
3289
  _OP_REQP = ["node_name"]
3290

    
3291
  def CheckArguments(self):
3292
    # validate/normalize the node name
3293
    self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3294

    
3295
  def BuildHooksEnv(self):
3296
    """Build hooks env.
3297

3298
    This will run on all nodes before, and on all nodes + the new node after.
3299

3300
    """
3301
    env = {
3302
      "OP_TARGET": self.op.node_name,
3303
      "NODE_NAME": self.op.node_name,
3304
      "NODE_PIP": self.op.primary_ip,
3305
      "NODE_SIP": self.op.secondary_ip,
3306
      }
3307
    nodes_0 = self.cfg.GetNodeList()
3308
    nodes_1 = nodes_0 + [self.op.node_name, ]
3309
    return env, nodes_0, nodes_1
3310

    
3311
  def CheckPrereq(self):
3312
    """Check prerequisites.
3313

3314
    This checks:
3315
     - the new node is not already in the config
3316
     - it is resolvable
3317
     - its parameters (single/dual homed) matches the cluster
3318

3319
    Any errors are signaled by raising errors.OpPrereqError.
3320

3321
    """
3322
    node_name = self.op.node_name
3323
    cfg = self.cfg
3324

    
3325
    dns_data = utils.GetHostInfo(node_name)
3326

    
3327
    node = dns_data.name
3328
    primary_ip = self.op.primary_ip = dns_data.ip
3329
    secondary_ip = getattr(self.op, "secondary_ip", None)
3330
    if secondary_ip is None:
3331
      secondary_ip = primary_ip
3332
    if not utils.IsValidIP(secondary_ip):
3333
      raise errors.OpPrereqError("Invalid secondary IP given",
3334
                                 errors.ECODE_INVAL)
3335
    self.op.secondary_ip = secondary_ip
3336

    
3337
    node_list = cfg.GetNodeList()
3338
    if not self.op.readd and node in node_list:
3339
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3340
                                 node, errors.ECODE_EXISTS)
3341
    elif self.op.readd and node not in node_list:
3342
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3343
                                 errors.ECODE_NOENT)
3344

    
3345
    self.changed_primary_ip = False
3346

    
3347
    for existing_node_name in node_list:
3348
      existing_node = cfg.GetNodeInfo(existing_node_name)
3349

    
3350
      if self.op.readd and node == existing_node_name:
3351
        if existing_node.secondary_ip != secondary_ip:
3352
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
3353
                                     " address configuration as before",
3354
                                     errors.ECODE_INVAL)
3355
        if existing_node.primary_ip != primary_ip:
3356
          self.changed_primary_ip = True
3357

    
3358
        continue
3359

    
3360
      if (existing_node.primary_ip == primary_ip or
3361
          existing_node.secondary_ip == primary_ip or
3362
          existing_node.primary_ip == secondary_ip or
3363
          existing_node.secondary_ip == secondary_ip):
3364
        raise errors.OpPrereqError("New node ip address(es) conflict with"
3365
                                   " existing node %s" % existing_node.name,
3366
                                   errors.ECODE_NOTUNIQUE)
3367

    
3368
    # check that the type of the node (single versus dual homed) is the
3369
    # same as for the master
3370
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3371
    master_singlehomed = myself.secondary_ip == myself.primary_ip
3372
    newbie_singlehomed = secondary_ip == primary_ip
3373
    if master_singlehomed != newbie_singlehomed:
3374
      if master_singlehomed:
3375
        raise errors.OpPrereqError("The master has no private ip but the"
3376
                                   " new node has one",
3377
                                   errors.ECODE_INVAL)
3378
      else:
3379
        raise errors.OpPrereqError("The master has a private ip but the"
3380
                                   " new node doesn't have one",
3381
                                   errors.ECODE_INVAL)
3382

    
3383
    # checks reachability
3384
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3385
      raise errors.OpPrereqError("Node not reachable by ping",
3386
                                 errors.ECODE_ENVIRON)
3387

    
3388
    if not newbie_singlehomed:
3389
      # check reachability from my secondary ip to newbie's secondary ip
3390
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3391
                           source=myself.secondary_ip):
3392
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3393
                                   " based ping to noded port",
3394
                                   errors.ECODE_ENVIRON)
3395

    
3396
    if self.op.readd:
3397
      exceptions = [node]
3398
    else:
3399
      exceptions = []
3400

    
3401
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3402

    
3403
    if self.op.readd:
3404
      self.new_node = self.cfg.GetNodeInfo(node)
3405
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
3406
    else:
3407
      self.new_node = objects.Node(name=node,
3408
                                   primary_ip=primary_ip,
3409
                                   secondary_ip=secondary_ip,
3410
                                   master_candidate=self.master_candidate,
3411
                                   offline=False, drained=False)
3412

    
3413
  def Exec(self, feedback_fn):
3414
    """Adds the new node to the cluster.
3415

3416
    """
3417
    new_node = self.new_node
3418
    node = new_node.name
3419

    
3420
    # for re-adds, reset the offline/drained/master-candidate flags;
3421
    # we need to reset here, otherwise offline would prevent RPC calls
3422
    # later in the procedure; this also means that if the re-add
3423
    # fails, we are left with a non-offlined, broken node
3424
    if self.op.readd:
3425
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3426
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3427
      # if we demote the node, we do cleanup later in the procedure
3428
      new_node.master_candidate = self.master_candidate
3429
      if self.changed_primary_ip:
3430
        new_node.primary_ip = self.op.primary_ip
3431

    
3432
    # notify the user about any possible mc promotion
3433
    if new_node.master_candidate:
3434
      self.LogInfo("Node will be a master candidate")
3435

    
3436
    # check connectivity
3437
    result = self.rpc.call_version([node])[node]
3438
    result.Raise("Can't get version information from node %s" % node)
3439
    if constants.PROTOCOL_VERSION == result.payload:
3440
      logging.info("Communication to node %s fine, sw version %s match",
3441
                   node, result.payload)
3442
    else:
3443
      raise errors.OpExecError("Version mismatch master version %s,"
3444
                               " node version %s" %
3445
                               (constants.PROTOCOL_VERSION, result.payload))
3446

    
3447
    # setup ssh on node
3448
    if self.cfg.GetClusterInfo().modify_ssh_setup:
3449
      logging.info("Copy ssh key to node %s", node)
3450
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3451
      keyarray = []
3452
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3453
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3454
                  priv_key, pub_key]
3455

    
3456
      for i in keyfiles:
3457
        keyarray.append(utils.ReadFile(i))
3458

    
3459
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3460
                                      keyarray[2], keyarray[3], keyarray[4],
3461
                                      keyarray[5])
3462
      result.Raise("Cannot transfer ssh keys to the new node")
3463

    
3464
    # Add node to our /etc/hosts, and add key to known_hosts
3465
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3466
      # FIXME: this should be done via an rpc call to node daemon
3467
      utils.AddHostToEtcHosts(new_node.name)
3468

    
3469
    if new_node.secondary_ip != new_node.primary_ip:
3470
      result = self.rpc.call_node_has_ip_address(new_node.name,
3471
                                                 new_node.secondary_ip)
3472
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3473
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3474
      if not result.payload:
3475
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3476
                                 " you gave (%s). Please fix and re-run this"
3477
                                 " command." % new_node.secondary_ip)
3478

    
3479
    node_verify_list = [self.cfg.GetMasterNode()]
3480
    node_verify_param = {
3481
      constants.NV_NODELIST: [node],
3482
      # TODO: do a node-net-test as well?
3483
    }
3484

    
3485
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3486
                                       self.cfg.GetClusterName())
3487
    for verifier in node_verify_list:
3488
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3489
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3490
      if nl_payload:
3491
        for failed in nl_payload:
3492
          feedback_fn("ssh/hostname verification failed"
3493
                      " (checking from %s): %s" %
3494
                      (verifier, nl_payload[failed]))
3495
        raise errors.OpExecError("ssh/hostname verification failed.")
3496

    
3497
    if self.op.readd:
3498
      _RedistributeAncillaryFiles(self)
3499
      self.context.ReaddNode(new_node)
3500
      # make sure we redistribute the config
3501
      self.cfg.Update(new_node, feedback_fn)
3502
      # and make sure the new node will not have old files around
3503
      if not new_node.master_candidate:
3504
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3505
        msg = result.fail_msg
3506
        if msg:
3507
          self.LogWarning("Node failed to demote itself from master"
3508
                          " candidate status: %s" % msg)
3509
    else:
3510
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3511
      self.context.AddNode(new_node, self.proc.GetECId())
3512

    
3513

    
3514
class LUSetNodeParams(LogicalUnit):
3515
  """Modifies the parameters of a node.
3516

3517
  """
3518
  HPATH = "node-modify"
3519
  HTYPE = constants.HTYPE_NODE
3520
  _OP_REQP = ["node_name"]
3521
  REQ_BGL = False
3522

    
3523
  def CheckArguments(self):
3524
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3525
    _CheckBooleanOpField(self.op, 'master_candidate')
3526
    _CheckBooleanOpField(self.op, 'offline')
3527
    _CheckBooleanOpField(self.op, 'drained')
3528
    _CheckBooleanOpField(self.op, 'auto_promote')
3529
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3530
    if all_mods.count(None) == 3:
3531
      raise errors.OpPrereqError("Please pass at least one modification",
3532
                                 errors.ECODE_INVAL)
3533
    if all_mods.count(True) > 1:
3534
      raise errors.OpPrereqError("Can't set the node into more than one"
3535
                                 " state at the same time",
3536
                                 errors.ECODE_INVAL)
3537

    
3538
    # Boolean value that tells us whether we're offlining or draining the node
3539
    self.offline_or_drain = (self.op.offline == True or
3540
                             self.op.drained == True)
3541
    self.deoffline_or_drain = (self.op.offline == False or
3542
                               self.op.drained == False)
3543
    self.might_demote = (self.op.master_candidate == False or
3544
                         self.offline_or_drain)
3545

    
3546
    self.lock_all = self.op.auto_promote and self.might_demote
3547

    
3548

    
3549
  def ExpandNames(self):
3550
    if self.lock_all:
3551
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3552
    else:
3553
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3554

    
3555
  def BuildHooksEnv(self):
3556
    """Build hooks env.
3557

3558
    This runs on the master node.
3559

3560
    """
3561
    env = {
3562
      "OP_TARGET": self.op.node_name,
3563
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3564
      "OFFLINE": str(self.op.offline),
3565
      "DRAINED": str(self.op.drained),
3566
      }
3567
    nl = [self.cfg.GetMasterNode(),
3568
          self.op.node_name]
3569
    return env, nl, nl
3570

    
3571
  def CheckPrereq(self):
3572
    """Check prerequisites.
3573

3574
    This only checks the instance list against the existing names.
3575

3576
    """
3577
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3578

    
3579
    if (self.op.master_candidate is not None or
3580
        self.op.drained is not None or
3581
        self.op.offline is not None):
3582
      # we can't change the master's node flags
3583
      if self.op.node_name == self.cfg.GetMasterNode():
3584
        raise errors.OpPrereqError("The master role can be changed"
3585
                                   " only via masterfailover",
3586
                                   errors.ECODE_INVAL)
3587

    
3588

    
3589
    if node.master_candidate and self.might_demote and not self.lock_all:
3590
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
3591
      # check if after removing the current node, we're missing master
3592
      # candidates
3593
      (mc_remaining, mc_should, _) = \
3594
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3595
      if mc_remaining < mc_should:
3596
        raise errors.OpPrereqError("Not enough master candidates, please"
3597
                                   " pass auto_promote to allow promotion",
3598
                                   errors.ECODE_INVAL)
3599

    
3600
    if (self.op.master_candidate == True and
3601
        ((node.offline and not self.op.offline == False) or
3602
         (node.drained and not self.op.drained == False))):
3603
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3604
                                 " to master_candidate" % node.name,
3605
                                 errors.ECODE_INVAL)
3606

    
3607
    # If we're being deofflined/drained, we'll MC ourself if needed
3608
    if (self.deoffline_or_drain and not self.offline_or_drain and not
3609
        self.op.master_candidate == True and not node.master_candidate):
3610
      self.op.master_candidate = _DecideSelfPromotion(self)
3611
      if self.op.master_candidate:
3612
        self.LogInfo("Autopromoting node to master candidate")
3613

    
3614
    return
3615

    
3616
  def Exec(self, feedback_fn):
3617
    """Modifies a node.
3618

3619
    """
3620
    node = self.node
3621

    
3622
    result = []
3623
    changed_mc = False
3624

    
3625
    if self.op.offline is not None:
3626
      node.offline = self.op.offline
3627
      result.append(("offline", str(self.op.offline)))
3628
      if self.op.offline == True:
3629
        if node.master_candidate:
3630
          node.master_candidate = False
3631
          changed_mc = True
3632
          result.append(("master_candidate", "auto-demotion due to offline"))
3633
        if node.drained:
3634
          node.drained = False
3635
          result.append(("drained", "clear drained status due to offline"))
3636

    
3637
    if self.op.master_candidate is not None:
3638
      node.master_candidate = self.op.master_candidate
3639
      changed_mc = True
3640
      result.append(("master_candidate", str(self.op.master_candidate)))
3641
      if self.op.master_candidate == False:
3642
        rrc = self.rpc.call_node_demote_from_mc(node.name)
3643
        msg = rrc.fail_msg
3644
        if msg:
3645
          self.LogWarning("Node failed to demote itself: %s" % msg)
3646

    
3647
    if self.op.drained is not None:
3648
      node.drained = self.op.drained
3649
      result.append(("drained", str(self.op.drained)))
3650
      if self.op.drained == True:
3651
        if node.master_candidate:
3652
          node.master_candidate = False
3653
          changed_mc = True
3654
          result.append(("master_candidate", "auto-demotion due to drain"))
3655
          rrc = self.rpc.call_node_demote_from_mc(node.name)
3656
          msg = rrc.fail_msg
3657
          if msg:
3658
            self.LogWarning("Node failed to demote itself: %s" % msg)
3659
        if node.offline:
3660
          node.offline = False
3661
          result.append(("offline", "clear offline status due to drain"))
3662

    
3663
    # we locked all nodes, we adjust the CP before updating this node
3664
    if self.lock_all:
3665
      _AdjustCandidatePool(self, [node.name])
3666

    
3667
    # this will trigger configuration file update, if needed
3668
    self.cfg.Update(node, feedback_fn)
3669

    
3670
    # this will trigger job queue propagation or cleanup
3671
    if changed_mc:
3672
      self.context.ReaddNode(node)
3673

    
3674
    return result
3675

    
3676

    
3677
class LUPowercycleNode(NoHooksLU):
3678
  """Powercycles a node.
3679

3680
  """
3681
  _OP_REQP = ["node_name", "force"]
3682
  REQ_BGL = False
3683

    
3684
  def CheckArguments(self):
3685
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3686
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3687
      raise errors.OpPrereqError("The node is the master and the force"
3688
                                 " parameter was not set",
3689
                                 errors.ECODE_INVAL)
3690

    
3691
  def ExpandNames(self):
3692
    """Locking for PowercycleNode.
3693

3694
    This is a last-resort option and shouldn't block on other
3695
    jobs. Therefore, we grab no locks.
3696

3697
    """
3698
    self.needed_locks = {}
3699

    
3700
  def CheckPrereq(self):
3701
    """Check prerequisites.
3702

3703
    This LU has no prereqs.
3704

3705
    """
3706
    pass
3707

    
3708
  def Exec(self, feedback_fn):
3709
    """Reboots a node.
3710

3711
    """
3712
    result = self.rpc.call_node_powercycle(self.op.node_name,
3713
                                           self.cfg.GetHypervisorType())
3714
    result.Raise("Failed to schedule the reboot")
3715
    return result.payload
3716

    
3717

    
3718
class LUQueryClusterInfo(NoHooksLU):
3719
  """Query cluster configuration.
3720

3721
  """
3722
  _OP_REQP = []
3723
  REQ_BGL = False
3724

    
3725
  def ExpandNames(self):
3726
    self.needed_locks = {}
3727

    
3728
  def CheckPrereq(self):
3729
    """No prerequsites needed for this LU.
3730

3731
    """
3732
    pass
3733

    
3734
  def Exec(self, feedback_fn):
3735
    """Return cluster config.
3736

3737
    """
3738
    cluster = self.cfg.GetClusterInfo()
3739
    os_hvp = {}
3740

    
3741
    # Filter just for enabled hypervisors
3742
    for os_name, hv_dict in cluster.os_hvp.items():
3743
      os_hvp[os_name] = {}
3744
      for hv_name, hv_params in hv_dict.items():
3745
        if hv_name in cluster.enabled_hypervisors:
3746
          os_hvp[os_name][hv_name] = hv_params
3747

    
3748
    result = {
3749
      "software_version": constants.RELEASE_VERSION,
3750
      "protocol_version": constants.PROTOCOL_VERSION,
3751
      "config_version": constants.CONFIG_VERSION,
3752
      "os_api_version": max(constants.OS_API_VERSIONS),
3753
      "export_version": constants.EXPORT_VERSION,
3754
      "architecture": (platform.architecture()[0], platform.machine()),
3755
      "name": cluster.cluster_name,
3756
      "master": cluster.master_node,
3757
      "default_hypervisor": cluster.enabled_hypervisors[0],
3758
      "enabled_hypervisors": cluster.enabled_hypervisors,
3759
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3760
                        for hypervisor_name in cluster.enabled_hypervisors]),
3761
      "os_hvp": os_hvp,
3762
      "beparams": cluster.beparams,
3763
      "nicparams": cluster.nicparams,
3764
      "candidate_pool_size": cluster.candidate_pool_size,
3765
      "master_netdev": cluster.master_netdev,
3766
      "volume_group_name": cluster.volume_group_name,
3767
      "file_storage_dir": cluster.file_storage_dir,
3768
      "maintain_node_health": cluster.maintain_node_health,
3769
      "ctime": cluster.ctime,
3770
      "mtime": cluster.mtime,
3771
      "uuid": cluster.uuid,
3772
      "tags": list(cluster.GetTags()),
3773
      "uid_pool": cluster.uid_pool,
3774
      }
3775

    
3776
    return result
3777

    
3778

    
3779
class LUQueryConfigValues(NoHooksLU):
3780
  """Return configuration values.
3781

3782
  """
3783
  _OP_REQP = []
3784
  REQ_BGL = False
3785
  _FIELDS_DYNAMIC = utils.FieldSet()
3786
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3787
                                  "watcher_pause")
3788

    
3789
  def ExpandNames(self):
3790
    self.needed_locks = {}
3791

    
3792
    _CheckOutputFields(static=self._FIELDS_STATIC,
3793
                       dynamic=self._FIELDS_DYNAMIC,
3794
                       selected=self.op.output_fields)
3795

    
3796
  def CheckPrereq(self):
3797
    """No prerequisites.
3798

3799
    """
3800
    pass
3801

    
3802
  def Exec(self, feedback_fn):
3803
    """Dump a representation of the cluster config to the standard output.
3804

3805
    """
3806
    values = []
3807
    for field in self.op.output_fields:
3808
      if field == "cluster_name":
3809
        entry = self.cfg.GetClusterName()
3810
      elif field == "master_node":
3811
        entry = self.cfg.GetMasterNode()
3812
      elif field == "drain_flag":
3813
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3814
      elif field == "watcher_pause":
3815
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3816
      else:
3817
        raise errors.ParameterError(field)
3818
      values.append(entry)
3819
    return values
3820

    
3821

    
3822
class LUActivateInstanceDisks(NoHooksLU):
3823
  """Bring up an instance's disks.
3824

3825
  """
3826
  _OP_REQP = ["instance_name"]
3827
  REQ_BGL = False
3828

    
3829
  def ExpandNames(self):
3830
    self._ExpandAndLockInstance()
3831
    self.needed_locks[locking.LEVEL_NODE] = []
3832
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3833

    
3834
  def DeclareLocks(self, level):
3835
    if level == locking.LEVEL_NODE:
3836
      self._LockInstancesNodes()
3837

    
3838
  def CheckPrereq(self):
3839
    """Check prerequisites.
3840

3841
    This checks that the instance is in the cluster.
3842

3843
    """
3844
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3845
    assert self.instance is not None, \
3846
      "Cannot retrieve locked instance %s" % self.op.instance_name
3847
    _CheckNodeOnline(self, self.instance.primary_node)
3848
    if not hasattr(self.op, "ignore_size"):
3849
      self.op.ignore_size = False
3850

    
3851
  def Exec(self, feedback_fn):
3852
    """Activate the disks.
3853

3854
    """
3855
    disks_ok, disks_info = \
3856
              _AssembleInstanceDisks(self, self.instance,
3857
                                     ignore_size=self.op.ignore_size)
3858
    if not disks_ok:
3859
      raise errors.OpExecError("Cannot activate block devices")
3860

    
3861
    return disks_info
3862

    
3863

    
3864
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3865
                           ignore_size=False):
3866
  """Prepare the block devices for an instance.
3867

3868
  This sets up the block devices on all nodes.
3869

3870
  @type lu: L{LogicalUnit}
3871
  @param lu: the logical unit on whose behalf we execute
3872
  @type instance: L{objects.Instance}
3873
  @param instance: the instance for whose disks we assemble
3874
  @type ignore_secondaries: boolean
3875
  @param ignore_secondaries: if true, errors on secondary nodes
3876
      won't result in an error return from the function
3877
  @type ignore_size: boolean
3878
  @param ignore_size: if true, the current known size of the disk
3879
      will not be used during the disk activation, useful for cases
3880
      when the size is wrong
3881
  @return: False if the operation failed, otherwise a list of
3882
      (host, instance_visible_name, node_visible_name)
3883
      with the mapping from node devices to instance devices
3884

3885
  """
3886
  device_info = []
3887
  disks_ok = True
3888
  iname = instance.name
3889
  # With the two passes mechanism we try to reduce the window of
3890
  # opportunity for the race condition of switching DRBD to primary
3891
  # before handshaking occured, but we do not eliminate it
3892

    
3893
  # The proper fix would be to wait (with some limits) until the
3894
  # connection has been made and drbd transitions from WFConnection
3895
  # into any other network-connected state (Connected, SyncTarget,
3896
  # SyncSource, etc.)
3897

    
3898
  # 1st pass, assemble on all nodes in secondary mode
3899
  for inst_disk in instance.disks:
3900
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3901
      if ignore_size:
3902
        node_disk = node_disk.Copy()
3903
        node_disk.UnsetSize()
3904
      lu.cfg.SetDiskID(node_disk, node)
3905
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3906
      msg = result.fail_msg
3907
      if msg:
3908
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3909
                           " (is_primary=False, pass=1): %s",
3910
                           inst_disk.iv_name, node, msg)
3911
        if not ignore_secondaries:
3912
          disks_ok = False
3913

    
3914
  # FIXME: race condition on drbd migration to primary
3915

    
3916
  # 2nd pass, do only the primary node
3917
  for inst_disk in instance.disks:
3918
    dev_path = None
3919

    
3920
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3921
      if node != instance.primary_node:
3922
        continue
3923
      if ignore_size:
3924
        node_disk = node_disk.Copy()
3925
        node_disk.UnsetSize()
3926
      lu.cfg.SetDiskID(node_disk, node)
3927
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3928
      msg = result.fail_msg
3929
      if msg:
3930
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3931
                           " (is_primary=True, pass=2): %s",
3932
                           inst_disk.iv_name, node, msg)
3933
        disks_ok = False
3934
      else:
3935
        dev_path = result.payload
3936

    
3937
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3938

    
3939
  # leave the disks configured for the primary node
3940
  # this is a workaround that would be fixed better by
3941
  # improving the logical/physical id handling
3942
  for disk in instance.disks:
3943
    lu.cfg.SetDiskID(disk, instance.primary_node)
3944

    
3945
  return disks_ok, device_info
3946

    
3947

    
3948
def _StartInstanceDisks(lu, instance, force):
3949
  """Start the disks of an instance.
3950

3951
  """
3952
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3953
                                           ignore_secondaries=force)
3954
  if not disks_ok:
3955
    _ShutdownInstanceDisks(lu, instance)
3956
    if force is not None and not force:
3957
      lu.proc.LogWarning("", hint="If the message above refers to a"
3958
                         " secondary node,"
3959
                         " you can retry the operation using '--force'.")
3960
    raise errors.OpExecError("Disk consistency error")
3961

    
3962

    
3963
class LUDeactivateInstanceDisks(NoHooksLU):
3964
  """Shutdown an instance's disks.
3965

3966
  """
3967
  _OP_REQP = ["instance_name"]
3968
  REQ_BGL = False
3969

    
3970
  def ExpandNames(self):
3971
    self._ExpandAndLockInstance()
3972
    self.needed_locks[locking.LEVEL_NODE] = []
3973
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3974

    
3975
  def DeclareLocks(self, level):
3976
    if level == locking.LEVEL_NODE:
3977
      self._LockInstancesNodes()
3978

    
3979
  def CheckPrereq(self):
3980
    """Check prerequisites.
3981

3982
    This checks that the instance is in the cluster.
3983

3984
    """
3985
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3986
    assert self.instance is not None, \
3987
      "Cannot retrieve locked instance %s" % self.op.instance_name
3988

    
3989
  def Exec(self, feedback_fn):
3990
    """Deactivate the disks
3991

3992
    """
3993
    instance = self.instance
3994
    _SafeShutdownInstanceDisks(self, instance)
3995

    
3996

    
3997
def _SafeShutdownInstanceDisks(lu, instance):
3998
  """Shutdown block devices of an instance.
3999

4000
  This function checks if an instance is running, before calling
4001
  _ShutdownInstanceDisks.
4002

4003
  """
4004
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4005
  _ShutdownInstanceDisks(lu, instance)
4006

    
4007

    
4008
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
4009
  """Shutdown block devices of an instance.
4010

4011
  This does the shutdown on all nodes of the instance.
4012

4013
  If the ignore_primary is false, errors on the primary node are
4014
  ignored.
4015

4016
  """
4017
  all_result = True
4018
  for disk in instance.disks:
4019
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4020
      lu.cfg.SetDiskID(top_disk, node)
4021
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4022
      msg = result.fail_msg
4023
      if msg:
4024
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4025
                      disk.iv_name, node, msg)
4026
        if not ignore_primary or node != instance.primary_node:
4027
          all_result = False
4028
  return all_result
4029

    
4030

    
4031
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4032
  """Checks if a node has enough free memory.
4033

4034
  This function check if a given node has the needed amount of free
4035
  memory. In case the node has less memory or we cannot get the
4036
  information from the node, this function raise an OpPrereqError
4037
  exception.
4038

4039
  @type lu: C{LogicalUnit}
4040
  @param lu: a logical unit from which we get configuration data
4041
  @type node: C{str}
4042
  @param node: the node to check
4043
  @type reason: C{str}
4044
  @param reason: string to use in the error message
4045
  @type requested: C{int}
4046
  @param requested: the amount of memory in MiB to check for
4047
  @type hypervisor_name: C{str}
4048
  @param hypervisor_name: the hypervisor to ask for memory stats
4049
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4050
      we cannot check the node
4051

4052
  """
4053
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4054
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4055
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4056
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4057
  if not isinstance(free_mem, int):
4058
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4059
                               " was '%s'" % (node, free_mem),
4060
                               errors.ECODE_ENVIRON)
4061
  if requested > free_mem:
4062
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4063
                               " needed %s MiB, available %s MiB" %
4064
                               (node, reason, requested, free_mem),
4065
                               errors.ECODE_NORES)
4066

    
4067

    
4068
def _CheckNodesFreeDisk(lu, nodenames, requested):
4069
  """Checks if nodes have enough free disk space in the default VG.
4070

4071
  This function check if all given nodes have the needed amount of
4072
  free disk. In case any node has less disk or we cannot get the
4073
  information from the node, this function raise an OpPrereqError
4074
  exception.
4075

4076
  @type lu: C{LogicalUnit}
4077
  @param lu: a logical unit from which we get configuration data
4078
  @type nodenames: C{list}
4079
  @param nodenames: the list of node names to check
4080
  @type requested: C{int}
4081
  @param requested: the amount of disk in MiB to check for
4082
  @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4083
      we cannot check the node
4084

4085
  """
4086
  nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4087
                                   lu.cfg.GetHypervisorType())
4088
  for node in nodenames:
4089
    info = nodeinfo[node]
4090
    info.Raise("Cannot get current information from node %s" % node,
4091
               prereq=True, ecode=errors.ECODE_ENVIRON)
4092
    vg_free = info.payload.get("vg_free", None)
4093
    if not isinstance(vg_free, int):
4094
      raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4095
                                 " result was '%s'" % (node, vg_free),
4096
                                 errors.ECODE_ENVIRON)
4097
    if requested > vg_free:
4098
      raise errors.OpPrereqError("Not enough disk space on target node %s:"
4099
                                 " required %d MiB, available %d MiB" %
4100
                                 (node, requested, vg_free),
4101
                                 errors.ECODE_NORES)
4102

    
4103

    
4104
class LUStartupInstance(LogicalUnit):
4105
  """Starts an instance.
4106

4107
  """
4108
  HPATH = "instance-start"
4109
  HTYPE = constants.HTYPE_INSTANCE
4110
  _OP_REQP = ["instance_name", "force"]
4111
  REQ_BGL = False
4112

    
4113
  def ExpandNames(self):
4114
    self._ExpandAndLockInstance()
4115

    
4116
  def BuildHooksEnv(self):
4117
    """Build hooks env.
4118

4119
    This runs on master, primary and secondary nodes of the instance.
4120

4121
    """
4122
    env = {
4123
      "FORCE": self.op.force,
4124
      }
4125
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4126
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4127
    return env, nl, nl
4128

    
4129
  def CheckPrereq(self):
4130
    """Check prerequisites.
4131

4132
    This checks that the instance is in the cluster.
4133

4134
    """
4135
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4136
    assert self.instance is not None, \
4137
      "Cannot retrieve locked instance %s" % self.op.instance_name
4138

    
4139
    # extra beparams
4140
    self.beparams = getattr(self.op, "beparams", {})
4141
    if self.beparams:
4142
      if not isinstance(self.beparams, dict):
4143
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4144
                                   " dict" % (type(self.beparams), ),
4145
                                   errors.ECODE_INVAL)
4146
      # fill the beparams dict
4147
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
4148
      self.op.beparams = self.beparams
4149

    
4150
    # extra hvparams
4151
    self.hvparams = getattr(self.op, "hvparams", {})
4152
    if self.hvparams:
4153
      if not isinstance(self.hvparams, dict):
4154
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4155
                                   " dict" % (type(self.hvparams), ),
4156
                                   errors.ECODE_INVAL)
4157

    
4158
      # check hypervisor parameter syntax (locally)
4159
      cluster = self.cfg.GetClusterInfo()
4160
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
4161
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
4162
                                    instance.hvparams)
4163
      filled_hvp.update(self.hvparams)
4164
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4165
      hv_type.CheckParameterSyntax(filled_hvp)
4166
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4167
      self.op.hvparams = self.hvparams
4168

    
4169
    _CheckNodeOnline(self, instance.primary_node)
4170

    
4171
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4172
    # check bridges existence
4173
    _CheckInstanceBridgesExist(self, instance)
4174

    
4175
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4176
                                              instance.name,
4177
                                              instance.hypervisor)
4178
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4179
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4180
    if not remote_info.payload: # not running already
4181
      _CheckNodeFreeMemory(self, instance.primary_node,
4182
                           "starting instance %s" % instance.name,
4183
                           bep[constants.BE_MEMORY], instance.hypervisor)
4184

    
4185
  def Exec(self, feedback_fn):
4186
    """Start the instance.
4187

4188
    """
4189
    instance = self.instance
4190
    force = self.op.force
4191

    
4192
    self.cfg.MarkInstanceUp(instance.name)
4193

    
4194
    node_current = instance.primary_node
4195

    
4196
    _StartInstanceDisks(self, instance, force)
4197

    
4198
    result = self.rpc.call_instance_start(node_current, instance,
4199
                                          self.hvparams, self.beparams)
4200
    msg = result.fail_msg
4201
    if msg:
4202
      _ShutdownInstanceDisks(self, instance)
4203
      raise errors.OpExecError("Could not start instance: %s" % msg)
4204

    
4205

    
4206
class LURebootInstance(LogicalUnit):
4207
  """Reboot an instance.
4208

4209
  """
4210
  HPATH = "instance-reboot"
4211
  HTYPE = constants.HTYPE_INSTANCE
4212
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4213
  REQ_BGL = False
4214

    
4215
  def CheckArguments(self):
4216
    """Check the arguments.
4217

4218
    """
4219
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4220
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4221

    
4222
  def ExpandNames(self):
4223
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
4224
                                   constants.INSTANCE_REBOOT_HARD,
4225
                                   constants.INSTANCE_REBOOT_FULL]:
4226
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
4227
                                  (constants.INSTANCE_REBOOT_SOFT,
4228
                                   constants.INSTANCE_REBOOT_HARD,
4229
                                   constants.INSTANCE_REBOOT_FULL))
4230
    self._ExpandAndLockInstance()
4231

    
4232
  def BuildHooksEnv(self):
4233
    """Build hooks env.
4234

4235
    This runs on master, primary and secondary nodes of the instance.
4236

4237
    """
4238
    env = {
4239
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4240
      "REBOOT_TYPE": self.op.reboot_type,
4241
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4242
      }
4243
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4244
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4245
    return env, nl, nl
4246

    
4247
  def CheckPrereq(self):
4248
    """Check prerequisites.
4249

4250
    This checks that the instance is in the cluster.
4251

4252
    """
4253
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4254
    assert self.instance is not None, \
4255
      "Cannot retrieve locked instance %s" % self.op.instance_name
4256

    
4257
    _CheckNodeOnline(self, instance.primary_node)
4258

    
4259
    # check bridges existence
4260
    _CheckInstanceBridgesExist(self, instance)
4261

    
4262
  def Exec(self, feedback_fn):
4263
    """Reboot the instance.
4264

4265
    """
4266
    instance = self.instance
4267
    ignore_secondaries = self.op.ignore_secondaries
4268
    reboot_type = self.op.reboot_type
4269

    
4270
    node_current = instance.primary_node
4271

    
4272
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4273
                       constants.INSTANCE_REBOOT_HARD]:
4274
      for disk in instance.disks:
4275
        self.cfg.SetDiskID(disk, node_current)
4276
      result = self.rpc.call_instance_reboot(node_current, instance,
4277
                                             reboot_type,
4278
                                             self.shutdown_timeout)
4279
      result.Raise("Could not reboot instance")
4280
    else:
4281
      result = self.rpc.call_instance_shutdown(node_current, instance,
4282
                                               self.shutdown_timeout)
4283
      result.Raise("Could not shutdown instance for full reboot")
4284
      _ShutdownInstanceDisks(self, instance)
4285
      _StartInstanceDisks(self, instance, ignore_secondaries)
4286
      result = self.rpc.call_instance_start(node_current, instance, None, None)
4287
      msg = result.fail_msg
4288
      if msg:
4289
        _ShutdownInstanceDisks(self, instance)
4290
        raise errors.OpExecError("Could not start instance for"
4291
                                 " full reboot: %s" % msg)
4292

    
4293
    self.cfg.MarkInstanceUp(instance.name)
4294

    
4295

    
4296
class LUShutdownInstance(LogicalUnit):
4297
  """Shutdown an instance.
4298

4299
  """
4300
  HPATH = "instance-stop"
4301
  HTYPE = constants.HTYPE_INSTANCE
4302
  _OP_REQP = ["instance_name"]
4303
  REQ_BGL = False
4304

    
4305
  def CheckArguments(self):
4306
    """Check the arguments.
4307

4308
    """
4309
    self.timeout = getattr(self.op, "timeout",
4310
                           constants.DEFAULT_SHUTDOWN_TIMEOUT)
4311

    
4312
  def ExpandNames(self):
4313
    self._ExpandAndLockInstance()
4314

    
4315
  def BuildHooksEnv(self):
4316
    """Build hooks env.
4317

4318
    This runs on master, primary and secondary nodes of the instance.
4319

4320
    """
4321
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4322
    env["TIMEOUT"] = self.timeout
4323
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4324
    return env, nl, nl
4325

    
4326
  def CheckPrereq(self):
4327
    """Check prerequisites.
4328

4329
    This checks that the instance is in the cluster.
4330

4331
    """
4332
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4333
    assert self.instance is not None, \
4334
      "Cannot retrieve locked instance %s" % self.op.instance_name
4335
    _CheckNodeOnline(self, self.instance.primary_node)
4336

    
4337
  def Exec(self, feedback_fn):
4338
    """Shutdown the instance.
4339

4340
    """
4341
    instance = self.instance
4342
    node_current = instance.primary_node
4343
    timeout = self.timeout
4344
    self.cfg.MarkInstanceDown(instance.name)
4345
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4346
    msg = result.fail_msg
4347
    if msg:
4348
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4349

    
4350
    _ShutdownInstanceDisks(self, instance)
4351

    
4352

    
4353
class LUReinstallInstance(LogicalUnit):
4354
  """Reinstall an instance.
4355

4356
  """
4357
  HPATH = "instance-reinstall"
4358
  HTYPE = constants.HTYPE_INSTANCE
4359
  _OP_REQP = ["instance_name"]
4360
  REQ_BGL = False
4361

    
4362
  def ExpandNames(self):
4363
    self._ExpandAndLockInstance()
4364

    
4365
  def BuildHooksEnv(self):
4366
    """Build hooks env.
4367

4368
    This runs on master, primary and secondary nodes of the instance.
4369

4370
    """
4371
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4372
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4373
    return env, nl, nl
4374

    
4375
  def CheckPrereq(self):
4376
    """Check prerequisites.
4377

4378
    This checks that the instance is in the cluster and is not running.
4379

4380
    """
4381
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4382
    assert instance is not None, \
4383
      "Cannot retrieve locked instance %s" % self.op.instance_name
4384
    _CheckNodeOnline(self, instance.primary_node)
4385

    
4386
    if instance.disk_template == constants.DT_DISKLESS:
4387
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4388
                                 self.op.instance_name,
4389
                                 errors.ECODE_INVAL)
4390
    _CheckInstanceDown(self, instance, "cannot reinstall")
4391

    
4392
    self.op.os_type = getattr(self.op, "os_type", None)
4393
    self.op.force_variant = getattr(self.op, "force_variant", False)
4394
    if self.op.os_type is not None:
4395
      # OS verification
4396
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4397
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4398

    
4399
    self.instance = instance
4400

    
4401
  def Exec(self, feedback_fn):
4402
    """Reinstall the instance.
4403

4404
    """
4405
    inst = self.instance
4406

    
4407
    if self.op.os_type is not None:
4408
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4409
      inst.os = self.op.os_type
4410
      self.cfg.Update(inst, feedback_fn)
4411

    
4412
    _StartInstanceDisks(self, inst, None)
4413
    try:
4414
      feedback_fn("Running the instance OS create scripts...")
4415
      # FIXME: pass debug option from opcode to backend
4416
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4417
                                             self.op.debug_level)
4418
      result.Raise("Could not install OS for instance %s on node %s" %
4419
                   (inst.name, inst.primary_node))
4420
    finally:
4421
      _ShutdownInstanceDisks(self, inst)
4422

    
4423

    
4424
class LURecreateInstanceDisks(LogicalUnit):
4425
  """Recreate an instance's missing disks.
4426

4427
  """
4428
  HPATH = "instance-recreate-disks"
4429
  HTYPE = constants.HTYPE_INSTANCE
4430
  _OP_REQP = ["instance_name", "disks"]
4431
  REQ_BGL = False
4432

    
4433
  def CheckArguments(self):
4434
    """Check the arguments.
4435

4436
    """
4437
    if not isinstance(self.op.disks, list):
4438
      raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4439
    for item in self.op.disks:
4440
      if (not isinstance(item, int) or
4441
          item < 0):
4442
        raise errors.OpPrereqError("Invalid disk specification '%s'" %
4443
                                   str(item), errors.ECODE_INVAL)
4444

    
4445
  def ExpandNames(self):
4446
    self._ExpandAndLockInstance()
4447

    
4448
  def BuildHooksEnv(self):
4449
    """Build hooks env.
4450

4451
    This runs on master, primary and secondary nodes of the instance.
4452

4453
    """
4454
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4455
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4456
    return env, nl, nl
4457

    
4458
  def CheckPrereq(self):
4459
    """Check prerequisites.
4460

4461
    This checks that the instance is in the cluster and is not running.
4462

4463
    """
4464
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4465
    assert instance is not None, \
4466
      "Cannot retrieve locked instance %s" % self.op.instance_name
4467
    _CheckNodeOnline(self, instance.primary_node)
4468

    
4469
    if instance.disk_template == constants.DT_DISKLESS:
4470
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4471
                                 self.op.instance_name, errors.ECODE_INVAL)
4472
    _CheckInstanceDown(self, instance, "cannot recreate disks")
4473

    
4474
    if not self.op.disks:
4475
      self.op.disks = range(len(instance.disks))
4476
    else:
4477
      for idx in self.op.disks:
4478
        if idx >= len(instance.disks):
4479
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4480
                                     errors.ECODE_INVAL)
4481

    
4482
    self.instance = instance
4483

    
4484
  def Exec(self, feedback_fn):
4485
    """Recreate the disks.
4486

4487
    """
4488
    to_skip = []
4489
    for idx, _ in enumerate(self.instance.disks):
4490
      if idx not in self.op.disks: # disk idx has not been passed in
4491
        to_skip.append(idx)
4492
        continue
4493

    
4494
    _CreateDisks(self, self.instance, to_skip=to_skip)
4495

    
4496

    
4497
class LURenameInstance(LogicalUnit):
4498
  """Rename an instance.
4499

4500
  """
4501
  HPATH = "instance-rename"
4502
  HTYPE = constants.HTYPE_INSTANCE
4503
  _OP_REQP = ["instance_name", "new_name"]
4504

    
4505
  def BuildHooksEnv(self):
4506
    """Build hooks env.
4507

4508
    This runs on master, primary and secondary nodes of the instance.
4509

4510
    """
4511
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4512
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4513
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4514
    return env, nl, nl
4515

    
4516
  def CheckPrereq(self):
4517
    """Check prerequisites.
4518

4519
    This checks that the instance is in the cluster and is not running.
4520

4521
    """
4522
    self.op.instance_name = _ExpandInstanceName(self.cfg,
4523
                                                self.op.instance_name)
4524
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4525
    assert instance is not None
4526
    _CheckNodeOnline(self, instance.primary_node)
4527
    _CheckInstanceDown(self, instance, "cannot rename")
4528
    self.instance = instance
4529

    
4530
    # new name verification
4531
    name_info = utils.GetHostInfo(self.op.new_name)
4532

    
4533
    self.op.new_name = new_name = name_info.name
4534
    instance_list = self.cfg.GetInstanceList()
4535
    if new_name in instance_list:
4536
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4537
                                 new_name, errors.ECODE_EXISTS)
4538

    
4539
    if not getattr(self.op, "ignore_ip", False):
4540
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4541
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4542
                                   (name_info.ip, new_name),
4543
                                   errors.ECODE_NOTUNIQUE)
4544

    
4545

    
4546
  def Exec(self, feedback_fn):
4547
    """Reinstall the instance.
4548

4549
    """
4550
    inst = self.instance
4551
    old_name = inst.name
4552

    
4553
    if inst.disk_template == constants.DT_FILE:
4554
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4555

    
4556
    self.cfg.RenameInstance(inst.name, self.op.new_name)
4557
    # Change the instance lock. This is definitely safe while we hold the BGL
4558
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4559
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4560

    
4561
    # re-read the instance from the configuration after rename
4562
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4563

    
4564
    if inst.disk_template == constants.DT_FILE:
4565
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4566
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4567
                                                     old_file_storage_dir,
4568
                                                     new_file_storage_dir)
4569
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4570
                   " (but the instance has been renamed in Ganeti)" %
4571
                   (inst.primary_node, old_file_storage_dir,
4572
                    new_file_storage_dir))
4573

    
4574
    _StartInstanceDisks(self, inst, None)
4575
    try:
4576
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4577
                                                 old_name, self.op.debug_level)
4578
      msg = result.fail_msg
4579
      if msg:
4580
        msg = ("Could not run OS rename script for instance %s on node %s"
4581
               " (but the instance has been renamed in Ganeti): %s" %
4582
               (inst.name, inst.primary_node, msg))
4583
        self.proc.LogWarning(msg)
4584
    finally:
4585
      _ShutdownInstanceDisks(self, inst)
4586

    
4587

    
4588
class LURemoveInstance(LogicalUnit):
4589
  """Remove an instance.
4590

4591
  """
4592
  HPATH = "instance-remove"
4593
  HTYPE = constants.HTYPE_INSTANCE
4594
  _OP_REQP = ["instance_name", "ignore_failures"]
4595
  REQ_BGL = False
4596

    
4597
  def CheckArguments(self):
4598
    """Check the arguments.
4599

4600
    """
4601
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4602
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4603

    
4604
  def ExpandNames(self):
4605
    self._ExpandAndLockInstance()
4606
    self.needed_locks[locking.LEVEL_NODE] = []
4607
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4608

    
4609
  def DeclareLocks(self, level):
4610
    if level == locking.LEVEL_NODE:
4611
      self._LockInstancesNodes()
4612

    
4613
  def BuildHooksEnv(self):
4614
    """Build hooks env.
4615

4616
    This runs on master, primary and secondary nodes of the instance.
4617

4618
    """
4619
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4620
    env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4621
    nl = [self.cfg.GetMasterNode()]
4622
    nl_post = list(self.instance.all_nodes) + nl
4623
    return env, nl, nl_post
4624

    
4625
  def CheckPrereq(self):
4626
    """Check prerequisites.
4627

4628
    This checks that the instance is in the cluster.
4629

4630
    """
4631
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4632
    assert self.instance is not None, \
4633
      "Cannot retrieve locked instance %s" % self.op.instance_name
4634

    
4635
  def Exec(self, feedback_fn):
4636
    """Remove the instance.
4637

4638
    """
4639
    instance = self.instance
4640
    logging.info("Shutting down instance %s on node %s",
4641
                 instance.name, instance.primary_node)
4642

    
4643
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4644
                                             self.shutdown_timeout)
4645
    msg = result.fail_msg
4646
    if msg:
4647
      if self.op.ignore_failures:
4648
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
4649
      else:
4650
        raise errors.OpExecError("Could not shutdown instance %s on"
4651
                                 " node %s: %s" %
4652
                                 (instance.name, instance.primary_node, msg))
4653

    
4654
    logging.info("Removing block devices for instance %s", instance.name)
4655

    
4656
    if not _RemoveDisks(self, instance):
4657
      if self.op.ignore_failures:
4658
        feedback_fn("Warning: can't remove instance's disks")
4659
      else:
4660
        raise errors.OpExecError("Can't remove instance's disks")
4661

    
4662
    logging.info("Removing instance %s out of cluster config", instance.name)
4663

    
4664
    self.cfg.RemoveInstance(instance.name)
4665
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4666

    
4667

    
4668
class LUQueryInstances(NoHooksLU):
4669
  """Logical unit for querying instances.
4670

4671
  """
4672
  # pylint: disable-msg=W0142
4673
  _OP_REQP = ["output_fields", "names", "use_locking"]
4674
  REQ_BGL = False
4675
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4676
                    "serial_no", "ctime", "mtime", "uuid"]
4677
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4678
                                    "admin_state",
4679
                                    "disk_template", "ip", "mac", "bridge",
4680
                                    "nic_mode", "nic_link",
4681
                                    "sda_size", "sdb_size", "vcpus", "tags",
4682
                                    "network_port", "beparams",
4683
                                    r"(disk)\.(size)/([0-9]+)",
4684
                                    r"(disk)\.(sizes)", "disk_usage",
4685
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4686
                                    r"(nic)\.(bridge)/([0-9]+)",
4687
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
4688
                                    r"(disk|nic)\.(count)",
4689
                                    "hvparams",
4690
                                    ] + _SIMPLE_FIELDS +
4691
                                  ["hv/%s" % name
4692
                                   for name in constants.HVS_PARAMETERS
4693
                                   if name not in constants.HVC_GLOBALS] +
4694
                                  ["be/%s" % name
4695
                                   for name in constants.BES_PARAMETERS])
4696
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4697

    
4698

    
4699
  def ExpandNames(self):
4700
    _CheckOutputFields(static=self._FIELDS_STATIC,
4701
                       dynamic=self._FIELDS_DYNAMIC,
4702
                       selected=self.op.output_fields)
4703

    
4704
    self.needed_locks = {}
4705
    self.share_locks[locking.LEVEL_INSTANCE] = 1
4706
    self.share_locks[locking.LEVEL_NODE] = 1
4707

    
4708
    if self.op.names:
4709
      self.wanted = _GetWantedInstances(self, self.op.names)
4710
    else:
4711
      self.wanted = locking.ALL_SET
4712

    
4713
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4714
    self.do_locking = self.do_node_query and self.op.use_locking
4715
    if self.do_locking:
4716
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4717
      self.needed_locks[locking.LEVEL_NODE] = []
4718
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4719

    
4720
  def DeclareLocks(self, level):
4721
    if level == locking.LEVEL_NODE and self.do_locking:
4722
      self._LockInstancesNodes()
4723

    
4724
  def CheckPrereq(self):
4725
    """Check prerequisites.
4726

4727
    """
4728
    pass
4729

    
4730
  def Exec(self, feedback_fn):
4731
    """Computes the list of nodes and their attributes.
4732

4733
    """
4734
    # pylint: disable-msg=R0912
4735
    # way too many branches here
4736
    all_info = self.cfg.GetAllInstancesInfo()
4737
    if self.wanted == locking.ALL_SET:
4738
      # caller didn't specify instance names, so ordering is not important
4739
      if self.do_locking:
4740
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4741
      else:
4742
        instance_names = all_info.keys()
4743
      instance_names = utils.NiceSort(instance_names)
4744
    else:
4745
      # caller did specify names, so we must keep the ordering
4746
      if self.do_locking:
4747
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4748
      else:
4749
        tgt_set = all_info.keys()
4750
      missing = set(self.wanted).difference(tgt_set)
4751
      if missing:
4752
        raise errors.OpExecError("Some instances were removed before"
4753
                                 " retrieving their data: %s" % missing)
4754
      instance_names = self.wanted
4755

    
4756
    instance_list = [all_info[iname] for iname in instance_names]
4757

    
4758
    # begin data gathering
4759

    
4760
    nodes = frozenset([inst.primary_node for inst in instance_list])
4761
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4762

    
4763
    bad_nodes = []
4764
    off_nodes = []
4765
    if self.do_node_query:
4766
      live_data = {}
4767
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4768
      for name in nodes:
4769
        result = node_data[name]
4770
        if result.offline:
4771
          # offline nodes will be in both lists
4772
          off_nodes.append(name)
4773
        if result.fail_msg:
4774
          bad_nodes.append(name)
4775
        else:
4776
          if result.payload:
4777
            live_data.update(result.payload)
4778
          # else no instance is alive
4779
    else:
4780
      live_data = dict([(name, {}) for name in instance_names])
4781

    
4782
    # end data gathering
4783

    
4784
    HVPREFIX = "hv/"
4785
    BEPREFIX = "be/"
4786
    output = []
4787
    cluster = self.cfg.GetClusterInfo()
4788
    for instance in instance_list:
4789
      iout = []
4790
      i_hv = cluster.FillHV(instance, skip_globals=True)
4791
      i_be = cluster.FillBE(instance)
4792
      i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4793
                                 nic.nicparams) for nic in instance.nics]
4794
      for field in self.op.output_fields:
4795
        st_match = self._FIELDS_STATIC.Matches(field)
4796
        if field in self._SIMPLE_FIELDS:
4797
          val = getattr(instance, field)
4798
        elif field == "pnode":
4799
          val = instance.primary_node
4800
        elif field == "snodes":
4801
          val = list(instance.secondary_nodes)
4802
        elif field == "admin_state":
4803
          val = instance.admin_up
4804
        elif field == "oper_state":
4805
          if instance.primary_node in bad_nodes:
4806
            val = None
4807
          else:
4808
            val = bool(live_data.get(instance.name))
4809
        elif field == "status":
4810
          if instance.primary_node in off_nodes:
4811
            val = "ERROR_nodeoffline"
4812
          elif instance.primary_node in bad_nodes:
4813
            val = "ERROR_nodedown"
4814
          else:
4815
            running = bool(live_data.get(instance.name))
4816
            if running:
4817
              if instance.admin_up:
4818
                val = "running"
4819
              else:
4820
                val = "ERROR_up"
4821
            else:
4822
              if instance.admin_up:
4823
                val = "ERROR_down"
4824
              else:
4825
                val = "ADMIN_down"
4826
        elif field == "oper_ram":
4827
          if instance.primary_node in bad_nodes:
4828
            val = None
4829
          elif instance.name in live_data:
4830
            val = live_data[instance.name].get("memory", "?")
4831
          else:
4832
            val = "-"
4833
        elif field == "vcpus":
4834
          val = i_be[constants.BE_VCPUS]
4835
        elif field == "disk_template":
4836
          val = instance.disk_template
4837
        elif field == "ip":
4838
          if instance.nics:
4839
            val = instance.nics[0].ip
4840
          else:
4841
            val = None
4842
        elif field == "nic_mode":
4843
          if instance.nics:
4844
            val = i_nicp[0][constants.NIC_MODE]
4845
          else:
4846
            val = None
4847
        elif field == "nic_link":
4848
          if instance.nics:
4849
            val = i_nicp[0][constants.NIC_LINK]
4850
          else:
4851
            val = None
4852
        elif field == "bridge":
4853
          if (instance.nics and
4854
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4855
            val = i_nicp[0][constants.NIC_LINK]
4856
          else:
4857
            val = None
4858
        elif field == "mac":
4859
          if instance.nics:
4860
            val = instance.nics[0].mac
4861
          else:
4862
            val = None
4863
        elif field == "sda_size" or field == "sdb_size":
4864
          idx = ord(field[2]) - ord('a')
4865
          try:
4866
            val = instance.FindDisk(idx).size
4867
          except errors.OpPrereqError:
4868
            val = None
4869
        elif field == "disk_usage": # total disk usage per node
4870
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
4871
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4872
        elif field == "tags":
4873
          val = list(instance.GetTags())
4874
        elif field == "hvparams":
4875
          val = i_hv
4876
        elif (field.startswith(HVPREFIX) and
4877
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4878
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4879
          val = i_hv.get(field[len(HVPREFIX):], None)
4880
        elif field == "beparams":
4881
          val = i_be
4882
        elif (field.startswith(BEPREFIX) and
4883
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4884
          val = i_be.get(field[len(BEPREFIX):], None)
4885
        elif st_match and st_match.groups():
4886
          # matches a variable list
4887
          st_groups = st_match.groups()
4888
          if st_groups and st_groups[0] == "disk":
4889
            if st_groups[1] == "count":
4890
              val = len(instance.disks)
4891
            elif st_groups[1] == "sizes":
4892
              val = [disk.size for disk in instance.disks]
4893
            elif st_groups[1] == "size":
4894
              try:
4895
                val = instance.FindDisk(st_groups[2]).size
4896
              except errors.OpPrereqError:
4897
                val = None
4898
            else:
4899
              assert False, "Unhandled disk parameter"
4900
          elif st_groups[0] == "nic":
4901
            if st_groups[1] == "count":
4902
              val = len(instance.nics)
4903
            elif st_groups[1] == "macs":
4904
              val = [nic.mac for nic in instance.nics]
4905
            elif st_groups[1] == "ips":
4906
              val = [nic.ip for nic in instance.nics]
4907
            elif st_groups[1] == "modes":
4908
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4909
            elif st_groups[1] == "links":
4910
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4911
            elif st_groups[1] == "bridges":
4912
              val = []
4913
              for nicp in i_nicp:
4914
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4915
                  val.append(nicp[constants.NIC_LINK])
4916
                else:
4917
                  val.append(None)
4918
            else:
4919
              # index-based item
4920
              nic_idx = int(st_groups[2])
4921
              if nic_idx >= len(instance.nics):
4922
                val = None
4923
              else:
4924
                if st_groups[1] == "mac":
4925
                  val = instance.nics[nic_idx].mac
4926
                elif st_groups[1] == "ip":
4927
                  val = instance.nics[nic_idx].ip
4928
                elif st_groups[1] == "mode":
4929
                  val = i_nicp[nic_idx][constants.NIC_MODE]
4930
                elif st_groups[1] == "link":
4931
                  val = i_nicp[nic_idx][constants.NIC_LINK]
4932
                elif st_groups[1] == "bridge":
4933
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4934
                  if nic_mode == constants.NIC_MODE_BRIDGED:
4935
                    val = i_nicp[nic_idx][constants.NIC_LINK]
4936
                  else:
4937
                    val = None
4938
                else:
4939
                  assert False, "Unhandled NIC parameter"
4940
          else:
4941
            assert False, ("Declared but unhandled variable parameter '%s'" %
4942
                           field)
4943
        else:
4944
          assert False, "Declared but unhandled parameter '%s'" % field
4945
        iout.append(val)
4946
      output.append(iout)
4947

    
4948
    return output
4949

    
4950

    
4951
class LUFailoverInstance(LogicalUnit):
4952
  """Failover an instance.
4953

4954
  """
4955
  HPATH = "instance-failover"
4956
  HTYPE = constants.HTYPE_INSTANCE
4957
  _OP_REQP = ["instance_name", "ignore_consistency"]
4958
  REQ_BGL = False
4959

    
4960
  def CheckArguments(self):
4961
    """Check the arguments.
4962

4963
    """
4964
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4965
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4966

    
4967
  def ExpandNames(self):
4968
    self._ExpandAndLockInstance()
4969
    self.needed_locks[locking.LEVEL_NODE] = []
4970
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4971

    
4972
  def DeclareLocks(self, level):
4973
    if level == locking.LEVEL_NODE:
4974
      self._LockInstancesNodes()
4975

    
4976
  def BuildHooksEnv(self):
4977
    """Build hooks env.
4978

4979
    This runs on master, primary and secondary nodes of the instance.
4980

4981
    """
4982
    instance = self.instance
4983
    source_node = instance.primary_node
4984
    target_node = instance.secondary_nodes[0]
4985
    env = {
4986
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4987
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4988
      "OLD_PRIMARY": source_node,
4989
      "OLD_SECONDARY": target_node,
4990
      "NEW_PRIMARY": target_node,
4991
      "NEW_SECONDARY": source_node,
4992
      }
4993
    env.update(_BuildInstanceHookEnvByObject(self, instance))
4994
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4995
    nl_post = list(nl)
4996
    nl_post.append(source_node)
4997
    return env, nl, nl_post
4998

    
4999
  def CheckPrereq(self):
5000
    """Check prerequisites.
5001

5002
    This checks that the instance is in the cluster.
5003

5004
    """
5005
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5006
    assert self.instance is not None, \
5007
      "Cannot retrieve locked instance %s" % self.op.instance_name
5008

    
5009
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5010
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5011
      raise errors.OpPrereqError("Instance's disk layout is not"
5012
                                 " network mirrored, cannot failover.",
5013
                                 errors.ECODE_STATE)
5014

    
5015
    secondary_nodes = instance.secondary_nodes
5016
    if not secondary_nodes:
5017
      raise errors.ProgrammerError("no secondary node but using "
5018
                                   "a mirrored disk template")
5019

    
5020
    target_node = secondary_nodes[0]
5021
    _CheckNodeOnline(self, target_node)
5022
    _CheckNodeNotDrained(self, target_node)
5023
    if instance.admin_up:
5024
      # check memory requirements on the secondary node
5025
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5026
                           instance.name, bep[constants.BE_MEMORY],
5027
                           instance.hypervisor)
5028
    else:
5029
      self.LogInfo("Not checking memory on the secondary node as"
5030
                   " instance will not be started")
5031

    
5032
    # check bridge existance
5033
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5034

    
5035
  def Exec(self, feedback_fn):
5036
    """Failover an instance.
5037

5038
    The failover is done by shutting it down on its present node and
5039
    starting it on the secondary.
5040

5041
    """
5042
    instance = self.instance
5043

    
5044
    source_node = instance.primary_node
5045
    target_node = instance.secondary_nodes[0]
5046

    
5047
    if instance.admin_up:
5048
      feedback_fn("* checking disk consistency between source and target")
5049
      for dev in instance.disks:
5050
        # for drbd, these are drbd over lvm
5051
        if not _CheckDiskConsistency(self, dev, target_node, False):
5052
          if not self.op.ignore_consistency:
5053
            raise errors.OpExecError("Disk %s is degraded on target node,"
5054
                                     " aborting failover." % dev.iv_name)
5055
    else:
5056
      feedback_fn("* not checking disk consistency as instance is not running")
5057

    
5058
    feedback_fn("* shutting down instance on source node")
5059
    logging.info("Shutting down instance %s on node %s",
5060
                 instance.name, source_node)
5061

    
5062
    result = self.rpc.call_instance_shutdown(source_node, instance,
5063
                                             self.shutdown_timeout)
5064
    msg = result.fail_msg
5065
    if msg:
5066
      if self.op.ignore_consistency:
5067
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5068
                             " Proceeding anyway. Please make sure node"
5069
                             " %s is down. Error details: %s",
5070
                             instance.name, source_node, source_node, msg)
5071
      else:
5072
        raise errors.OpExecError("Could not shutdown instance %s on"
5073
                                 " node %s: %s" %
5074
                                 (instance.name, source_node, msg))
5075

    
5076
    feedback_fn("* deactivating the instance's disks on source node")
5077
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5078
      raise errors.OpExecError("Can't shut down the instance's disks.")
5079

    
5080
    instance.primary_node = target_node
5081
    # distribute new instance config to the other nodes
5082
    self.cfg.Update(instance, feedback_fn)
5083

    
5084
    # Only start the instance if it's marked as up
5085
    if instance.admin_up:
5086
      feedback_fn("* activating the instance's disks on target node")
5087
      logging.info("Starting instance %s on node %s",
5088
                   instance.name, target_node)
5089

    
5090
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5091
                                               ignore_secondaries=True)
5092
      if not disks_ok:
5093
        _ShutdownInstanceDisks(self, instance)
5094
        raise errors.OpExecError("Can't activate the instance's disks")
5095

    
5096
      feedback_fn("* starting the instance on the target node")
5097
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5098
      msg = result.fail_msg
5099
      if msg:
5100
        _ShutdownInstanceDisks(self, instance)
5101
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5102
                                 (instance.name, target_node, msg))
5103

    
5104

    
5105
class LUMigrateInstance(LogicalUnit):
5106
  """Migrate an instance.
5107

5108
  This is migration without shutting down, compared to the failover,
5109
  which is done with shutdown.
5110

5111
  """
5112
  HPATH = "instance-migrate"
5113
  HTYPE = constants.HTYPE_INSTANCE
5114
  _OP_REQP = ["instance_name", "live", "cleanup"]
5115

    
5116
  REQ_BGL = False
5117

    
5118
  def ExpandNames(self):
5119
    self._ExpandAndLockInstance()
5120

    
5121
    self.needed_locks[locking.LEVEL_NODE] = []
5122
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5123

    
5124
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5125
                                       self.op.live, self.op.cleanup)
5126
    self.tasklets = [self._migrater]
5127

    
5128
  def DeclareLocks(self, level):
5129
    if level == locking.LEVEL_NODE:
5130
      self._LockInstancesNodes()
5131

    
5132
  def BuildHooksEnv(self):
5133
    """Build hooks env.
5134

5135
    This runs on master, primary and secondary nodes of the instance.
5136

5137
    """
5138
    instance = self._migrater.instance
5139
    source_node = instance.primary_node
5140
    target_node = instance.secondary_nodes[0]
5141
    env = _BuildInstanceHookEnvByObject(self, instance)
5142
    env["MIGRATE_LIVE"] = self.op.live
5143
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5144
    env.update({
5145
        "OLD_PRIMARY": source_node,
5146
        "OLD_SECONDARY": target_node,
5147
        "NEW_PRIMARY": target_node,
5148
        "NEW_SECONDARY": source_node,
5149
        })
5150
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5151
    nl_post = list(nl)
5152
    nl_post.append(source_node)
5153
    return env, nl, nl_post
5154

    
5155

    
5156
class LUMoveInstance(LogicalUnit):
5157
  """Move an instance by data-copying.
5158

5159
  """
5160
  HPATH = "instance-move"
5161
  HTYPE = constants.HTYPE_INSTANCE
5162
  _OP_REQP = ["instance_name", "target_node"]
5163
  REQ_BGL = False
5164

    
5165
  def CheckArguments(self):
5166
    """Check the arguments.
5167

5168
    """
5169
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5170
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
5171

    
5172
  def ExpandNames(self):
5173
    self._ExpandAndLockInstance()
5174
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5175
    self.op.target_node = target_node
5176
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5177
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5178

    
5179
  def DeclareLocks(self, level):
5180
    if level == locking.LEVEL_NODE:
5181
      self._LockInstancesNodes(primary_only=True)
5182

    
5183
  def BuildHooksEnv(self):
5184
    """Build hooks env.
5185

5186
    This runs on master, primary and secondary nodes of the instance.
5187

5188
    """
5189
    env = {
5190
      "TARGET_NODE": self.op.target_node,
5191
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5192
      }
5193
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5194
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5195
                                       self.op.target_node]
5196
    return env, nl, nl
5197

    
5198
  def CheckPrereq(self):
5199
    """Check prerequisites.
5200

5201
    This checks that the instance is in the cluster.
5202

5203
    """
5204
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5205
    assert self.instance is not None, \
5206
      "Cannot retrieve locked instance %s" % self.op.instance_name
5207

    
5208
    node = self.cfg.GetNodeInfo(self.op.target_node)
5209
    assert node is not None, \
5210
      "Cannot retrieve locked node %s" % self.op.target_node
5211

    
5212
    self.target_node = target_node = node.name
5213

    
5214
    if target_node == instance.primary_node:
5215
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
5216
                                 (instance.name, target_node),
5217
                                 errors.ECODE_STATE)
5218

    
5219
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5220

    
5221
    for idx, dsk in enumerate(instance.disks):
5222
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5223
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5224
                                   " cannot copy" % idx, errors.ECODE_STATE)
5225

    
5226
    _CheckNodeOnline(self, target_node)
5227
    _CheckNodeNotDrained(self, target_node)
5228

    
5229
    if instance.admin_up:
5230
      # check memory requirements on the secondary node
5231
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5232
                           instance.name, bep[constants.BE_MEMORY],
5233
                           instance.hypervisor)
5234
    else:
5235
      self.LogInfo("Not checking memory on the secondary node as"
5236
                   " instance will not be started")
5237

    
5238
    # check bridge existance
5239
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5240

    
5241
  def Exec(self, feedback_fn):
5242
    """Move an instance.
5243

5244
    The move is done by shutting it down on its present node, copying
5245
    the data over (slow) and starting it on the new node.
5246

5247
    """
5248
    instance = self.instance
5249

    
5250
    source_node = instance.primary_node
5251
    target_node = self.target_node
5252

    
5253
    self.LogInfo("Shutting down instance %s on source node %s",
5254
                 instance.name, source_node)
5255

    
5256
    result = self.rpc.call_instance_shutdown(source_node, instance,
5257
                                             self.shutdown_timeout)
5258
    msg = result.fail_msg
5259
    if msg:
5260
      if self.op.ignore_consistency:
5261
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5262
                             " Proceeding anyway. Please make sure node"
5263
                             " %s is down. Error details: %s",
5264
                             instance.name, source_node, source_node, msg)
5265
      else:
5266
        raise errors.OpExecError("Could not shutdown instance %s on"
5267
                                 " node %s: %s" %
5268
                                 (instance.name, source_node, msg))
5269

    
5270
    # create the target disks
5271
    try:
5272
      _CreateDisks(self, instance, target_node=target_node)
5273
    except errors.OpExecError:
5274
      self.LogWarning("Device creation failed, reverting...")
5275
      try:
5276
        _RemoveDisks(self, instance, target_node=target_node)
5277
      finally:
5278
        self.cfg.ReleaseDRBDMinors(instance.name)
5279
        raise
5280

    
5281
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5282

    
5283
    errs = []
5284
    # activate, get path, copy the data over
5285
    for idx, disk in enumerate(instance.disks):
5286
      self.LogInfo("Copying data for disk %d", idx)
5287
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5288
                                               instance.name, True)
5289
      if result.fail_msg:
5290
        self.LogWarning("Can't assemble newly created disk %d: %s",
5291
                        idx, result.fail_msg)
5292
        errs.append(result.fail_msg)
5293
        break
5294
      dev_path = result.payload
5295
      result = self.rpc.call_blockdev_export(source_node, disk,
5296
                                             target_node, dev_path,
5297
                                             cluster_name)
5298
      if result.fail_msg:
5299
        self.LogWarning("Can't copy data over for disk %d: %s",
5300
                        idx, result.fail_msg)
5301
        errs.append(result.fail_msg)
5302
        break
5303

    
5304
    if errs:
5305
      self.LogWarning("Some disks failed to copy, aborting")
5306
      try:
5307
        _RemoveDisks(self, instance, target_node=target_node)
5308
      finally:
5309
        self.cfg.ReleaseDRBDMinors(instance.name)
5310
        raise errors.OpExecError("Errors during disk copy: %s" %
5311
                                 (",".join(errs),))
5312

    
5313
    instance.primary_node = target_node
5314
    self.cfg.Update(instance, feedback_fn)
5315

    
5316
    self.LogInfo("Removing the disks on the original node")
5317
    _RemoveDisks(self, instance, target_node=source_node)
5318

    
5319
    # Only start the instance if it's marked as up
5320
    if instance.admin_up:
5321
      self.LogInfo("Starting instance %s on node %s",
5322
                   instance.name, target_node)
5323

    
5324
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5325
                                           ignore_secondaries=True)
5326
      if not disks_ok:
5327
        _ShutdownInstanceDisks(self, instance)
5328
        raise errors.OpExecError("Can't activate the instance's disks")
5329

    
5330
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5331
      msg = result.fail_msg
5332
      if msg:
5333
        _ShutdownInstanceDisks(self, instance)
5334
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5335
                                 (instance.name, target_node, msg))
5336

    
5337

    
5338
class LUMigrateNode(LogicalUnit):
5339
  """Migrate all instances from a node.
5340

5341
  """
5342
  HPATH = "node-migrate"
5343
  HTYPE = constants.HTYPE_NODE
5344
  _OP_REQP = ["node_name", "live"]
5345
  REQ_BGL = False
5346

    
5347
  def ExpandNames(self):
5348
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5349

    
5350
    self.needed_locks = {
5351
      locking.LEVEL_NODE: [self.op.node_name],
5352
      }
5353

    
5354
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5355

    
5356
    # Create tasklets for migrating instances for all instances on this node
5357
    names = []
5358
    tasklets = []
5359

    
5360
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5361
      logging.debug("Migrating instance %s", inst.name)
5362
      names.append(inst.name)
5363

    
5364
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5365

    
5366
    self.tasklets = tasklets
5367

    
5368
    # Declare instance locks
5369
    self.needed_locks[locking.LEVEL_INSTANCE] = names
5370

    
5371
  def DeclareLocks(self, level):
5372
    if level == locking.LEVEL_NODE:
5373
      self._LockInstancesNodes()
5374

    
5375
  def BuildHooksEnv(self):
5376
    """Build hooks env.
5377

5378
    This runs on the master, the primary and all the secondaries.
5379

5380
    """
5381
    env = {
5382
      "NODE_NAME": self.op.node_name,
5383
      }
5384

    
5385
    nl = [self.cfg.GetMasterNode()]
5386

    
5387
    return (env, nl, nl)
5388

    
5389

    
5390
class TLMigrateInstance(Tasklet):
5391
  def __init__(self, lu, instance_name, live, cleanup):
5392
    """Initializes this class.
5393

5394
    """
5395
    Tasklet.__init__(self, lu)
5396

    
5397
    # Parameters
5398
    self.instance_name = instance_name
5399
    self.live = live
5400
    self.cleanup = cleanup
5401

    
5402
  def CheckPrereq(self):
5403
    """Check prerequisites.
5404

5405
    This checks that the instance is in the cluster.
5406

5407
    """
5408
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5409
    instance = self.cfg.GetInstanceInfo(instance_name)
5410
    assert instance is not None
5411

    
5412
    if instance.disk_template != constants.DT_DRBD8:
5413
      raise errors.OpPrereqError("Instance's disk layout is not"
5414
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5415

    
5416
    secondary_nodes = instance.secondary_nodes
5417
    if not secondary_nodes:
5418
      raise errors.ConfigurationError("No secondary node but using"
5419
                                      " drbd8 disk template")
5420

    
5421
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
5422

    
5423
    target_node = secondary_nodes[0]
5424
    # check memory requirements on the secondary node
5425
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5426
                         instance.name, i_be[constants.BE_MEMORY],
5427
                         instance.hypervisor)
5428

    
5429
    # check bridge existance
5430
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5431

    
5432
    if not self.cleanup:
5433
      _CheckNodeNotDrained(self, target_node)
5434
      result = self.rpc.call_instance_migratable(instance.primary_node,
5435
                                                 instance)
5436
      result.Raise("Can't migrate, please use failover",
5437
                   prereq=True, ecode=errors.ECODE_STATE)
5438

    
5439
    self.instance = instance
5440

    
5441
  def _WaitUntilSync(self):
5442
    """Poll with custom rpc for disk sync.
5443

5444
    This uses our own step-based rpc call.
5445

5446
    """
5447
    self.feedback_fn("* wait until resync is done")
5448
    all_done = False
5449
    while not all_done:
5450
      all_done = True
5451
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5452
                                            self.nodes_ip,
5453
                                            self.instance.disks)
5454
      min_percent = 100
5455
      for node, nres in result.items():
5456
        nres.Raise("Cannot resync disks on node %s" % node)
5457
        node_done, node_percent = nres.payload
5458
        all_done = all_done and node_done
5459
        if node_percent is not None:
5460
          min_percent = min(min_percent, node_percent)
5461
      if not all_done:
5462
        if min_percent < 100:
5463
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
5464
        time.sleep(2)
5465

    
5466
  def _EnsureSecondary(self, node):
5467
    """Demote a node to secondary.
5468

5469
    """
5470
    self.feedback_fn("* switching node %s to secondary mode" % node)
5471

    
5472
    for dev in self.instance.disks:
5473
      self.cfg.SetDiskID(dev, node)
5474

    
5475
    result = self.rpc.call_blockdev_close(node, self.instance.name,
5476
                                          self.instance.disks)
5477
    result.Raise("Cannot change disk to secondary on node %s" % node)
5478

    
5479
  def _GoStandalone(self):
5480
    """Disconnect from the network.
5481

5482
    """
5483
    self.feedback_fn("* changing into standalone mode")
5484
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5485
                                               self.instance.disks)
5486
    for node, nres in result.items():
5487
      nres.Raise("Cannot disconnect disks node %s" % node)
5488

    
5489
  def _GoReconnect(self, multimaster):
5490
    """Reconnect to the network.
5491

5492
    """
5493
    if multimaster:
5494
      msg = "dual-master"
5495
    else:
5496
      msg = "single-master"
5497
    self.feedback_fn("* changing disks into %s mode" % msg)
5498
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5499
                                           self.instance.disks,
5500
                                           self.instance.name, multimaster)
5501
    for node, nres in result.items():
5502
      nres.Raise("Cannot change disks config on node %s" % node)
5503

    
5504
  def _ExecCleanup(self):
5505
    """Try to cleanup after a failed migration.
5506

5507
    The cleanup is done by:
5508
      - check that the instance is running only on one node
5509
        (and update the config if needed)
5510
      - change disks on its secondary node to secondary
5511
      - wait until disks are fully synchronized
5512
      - disconnect from the network
5513
      - change disks into single-master mode
5514
      - wait again until disks are fully synchronized
5515

5516
    """
5517
    instance = self.instance
5518
    target_node = self.target_node
5519
    source_node = self.source_node
5520

    
5521
    # check running on only one node
5522
    self.feedback_fn("* checking where the instance actually runs"
5523
                     " (if this hangs, the hypervisor might be in"
5524
                     " a bad state)")
5525
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5526
    for node, result in ins_l.items():
5527
      result.Raise("Can't contact node %s" % node)
5528

    
5529
    runningon_source = instance.name in ins_l[source_node].payload
5530
    runningon_target = instance.name in ins_l[target_node].payload
5531

    
5532
    if runningon_source and runningon_target:
5533
      raise errors.OpExecError("Instance seems to be running on two nodes,"
5534
                               " or the hypervisor is confused. You will have"
5535
                               " to ensure manually that it runs only on one"
5536
                               " and restart this operation.")
5537

    
5538
    if not (runningon_source or runningon_target):
5539
      raise errors.OpExecError("Instance does not seem to be running at all."
5540
                               " In this case, it's safer to repair by"
5541
                               " running 'gnt-instance stop' to ensure disk"
5542
                               " shutdown, and then restarting it.")
5543

    
5544
    if runningon_target:
5545
      # the migration has actually succeeded, we need to update the config
5546
      self.feedback_fn("* instance running on secondary node (%s),"
5547
                       " updating config" % target_node)
5548
      instance.primary_node = target_node
5549
      self.cfg.Update(instance, self.feedback_fn)
5550
      demoted_node = source_node
5551
    else:
5552
      self.feedback_fn("* instance confirmed to be running on its"
5553
                       " primary node (%s)" % source_node)
5554
      demoted_node = target_node
5555

    
5556
    self._EnsureSecondary(demoted_node)
5557
    try:
5558
      self._WaitUntilSync()
5559
    except errors.OpExecError:
5560
      # we ignore here errors, since if the device is standalone, it
5561
      # won't be able to sync
5562
      pass
5563
    self._GoStandalone()
5564
    self._GoReconnect(False)
5565
    self._WaitUntilSync()
5566

    
5567
    self.feedback_fn("* done")
5568

    
5569
  def _RevertDiskStatus(self):
5570
    """Try to revert the disk status after a failed migration.
5571

5572
    """
5573
    target_node = self.target_node
5574
    try:
5575
      self._EnsureSecondary(target_node)
5576
      self._GoStandalone()
5577
      self._GoReconnect(False)
5578
      self._WaitUntilSync()
5579
    except errors.OpExecError, err:
5580
      self.lu.LogWarning("Migration failed and I can't reconnect the"
5581
                         " drives: error '%s'\n"
5582
                         "Please look and recover the instance status" %
5583
                         str(err))
5584

    
5585
  def _AbortMigration(self):
5586
    """Call the hypervisor code to abort a started migration.
5587

5588
    """
5589
    instance = self.instance
5590
    target_node = self.target_node
5591
    migration_info = self.migration_info
5592

    
5593
    abort_result = self.rpc.call_finalize_migration(target_node,
5594
                                                    instance,
5595
                                                    migration_info,
5596
                                                    False)
5597
    abort_msg = abort_result.fail_msg
5598
    if abort_msg:
5599
      logging.error("Aborting migration failed on target node %s: %s",
5600
                    target_node, abort_msg)
5601
      # Don't raise an exception here, as we stil have to try to revert the
5602
      # disk status, even if this step failed.
5603

    
5604
  def _ExecMigration(self):
5605
    """Migrate an instance.
5606

5607
    The migrate is done by:
5608
      - change the disks into dual-master mode
5609
      - wait until disks are fully synchronized again
5610
      - migrate the instance
5611
      - change disks on the new secondary node (the old primary) to secondary
5612
      - wait until disks are fully synchronized
5613
      - change disks into single-master mode
5614

5615
    """
5616
    instance = self.instance
5617
    target_node = self.target_node
5618
    source_node = self.source_node
5619

    
5620
    self.feedback_fn("* checking disk consistency between source and target")
5621
    for dev in instance.disks:
5622
      if not _CheckDiskConsistency(self, dev, target_node, False):
5623
        raise errors.OpExecError("Disk %s is degraded or not fully"
5624
                                 " synchronized on target node,"
5625
                                 " aborting migrate." % dev.iv_name)
5626

    
5627
    # First get the migration information from the remote node
5628
    result = self.rpc.call_migration_info(source_node, instance)
5629
    msg = result.fail_msg
5630
    if msg:
5631
      log_err = ("Failed fetching source migration information from %s: %s" %
5632
                 (source_node, msg))
5633
      logging.error(log_err)
5634
      raise errors.OpExecError(log_err)
5635

    
5636
    self.migration_info = migration_info = result.payload
5637

    
5638
    # Then switch the disks to master/master mode
5639
    self._EnsureSecondary(target_node)
5640
    self._GoStandalone()
5641
    self._GoReconnect(True)
5642
    self._WaitUntilSync()
5643

    
5644
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
5645
    result = self.rpc.call_accept_instance(target_node,
5646
                                           instance,
5647
                                           migration_info,
5648
                                           self.nodes_ip[target_node])
5649

    
5650
    msg = result.fail_msg
5651
    if msg:
5652
      logging.error("Instance pre-migration failed, trying to revert"
5653
                    " disk status: %s", msg)
5654
      self.feedback_fn("Pre-migration failed, aborting")
5655
      self._AbortMigration()
5656
      self._RevertDiskStatus()
5657
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5658
                               (instance.name, msg))
5659

    
5660
    self.feedback_fn("* migrating instance to %s" % target_node)
5661
    time.sleep(10)
5662
    result = self.rpc.call_instance_migrate(source_node, instance,
5663
                                            self.nodes_ip[target_node],
5664
                                            self.live)
5665
    msg = result.fail_msg
5666
    if msg:
5667
      logging.error("Instance migration failed, trying to revert"
5668
                    " disk status: %s", msg)
5669
      self.feedback_fn("Migration failed, aborting")
5670
      self._AbortMigration()
5671
      self._RevertDiskStatus()
5672
      raise errors.OpExecError("Could not migrate instance %s: %s" %
5673
                               (instance.name, msg))
5674
    time.sleep(10)
5675

    
5676
    instance.primary_node = target_node
5677
    # distribute new instance config to the other nodes
5678
    self.cfg.Update(instance, self.feedback_fn)
5679

    
5680
    result = self.rpc.call_finalize_migration(target_node,
5681
                                              instance,
5682
                                              migration_info,
5683
                                              True)
5684
    msg = result.fail_msg
5685
    if msg:
5686
      logging.error("Instance migration succeeded, but finalization failed:"
5687
                    " %s", msg)
5688
      raise errors.OpExecError("Could not finalize instance migration: %s" %
5689
                               msg)
5690

    
5691
    self._EnsureSecondary(source_node)
5692
    self._WaitUntilSync()
5693
    self._GoStandalone()
5694
    self._GoReconnect(False)
5695
    self._WaitUntilSync()
5696

    
5697
    self.feedback_fn("* done")
5698

    
5699
  def Exec(self, feedback_fn):
5700
    """Perform the migration.
5701

5702
    """
5703
    feedback_fn("Migrating instance %s" % self.instance.name)
5704

    
5705
    self.feedback_fn = feedback_fn
5706

    
5707
    self.source_node = self.instance.primary_node
5708
    self.target_node = self.instance.secondary_nodes[0]
5709
    self.all_nodes = [self.source_node, self.target_node]
5710
    self.nodes_ip = {
5711
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5712
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5713
      }
5714

    
5715
    if self.cleanup:
5716
      return self._ExecCleanup()
5717
    else:
5718
      return self._ExecMigration()
5719

    
5720

    
5721
def _CreateBlockDev(lu, node, instance, device, force_create,
5722
                    info, force_open):
5723
  """Create a tree of block devices on a given node.
5724

5725
  If this device type has to be created on secondaries, create it and
5726
  all its children.
5727

5728
  If not, just recurse to children keeping the same 'force' value.
5729

5730
  @param lu: the lu on whose behalf we execute
5731
  @param node: the node on which to create the device
5732
  @type instance: L{objects.Instance}
5733
  @param instance: the instance which owns the device
5734
  @type device: L{objects.Disk}
5735
  @param device: the device to create
5736
  @type force_create: boolean
5737
  @param force_create: whether to force creation of this device; this
5738
      will be change to True whenever we find a device which has
5739
      CreateOnSecondary() attribute
5740
  @param info: the extra 'metadata' we should attach to the device
5741
      (this will be represented as a LVM tag)
5742
  @type force_open: boolean
5743
  @param force_open: this parameter will be passes to the
5744
      L{backend.BlockdevCreate} function where it specifies
5745
      whether we run on primary or not, and it affects both
5746
      the child assembly and the device own Open() execution
5747

5748
  """
5749
  if device.CreateOnSecondary():
5750
    force_create = True
5751

    
5752
  if device.children:
5753
    for child in device.children:
5754
      _CreateBlockDev(lu, node, instance, child, force_create,
5755
                      info, force_open)
5756

    
5757
  if not force_create:
5758
    return
5759

    
5760
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5761

    
5762

    
5763
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5764
  """Create a single block device on a given node.
5765

5766
  This will not recurse over children of the device, so they must be
5767
  created in advance.
5768

5769
  @param lu: the lu on whose behalf we execute
5770
  @param node: the node on which to create the device
5771
  @type instance: L{objects.Instance}
5772
  @param instance: the instance which owns the device
5773
  @type device: L{objects.Disk}
5774
  @param device: the device to create
5775
  @param info: the extra 'metadata' we should attach to the device
5776
      (this will be represented as a LVM tag)
5777
  @type force_open: boolean
5778
  @param force_open: this parameter will be passes to the
5779
      L{backend.BlockdevCreate} function where it specifies
5780
      whether we run on primary or not, and it affects both
5781
      the child assembly and the device own Open() execution
5782

5783
  """
5784
  lu.cfg.SetDiskID(device, node)
5785
  result = lu.rpc.call_blockdev_create(node, device, device.size,
5786
                                       instance.name, force_open, info)
5787
  result.Raise("Can't create block device %s on"
5788
               " node %s for instance %s" % (device, node, instance.name))
5789
  if device.physical_id is None:
5790
    device.physical_id = result.payload
5791

    
5792

    
5793
def _GenerateUniqueNames(lu, exts):
5794
  """Generate a suitable LV name.
5795

5796
  This will generate a logical volume name for the given instance.
5797

5798
  """
5799
  results = []
5800
  for val in exts:
5801
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5802
    results.append("%s%s" % (new_id, val))
5803
  return results
5804

    
5805

    
5806
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5807
                         p_minor, s_minor):
5808
  """Generate a drbd8 device complete with its children.
5809

5810
  """
5811
  port = lu.cfg.AllocatePort()
5812
  vgname = lu.cfg.GetVGName()
5813
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5814
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5815
                          logical_id=(vgname, names[0]))
5816
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5817
                          logical_id=(vgname, names[1]))
5818
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5819
                          logical_id=(primary, secondary, port,
5820
                                      p_minor, s_minor,
5821
                                      shared_secret),
5822
                          children=[dev_data, dev_meta],
5823
                          iv_name=iv_name)
5824
  return drbd_dev
5825

    
5826

    
5827
def _GenerateDiskTemplate(lu, template_name,
5828
                          instance_name, primary_node,
5829
                          secondary_nodes, disk_info,
5830
                          file_storage_dir, file_driver,
5831
                          base_index):
5832
  """Generate the entire disk layout for a given template type.
5833

5834
  """
5835
  #TODO: compute space requirements
5836

    
5837
  vgname = lu.cfg.GetVGName()
5838
  disk_count = len(disk_info)
5839
  disks = []
5840
  if template_name == constants.DT_DISKLESS:
5841
    pass
5842
  elif template_name == constants.DT_PLAIN:
5843
    if len(secondary_nodes) != 0:
5844
      raise errors.ProgrammerError("Wrong template configuration")
5845

    
5846
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5847
                                      for i in range(disk_count)])
5848
    for idx, disk in enumerate(disk_info):
5849
      disk_index = idx + base_index
5850
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5851
                              logical_id=(vgname, names[idx]),
5852
                              iv_name="disk/%d" % disk_index,
5853
                              mode=disk["mode"])
5854
      disks.append(disk_dev)
5855
  elif template_name == constants.DT_DRBD8:
5856
    if len(secondary_nodes) != 1:
5857
      raise errors.ProgrammerError("Wrong template configuration")
5858
    remote_node = secondary_nodes[0]
5859
    minors = lu.cfg.AllocateDRBDMinor(
5860
      [primary_node, remote_node] * len(disk_info), instance_name)
5861

    
5862
    names = []
5863
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5864
                                               for i in range(disk_count)]):
5865
      names.append(lv_prefix + "_data")
5866
      names.append(lv_prefix + "_meta")
5867
    for idx, disk in enumerate(disk_info):
5868
      disk_index = idx + base_index
5869
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5870
                                      disk["size"], names[idx*2:idx*2+2],
5871
                                      "disk/%d" % disk_index,
5872
                                      minors[idx*2], minors[idx*2+1])
5873
      disk_dev.mode = disk["mode"]
5874
      disks.append(disk_dev)
5875
  elif template_name == constants.DT_FILE:
5876
    if len(secondary_nodes) != 0:
5877
      raise errors.ProgrammerError("Wrong template configuration")
5878

    
5879
    _RequireFileStorage()
5880

    
5881
    for idx, disk in enumerate(disk_info):
5882
      disk_index = idx + base_index
5883
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5884
                              iv_name="disk/%d" % disk_index,
5885
                              logical_id=(file_driver,
5886
                                          "%s/disk%d" % (file_storage_dir,
5887
                                                         disk_index)),
5888
                              mode=disk["mode"])
5889
      disks.append(disk_dev)
5890
  else:
5891
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5892
  return disks
5893

    
5894

    
5895
def _GetInstanceInfoText(instance):
5896
  """Compute that text that should be added to the disk's metadata.
5897

5898
  """
5899
  return "originstname+%s" % instance.name
5900

    
5901

    
5902
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5903
  """Create all disks for an instance.
5904

5905
  This abstracts away some work from AddInstance.
5906

5907
  @type lu: L{LogicalUnit}
5908
  @param lu: the logical unit on whose behalf we execute
5909
  @type instance: L{objects.Instance}
5910
  @param instance: the instance whose disks we should create
5911
  @type to_skip: list
5912
  @param to_skip: list of indices to skip
5913
  @type target_node: string
5914
  @param target_node: if passed, overrides the target node for creation
5915
  @rtype: boolean
5916
  @return: the success of the creation
5917

5918
  """
5919
  info = _GetInstanceInfoText(instance)
5920
  if target_node is None:
5921
    pnode = instance.primary_node
5922
    all_nodes = instance.all_nodes
5923
  else:
5924
    pnode = target_node
5925
    all_nodes = [pnode]
5926

    
5927
  if instance.disk_template == constants.DT_FILE:
5928
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5929
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5930

    
5931
    result.Raise("Failed to create directory '%s' on"
5932
                 " node %s" % (file_storage_dir, pnode))
5933

    
5934
  # Note: this needs to be kept in sync with adding of disks in
5935
  # LUSetInstanceParams
5936
  for idx, device in enumerate(instance.disks):
5937
    if to_skip and idx in to_skip:
5938
      continue
5939
    logging.info("Creating volume %s for instance %s",
5940
                 device.iv_name, instance.name)
5941
    #HARDCODE
5942
    for node in all_nodes:
5943
      f_create = node == pnode
5944
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5945

    
5946

    
5947
def _RemoveDisks(lu, instance, target_node=None):
5948
  """Remove all disks for an instance.
5949

5950
  This abstracts away some work from `AddInstance()` and
5951
  `RemoveInstance()`. Note that in case some of the devices couldn't
5952
  be removed, the removal will continue with the other ones (compare
5953
  with `_CreateDisks()`).
5954

5955
  @type lu: L{LogicalUnit}
5956
  @param lu: the logical unit on whose behalf we execute
5957
  @type instance: L{objects.Instance}
5958
  @param instance: the instance whose disks we should remove
5959
  @type target_node: string
5960
  @param target_node: used to override the node on which to remove the disks
5961
  @rtype: boolean
5962
  @return: the success of the removal
5963

5964
  """
5965
  logging.info("Removing block devices for instance %s", instance.name)
5966

    
5967
  all_result = True
5968
  for device in instance.disks:
5969
    if target_node:
5970
      edata = [(target_node, device)]
5971
    else:
5972
      edata = device.ComputeNodeTree(instance.primary_node)
5973
    for node, disk in edata:
5974
      lu.cfg.SetDiskID(disk, node)
5975
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5976
      if msg:
5977
        lu.LogWarning("Could not remove block device %s on node %s,"
5978
                      " continuing anyway: %s", device.iv_name, node, msg)
5979
        all_result = False
5980

    
5981
  if instance.disk_template == constants.DT_FILE:
5982
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5983
    if target_node:
5984
      tgt = target_node
5985
    else:
5986
      tgt = instance.primary_node
5987
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5988
    if result.fail_msg:
5989
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5990
                    file_storage_dir, instance.primary_node, result.fail_msg)
5991
      all_result = False
5992

    
5993
  return all_result
5994

    
5995

    
5996
def _ComputeDiskSize(disk_template, disks):
5997
  """Compute disk size requirements in the volume group
5998

5999
  """
6000
  # Required free disk space as a function of disk and swap space
6001
  req_size_dict = {
6002
    constants.DT_DISKLESS: None,
6003
    constants.DT_PLAIN: sum(d["size"] for d in disks),
6004
    # 128 MB are added for drbd metadata for each disk
6005
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6006
    constants.DT_FILE: None,
6007
  }
6008

    
6009
  if disk_template not in req_size_dict:
6010
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6011
                                 " is unknown" %  disk_template)
6012

    
6013
  return req_size_dict[disk_template]
6014

    
6015

    
6016
def _CheckHVParams(lu, nodenames, hvname, hvparams):
6017
  """Hypervisor parameter validation.
6018

6019
  This function abstract the hypervisor parameter validation to be
6020
  used in both instance create and instance modify.
6021

6022
  @type lu: L{LogicalUnit}
6023
  @param lu: the logical unit for which we check
6024
  @type nodenames: list
6025
  @param nodenames: the list of nodes on which we should check
6026
  @type hvname: string
6027
  @param hvname: the name of the hypervisor we should use
6028
  @type hvparams: dict
6029
  @param hvparams: the parameters which we need to check
6030
  @raise errors.OpPrereqError: if the parameters are not valid
6031

6032
  """
6033
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6034
                                                  hvname,
6035
                                                  hvparams)
6036
  for node in nodenames:
6037
    info = hvinfo[node]
6038
    if info.offline:
6039
      continue
6040
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6041

    
6042

    
6043
class LUCreateInstance(LogicalUnit):
6044
  """Create an instance.
6045

6046
  """
6047
  HPATH = "instance-add"
6048
  HTYPE = constants.HTYPE_INSTANCE
6049
  _OP_REQP = ["instance_name", "disks",
6050
              "mode", "start",
6051
              "wait_for_sync", "ip_check", "nics",
6052
              "hvparams", "beparams"]
6053
  REQ_BGL = False
6054

    
6055
  def CheckArguments(self):
6056
    """Check arguments.
6057

6058
    """
6059
    # set optional parameters to none if they don't exist
6060
    for attr in ["pnode", "snode", "iallocator", "hypervisor",
6061
                 "disk_template", "identify_defaults"]:
6062
      if not hasattr(self.op, attr):
6063
        setattr(self.op, attr, None)
6064

    
6065
    # do not require name_check to ease forward/backward compatibility
6066
    # for tools
6067
    if not hasattr(self.op, "name_check"):
6068
      self.op.name_check = True
6069
    if not hasattr(self.op, "no_install"):
6070
      self.op.no_install = False
6071
    if self.op.no_install and self.op.start:
6072
      self.LogInfo("No-installation mode selected, disabling startup")
6073
      self.op.start = False
6074
    # validate/normalize the instance name
6075
    self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6076
    if self.op.ip_check and not self.op.name_check:
6077
      # TODO: make the ip check more flexible and not depend on the name check
6078
      raise errors.OpPrereqError("Cannot do ip checks without a name check",
6079
                                 errors.ECODE_INVAL)
6080
    # check disk information: either all adopt, or no adopt
6081
    has_adopt = has_no_adopt = False
6082
    for disk in self.op.disks:
6083
      if "adopt" in disk:
6084
        has_adopt = True
6085
      else:
6086
        has_no_adopt = True
6087
    if has_adopt and has_no_adopt:
6088
      raise errors.OpPrereqError("Either all disks are adopted or none is",
6089
                                 errors.ECODE_INVAL)
6090
    if has_adopt:
6091
      if self.op.disk_template != constants.DT_PLAIN:
6092
        raise errors.OpPrereqError("Disk adoption is only supported for the"
6093
                                   " 'plain' disk template",
6094
                                   errors.ECODE_INVAL)
6095
      if self.op.iallocator is not None:
6096
        raise errors.OpPrereqError("Disk adoption not allowed with an"
6097
                                   " iallocator script", errors.ECODE_INVAL)
6098
      if self.op.mode == constants.INSTANCE_IMPORT:
6099
        raise errors.OpPrereqError("Disk adoption not allowed for"
6100
                                   " instance import", errors.ECODE_INVAL)
6101

    
6102
    self.adopt_disks = has_adopt
6103

    
6104
    # verify creation mode
6105
    if self.op.mode not in (constants.INSTANCE_CREATE,
6106
                            constants.INSTANCE_IMPORT):
6107
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6108
                                 self.op.mode, errors.ECODE_INVAL)
6109

    
6110
    # instance name verification
6111
    if self.op.name_check:
6112
      self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6113
      self.op.instance_name = self.hostname1.name
6114
      # used in CheckPrereq for ip ping check
6115
      self.check_ip = self.hostname1.ip
6116
    else:
6117
      self.check_ip = None
6118

    
6119
    # file storage checks
6120
    if (self.op.file_driver and
6121
        not self.op.file_driver in constants.FILE_DRIVER):
6122
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6123
                                 self.op.file_driver, errors.ECODE_INVAL)
6124

    
6125
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6126
      raise errors.OpPrereqError("File storage directory path not absolute",
6127
                                 errors.ECODE_INVAL)
6128

    
6129
    ### Node/iallocator related checks
6130
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
6131
      raise errors.OpPrereqError("One and only one of iallocator and primary"
6132
                                 " node must be given",
6133
                                 errors.ECODE_INVAL)
6134

    
6135
    if self.op.mode == constants.INSTANCE_IMPORT:
6136
      # On import force_variant must be True, because if we forced it at
6137
      # initial install, our only chance when importing it back is that it
6138
      # works again!
6139
      self.op.force_variant = True
6140

    
6141
      if self.op.no_install:
6142
        self.LogInfo("No-installation mode has no effect during import")
6143

    
6144
    else: # INSTANCE_CREATE
6145
      if getattr(self.op, "os_type", None) is None:
6146
        raise errors.OpPrereqError("No guest OS specified",
6147
                                   errors.ECODE_INVAL)
6148
      self.op.force_variant = getattr(self.op, "force_variant", False)
6149
      if self.op.disk_template is None:
6150
        raise errors.OpPrereqError("No disk template specified",
6151
                                   errors.ECODE_INVAL)
6152

    
6153
  def ExpandNames(self):
6154
    """ExpandNames for CreateInstance.
6155

6156
    Figure out the right locks for instance creation.
6157

6158
    """
6159
    self.needed_locks = {}
6160

    
6161
    instance_name = self.op.instance_name
6162
    # this is just a preventive check, but someone might still add this
6163
    # instance in the meantime, and creation will fail at lock-add time
6164
    if instance_name in self.cfg.GetInstanceList():
6165
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6166
                                 instance_name, errors.ECODE_EXISTS)
6167

    
6168
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6169

    
6170
    if self.op.iallocator:
6171
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6172
    else:
6173
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6174
      nodelist = [self.op.pnode]
6175
      if self.op.snode is not None:
6176
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6177
        nodelist.append(self.op.snode)
6178
      self.needed_locks[locking.LEVEL_NODE] = nodelist
6179

    
6180
    # in case of import lock the source node too
6181
    if self.op.mode == constants.INSTANCE_IMPORT:
6182
      src_node = getattr(self.op, "src_node", None)
6183
      src_path = getattr(self.op, "src_path", None)
6184

    
6185
      if src_path is None:
6186
        self.op.src_path = src_path = self.op.instance_name
6187

    
6188
      if src_node is None:
6189
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6190
        self.op.src_node = None
6191
        if os.path.isabs(src_path):
6192
          raise errors.OpPrereqError("Importing an instance from an absolute"
6193
                                     " path requires a source node option.",
6194
                                     errors.ECODE_INVAL)
6195
      else:
6196
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6197
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6198
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
6199
        if not os.path.isabs(src_path):
6200
          self.op.src_path = src_path = \
6201
            utils.PathJoin(constants.EXPORT_DIR, src_path)
6202

    
6203
  def _RunAllocator(self):
6204
    """Run the allocator based on input opcode.
6205

6206
    """
6207
    nics = [n.ToDict() for n in self.nics]
6208
    ial = IAllocator(self.cfg, self.rpc,
6209
                     mode=constants.IALLOCATOR_MODE_ALLOC,
6210
                     name=self.op.instance_name,
6211
                     disk_template=self.op.disk_template,
6212
                     tags=[],
6213
                     os=self.op.os_type,
6214
                     vcpus=self.be_full[constants.BE_VCPUS],
6215
                     mem_size=self.be_full[constants.BE_MEMORY],
6216
                     disks=self.disks,
6217
                     nics=nics,
6218
                     hypervisor=self.op.hypervisor,
6219
                     )
6220

    
6221
    ial.Run(self.op.iallocator)
6222

    
6223
    if not ial.success:
6224
      raise errors.OpPrereqError("Can't compute nodes using"
6225
                                 " iallocator '%s': %s" %
6226
                                 (self.op.iallocator, ial.info),
6227
                                 errors.ECODE_NORES)
6228
    if len(ial.result) != ial.required_nodes:
6229
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6230
                                 " of nodes (%s), required %s" %
6231
                                 (self.op.iallocator, len(ial.result),
6232
                                  ial.required_nodes), errors.ECODE_FAULT)
6233
    self.op.pnode = ial.result[0]
6234
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6235
                 self.op.instance_name, self.op.iallocator,
6236
                 utils.CommaJoin(ial.result))
6237
    if ial.required_nodes == 2:
6238
      self.op.snode = ial.result[1]
6239

    
6240
  def BuildHooksEnv(self):
6241
    """Build hooks env.
6242

6243
    This runs on master, primary and secondary nodes of the instance.
6244

6245
    """
6246
    env = {
6247
      "ADD_MODE": self.op.mode,
6248
      }
6249
    if self.op.mode == constants.INSTANCE_IMPORT:
6250
      env["SRC_NODE"] = self.op.src_node
6251
      env["SRC_PATH"] = self.op.src_path
6252
      env["SRC_IMAGES"] = self.src_images
6253

    
6254
    env.update(_BuildInstanceHookEnv(
6255
      name=self.op.instance_name,
6256
      primary_node=self.op.pnode,
6257
      secondary_nodes=self.secondaries,
6258
      status=self.op.start,
6259
      os_type=self.op.os_type,
6260
      memory=self.be_full[constants.BE_MEMORY],
6261
      vcpus=self.be_full[constants.BE_VCPUS],
6262
      nics=_NICListToTuple(self, self.nics),
6263
      disk_template=self.op.disk_template,
6264
      disks=[(d["size"], d["mode"]) for d in self.disks],
6265
      bep=self.be_full,
6266
      hvp=self.hv_full,
6267
      hypervisor_name=self.op.hypervisor,
6268
    ))
6269

    
6270
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6271
          self.secondaries)
6272
    return env, nl, nl
6273

    
6274
  def _ReadExportInfo(self):
6275
    """Reads the export information from disk.
6276

6277
    It will override the opcode source node and path with the actual
6278
    information, if these two were not specified before.
6279

6280
    @return: the export information
6281

6282
    """
6283
    assert self.op.mode == constants.INSTANCE_IMPORT
6284

    
6285
    src_node = self.op.src_node
6286
    src_path = self.op.src_path
6287

    
6288
    if src_node is None:
6289
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6290
      exp_list = self.rpc.call_export_list(locked_nodes)
6291
      found = False
6292
      for node in exp_list:
6293
        if exp_list[node].fail_msg:
6294
          continue
6295
        if src_path in exp_list[node].payload:
6296
          found = True
6297
          self.op.src_node = src_node = node
6298
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6299
                                                       src_path)
6300
          break
6301
      if not found:
6302
        raise errors.OpPrereqError("No export found for relative path %s" %
6303
                                    src_path, errors.ECODE_INVAL)
6304

    
6305
    _CheckNodeOnline(self, src_node)
6306
    result = self.rpc.call_export_info(src_node, src_path)
6307
    result.Raise("No export or invalid export found in dir %s" % src_path)
6308

    
6309
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6310
    if not export_info.has_section(constants.INISECT_EXP):
6311
      raise errors.ProgrammerError("Corrupted export config",
6312
                                   errors.ECODE_ENVIRON)
6313

    
6314
    ei_version = export_info.get(constants.INISECT_EXP, "version")
6315
    if (int(ei_version) != constants.EXPORT_VERSION):
6316
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6317
                                 (ei_version, constants.EXPORT_VERSION),
6318
                                 errors.ECODE_ENVIRON)
6319
    return export_info
6320

    
6321
  def _ReadExportParams(self, einfo):
6322
    """Use export parameters as defaults.
6323

6324
    In case the opcode doesn't specify (as in override) some instance
6325
    parameters, then try to use them from the export information, if
6326
    that declares them.
6327

6328
    """
6329
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6330

    
6331
    if self.op.disk_template is None:
6332
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
6333
        self.op.disk_template = einfo.get(constants.INISECT_INS,
6334
                                          "disk_template")
6335
      else:
6336
        raise errors.OpPrereqError("No disk template specified and the export"
6337
                                   " is missing the disk_template information",
6338
                                   errors.ECODE_INVAL)
6339

    
6340
    if not self.op.disks:
6341
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
6342
        disks = []
6343
        # TODO: import the disk iv_name too
6344
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6345
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6346
          disks.append({"size": disk_sz})
6347
        self.op.disks = disks
6348
      else:
6349
        raise errors.OpPrereqError("No disk info specified and the export"
6350
                                   " is missing the disk information",
6351
                                   errors.ECODE_INVAL)
6352

    
6353
    if (not self.op.nics and
6354
        einfo.has_option(constants.INISECT_INS, "nic_count")):
6355
      nics = []
6356
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6357
        ndict = {}
6358
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6359
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6360
          ndict[name] = v
6361
        nics.append(ndict)
6362
      self.op.nics = nics
6363

    
6364
    if (self.op.hypervisor is None and
6365
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
6366
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6367
    if einfo.has_section(constants.INISECT_HYP):
6368
      # use the export parameters but do not override the ones
6369
      # specified by the user
6370
      for name, value in einfo.items(constants.INISECT_HYP):
6371
        if name not in self.op.hvparams:
6372
          self.op.hvparams[name] = value
6373

    
6374
    if einfo.has_section(constants.INISECT_BEP):
6375
      # use the parameters, without overriding
6376
      for name, value in einfo.items(constants.INISECT_BEP):
6377
        if name not in self.op.beparams:
6378
          self.op.beparams[name] = value
6379
    else:
6380
      # try to read the parameters old style, from the main section
6381
      for name in constants.BES_PARAMETERS:
6382
        if (name not in self.op.beparams and
6383
            einfo.has_option(constants.INISECT_INS, name)):
6384
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6385

    
6386
  def _RevertToDefaults(self, cluster):
6387
    """Revert the instance parameters to the default values.
6388

6389
    """
6390
    # hvparams
6391
    hv_defs = cluster.GetHVDefaults(self.op.hypervisor, self.op.os_type)
6392
    for name in self.op.hvparams.keys():
6393
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6394
        del self.op.hvparams[name]
6395
    # beparams
6396
    be_defs = cluster.beparams.get(constants.PP_DEFAULT, {})
6397
    for name in self.op.beparams.keys():
6398
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
6399
        del self.op.beparams[name]
6400
    # nic params
6401
    nic_defs = cluster.nicparams.get(constants.PP_DEFAULT, {})
6402
    for nic in self.op.nics:
6403
      for name in constants.NICS_PARAMETERS:
6404
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6405
          del nic[name]
6406

    
6407
  def CheckPrereq(self):
6408
    """Check prerequisites.
6409

6410
    """
6411
    if self.op.mode == constants.INSTANCE_IMPORT:
6412
      export_info = self._ReadExportInfo()
6413
      self._ReadExportParams(export_info)
6414

    
6415
    _CheckDiskTemplate(self.op.disk_template)
6416

    
6417
    if (not self.cfg.GetVGName() and
6418
        self.op.disk_template not in constants.DTS_NOT_LVM):
6419
      raise errors.OpPrereqError("Cluster does not support lvm-based"
6420
                                 " instances", errors.ECODE_STATE)
6421

    
6422
    if self.op.hypervisor is None:
6423
      self.op.hypervisor = self.cfg.GetHypervisorType()
6424

    
6425
    cluster = self.cfg.GetClusterInfo()
6426
    enabled_hvs = cluster.enabled_hypervisors
6427
    if self.op.hypervisor not in enabled_hvs:
6428
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6429
                                 " cluster (%s)" % (self.op.hypervisor,
6430
                                  ",".join(enabled_hvs)),
6431
                                 errors.ECODE_STATE)
6432

    
6433
    # check hypervisor parameter syntax (locally)
6434
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6435
    filled_hvp = objects.FillDict(cluster.GetHVDefaults(self.op.hypervisor,
6436
                                                        self.op.os_type),
6437
                                  self.op.hvparams)
6438
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6439
    hv_type.CheckParameterSyntax(filled_hvp)
6440
    self.hv_full = filled_hvp
6441
    # check that we don't specify global parameters on an instance
6442
    _CheckGlobalHvParams(self.op.hvparams)
6443

    
6444
    # fill and remember the beparams dict
6445
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6446
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
6447
                                    self.op.beparams)
6448

    
6449
    # now that hvp/bep are in final format, let's reset to defaults,
6450
    # if told to do so
6451
    if self.op.identify_defaults:
6452
      self._RevertToDefaults(cluster)
6453

    
6454
    # NIC buildup
6455
    self.nics = []
6456
    for idx, nic in enumerate(self.op.nics):
6457
      nic_mode_req = nic.get("mode", None)
6458
      nic_mode = nic_mode_req
6459
      if nic_mode is None:
6460
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6461

    
6462
      # in routed mode, for the first nic, the default ip is 'auto'
6463
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6464
        default_ip_mode = constants.VALUE_AUTO
6465
      else:
6466
        default_ip_mode = constants.VALUE_NONE
6467

    
6468
      # ip validity checks
6469
      ip = nic.get("ip", default_ip_mode)
6470
      if ip is None or ip.lower() == constants.VALUE_NONE:
6471
        nic_ip = None
6472
      elif ip.lower() == constants.VALUE_AUTO:
6473
        if not self.op.name_check:
6474
          raise errors.OpPrereqError("IP address set to auto but name checks"
6475
                                     " have been skipped. Aborting.",
6476
                                     errors.ECODE_INVAL)
6477
        nic_ip = self.hostname1.ip
6478
      else:
6479
        if not utils.IsValidIP(ip):
6480
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6481
                                     " like a valid IP" % ip,
6482
                                     errors.ECODE_INVAL)
6483
        nic_ip = ip
6484

    
6485
      # TODO: check the ip address for uniqueness
6486
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6487
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
6488
                                   errors.ECODE_INVAL)
6489

    
6490
      # MAC address verification
6491
      mac = nic.get("mac", constants.VALUE_AUTO)
6492
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6493
        mac = utils.NormalizeAndValidateMac(mac)
6494

    
6495
        try:
6496
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
6497
        except errors.ReservationError:
6498
          raise errors.OpPrereqError("MAC address %s already in use"
6499
                                     " in cluster" % mac,
6500
                                     errors.ECODE_NOTUNIQUE)
6501

    
6502
      # bridge verification
6503
      bridge = nic.get("bridge", None)
6504
      link = nic.get("link", None)
6505
      if bridge and link:
6506
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6507
                                   " at the same time", errors.ECODE_INVAL)
6508
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6509
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6510
                                   errors.ECODE_INVAL)
6511
      elif bridge:
6512
        link = bridge
6513

    
6514
      nicparams = {}
6515
      if nic_mode_req:
6516
        nicparams[constants.NIC_MODE] = nic_mode_req
6517
      if link:
6518
        nicparams[constants.NIC_LINK] = link
6519

    
6520
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
6521
                                      nicparams)
6522
      objects.NIC.CheckParameterSyntax(check_params)
6523
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6524

    
6525
    # disk checks/pre-build
6526
    self.disks = []
6527
    for disk in self.op.disks:
6528
      mode = disk.get("mode", constants.DISK_RDWR)
6529
      if mode not in constants.DISK_ACCESS_SET:
6530
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6531
                                   mode, errors.ECODE_INVAL)
6532
      size = disk.get("size", None)
6533
      if size is None:
6534
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6535
      try:
6536
        size = int(size)
6537
      except (TypeError, ValueError):
6538
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6539
                                   errors.ECODE_INVAL)
6540
      new_disk = {"size": size, "mode": mode}
6541
      if "adopt" in disk:
6542
        new_disk["adopt"] = disk["adopt"]
6543
      self.disks.append(new_disk)
6544

    
6545
    if self.op.mode == constants.INSTANCE_IMPORT:
6546

    
6547
      # Check that the new instance doesn't have less disks than the export
6548
      instance_disks = len(self.disks)
6549
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6550
      if instance_disks < export_disks:
6551
        raise errors.OpPrereqError("Not enough disks to import."
6552
                                   " (instance: %d, export: %d)" %
6553
                                   (instance_disks, export_disks),
6554
                                   errors.ECODE_INVAL)
6555

    
6556
      disk_images = []
6557
      for idx in range(export_disks):
6558
        option = 'disk%d_dump' % idx
6559
        if export_info.has_option(constants.INISECT_INS, option):
6560
          # FIXME: are the old os-es, disk sizes, etc. useful?
6561
          export_name = export_info.get(constants.INISECT_INS, option)
6562
          image = utils.PathJoin(self.op.src_path, export_name)
6563
          disk_images.append(image)
6564
        else:
6565
          disk_images.append(False)
6566

    
6567
      self.src_images = disk_images
6568

    
6569
      old_name = export_info.get(constants.INISECT_INS, 'name')
6570
      try:
6571
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6572
      except (TypeError, ValueError), err:
6573
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
6574
                                   " an integer: %s" % str(err),
6575
                                   errors.ECODE_STATE)
6576
      if self.op.instance_name == old_name:
6577
        for idx, nic in enumerate(self.nics):
6578
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6579
            nic_mac_ini = 'nic%d_mac' % idx
6580
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6581

    
6582
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6583

    
6584
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
6585
    if self.op.ip_check:
6586
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6587
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6588
                                   (self.check_ip, self.op.instance_name),
6589
                                   errors.ECODE_NOTUNIQUE)
6590

    
6591
    #### mac address generation
6592
    # By generating here the mac address both the allocator and the hooks get
6593
    # the real final mac address rather than the 'auto' or 'generate' value.
6594
    # There is a race condition between the generation and the instance object
6595
    # creation, which means that we know the mac is valid now, but we're not
6596
    # sure it will be when we actually add the instance. If things go bad
6597
    # adding the instance will abort because of a duplicate mac, and the
6598
    # creation job will fail.
6599
    for nic in self.nics:
6600
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6601
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6602

    
6603
    #### allocator run
6604

    
6605
    if self.op.iallocator is not None:
6606
      self._RunAllocator()
6607

    
6608
    #### node related checks
6609

    
6610
    # check primary node
6611
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6612
    assert self.pnode is not None, \
6613
      "Cannot retrieve locked node %s" % self.op.pnode
6614
    if pnode.offline:
6615
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6616
                                 pnode.name, errors.ECODE_STATE)
6617
    if pnode.drained:
6618
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6619
                                 pnode.name, errors.ECODE_STATE)
6620

    
6621
    self.secondaries = []
6622

    
6623
    # mirror node verification
6624
    if self.op.disk_template in constants.DTS_NET_MIRROR:
6625
      if self.op.snode is None:
6626
        raise errors.OpPrereqError("The networked disk templates need"
6627
                                   " a mirror node", errors.ECODE_INVAL)
6628
      if self.op.snode == pnode.name:
6629
        raise errors.OpPrereqError("The secondary node cannot be the"
6630
                                   " primary node.", errors.ECODE_INVAL)
6631
      _CheckNodeOnline(self, self.op.snode)
6632
      _CheckNodeNotDrained(self, self.op.snode)
6633
      self.secondaries.append(self.op.snode)
6634

    
6635
    nodenames = [pnode.name] + self.secondaries
6636

    
6637
    req_size = _ComputeDiskSize(self.op.disk_template,
6638
                                self.disks)
6639

    
6640
    # Check lv size requirements, if not adopting
6641
    if req_size is not None and not self.adopt_disks:
6642
      _CheckNodesFreeDisk(self, nodenames, req_size)
6643

    
6644
    if self.adopt_disks: # instead, we must check the adoption data
6645
      all_lvs = set([i["adopt"] for i in self.disks])
6646
      if len(all_lvs) != len(self.disks):
6647
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
6648
                                   errors.ECODE_INVAL)
6649
      for lv_name in all_lvs:
6650
        try:
6651
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6652
        except errors.ReservationError:
6653
          raise errors.OpPrereqError("LV named %s used by another instance" %
6654
                                     lv_name, errors.ECODE_NOTUNIQUE)
6655

    
6656
      node_lvs = self.rpc.call_lv_list([pnode.name],
6657
                                       self.cfg.GetVGName())[pnode.name]
6658
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6659
      node_lvs = node_lvs.payload
6660
      delta = all_lvs.difference(node_lvs.keys())
6661
      if delta:
6662
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
6663
                                   utils.CommaJoin(delta),
6664
                                   errors.ECODE_INVAL)
6665
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6666
      if online_lvs:
6667
        raise errors.OpPrereqError("Online logical volumes found, cannot"
6668
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
6669
                                   errors.ECODE_STATE)
6670
      # update the size of disk based on what is found
6671
      for dsk in self.disks:
6672
        dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6673

    
6674
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6675

    
6676
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6677

    
6678
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6679

    
6680
    # memory check on primary node
6681
    if self.op.start:
6682
      _CheckNodeFreeMemory(self, self.pnode.name,
6683
                           "creating instance %s" % self.op.instance_name,
6684
                           self.be_full[constants.BE_MEMORY],
6685
                           self.op.hypervisor)
6686

    
6687
    self.dry_run_result = list(nodenames)
6688

    
6689
  def Exec(self, feedback_fn):
6690
    """Create and add the instance to the cluster.
6691

6692
    """
6693
    instance = self.op.instance_name
6694
    pnode_name = self.pnode.name
6695

    
6696
    ht_kind = self.op.hypervisor
6697
    if ht_kind in constants.HTS_REQ_PORT:
6698
      network_port = self.cfg.AllocatePort()
6699
    else:
6700
      network_port = None
6701

    
6702
    if constants.ENABLE_FILE_STORAGE:
6703
      # this is needed because os.path.join does not accept None arguments
6704
      if self.op.file_storage_dir is None:
6705
        string_file_storage_dir = ""
6706
      else:
6707
        string_file_storage_dir = self.op.file_storage_dir
6708

    
6709
      # build the full file storage dir path
6710
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6711
                                        string_file_storage_dir, instance)
6712
    else:
6713
      file_storage_dir = ""
6714

    
6715

    
6716
    disks = _GenerateDiskTemplate(self,
6717
                                  self.op.disk_template,
6718
                                  instance, pnode_name,
6719
                                  self.secondaries,
6720
                                  self.disks,
6721
                                  file_storage_dir,
6722
                                  self.op.file_driver,
6723
                                  0)
6724

    
6725
    iobj = objects.Instance(name=instance, os=self.op.os_type,
6726
                            primary_node=pnode_name,
6727
                            nics=self.nics, disks=disks,
6728
                            disk_template=self.op.disk_template,
6729
                            admin_up=False,
6730
                            network_port=network_port,
6731
                            beparams=self.op.beparams,
6732
                            hvparams=self.op.hvparams,
6733
                            hypervisor=self.op.hypervisor,
6734
                            )
6735

    
6736
    if self.adopt_disks:
6737
      # rename LVs to the newly-generated names; we need to construct
6738
      # 'fake' LV disks with the old data, plus the new unique_id
6739
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6740
      rename_to = []
6741
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6742
        rename_to.append(t_dsk.logical_id)
6743
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6744
        self.cfg.SetDiskID(t_dsk, pnode_name)
6745
      result = self.rpc.call_blockdev_rename(pnode_name,
6746
                                             zip(tmp_disks, rename_to))
6747
      result.Raise("Failed to rename adoped LVs")
6748
    else:
6749
      feedback_fn("* creating instance disks...")
6750
      try:
6751
        _CreateDisks(self, iobj)
6752
      except errors.OpExecError:
6753
        self.LogWarning("Device creation failed, reverting...")
6754
        try:
6755
          _RemoveDisks(self, iobj)
6756
        finally:
6757
          self.cfg.ReleaseDRBDMinors(instance)
6758
          raise
6759

    
6760
    feedback_fn("adding instance %s to cluster config" % instance)
6761

    
6762
    self.cfg.AddInstance(iobj, self.proc.GetECId())
6763

    
6764
    # Declare that we don't want to remove the instance lock anymore, as we've
6765
    # added the instance to the config
6766
    del self.remove_locks[locking.LEVEL_INSTANCE]
6767
    # Unlock all the nodes
6768
    if self.op.mode == constants.INSTANCE_IMPORT:
6769
      nodes_keep = [self.op.src_node]
6770
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6771
                       if node != self.op.src_node]
6772
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6773
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6774
    else:
6775
      self.context.glm.release(locking.LEVEL_NODE)
6776
      del self.acquired_locks[locking.LEVEL_NODE]
6777

    
6778
    if self.op.wait_for_sync:
6779
      disk_abort = not _WaitForSync(self, iobj)
6780
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
6781
      # make sure the disks are not degraded (still sync-ing is ok)
6782
      time.sleep(15)
6783
      feedback_fn("* checking mirrors status")
6784
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6785
    else:
6786
      disk_abort = False
6787

    
6788
    if disk_abort:
6789
      _RemoveDisks(self, iobj)
6790
      self.cfg.RemoveInstance(iobj.name)
6791
      # Make sure the instance lock gets removed
6792
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6793
      raise errors.OpExecError("There are some degraded disks for"
6794
                               " this instance")
6795

    
6796
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6797
      if self.op.mode == constants.INSTANCE_CREATE:
6798
        if not self.op.no_install:
6799
          feedback_fn("* running the instance OS create scripts...")
6800
          # FIXME: pass debug option from opcode to backend
6801
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6802
                                                 self.op.debug_level)
6803
          result.Raise("Could not add os for instance %s"
6804
                       " on node %s" % (instance, pnode_name))
6805

    
6806
      elif self.op.mode == constants.INSTANCE_IMPORT:
6807
        feedback_fn("* running the instance OS import scripts...")
6808
        src_node = self.op.src_node
6809
        src_images = self.src_images
6810
        cluster_name = self.cfg.GetClusterName()
6811
        # FIXME: pass debug option from opcode to backend
6812
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6813
                                                         src_node, src_images,
6814
                                                         cluster_name,
6815
                                                         self.op.debug_level)
6816
        msg = import_result.fail_msg
6817
        if msg:
6818
          self.LogWarning("Error while importing the disk images for instance"
6819
                          " %s on node %s: %s" % (instance, pnode_name, msg))
6820
      else:
6821
        # also checked in the prereq part
6822
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6823
                                     % self.op.mode)
6824

    
6825
    if self.op.start:
6826
      iobj.admin_up = True
6827
      self.cfg.Update(iobj, feedback_fn)
6828
      logging.info("Starting instance %s on node %s", instance, pnode_name)
6829
      feedback_fn("* starting instance...")
6830
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6831
      result.Raise("Could not start instance")
6832

    
6833
    return list(iobj.all_nodes)
6834

    
6835

    
6836
class LUConnectConsole(NoHooksLU):
6837
  """Connect to an instance's console.
6838

6839
  This is somewhat special in that it returns the command line that
6840
  you need to run on the master node in order to connect to the
6841
  console.
6842

6843
  """
6844
  _OP_REQP = ["instance_name"]
6845
  REQ_BGL = False
6846

    
6847
  def ExpandNames(self):
6848
    self._ExpandAndLockInstance()
6849

    
6850
  def CheckPrereq(self):
6851
    """Check prerequisites.
6852

6853
    This checks that the instance is in the cluster.
6854

6855
    """
6856
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6857
    assert self.instance is not None, \
6858
      "Cannot retrieve locked instance %s" % self.op.instance_name
6859
    _CheckNodeOnline(self, self.instance.primary_node)
6860

    
6861
  def Exec(self, feedback_fn):
6862
    """Connect to the console of an instance
6863

6864
    """
6865
    instance = self.instance
6866
    node = instance.primary_node
6867

    
6868
    node_insts = self.rpc.call_instance_list([node],
6869
                                             [instance.hypervisor])[node]
6870
    node_insts.Raise("Can't get node information from %s" % node)
6871

    
6872
    if instance.name not in node_insts.payload:
6873
      raise errors.OpExecError("Instance %s is not running." % instance.name)
6874

    
6875
    logging.debug("Connecting to console of %s on %s", instance.name, node)
6876

    
6877
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
6878
    cluster = self.cfg.GetClusterInfo()
6879
    # beparams and hvparams are passed separately, to avoid editing the
6880
    # instance and then saving the defaults in the instance itself.
6881
    hvparams = cluster.FillHV(instance)
6882
    beparams = cluster.FillBE(instance)
6883
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6884

    
6885
    # build ssh cmdline
6886
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6887

    
6888

    
6889
class LUReplaceDisks(LogicalUnit):
6890
  """Replace the disks of an instance.
6891

6892
  """
6893
  HPATH = "mirrors-replace"
6894
  HTYPE = constants.HTYPE_INSTANCE
6895
  _OP_REQP = ["instance_name", "mode", "disks"]
6896
  REQ_BGL = False
6897

    
6898
  def CheckArguments(self):
6899
    if not hasattr(self.op, "remote_node"):
6900
      self.op.remote_node = None
6901
    if not hasattr(self.op, "iallocator"):
6902
      self.op.iallocator = None
6903
    if not hasattr(self.op, "early_release"):
6904
      self.op.early_release = False
6905

    
6906
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6907
                                  self.op.iallocator)
6908

    
6909
  def ExpandNames(self):
6910
    self._ExpandAndLockInstance()
6911

    
6912
    if self.op.iallocator is not None:
6913
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6914

    
6915
    elif self.op.remote_node is not None:
6916
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6917
      self.op.remote_node = remote_node
6918

    
6919
      # Warning: do not remove the locking of the new secondary here
6920
      # unless DRBD8.AddChildren is changed to work in parallel;
6921
      # currently it doesn't since parallel invocations of
6922
      # FindUnusedMinor will conflict
6923
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6924
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6925

    
6926
    else:
6927
      self.needed_locks[locking.LEVEL_NODE] = []
6928
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6929

    
6930
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6931
                                   self.op.iallocator, self.op.remote_node,
6932
                                   self.op.disks, False, self.op.early_release)
6933

    
6934
    self.tasklets = [self.replacer]
6935

    
6936
  def DeclareLocks(self, level):
6937
    # If we're not already locking all nodes in the set we have to declare the
6938
    # instance's primary/secondary nodes.
6939
    if (level == locking.LEVEL_NODE and
6940
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6941
      self._LockInstancesNodes()
6942

    
6943
  def BuildHooksEnv(self):
6944
    """Build hooks env.
6945

6946
    This runs on the master, the primary and all the secondaries.
6947

6948
    """
6949
    instance = self.replacer.instance
6950
    env = {
6951
      "MODE": self.op.mode,
6952
      "NEW_SECONDARY": self.op.remote_node,
6953
      "OLD_SECONDARY": instance.secondary_nodes[0],
6954
      }
6955
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6956
    nl = [
6957
      self.cfg.GetMasterNode(),
6958
      instance.primary_node,
6959
      ]
6960
    if self.op.remote_node is not None:
6961
      nl.append(self.op.remote_node)
6962
    return env, nl, nl
6963

    
6964

    
6965
class LUEvacuateNode(LogicalUnit):
6966
  """Relocate the secondary instances from a node.
6967

6968
  """
6969
  HPATH = "node-evacuate"
6970
  HTYPE = constants.HTYPE_NODE
6971
  _OP_REQP = ["node_name"]
6972
  REQ_BGL = False
6973

    
6974
  def CheckArguments(self):
6975
    if not hasattr(self.op, "remote_node"):
6976
      self.op.remote_node = None
6977
    if not hasattr(self.op, "iallocator"):
6978
      self.op.iallocator = None
6979
    if not hasattr(self.op, "early_release"):
6980
      self.op.early_release = False
6981

    
6982
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6983
                                  self.op.remote_node,
6984
                                  self.op.iallocator)
6985

    
6986
  def ExpandNames(self):
6987
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6988

    
6989
    self.needed_locks = {}
6990

    
6991
    # Declare node locks
6992
    if self.op.iallocator is not None:
6993
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6994

    
6995
    elif self.op.remote_node is not None:
6996
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6997

    
6998
      # Warning: do not remove the locking of the new secondary here
6999
      # unless DRBD8.AddChildren is changed to work in parallel;
7000
      # currently it doesn't since parallel invocations of
7001
      # FindUnusedMinor will conflict
7002
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
7003
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7004

    
7005
    else:
7006
      raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
7007

    
7008
    # Create tasklets for replacing disks for all secondary instances on this
7009
    # node
7010
    names = []
7011
    tasklets = []
7012

    
7013
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
7014
      logging.debug("Replacing disks for instance %s", inst.name)
7015
      names.append(inst.name)
7016

    
7017
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
7018
                                self.op.iallocator, self.op.remote_node, [],
7019
                                True, self.op.early_release)
7020
      tasklets.append(replacer)
7021

    
7022
    self.tasklets = tasklets
7023
    self.instance_names = names
7024

    
7025
    # Declare instance locks
7026
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
7027

    
7028
  def DeclareLocks(self, level):
7029
    # If we're not already locking all nodes in the set we have to declare the
7030
    # instance's primary/secondary nodes.
7031
    if (level == locking.LEVEL_NODE and
7032
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7033
      self._LockInstancesNodes()
7034

    
7035
  def BuildHooksEnv(self):
7036
    """Build hooks env.
7037

7038
    This runs on the master, the primary and all the secondaries.
7039

7040
    """
7041
    env = {
7042
      "NODE_NAME": self.op.node_name,
7043
      }
7044

    
7045
    nl = [self.cfg.GetMasterNode()]
7046

    
7047
    if self.op.remote_node is not None:
7048
      env["NEW_SECONDARY"] = self.op.remote_node
7049
      nl.append(self.op.remote_node)
7050

    
7051
    return (env, nl, nl)
7052

    
7053

    
7054
class TLReplaceDisks(Tasklet):
7055
  """Replaces disks for an instance.
7056

7057
  Note: Locking is not within the scope of this class.
7058

7059
  """
7060
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7061
               disks, delay_iallocator, early_release):
7062
    """Initializes this class.
7063

7064
    """
7065
    Tasklet.__init__(self, lu)
7066

    
7067
    # Parameters
7068
    self.instance_name = instance_name
7069
    self.mode = mode
7070
    self.iallocator_name = iallocator_name
7071
    self.remote_node = remote_node
7072
    self.disks = disks
7073
    self.delay_iallocator = delay_iallocator
7074
    self.early_release = early_release
7075

    
7076
    # Runtime data
7077
    self.instance = None
7078
    self.new_node = None
7079
    self.target_node = None
7080
    self.other_node = None
7081
    self.remote_node_info = None
7082
    self.node_secondary_ip = None
7083

    
7084
  @staticmethod
7085
  def CheckArguments(mode, remote_node, iallocator):
7086
    """Helper function for users of this class.
7087

7088
    """
7089
    # check for valid parameter combination
7090
    if mode == constants.REPLACE_DISK_CHG:
7091
      if remote_node is None and iallocator is None:
7092
        raise errors.OpPrereqError("When changing the secondary either an"
7093
                                   " iallocator script must be used or the"
7094
                                   " new node given", errors.ECODE_INVAL)
7095

    
7096
      if remote_node is not None and iallocator is not None:
7097
        raise errors.OpPrereqError("Give either the iallocator or the new"
7098
                                   " secondary, not both", errors.ECODE_INVAL)
7099

    
7100
    elif remote_node is not None or iallocator is not None:
7101
      # Not replacing the secondary
7102
      raise errors.OpPrereqError("The iallocator and new node options can"
7103
                                 " only be used when changing the"
7104
                                 " secondary node", errors.ECODE_INVAL)
7105

    
7106
  @staticmethod
7107
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7108
    """Compute a new secondary node using an IAllocator.
7109

7110
    """
7111
    ial = IAllocator(lu.cfg, lu.rpc,
7112
                     mode=constants.IALLOCATOR_MODE_RELOC,
7113
                     name=instance_name,
7114
                     relocate_from=relocate_from)
7115

    
7116
    ial.Run(iallocator_name)
7117

    
7118
    if not ial.success:
7119
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7120
                                 " %s" % (iallocator_name, ial.info),
7121
                                 errors.ECODE_NORES)
7122

    
7123
    if len(ial.result) != ial.required_nodes:
7124
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7125
                                 " of nodes (%s), required %s" %
7126
                                 (iallocator_name,
7127
                                  len(ial.result), ial.required_nodes),
7128
                                 errors.ECODE_FAULT)
7129

    
7130
    remote_node_name = ial.result[0]
7131

    
7132
    lu.LogInfo("Selected new secondary for instance '%s': %s",
7133
               instance_name, remote_node_name)
7134

    
7135
    return remote_node_name
7136

    
7137
  def _FindFaultyDisks(self, node_name):
7138
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7139
                                    node_name, True)
7140

    
7141
  def CheckPrereq(self):
7142
    """Check prerequisites.
7143

7144
    This checks that the instance is in the cluster.
7145

7146
    """
7147
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7148
    assert instance is not None, \
7149
      "Cannot retrieve locked instance %s" % self.instance_name
7150

    
7151
    if instance.disk_template != constants.DT_DRBD8:
7152
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7153
                                 " instances", errors.ECODE_INVAL)
7154

    
7155
    if len(instance.secondary_nodes) != 1:
7156
      raise errors.OpPrereqError("The instance has a strange layout,"
7157
                                 " expected one secondary but found %d" %
7158
                                 len(instance.secondary_nodes),
7159
                                 errors.ECODE_FAULT)
7160

    
7161
    if not self.delay_iallocator:
7162
      self._CheckPrereq2()
7163

    
7164
  def _CheckPrereq2(self):
7165
    """Check prerequisites, second part.
7166

7167
    This function should always be part of CheckPrereq. It was separated and is
7168
    now called from Exec because during node evacuation iallocator was only
7169
    called with an unmodified cluster model, not taking planned changes into
7170
    account.
7171

7172
    """
7173
    instance = self.instance
7174
    secondary_node = instance.secondary_nodes[0]
7175

    
7176
    if self.iallocator_name is None:
7177
      remote_node = self.remote_node
7178
    else:
7179
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7180
                                       instance.name, instance.secondary_nodes)
7181

    
7182
    if remote_node is not None:
7183
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7184
      assert self.remote_node_info is not None, \
7185
        "Cannot retrieve locked node %s" % remote_node
7186
    else:
7187
      self.remote_node_info = None
7188

    
7189
    if remote_node == self.instance.primary_node:
7190
      raise errors.OpPrereqError("The specified node is the primary node of"
7191
                                 " the instance.", errors.ECODE_INVAL)
7192

    
7193
    if remote_node == secondary_node:
7194
      raise errors.OpPrereqError("The specified node is already the"
7195
                                 " secondary node of the instance.",
7196
                                 errors.ECODE_INVAL)
7197

    
7198
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7199
                                    constants.REPLACE_DISK_CHG):
7200
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
7201
                                 errors.ECODE_INVAL)
7202

    
7203
    if self.mode == constants.REPLACE_DISK_AUTO:
7204
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
7205
      faulty_secondary = self._FindFaultyDisks(secondary_node)
7206

    
7207
      if faulty_primary and faulty_secondary:
7208
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7209
                                   " one node and can not be repaired"
7210
                                   " automatically" % self.instance_name,
7211
                                   errors.ECODE_STATE)
7212

    
7213
      if faulty_primary:
7214
        self.disks = faulty_primary
7215
        self.target_node = instance.primary_node
7216
        self.other_node = secondary_node
7217
        check_nodes = [self.target_node, self.other_node]
7218
      elif faulty_secondary:
7219
        self.disks = faulty_secondary
7220
        self.target_node = secondary_node
7221
        self.other_node = instance.primary_node
7222
        check_nodes = [self.target_node, self.other_node]
7223
      else:
7224
        self.disks = []
7225
        check_nodes = []
7226

    
7227
    else:
7228
      # Non-automatic modes
7229
      if self.mode == constants.REPLACE_DISK_PRI:
7230
        self.target_node = instance.primary_node
7231
        self.other_node = secondary_node
7232
        check_nodes = [self.target_node, self.other_node]
7233

    
7234
      elif self.mode == constants.REPLACE_DISK_SEC:
7235
        self.target_node = secondary_node
7236
        self.other_node = instance.primary_node
7237
        check_nodes = [self.target_node, self.other_node]
7238

    
7239
      elif self.mode == constants.REPLACE_DISK_CHG:
7240
        self.new_node = remote_node
7241
        self.other_node = instance.primary_node
7242
        self.target_node = secondary_node
7243
        check_nodes = [self.new_node, self.other_node]
7244

    
7245
        _CheckNodeNotDrained(self.lu, remote_node)
7246

    
7247
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
7248
        assert old_node_info is not None
7249
        if old_node_info.offline and not self.early_release:
7250
          # doesn't make sense to delay the release
7251
          self.early_release = True
7252
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7253
                          " early-release mode", secondary_node)
7254

    
7255
      else:
7256
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7257
                                     self.mode)
7258

    
7259
      # If not specified all disks should be replaced
7260
      if not self.disks:
7261
        self.disks = range(len(self.instance.disks))
7262

    
7263
    for node in check_nodes:
7264
      _CheckNodeOnline(self.lu, node)
7265

    
7266
    # Check whether disks are valid
7267
    for disk_idx in self.disks:
7268
      instance.FindDisk(disk_idx)
7269

    
7270
    # Get secondary node IP addresses
7271
    node_2nd_ip = {}
7272

    
7273
    for node_name in [self.target_node, self.other_node, self.new_node]:
7274
      if node_name is not None:
7275
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7276

    
7277
    self.node_secondary_ip = node_2nd_ip
7278

    
7279
  def Exec(self, feedback_fn):
7280
    """Execute disk replacement.
7281

7282
    This dispatches the disk replacement to the appropriate handler.
7283

7284
    """
7285
    if self.delay_iallocator:
7286
      self._CheckPrereq2()
7287

    
7288
    if not self.disks:
7289
      feedback_fn("No disks need replacement")
7290
      return
7291

    
7292
    feedback_fn("Replacing disk(s) %s for %s" %
7293
                (utils.CommaJoin(self.disks), self.instance.name))
7294

    
7295
    activate_disks = (not self.instance.admin_up)
7296

    
7297
    # Activate the instance disks if we're replacing them on a down instance
7298
    if activate_disks:
7299
      _StartInstanceDisks(self.lu, self.instance, True)
7300

    
7301
    try:
7302
      # Should we replace the secondary node?
7303
      if self.new_node is not None:
7304
        fn = self._ExecDrbd8Secondary
7305
      else:
7306
        fn = self._ExecDrbd8DiskOnly
7307

    
7308
      return fn(feedback_fn)
7309

    
7310
    finally:
7311
      # Deactivate the instance disks if we're replacing them on a
7312
      # down instance
7313
      if activate_disks:
7314
        _SafeShutdownInstanceDisks(self.lu, self.instance)
7315

    
7316
  def _CheckVolumeGroup(self, nodes):
7317
    self.lu.LogInfo("Checking volume groups")
7318

    
7319
    vgname = self.cfg.GetVGName()
7320

    
7321
    # Make sure volume group exists on all involved nodes
7322
    results = self.rpc.call_vg_list(nodes)
7323
    if not results:
7324
      raise errors.OpExecError("Can't list volume groups on the nodes")
7325

    
7326
    for node in nodes:
7327
      res = results[node]
7328
      res.Raise("Error checking node %s" % node)
7329
      if vgname not in res.payload:
7330
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
7331
                                 (vgname, node))
7332

    
7333
  def _CheckDisksExistence(self, nodes):
7334
    # Check disk existence
7335
    for idx, dev in enumerate(self.instance.disks):
7336
      if idx not in self.disks:
7337
        continue
7338

    
7339
      for node in nodes:
7340
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7341
        self.cfg.SetDiskID(dev, node)
7342

    
7343
        result = self.rpc.call_blockdev_find(node, dev)
7344

    
7345
        msg = result.fail_msg
7346
        if msg or not result.payload:
7347
          if not msg:
7348
            msg = "disk not found"
7349
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7350
                                   (idx, node, msg))
7351

    
7352
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7353
    for idx, dev in enumerate(self.instance.disks):
7354
      if idx not in self.disks:
7355
        continue
7356

    
7357
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7358
                      (idx, node_name))
7359

    
7360
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7361
                                   ldisk=ldisk):
7362
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7363
                                 " replace disks for instance %s" %
7364
                                 (node_name, self.instance.name))
7365

    
7366
  def _CreateNewStorage(self, node_name):
7367
    vgname = self.cfg.GetVGName()
7368
    iv_names = {}
7369

    
7370
    for idx, dev in enumerate(self.instance.disks):
7371
      if idx not in self.disks:
7372
        continue
7373

    
7374
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7375

    
7376
      self.cfg.SetDiskID(dev, node_name)
7377

    
7378
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7379
      names = _GenerateUniqueNames(self.lu, lv_names)
7380

    
7381
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7382
                             logical_id=(vgname, names[0]))
7383
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7384
                             logical_id=(vgname, names[1]))
7385

    
7386
      new_lvs = [lv_data, lv_meta]
7387
      old_lvs = dev.children
7388
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7389

    
7390
      # we pass force_create=True to force the LVM creation
7391
      for new_lv in new_lvs:
7392
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7393
                        _GetInstanceInfoText(self.instance), False)
7394

    
7395
    return iv_names
7396

    
7397
  def _CheckDevices(self, node_name, iv_names):
7398
    for name, (dev, _, _) in iv_names.iteritems():
7399
      self.cfg.SetDiskID(dev, node_name)
7400

    
7401
      result = self.rpc.call_blockdev_find(node_name, dev)
7402

    
7403
      msg = result.fail_msg
7404
      if msg or not result.payload:
7405
        if not msg:
7406
          msg = "disk not found"
7407
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
7408
                                 (name, msg))
7409

    
7410
      if result.payload.is_degraded:
7411
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
7412

    
7413
  def _RemoveOldStorage(self, node_name, iv_names):
7414
    for name, (_, old_lvs, _) in iv_names.iteritems():
7415
      self.lu.LogInfo("Remove logical volumes for %s" % name)
7416

    
7417
      for lv in old_lvs:
7418
        self.cfg.SetDiskID(lv, node_name)
7419

    
7420
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7421
        if msg:
7422
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
7423
                             hint="remove unused LVs manually")
7424

    
7425
  def _ReleaseNodeLock(self, node_name):
7426
    """Releases the lock for a given node."""
7427
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7428

    
7429
  def _ExecDrbd8DiskOnly(self, feedback_fn):
7430
    """Replace a disk on the primary or secondary for DRBD 8.
7431

7432
    The algorithm for replace is quite complicated:
7433

7434
      1. for each disk to be replaced:
7435

7436
        1. create new LVs on the target node with unique names
7437
        1. detach old LVs from the drbd device
7438
        1. rename old LVs to name_replaced.<time_t>
7439
        1. rename new LVs to old LVs
7440
        1. attach the new LVs (with the old names now) to the drbd device
7441

7442
      1. wait for sync across all devices
7443

7444
      1. for each modified disk:
7445

7446
        1. remove old LVs (which have the name name_replaces.<time_t>)
7447

7448
    Failures are not very well handled.
7449

7450
    """
7451
    steps_total = 6
7452

    
7453
    # Step: check device activation
7454
    self.lu.LogStep(1, steps_total, "Check device existence")
7455
    self._CheckDisksExistence([self.other_node, self.target_node])
7456
    self._CheckVolumeGroup([self.target_node, self.other_node])
7457

    
7458
    # Step: check other node consistency
7459
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7460
    self._CheckDisksConsistency(self.other_node,
7461
                                self.other_node == self.instance.primary_node,
7462
                                False)
7463

    
7464
    # Step: create new storage
7465
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7466
    iv_names = self._CreateNewStorage(self.target_node)
7467

    
7468
    # Step: for each lv, detach+rename*2+attach
7469
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7470
    for dev, old_lvs, new_lvs in iv_names.itervalues():
7471
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7472

    
7473
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7474
                                                     old_lvs)
7475
      result.Raise("Can't detach drbd from local storage on node"
7476
                   " %s for device %s" % (self.target_node, dev.iv_name))
7477
      #dev.children = []
7478
      #cfg.Update(instance)
7479

    
7480
      # ok, we created the new LVs, so now we know we have the needed
7481
      # storage; as such, we proceed on the target node to rename
7482
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7483
      # using the assumption that logical_id == physical_id (which in
7484
      # turn is the unique_id on that node)
7485

    
7486
      # FIXME(iustin): use a better name for the replaced LVs
7487
      temp_suffix = int(time.time())
7488
      ren_fn = lambda d, suff: (d.physical_id[0],
7489
                                d.physical_id[1] + "_replaced-%s" % suff)
7490

    
7491
      # Build the rename list based on what LVs exist on the node
7492
      rename_old_to_new = []
7493
      for to_ren in old_lvs:
7494
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7495
        if not result.fail_msg and result.payload:
7496
          # device exists
7497
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7498

    
7499
      self.lu.LogInfo("Renaming the old LVs on the target node")
7500
      result = self.rpc.call_blockdev_rename(self.target_node,
7501
                                             rename_old_to_new)
7502
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
7503

    
7504
      # Now we rename the new LVs to the old LVs
7505
      self.lu.LogInfo("Renaming the new LVs on the target node")
7506
      rename_new_to_old = [(new, old.physical_id)
7507
                           for old, new in zip(old_lvs, new_lvs)]
7508
      result = self.rpc.call_blockdev_rename(self.target_node,
7509
                                             rename_new_to_old)
7510
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
7511

    
7512
      for old, new in zip(old_lvs, new_lvs):
7513
        new.logical_id = old.logical_id
7514
        self.cfg.SetDiskID(new, self.target_node)
7515

    
7516
      for disk in old_lvs:
7517
        disk.logical_id = ren_fn(disk, temp_suffix)
7518
        self.cfg.SetDiskID(disk, self.target_node)
7519

    
7520
      # Now that the new lvs have the old name, we can add them to the device
7521
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7522
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7523
                                                  new_lvs)
7524
      msg = result.fail_msg
7525
      if msg:
7526
        for new_lv in new_lvs:
7527
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
7528
                                               new_lv).fail_msg
7529
          if msg2:
7530
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7531
                               hint=("cleanup manually the unused logical"
7532
                                     "volumes"))
7533
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7534

    
7535
      dev.children = new_lvs
7536

    
7537
      self.cfg.Update(self.instance, feedback_fn)
7538

    
7539
    cstep = 5
7540
    if self.early_release:
7541
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7542
      cstep += 1
7543
      self._RemoveOldStorage(self.target_node, iv_names)
7544
      # WARNING: we release both node locks here, do not do other RPCs
7545
      # than WaitForSync to the primary node
7546
      self._ReleaseNodeLock([self.target_node, self.other_node])
7547

    
7548
    # Wait for sync
7549
    # This can fail as the old devices are degraded and _WaitForSync
7550
    # does a combined result over all disks, so we don't check its return value
7551
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7552
    cstep += 1
7553
    _WaitForSync(self.lu, self.instance)
7554

    
7555
    # Check all devices manually
7556
    self._CheckDevices(self.instance.primary_node, iv_names)
7557

    
7558
    # Step: remove old storage
7559
    if not self.early_release:
7560
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7561
      cstep += 1
7562
      self._RemoveOldStorage(self.target_node, iv_names)
7563

    
7564
  def _ExecDrbd8Secondary(self, feedback_fn):
7565
    """Replace the secondary node for DRBD 8.
7566

7567
    The algorithm for replace is quite complicated:
7568
      - for all disks of the instance:
7569
        - create new LVs on the new node with same names
7570
        - shutdown the drbd device on the old secondary
7571
        - disconnect the drbd network on the primary
7572
        - create the drbd device on the new secondary
7573
        - network attach the drbd on the primary, using an artifice:
7574
          the drbd code for Attach() will connect to the network if it
7575
          finds a device which is connected to the good local disks but
7576
          not network enabled
7577
      - wait for sync across all devices
7578
      - remove all disks from the old secondary
7579

7580
    Failures are not very well handled.
7581

7582
    """
7583
    steps_total = 6
7584

    
7585
    # Step: check device activation
7586
    self.lu.LogStep(1, steps_total, "Check device existence")
7587
    self._CheckDisksExistence([self.instance.primary_node])
7588
    self._CheckVolumeGroup([self.instance.primary_node])
7589

    
7590
    # Step: check other node consistency
7591
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7592
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
7593

    
7594
    # Step: create new storage
7595
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7596
    for idx, dev in enumerate(self.instance.disks):
7597
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7598
                      (self.new_node, idx))
7599
      # we pass force_create=True to force LVM creation
7600
      for new_lv in dev.children:
7601
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7602
                        _GetInstanceInfoText(self.instance), False)
7603

    
7604
    # Step 4: dbrd minors and drbd setups changes
7605
    # after this, we must manually remove the drbd minors on both the
7606
    # error and the success paths
7607
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7608
    minors = self.cfg.AllocateDRBDMinor([self.new_node
7609
                                         for dev in self.instance.disks],
7610
                                        self.instance.name)
7611
    logging.debug("Allocated minors %r", minors)
7612

    
7613
    iv_names = {}
7614
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7615
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7616
                      (self.new_node, idx))
7617
      # create new devices on new_node; note that we create two IDs:
7618
      # one without port, so the drbd will be activated without
7619
      # networking information on the new node at this stage, and one
7620
      # with network, for the latter activation in step 4
7621
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7622
      if self.instance.primary_node == o_node1:
7623
        p_minor = o_minor1
7624
      else:
7625
        assert self.instance.primary_node == o_node2, "Three-node instance?"
7626
        p_minor = o_minor2
7627

    
7628
      new_alone_id = (self.instance.primary_node, self.new_node, None,
7629
                      p_minor, new_minor, o_secret)
7630
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
7631
                    p_minor, new_minor, o_secret)
7632

    
7633
      iv_names[idx] = (dev, dev.children, new_net_id)
7634
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7635
                    new_net_id)
7636
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7637
                              logical_id=new_alone_id,
7638
                              children=dev.children,
7639
                              size=dev.size)
7640
      try:
7641
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7642
                              _GetInstanceInfoText(self.instance), False)
7643
      except errors.GenericError:
7644
        self.cfg.ReleaseDRBDMinors(self.instance.name)
7645
        raise
7646

    
7647
    # We have new devices, shutdown the drbd on the old secondary
7648
    for idx, dev in enumerate(self.instance.disks):
7649
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7650
      self.cfg.SetDiskID(dev, self.target_node)
7651
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7652
      if msg:
7653
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7654
                           "node: %s" % (idx, msg),
7655
                           hint=("Please cleanup this device manually as"
7656
                                 " soon as possible"))
7657

    
7658
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7659
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7660
                                               self.node_secondary_ip,
7661
                                               self.instance.disks)\
7662
                                              [self.instance.primary_node]
7663

    
7664
    msg = result.fail_msg
7665
    if msg:
7666
      # detaches didn't succeed (unlikely)
7667
      self.cfg.ReleaseDRBDMinors(self.instance.name)
7668
      raise errors.OpExecError("Can't detach the disks from the network on"
7669
                               " old node: %s" % (msg,))
7670

    
7671
    # if we managed to detach at least one, we update all the disks of
7672
    # the instance to point to the new secondary
7673
    self.lu.LogInfo("Updating instance configuration")
7674
    for dev, _, new_logical_id in iv_names.itervalues():
7675
      dev.logical_id = new_logical_id
7676
      self.cfg.SetDiskID(dev, self.instance.primary_node)
7677

    
7678
    self.cfg.Update(self.instance, feedback_fn)
7679

    
7680
    # and now perform the drbd attach
7681
    self.lu.LogInfo("Attaching primary drbds to new secondary"
7682
                    " (standalone => connected)")
7683
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7684
                                            self.new_node],
7685
                                           self.node_secondary_ip,
7686
                                           self.instance.disks,
7687
                                           self.instance.name,
7688
                                           False)
7689
    for to_node, to_result in result.items():
7690
      msg = to_result.fail_msg
7691
      if msg:
7692
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7693
                           to_node, msg,
7694
                           hint=("please do a gnt-instance info to see the"
7695
                                 " status of disks"))
7696
    cstep = 5
7697
    if self.early_release:
7698
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7699
      cstep += 1
7700
      self._RemoveOldStorage(self.target_node, iv_names)
7701
      # WARNING: we release all node locks here, do not do other RPCs
7702
      # than WaitForSync to the primary node
7703
      self._ReleaseNodeLock([self.instance.primary_node,
7704
                             self.target_node,
7705
                             self.new_node])
7706

    
7707
    # Wait for sync
7708
    # This can fail as the old devices are degraded and _WaitForSync
7709
    # does a combined result over all disks, so we don't check its return value
7710
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7711
    cstep += 1
7712
    _WaitForSync(self.lu, self.instance)
7713

    
7714
    # Check all devices manually
7715
    self._CheckDevices(self.instance.primary_node, iv_names)
7716

    
7717
    # Step: remove old storage
7718
    if not self.early_release:
7719
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7720
      self._RemoveOldStorage(self.target_node, iv_names)
7721

    
7722

    
7723
class LURepairNodeStorage(NoHooksLU):
7724
  """Repairs the volume group on a node.
7725

7726
  """
7727
  _OP_REQP = ["node_name"]
7728
  REQ_BGL = False
7729

    
7730
  def CheckArguments(self):
7731
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7732

    
7733
    _CheckStorageType(self.op.storage_type)
7734

    
7735
  def ExpandNames(self):
7736
    self.needed_locks = {
7737
      locking.LEVEL_NODE: [self.op.node_name],
7738
      }
7739

    
7740
  def _CheckFaultyDisks(self, instance, node_name):
7741
    """Ensure faulty disks abort the opcode or at least warn."""
7742
    try:
7743
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7744
                                  node_name, True):
7745
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7746
                                   " node '%s'" % (instance.name, node_name),
7747
                                   errors.ECODE_STATE)
7748
    except errors.OpPrereqError, err:
7749
      if self.op.ignore_consistency:
7750
        self.proc.LogWarning(str(err.args[0]))
7751
      else:
7752
        raise
7753

    
7754
  def CheckPrereq(self):
7755
    """Check prerequisites.
7756

7757
    """
7758
    storage_type = self.op.storage_type
7759

    
7760
    if (constants.SO_FIX_CONSISTENCY not in
7761
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7762
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
7763
                                 " repaired" % storage_type,
7764
                                 errors.ECODE_INVAL)
7765

    
7766
    # Check whether any instance on this node has faulty disks
7767
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7768
      if not inst.admin_up:
7769
        continue
7770
      check_nodes = set(inst.all_nodes)
7771
      check_nodes.discard(self.op.node_name)
7772
      for inst_node_name in check_nodes:
7773
        self._CheckFaultyDisks(inst, inst_node_name)
7774

    
7775
  def Exec(self, feedback_fn):
7776
    feedback_fn("Repairing storage unit '%s' on %s ..." %
7777
                (self.op.name, self.op.node_name))
7778

    
7779
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7780
    result = self.rpc.call_storage_execute(self.op.node_name,
7781
                                           self.op.storage_type, st_args,
7782
                                           self.op.name,
7783
                                           constants.SO_FIX_CONSISTENCY)
7784
    result.Raise("Failed to repair storage unit '%s' on %s" %
7785
                 (self.op.name, self.op.node_name))
7786

    
7787

    
7788
class LUNodeEvacuationStrategy(NoHooksLU):
7789
  """Computes the node evacuation strategy.
7790

7791
  """
7792
  _OP_REQP = ["nodes"]
7793
  REQ_BGL = False
7794

    
7795
  def CheckArguments(self):
7796
    if not hasattr(self.op, "remote_node"):
7797
      self.op.remote_node = None
7798
    if not hasattr(self.op, "iallocator"):
7799
      self.op.iallocator = None
7800
    if self.op.remote_node is not None and self.op.iallocator is not None:
7801
      raise errors.OpPrereqError("Give either the iallocator or the new"
7802
                                 " secondary, not both", errors.ECODE_INVAL)
7803

    
7804
  def ExpandNames(self):
7805
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7806
    self.needed_locks = locks = {}
7807
    if self.op.remote_node is None:
7808
      locks[locking.LEVEL_NODE] = locking.ALL_SET
7809
    else:
7810
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7811
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7812

    
7813
  def CheckPrereq(self):
7814
    pass
7815

    
7816
  def Exec(self, feedback_fn):
7817
    if self.op.remote_node is not None:
7818
      instances = []
7819
      for node in self.op.nodes:
7820
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7821
      result = []
7822
      for i in instances:
7823
        if i.primary_node == self.op.remote_node:
7824
          raise errors.OpPrereqError("Node %s is the primary node of"
7825
                                     " instance %s, cannot use it as"
7826
                                     " secondary" %
7827
                                     (self.op.remote_node, i.name),
7828
                                     errors.ECODE_INVAL)
7829
        result.append([i.name, self.op.remote_node])
7830
    else:
7831
      ial = IAllocator(self.cfg, self.rpc,
7832
                       mode=constants.IALLOCATOR_MODE_MEVAC,
7833
                       evac_nodes=self.op.nodes)
7834
      ial.Run(self.op.iallocator, validate=True)
7835
      if not ial.success:
7836
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7837
                                 errors.ECODE_NORES)
7838
      result = ial.result
7839
    return result
7840

    
7841

    
7842
class LUGrowDisk(LogicalUnit):
7843
  """Grow a disk of an instance.
7844

7845
  """
7846
  HPATH = "disk-grow"
7847
  HTYPE = constants.HTYPE_INSTANCE
7848
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7849
  REQ_BGL = False
7850

    
7851
  def ExpandNames(self):
7852
    self._ExpandAndLockInstance()
7853
    self.needed_locks[locking.LEVEL_NODE] = []
7854
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7855

    
7856
  def DeclareLocks(self, level):
7857
    if level == locking.LEVEL_NODE:
7858
      self._LockInstancesNodes()
7859

    
7860
  def BuildHooksEnv(self):
7861
    """Build hooks env.
7862

7863
    This runs on the master, the primary and all the secondaries.
7864

7865
    """
7866
    env = {
7867
      "DISK": self.op.disk,
7868
      "AMOUNT": self.op.amount,
7869
      }
7870
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7871
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7872
    return env, nl, nl
7873

    
7874
  def CheckPrereq(self):
7875
    """Check prerequisites.
7876

7877
    This checks that the instance is in the cluster.
7878

7879
    """
7880
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7881
    assert instance is not None, \
7882
      "Cannot retrieve locked instance %s" % self.op.instance_name
7883
    nodenames = list(instance.all_nodes)
7884
    for node in nodenames:
7885
      _CheckNodeOnline(self, node)
7886

    
7887

    
7888
    self.instance = instance
7889

    
7890
    if instance.disk_template not in constants.DTS_GROWABLE:
7891
      raise errors.OpPrereqError("Instance's disk layout does not support"
7892
                                 " growing.", errors.ECODE_INVAL)
7893

    
7894
    self.disk = instance.FindDisk(self.op.disk)
7895

    
7896
    if instance.disk_template != constants.DT_FILE:
7897
      # TODO: check the free disk space for file, when that feature will be
7898
      # supported
7899
      _CheckNodesFreeDisk(self, nodenames, self.op.amount)
7900

    
7901
  def Exec(self, feedback_fn):
7902
    """Execute disk grow.
7903

7904
    """
7905
    instance = self.instance
7906
    disk = self.disk
7907
    for node in instance.all_nodes:
7908
      self.cfg.SetDiskID(disk, node)
7909
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7910
      result.Raise("Grow request failed to node %s" % node)
7911

    
7912
      # TODO: Rewrite code to work properly
7913
      # DRBD goes into sync mode for a short amount of time after executing the
7914
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7915
      # calling "resize" in sync mode fails. Sleeping for a short amount of
7916
      # time is a work-around.
7917
      time.sleep(5)
7918

    
7919
    disk.RecordGrow(self.op.amount)
7920
    self.cfg.Update(instance, feedback_fn)
7921
    if self.op.wait_for_sync:
7922
      disk_abort = not _WaitForSync(self, instance)
7923
      if disk_abort:
7924
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7925
                             " status.\nPlease check the instance.")
7926

    
7927

    
7928
class LUQueryInstanceData(NoHooksLU):
7929
  """Query runtime instance data.
7930

7931
  """
7932
  _OP_REQP = ["instances", "static"]
7933
  REQ_BGL = False
7934

    
7935
  def ExpandNames(self):
7936
    self.needed_locks = {}
7937
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7938

    
7939
    if not isinstance(self.op.instances, list):
7940
      raise errors.OpPrereqError("Invalid argument type 'instances'",
7941
                                 errors.ECODE_INVAL)
7942

    
7943
    if self.op.instances:
7944
      self.wanted_names = []
7945
      for name in self.op.instances:
7946
        full_name = _ExpandInstanceName(self.cfg, name)
7947
        self.wanted_names.append(full_name)
7948
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7949
    else:
7950
      self.wanted_names = None
7951
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7952

    
7953
    self.needed_locks[locking.LEVEL_NODE] = []
7954
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7955

    
7956
  def DeclareLocks(self, level):
7957
    if level == locking.LEVEL_NODE:
7958
      self._LockInstancesNodes()
7959

    
7960
  def CheckPrereq(self):
7961
    """Check prerequisites.
7962

7963
    This only checks the optional instance list against the existing names.
7964

7965
    """
7966
    if self.wanted_names is None:
7967
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7968

    
7969
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7970
                             in self.wanted_names]
7971
    return
7972

    
7973
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
7974
    """Returns the status of a block device
7975

7976
    """
7977
    if self.op.static or not node:
7978
      return None
7979

    
7980
    self.cfg.SetDiskID(dev, node)
7981

    
7982
    result = self.rpc.call_blockdev_find(node, dev)
7983
    if result.offline:
7984
      return None
7985

    
7986
    result.Raise("Can't compute disk status for %s" % instance_name)
7987

    
7988
    status = result.payload
7989
    if status is None:
7990
      return None
7991

    
7992
    return (status.dev_path, status.major, status.minor,
7993
            status.sync_percent, status.estimated_time,
7994
            status.is_degraded, status.ldisk_status)
7995

    
7996
  def _ComputeDiskStatus(self, instance, snode, dev):
7997
    """Compute block device status.
7998

7999
    """
8000
    if dev.dev_type in constants.LDS_DRBD:
8001
      # we change the snode then (otherwise we use the one passed in)
8002
      if dev.logical_id[0] == instance.primary_node:
8003
        snode = dev.logical_id[1]
8004
      else:
8005
        snode = dev.logical_id[0]
8006

    
8007
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8008
                                              instance.name, dev)
8009
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8010

    
8011
    if dev.children:
8012
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
8013
                      for child in dev.children]
8014
    else:
8015
      dev_children = []
8016

    
8017
    data = {
8018
      "iv_name": dev.iv_name,
8019
      "dev_type": dev.dev_type,
8020
      "logical_id": dev.logical_id,
8021
      "physical_id": dev.physical_id,
8022
      "pstatus": dev_pstatus,
8023
      "sstatus": dev_sstatus,
8024
      "children": dev_children,
8025
      "mode": dev.mode,
8026
      "size": dev.size,
8027
      }
8028

    
8029
    return data
8030

    
8031
  def Exec(self, feedback_fn):
8032
    """Gather and return data"""
8033
    result = {}
8034

    
8035
    cluster = self.cfg.GetClusterInfo()
8036

    
8037
    for instance in self.wanted_instances:
8038
      if not self.op.static:
8039
        remote_info = self.rpc.call_instance_info(instance.primary_node,
8040
                                                  instance.name,
8041
                                                  instance.hypervisor)
8042
        remote_info.Raise("Error checking node %s" % instance.primary_node)
8043
        remote_info = remote_info.payload
8044
        if remote_info and "state" in remote_info:
8045
          remote_state = "up"
8046
        else:
8047
          remote_state = "down"
8048
      else:
8049
        remote_state = None
8050
      if instance.admin_up:
8051
        config_state = "up"
8052
      else:
8053
        config_state = "down"
8054

    
8055
      disks = [self._ComputeDiskStatus(instance, None, device)
8056
               for device in instance.disks]
8057

    
8058
      idict = {
8059
        "name": instance.name,
8060
        "config_state": config_state,
8061
        "run_state": remote_state,
8062
        "pnode": instance.primary_node,
8063
        "snodes": instance.secondary_nodes,
8064
        "os": instance.os,
8065
        # this happens to be the same format used for hooks
8066
        "nics": _NICListToTuple(self, instance.nics),
8067
        "disks": disks,
8068
        "hypervisor": instance.hypervisor,
8069
        "network_port": instance.network_port,
8070
        "hv_instance": instance.hvparams,
8071
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
8072
        "be_instance": instance.beparams,
8073
        "be_actual": cluster.FillBE(instance),
8074
        "serial_no": instance.serial_no,
8075
        "mtime": instance.mtime,
8076
        "ctime": instance.ctime,
8077
        "uuid": instance.uuid,
8078
        }
8079

    
8080
      result[instance.name] = idict
8081

    
8082
    return result
8083

    
8084

    
8085
class LUSetInstanceParams(LogicalUnit):
8086
  """Modifies an instances's parameters.
8087

8088
  """
8089
  HPATH = "instance-modify"
8090
  HTYPE = constants.HTYPE_INSTANCE
8091
  _OP_REQP = ["instance_name"]
8092
  REQ_BGL = False
8093

    
8094
  def CheckArguments(self):
8095
    if not hasattr(self.op, 'nics'):
8096
      self.op.nics = []
8097
    if not hasattr(self.op, 'disks'):
8098
      self.op.disks = []
8099
    if not hasattr(self.op, 'beparams'):
8100
      self.op.beparams = {}
8101
    if not hasattr(self.op, 'hvparams'):
8102
      self.op.hvparams = {}
8103
    if not hasattr(self.op, "disk_template"):
8104
      self.op.disk_template = None
8105
    if not hasattr(self.op, "remote_node"):
8106
      self.op.remote_node = None
8107
    if not hasattr(self.op, "os_name"):
8108
      self.op.os_name = None
8109
    if not hasattr(self.op, "force_variant"):
8110
      self.op.force_variant = False
8111
    self.op.force = getattr(self.op, "force", False)
8112
    if not (self.op.nics or self.op.disks or self.op.disk_template or
8113
            self.op.hvparams or self.op.beparams or self.op.os_name):
8114
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8115

    
8116
    if self.op.hvparams:
8117
      _CheckGlobalHvParams(self.op.hvparams)
8118

    
8119
    # Disk validation
8120
    disk_addremove = 0
8121
    for disk_op, disk_dict in self.op.disks:
8122
      if disk_op == constants.DDM_REMOVE:
8123
        disk_addremove += 1
8124
        continue
8125
      elif disk_op == constants.DDM_ADD:
8126
        disk_addremove += 1
8127
      else:
8128
        if not isinstance(disk_op, int):
8129
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8130
        if not isinstance(disk_dict, dict):
8131
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8132
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8133

    
8134
      if disk_op == constants.DDM_ADD:
8135
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8136
        if mode not in constants.DISK_ACCESS_SET:
8137
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8138
                                     errors.ECODE_INVAL)
8139
        size = disk_dict.get('size', None)
8140
        if size is None:
8141
          raise errors.OpPrereqError("Required disk parameter size missing",
8142
                                     errors.ECODE_INVAL)
8143
        try:
8144
          size = int(size)
8145
        except (TypeError, ValueError), err:
8146
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8147
                                     str(err), errors.ECODE_INVAL)
8148
        disk_dict['size'] = size
8149
      else:
8150
        # modification of disk
8151
        if 'size' in disk_dict:
8152
          raise errors.OpPrereqError("Disk size change not possible, use"
8153
                                     " grow-disk", errors.ECODE_INVAL)
8154

    
8155
    if disk_addremove > 1:
8156
      raise errors.OpPrereqError("Only one disk add or remove operation"
8157
                                 " supported at a time", errors.ECODE_INVAL)
8158

    
8159
    if self.op.disks and self.op.disk_template is not None:
8160
      raise errors.OpPrereqError("Disk template conversion and other disk"
8161
                                 " changes not supported at the same time",
8162
                                 errors.ECODE_INVAL)
8163

    
8164
    if self.op.disk_template:
8165
      _CheckDiskTemplate(self.op.disk_template)
8166
      if (self.op.disk_template in constants.DTS_NET_MIRROR and
8167
          self.op.remote_node is None):
8168
        raise errors.OpPrereqError("Changing the disk template to a mirrored"
8169
                                   " one requires specifying a secondary node",
8170
                                   errors.ECODE_INVAL)
8171

    
8172
    # NIC validation
8173
    nic_addremove = 0
8174
    for nic_op, nic_dict in self.op.nics:
8175
      if nic_op == constants.DDM_REMOVE:
8176
        nic_addremove += 1
8177
        continue
8178
      elif nic_op == constants.DDM_ADD:
8179
        nic_addremove += 1
8180
      else:
8181
        if not isinstance(nic_op, int):
8182
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8183
        if not isinstance(nic_dict, dict):
8184
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8185
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8186

    
8187
      # nic_dict should be a dict
8188
      nic_ip = nic_dict.get('ip', None)
8189
      if nic_ip is not None:
8190
        if nic_ip.lower() == constants.VALUE_NONE:
8191
          nic_dict['ip'] = None
8192
        else:
8193
          if not utils.IsValidIP(nic_ip):
8194
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8195
                                       errors.ECODE_INVAL)
8196

    
8197
      nic_bridge = nic_dict.get('bridge', None)
8198
      nic_link = nic_dict.get('link', None)
8199
      if nic_bridge and nic_link:
8200
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8201
                                   " at the same time", errors.ECODE_INVAL)
8202
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8203
        nic_dict['bridge'] = None
8204
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8205
        nic_dict['link'] = None
8206

    
8207
      if nic_op == constants.DDM_ADD:
8208
        nic_mac = nic_dict.get('mac', None)
8209
        if nic_mac is None:
8210
          nic_dict['mac'] = constants.VALUE_AUTO
8211

    
8212
      if 'mac' in nic_dict:
8213
        nic_mac = nic_dict['mac']
8214
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8215
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8216

    
8217
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8218
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8219
                                     " modifying an existing nic",
8220
                                     errors.ECODE_INVAL)
8221

    
8222
    if nic_addremove > 1:
8223
      raise errors.OpPrereqError("Only one NIC add or remove operation"
8224
                                 " supported at a time", errors.ECODE_INVAL)
8225

    
8226
  def ExpandNames(self):
8227
    self._ExpandAndLockInstance()
8228
    self.needed_locks[locking.LEVEL_NODE] = []
8229
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8230

    
8231
  def DeclareLocks(self, level):
8232
    if level == locking.LEVEL_NODE:
8233
      self._LockInstancesNodes()
8234
      if self.op.disk_template and self.op.remote_node:
8235
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8236
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8237

    
8238
  def BuildHooksEnv(self):
8239
    """Build hooks env.
8240

8241
    This runs on the master, primary and secondaries.
8242

8243
    """
8244
    args = dict()
8245
    if constants.BE_MEMORY in self.be_new:
8246
      args['memory'] = self.be_new[constants.BE_MEMORY]
8247
    if constants.BE_VCPUS in self.be_new:
8248
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
8249
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8250
    # information at all.
8251
    if self.op.nics:
8252
      args['nics'] = []
8253
      nic_override = dict(self.op.nics)
8254
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
8255
      for idx, nic in enumerate(self.instance.nics):
8256
        if idx in nic_override:
8257
          this_nic_override = nic_override[idx]
8258
        else:
8259
          this_nic_override = {}
8260
        if 'ip' in this_nic_override:
8261
          ip = this_nic_override['ip']
8262
        else:
8263
          ip = nic.ip
8264
        if 'mac' in this_nic_override:
8265
          mac = this_nic_override['mac']
8266
        else:
8267
          mac = nic.mac
8268
        if idx in self.nic_pnew:
8269
          nicparams = self.nic_pnew[idx]
8270
        else:
8271
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
8272
        mode = nicparams[constants.NIC_MODE]
8273
        link = nicparams[constants.NIC_LINK]
8274
        args['nics'].append((ip, mac, mode, link))
8275
      if constants.DDM_ADD in nic_override:
8276
        ip = nic_override[constants.DDM_ADD].get('ip', None)
8277
        mac = nic_override[constants.DDM_ADD]['mac']
8278
        nicparams = self.nic_pnew[constants.DDM_ADD]
8279
        mode = nicparams[constants.NIC_MODE]
8280
        link = nicparams[constants.NIC_LINK]
8281
        args['nics'].append((ip, mac, mode, link))
8282
      elif constants.DDM_REMOVE in nic_override:
8283
        del args['nics'][-1]
8284

    
8285
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8286
    if self.op.disk_template:
8287
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8288
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8289
    return env, nl, nl
8290

    
8291
  @staticmethod
8292
  def _GetUpdatedParams(old_params, update_dict,
8293
                        default_values, parameter_types):
8294
    """Return the new params dict for the given params.
8295

8296
    @type old_params: dict
8297
    @param old_params: old parameters
8298
    @type update_dict: dict
8299
    @param update_dict: dict containing new parameter values,
8300
                        or constants.VALUE_DEFAULT to reset the
8301
                        parameter to its default value
8302
    @type default_values: dict
8303
    @param default_values: default values for the filled parameters
8304
    @type parameter_types: dict
8305
    @param parameter_types: dict mapping target dict keys to types
8306
                            in constants.ENFORCEABLE_TYPES
8307
    @rtype: (dict, dict)
8308
    @return: (new_parameters, filled_parameters)
8309

8310
    """
8311
    params_copy = copy.deepcopy(old_params)
8312
    for key, val in update_dict.iteritems():
8313
      if val == constants.VALUE_DEFAULT:
8314
        try:
8315
          del params_copy[key]
8316
        except KeyError:
8317
          pass
8318
      else:
8319
        params_copy[key] = val
8320
    utils.ForceDictType(params_copy, parameter_types)
8321
    params_filled = objects.FillDict(default_values, params_copy)
8322
    return (params_copy, params_filled)
8323

    
8324
  def CheckPrereq(self):
8325
    """Check prerequisites.
8326

8327
    This only checks the instance list against the existing names.
8328

8329
    """
8330
    self.force = self.op.force
8331

    
8332
    # checking the new params on the primary/secondary nodes
8333

    
8334
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8335
    cluster = self.cluster = self.cfg.GetClusterInfo()
8336
    assert self.instance is not None, \
8337
      "Cannot retrieve locked instance %s" % self.op.instance_name
8338
    pnode = instance.primary_node
8339
    nodelist = list(instance.all_nodes)
8340

    
8341
    if self.op.disk_template:
8342
      if instance.disk_template == self.op.disk_template:
8343
        raise errors.OpPrereqError("Instance already has disk template %s" %
8344
                                   instance.disk_template, errors.ECODE_INVAL)
8345

    
8346
      if (instance.disk_template,
8347
          self.op.disk_template) not in self._DISK_CONVERSIONS:
8348
        raise errors.OpPrereqError("Unsupported disk template conversion from"
8349
                                   " %s to %s" % (instance.disk_template,
8350
                                                  self.op.disk_template),
8351
                                   errors.ECODE_INVAL)
8352
      if self.op.disk_template in constants.DTS_NET_MIRROR:
8353
        _CheckNodeOnline(self, self.op.remote_node)
8354
        _CheckNodeNotDrained(self, self.op.remote_node)
8355
        disks = [{"size": d.size} for d in instance.disks]
8356
        required = _ComputeDiskSize(self.op.disk_template, disks)
8357
        _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8358
        _CheckInstanceDown(self, instance, "cannot change disk template")
8359

    
8360
    # hvparams processing
8361
    if self.op.hvparams:
8362
      i_hvdict, hv_new = self._GetUpdatedParams(
8363
                             instance.hvparams, self.op.hvparams,
8364
                             cluster.hvparams[instance.hypervisor],
8365
                             constants.HVS_PARAMETER_TYPES)
8366
      # local check
8367
      hypervisor.GetHypervisor(
8368
        instance.hypervisor).CheckParameterSyntax(hv_new)
8369
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8370
      self.hv_new = hv_new # the new actual values
8371
      self.hv_inst = i_hvdict # the new dict (without defaults)
8372
    else:
8373
      self.hv_new = self.hv_inst = {}
8374

    
8375
    # beparams processing
8376
    if self.op.beparams:
8377
      i_bedict, be_new = self._GetUpdatedParams(
8378
                             instance.beparams, self.op.beparams,
8379
                             cluster.beparams[constants.PP_DEFAULT],
8380
                             constants.BES_PARAMETER_TYPES)
8381
      self.be_new = be_new # the new actual values
8382
      self.be_inst = i_bedict # the new dict (without defaults)
8383
    else:
8384
      self.be_new = self.be_inst = {}
8385

    
8386
    self.warn = []
8387

    
8388
    if constants.BE_MEMORY in self.op.beparams and not self.force:
8389
      mem_check_list = [pnode]
8390
      if be_new[constants.BE_AUTO_BALANCE]:
8391
        # either we changed auto_balance to yes or it was from before
8392
        mem_check_list.extend(instance.secondary_nodes)
8393
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
8394
                                                  instance.hypervisor)
8395
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8396
                                         instance.hypervisor)
8397
      pninfo = nodeinfo[pnode]
8398
      msg = pninfo.fail_msg
8399
      if msg:
8400
        # Assume the primary node is unreachable and go ahead
8401
        self.warn.append("Can't get info from primary node %s: %s" %
8402
                         (pnode,  msg))
8403
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
8404
        self.warn.append("Node data from primary node %s doesn't contain"
8405
                         " free memory information" % pnode)
8406
      elif instance_info.fail_msg:
8407
        self.warn.append("Can't get instance runtime information: %s" %
8408
                        instance_info.fail_msg)
8409
      else:
8410
        if instance_info.payload:
8411
          current_mem = int(instance_info.payload['memory'])
8412
        else:
8413
          # Assume instance not running
8414
          # (there is a slight race condition here, but it's not very probable,
8415
          # and we have no other way to check)
8416
          current_mem = 0
8417
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8418
                    pninfo.payload['memory_free'])
8419
        if miss_mem > 0:
8420
          raise errors.OpPrereqError("This change will prevent the instance"
8421
                                     " from starting, due to %d MB of memory"
8422
                                     " missing on its primary node" % miss_mem,
8423
                                     errors.ECODE_NORES)
8424

    
8425
      if be_new[constants.BE_AUTO_BALANCE]:
8426
        for node, nres in nodeinfo.items():
8427
          if node not in instance.secondary_nodes:
8428
            continue
8429
          msg = nres.fail_msg
8430
          if msg:
8431
            self.warn.append("Can't get info from secondary node %s: %s" %
8432
                             (node, msg))
8433
          elif not isinstance(nres.payload.get('memory_free', None), int):
8434
            self.warn.append("Secondary node %s didn't return free"
8435
                             " memory information" % node)
8436
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8437
            self.warn.append("Not enough memory to failover instance to"
8438
                             " secondary node %s" % node)
8439

    
8440
    # NIC processing
8441
    self.nic_pnew = {}
8442
    self.nic_pinst = {}
8443
    for nic_op, nic_dict in self.op.nics:
8444
      if nic_op == constants.DDM_REMOVE:
8445
        if not instance.nics:
8446
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8447
                                     errors.ECODE_INVAL)
8448
        continue
8449
      if nic_op != constants.DDM_ADD:
8450
        # an existing nic
8451
        if not instance.nics:
8452
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8453
                                     " no NICs" % nic_op,
8454
                                     errors.ECODE_INVAL)
8455
        if nic_op < 0 or nic_op >= len(instance.nics):
8456
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8457
                                     " are 0 to %d" %
8458
                                     (nic_op, len(instance.nics) - 1),
8459
                                     errors.ECODE_INVAL)
8460
        old_nic_params = instance.nics[nic_op].nicparams
8461
        old_nic_ip = instance.nics[nic_op].ip
8462
      else:
8463
        old_nic_params = {}
8464
        old_nic_ip = None
8465

    
8466
      update_params_dict = dict([(key, nic_dict[key])
8467
                                 for key in constants.NICS_PARAMETERS
8468
                                 if key in nic_dict])
8469

    
8470
      if 'bridge' in nic_dict:
8471
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8472

    
8473
      new_nic_params, new_filled_nic_params = \
8474
          self._GetUpdatedParams(old_nic_params, update_params_dict,
8475
                                 cluster.nicparams[constants.PP_DEFAULT],
8476
                                 constants.NICS_PARAMETER_TYPES)
8477
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8478
      self.nic_pinst[nic_op] = new_nic_params
8479
      self.nic_pnew[nic_op] = new_filled_nic_params
8480
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8481

    
8482
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
8483
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8484
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8485
        if msg:
8486
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8487
          if self.force:
8488
            self.warn.append(msg)
8489
          else:
8490
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8491
      if new_nic_mode == constants.NIC_MODE_ROUTED:
8492
        if 'ip' in nic_dict:
8493
          nic_ip = nic_dict['ip']
8494
        else:
8495
          nic_ip = old_nic_ip
8496
        if nic_ip is None:
8497
          raise errors.OpPrereqError('Cannot set the nic ip to None'
8498
                                     ' on a routed nic', errors.ECODE_INVAL)
8499
      if 'mac' in nic_dict:
8500
        nic_mac = nic_dict['mac']
8501
        if nic_mac is None:
8502
          raise errors.OpPrereqError('Cannot set the nic mac to None',
8503
                                     errors.ECODE_INVAL)
8504
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8505
          # otherwise generate the mac
8506
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8507
        else:
8508
          # or validate/reserve the current one
8509
          try:
8510
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8511
          except errors.ReservationError:
8512
            raise errors.OpPrereqError("MAC address %s already in use"
8513
                                       " in cluster" % nic_mac,
8514
                                       errors.ECODE_NOTUNIQUE)
8515

    
8516
    # DISK processing
8517
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8518
      raise errors.OpPrereqError("Disk operations not supported for"
8519
                                 " diskless instances",
8520
                                 errors.ECODE_INVAL)
8521
    for disk_op, _ in self.op.disks:
8522
      if disk_op == constants.DDM_REMOVE:
8523
        if len(instance.disks) == 1:
8524
          raise errors.OpPrereqError("Cannot remove the last disk of"
8525
                                     " an instance", errors.ECODE_INVAL)
8526
        _CheckInstanceDown(self, instance, "cannot remove disks")
8527

    
8528
      if (disk_op == constants.DDM_ADD and
8529
          len(instance.nics) >= constants.MAX_DISKS):
8530
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8531
                                   " add more" % constants.MAX_DISKS,
8532
                                   errors.ECODE_STATE)
8533
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8534
        # an existing disk
8535
        if disk_op < 0 or disk_op >= len(instance.disks):
8536
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
8537
                                     " are 0 to %d" %
8538
                                     (disk_op, len(instance.disks)),
8539
                                     errors.ECODE_INVAL)
8540

    
8541
    # OS change
8542
    if self.op.os_name and not self.op.force:
8543
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8544
                      self.op.force_variant)
8545

    
8546
    return
8547

    
8548
  def _ConvertPlainToDrbd(self, feedback_fn):
8549
    """Converts an instance from plain to drbd.
8550

8551
    """
8552
    feedback_fn("Converting template to drbd")
8553
    instance = self.instance
8554
    pnode = instance.primary_node
8555
    snode = self.op.remote_node
8556

    
8557
    # create a fake disk info for _GenerateDiskTemplate
8558
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8559
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8560
                                      instance.name, pnode, [snode],
8561
                                      disk_info, None, None, 0)
8562
    info = _GetInstanceInfoText(instance)
8563
    feedback_fn("Creating aditional volumes...")
8564
    # first, create the missing data and meta devices
8565
    for disk in new_disks:
8566
      # unfortunately this is... not too nice
8567
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8568
                            info, True)
8569
      for child in disk.children:
8570
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
8571
    # at this stage, all new LVs have been created, we can rename the
8572
    # old ones
8573
    feedback_fn("Renaming original volumes...")
8574
    rename_list = [(o, n.children[0].logical_id)
8575
                   for (o, n) in zip(instance.disks, new_disks)]
8576
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
8577
    result.Raise("Failed to rename original LVs")
8578

    
8579
    feedback_fn("Initializing DRBD devices...")
8580
    # all child devices are in place, we can now create the DRBD devices
8581
    for disk in new_disks:
8582
      for node in [pnode, snode]:
8583
        f_create = node == pnode
8584
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8585

    
8586
    # at this point, the instance has been modified
8587
    instance.disk_template = constants.DT_DRBD8
8588
    instance.disks = new_disks
8589
    self.cfg.Update(instance, feedback_fn)
8590

    
8591
    # disks are created, waiting for sync
8592
    disk_abort = not _WaitForSync(self, instance)
8593
    if disk_abort:
8594
      raise errors.OpExecError("There are some degraded disks for"
8595
                               " this instance, please cleanup manually")
8596

    
8597
  def _ConvertDrbdToPlain(self, feedback_fn):
8598
    """Converts an instance from drbd to plain.
8599

8600
    """
8601
    instance = self.instance
8602
    assert len(instance.secondary_nodes) == 1
8603
    pnode = instance.primary_node
8604
    snode = instance.secondary_nodes[0]
8605
    feedback_fn("Converting template to plain")
8606

    
8607
    old_disks = instance.disks
8608
    new_disks = [d.children[0] for d in old_disks]
8609

    
8610
    # copy over size and mode
8611
    for parent, child in zip(old_disks, new_disks):
8612
      child.size = parent.size
8613
      child.mode = parent.mode
8614

    
8615
    # update instance structure
8616
    instance.disks = new_disks
8617
    instance.disk_template = constants.DT_PLAIN
8618
    self.cfg.Update(instance, feedback_fn)
8619

    
8620
    feedback_fn("Removing volumes on the secondary node...")
8621
    for disk in old_disks:
8622
      self.cfg.SetDiskID(disk, snode)
8623
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8624
      if msg:
8625
        self.LogWarning("Could not remove block device %s on node %s,"
8626
                        " continuing anyway: %s", disk.iv_name, snode, msg)
8627

    
8628
    feedback_fn("Removing unneeded volumes on the primary node...")
8629
    for idx, disk in enumerate(old_disks):
8630
      meta = disk.children[1]
8631
      self.cfg.SetDiskID(meta, pnode)
8632
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8633
      if msg:
8634
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
8635
                        " continuing anyway: %s", idx, pnode, msg)
8636

    
8637

    
8638
  def Exec(self, feedback_fn):
8639
    """Modifies an instance.
8640

8641
    All parameters take effect only at the next restart of the instance.
8642

8643
    """
8644
    # Process here the warnings from CheckPrereq, as we don't have a
8645
    # feedback_fn there.
8646
    for warn in self.warn:
8647
      feedback_fn("WARNING: %s" % warn)
8648

    
8649
    result = []
8650
    instance = self.instance
8651
    # disk changes
8652
    for disk_op, disk_dict in self.op.disks:
8653
      if disk_op == constants.DDM_REMOVE:
8654
        # remove the last disk
8655
        device = instance.disks.pop()
8656
        device_idx = len(instance.disks)
8657
        for node, disk in device.ComputeNodeTree(instance.primary_node):
8658
          self.cfg.SetDiskID(disk, node)
8659
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8660
          if msg:
8661
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
8662
                            " continuing anyway", device_idx, node, msg)
8663
        result.append(("disk/%d" % device_idx, "remove"))
8664
      elif disk_op == constants.DDM_ADD:
8665
        # add a new disk
8666
        if instance.disk_template == constants.DT_FILE:
8667
          file_driver, file_path = instance.disks[0].logical_id
8668
          file_path = os.path.dirname(file_path)
8669
        else:
8670
          file_driver = file_path = None
8671
        disk_idx_base = len(instance.disks)
8672
        new_disk = _GenerateDiskTemplate(self,
8673
                                         instance.disk_template,
8674
                                         instance.name, instance.primary_node,
8675
                                         instance.secondary_nodes,
8676
                                         [disk_dict],
8677
                                         file_path,
8678
                                         file_driver,
8679
                                         disk_idx_base)[0]
8680
        instance.disks.append(new_disk)
8681
        info = _GetInstanceInfoText(instance)
8682

    
8683
        logging.info("Creating volume %s for instance %s",
8684
                     new_disk.iv_name, instance.name)
8685
        # Note: this needs to be kept in sync with _CreateDisks
8686
        #HARDCODE
8687
        for node in instance.all_nodes:
8688
          f_create = node == instance.primary_node
8689
          try:
8690
            _CreateBlockDev(self, node, instance, new_disk,
8691
                            f_create, info, f_create)
8692
          except errors.OpExecError, err:
8693
            self.LogWarning("Failed to create volume %s (%s) on"
8694
                            " node %s: %s",
8695
                            new_disk.iv_name, new_disk, node, err)
8696
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8697
                       (new_disk.size, new_disk.mode)))
8698
      else:
8699
        # change a given disk
8700
        instance.disks[disk_op].mode = disk_dict['mode']
8701
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8702

    
8703
    if self.op.disk_template:
8704
      r_shut = _ShutdownInstanceDisks(self, instance)
8705
      if not r_shut:
8706
        raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8707
                                 " proceed with disk template conversion")
8708
      mode = (instance.disk_template, self.op.disk_template)
8709
      try:
8710
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
8711
      except:
8712
        self.cfg.ReleaseDRBDMinors(instance.name)
8713
        raise
8714
      result.append(("disk_template", self.op.disk_template))
8715

    
8716
    # NIC changes
8717
    for nic_op, nic_dict in self.op.nics:
8718
      if nic_op == constants.DDM_REMOVE:
8719
        # remove the last nic
8720
        del instance.nics[-1]
8721
        result.append(("nic.%d" % len(instance.nics), "remove"))
8722
      elif nic_op == constants.DDM_ADD:
8723
        # mac and bridge should be set, by now
8724
        mac = nic_dict['mac']
8725
        ip = nic_dict.get('ip', None)
8726
        nicparams = self.nic_pinst[constants.DDM_ADD]
8727
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8728
        instance.nics.append(new_nic)
8729
        result.append(("nic.%d" % (len(instance.nics) - 1),
8730
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
8731
                       (new_nic.mac, new_nic.ip,
8732
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8733
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8734
                       )))
8735
      else:
8736
        for key in 'mac', 'ip':
8737
          if key in nic_dict:
8738
            setattr(instance.nics[nic_op], key, nic_dict[key])
8739
        if nic_op in self.nic_pinst:
8740
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8741
        for key, val in nic_dict.iteritems():
8742
          result.append(("nic.%s/%d" % (key, nic_op), val))
8743

    
8744
    # hvparams changes
8745
    if self.op.hvparams:
8746
      instance.hvparams = self.hv_inst
8747
      for key, val in self.op.hvparams.iteritems():
8748
        result.append(("hv/%s" % key, val))
8749

    
8750
    # beparams changes
8751
    if self.op.beparams:
8752
      instance.beparams = self.be_inst
8753
      for key, val in self.op.beparams.iteritems():
8754
        result.append(("be/%s" % key, val))
8755

    
8756
    # OS change
8757
    if self.op.os_name:
8758
      instance.os = self.op.os_name
8759

    
8760
    self.cfg.Update(instance, feedback_fn)
8761

    
8762
    return result
8763

    
8764
  _DISK_CONVERSIONS = {
8765
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8766
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8767
    }
8768

    
8769
class LUQueryExports(NoHooksLU):
8770
  """Query the exports list
8771

8772
  """
8773
  _OP_REQP = ['nodes']
8774
  REQ_BGL = False
8775

    
8776
  def ExpandNames(self):
8777
    self.needed_locks = {}
8778
    self.share_locks[locking.LEVEL_NODE] = 1
8779
    if not self.op.nodes:
8780
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8781
    else:
8782
      self.needed_locks[locking.LEVEL_NODE] = \
8783
        _GetWantedNodes(self, self.op.nodes)
8784

    
8785
  def CheckPrereq(self):
8786
    """Check prerequisites.
8787

8788
    """
8789
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8790

    
8791
  def Exec(self, feedback_fn):
8792
    """Compute the list of all the exported system images.
8793

8794
    @rtype: dict
8795
    @return: a dictionary with the structure node->(export-list)
8796
        where export-list is a list of the instances exported on
8797
        that node.
8798

8799
    """
8800
    rpcresult = self.rpc.call_export_list(self.nodes)
8801
    result = {}
8802
    for node in rpcresult:
8803
      if rpcresult[node].fail_msg:
8804
        result[node] = False
8805
      else:
8806
        result[node] = rpcresult[node].payload
8807

    
8808
    return result
8809

    
8810

    
8811
class LUExportInstance(LogicalUnit):
8812
  """Export an instance to an image in the cluster.
8813

8814
  """
8815
  HPATH = "instance-export"
8816
  HTYPE = constants.HTYPE_INSTANCE
8817
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
8818
  REQ_BGL = False
8819

    
8820
  def CheckArguments(self):
8821
    """Check the arguments.
8822

8823
    """
8824
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8825
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
8826

    
8827
  def ExpandNames(self):
8828
    self._ExpandAndLockInstance()
8829
    # FIXME: lock only instance primary and destination node
8830
    #
8831
    # Sad but true, for now we have do lock all nodes, as we don't know where
8832
    # the previous export might be, and and in this LU we search for it and
8833
    # remove it from its current node. In the future we could fix this by:
8834
    #  - making a tasklet to search (share-lock all), then create the new one,
8835
    #    then one to remove, after
8836
    #  - removing the removal operation altogether
8837
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8838

    
8839
  def DeclareLocks(self, level):
8840
    """Last minute lock declaration."""
8841
    # All nodes are locked anyway, so nothing to do here.
8842

    
8843
  def BuildHooksEnv(self):
8844
    """Build hooks env.
8845

8846
    This will run on the master, primary node and target node.
8847

8848
    """
8849
    env = {
8850
      "EXPORT_NODE": self.op.target_node,
8851
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8852
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8853
      }
8854
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8855
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8856
          self.op.target_node]
8857
    return env, nl, nl
8858

    
8859
  def CheckPrereq(self):
8860
    """Check prerequisites.
8861

8862
    This checks that the instance and node names are valid.
8863

8864
    """
8865
    instance_name = self.op.instance_name
8866
    self.instance = self.cfg.GetInstanceInfo(instance_name)
8867
    assert self.instance is not None, \
8868
          "Cannot retrieve locked instance %s" % self.op.instance_name
8869
    _CheckNodeOnline(self, self.instance.primary_node)
8870

    
8871
    self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8872
    self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
8873
    assert self.dst_node is not None
8874

    
8875
    _CheckNodeOnline(self, self.dst_node.name)
8876
    _CheckNodeNotDrained(self, self.dst_node.name)
8877

    
8878
    # instance disk type verification
8879
    for disk in self.instance.disks:
8880
      if disk.dev_type == constants.LD_FILE:
8881
        raise errors.OpPrereqError("Export not supported for instances with"
8882
                                   " file-based disks", errors.ECODE_INVAL)
8883

    
8884
  def _CreateSnapshots(self, feedback_fn):
8885
    """Creates an LVM snapshot for every disk of the instance.
8886

8887
    @return: List of snapshots as L{objects.Disk} instances
8888

8889
    """
8890
    instance = self.instance
8891
    src_node = instance.primary_node
8892

    
8893
    vgname = self.cfg.GetVGName()
8894

    
8895
    snap_disks = []
8896

    
8897
    for idx, disk in enumerate(instance.disks):
8898
      feedback_fn("Creating a snapshot of disk/%s on node %s" %
8899
                  (idx, src_node))
8900

    
8901
      # result.payload will be a snapshot of an lvm leaf of the one we
8902
      # passed
8903
      result = self.rpc.call_blockdev_snapshot(src_node, disk)
8904
      msg = result.fail_msg
8905
      if msg:
8906
        self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8907
                        idx, src_node, msg)
8908
        snap_disks.append(False)
8909
      else:
8910
        disk_id = (vgname, result.payload)
8911
        new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8912
                               logical_id=disk_id, physical_id=disk_id,
8913
                               iv_name=disk.iv_name)
8914
        snap_disks.append(new_dev)
8915

    
8916
    return snap_disks
8917

    
8918
  def _RemoveSnapshot(self, feedback_fn, snap_disks, disk_index):
8919
    """Removes an LVM snapshot.
8920

8921
    @type snap_disks: list
8922
    @param snap_disks: The list of all snapshots as returned by
8923
                       L{_CreateSnapshots}
8924
    @type disk_index: number
8925
    @param disk_index: Index of the snapshot to be removed
8926
    @rtype: bool
8927
    @return: Whether removal was successful or not
8928

8929
    """
8930
    disk = snap_disks[disk_index]
8931
    if disk:
8932
      src_node = self.instance.primary_node
8933

    
8934
      feedback_fn("Removing snapshot of disk/%s on node %s" %
8935
                  (disk_index, src_node))
8936

    
8937
      result = self.rpc.call_blockdev_remove(src_node, disk)
8938
      if not result.fail_msg:
8939
        return True
8940

    
8941
      self.LogWarning("Could not remove snapshot for disk/%d from node"
8942
                      " %s: %s", disk_index, src_node, result.fail_msg)
8943

    
8944
    return False
8945

    
8946
  def _CleanupExports(self, feedback_fn):
8947
    """Removes exports of current instance from all other nodes.
8948

8949
    If an instance in a cluster with nodes A..D was exported to node C, its
8950
    exports will be removed from the nodes A, B and D.
8951

8952
    """
8953
    nodelist = self.cfg.GetNodeList()
8954
    nodelist.remove(self.dst_node.name)
8955

    
8956
    # on one-node clusters nodelist will be empty after the removal
8957
    # if we proceed the backup would be removed because OpQueryExports
8958
    # substitutes an empty list with the full cluster node list.
8959
    iname = self.instance.name
8960
    if nodelist:
8961
      feedback_fn("Removing old exports for instance %s" % iname)
8962
      exportlist = self.rpc.call_export_list(nodelist)
8963
      for node in exportlist:
8964
        if exportlist[node].fail_msg:
8965
          continue
8966
        if iname in exportlist[node].payload:
8967
          msg = self.rpc.call_export_remove(node, iname).fail_msg
8968
          if msg:
8969
            self.LogWarning("Could not remove older export for instance %s"
8970
                            " on node %s: %s", iname, node, msg)
8971

    
8972
  def Exec(self, feedback_fn):
8973
    """Export an instance to an image in the cluster.
8974

8975
    """
8976
    instance = self.instance
8977
    dst_node = self.dst_node
8978
    src_node = instance.primary_node
8979

    
8980
    if self.op.shutdown:
8981
      # shutdown the instance, but not the disks
8982
      feedback_fn("Shutting down instance %s" % instance.name)
8983
      result = self.rpc.call_instance_shutdown(src_node, instance,
8984
                                               self.shutdown_timeout)
8985
      result.Raise("Could not shutdown instance %s on"
8986
                   " node %s" % (instance.name, src_node))
8987

    
8988
    # set the disks ID correctly since call_instance_start needs the
8989
    # correct drbd minor to create the symlinks
8990
    for disk in instance.disks:
8991
      self.cfg.SetDiskID(disk, src_node)
8992

    
8993
    activate_disks = (not instance.admin_up)
8994

    
8995
    if activate_disks:
8996
      # Activate the instance disks if we'exporting a stopped instance
8997
      feedback_fn("Activating disks for %s" % instance.name)
8998
      _StartInstanceDisks(self, instance, None)
8999

    
9000
    try:
9001
      # per-disk results
9002
      dresults = []
9003
      removed_snaps = [False] * len(instance.disks)
9004

    
9005
      snap_disks = None
9006
      try:
9007
        try:
9008
          snap_disks = self._CreateSnapshots(feedback_fn)
9009
        finally:
9010
          if self.op.shutdown and instance.admin_up:
9011
            feedback_fn("Starting instance %s" % instance.name)
9012
            result = self.rpc.call_instance_start(src_node, instance,
9013
                                                  None, None)
9014
            msg = result.fail_msg
9015
            if msg:
9016
              _ShutdownInstanceDisks(self, instance)
9017
              raise errors.OpExecError("Could not start instance: %s" % msg)
9018

    
9019
        assert len(snap_disks) == len(instance.disks)
9020
        assert len(removed_snaps) == len(instance.disks)
9021

    
9022
        # TODO: check for size
9023

    
9024
        cluster_name = self.cfg.GetClusterName()
9025
        for idx, dev in enumerate(snap_disks):
9026
          feedback_fn("Exporting snapshot %s from %s to %s" %
9027
                      (idx, src_node, dst_node.name))
9028
          if dev:
9029
            # FIXME: pass debug from opcode to backend
9030
            result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
9031
                                                   instance, cluster_name,
9032
                                                   idx, self.op.debug_level)
9033
            msg = result.fail_msg
9034
            if msg:
9035
              self.LogWarning("Could not export disk/%s from node %s to"
9036
                              " node %s: %s", idx, src_node, dst_node.name, msg)
9037
              dresults.append(False)
9038
            else:
9039
              dresults.append(True)
9040

    
9041
            # Remove snapshot
9042
            if self._RemoveSnapshot(feedback_fn, snap_disks, idx):
9043
              removed_snaps[idx] = True
9044
          else:
9045
            dresults.append(False)
9046

    
9047
        assert len(dresults) == len(instance.disks)
9048

    
9049
        # Check for backwards compatibility
9050
        assert compat.all(isinstance(i, bool) for i in dresults), \
9051
               "Not all results are boolean: %r" % dresults
9052

    
9053
        feedback_fn("Finalizing export on %s" % dst_node.name)
9054
        result = self.rpc.call_finalize_export(dst_node.name, instance,
9055
                                               snap_disks)
9056
        msg = result.fail_msg
9057
        fin_resu = not msg
9058
        if msg:
9059
          self.LogWarning("Could not finalize export for instance %s"
9060
                          " on node %s: %s", instance.name, dst_node.name, msg)
9061

    
9062
      finally:
9063
        # Remove all snapshots
9064
        assert len(removed_snaps) == len(instance.disks)
9065
        for idx, removed in enumerate(removed_snaps):
9066
          if not removed:
9067
            self._RemoveSnapshot(feedback_fn, snap_disks, idx)
9068

    
9069
    finally:
9070
      if activate_disks:
9071
        feedback_fn("Deactivating disks for %s" % instance.name)
9072
        _ShutdownInstanceDisks(self, instance)
9073

    
9074
    self._CleanupExports(feedback_fn)
9075

    
9076
    return fin_resu, dresults
9077

    
9078

    
9079
class LURemoveExport(NoHooksLU):
9080
  """Remove exports related to the named instance.
9081

9082
  """
9083
  _OP_REQP = ["instance_name"]
9084
  REQ_BGL = False
9085

    
9086
  def ExpandNames(self):
9087
    self.needed_locks = {}
9088
    # We need all nodes to be locked in order for RemoveExport to work, but we
9089
    # don't need to lock the instance itself, as nothing will happen to it (and
9090
    # we can remove exports also for a removed instance)
9091
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9092

    
9093
  def CheckPrereq(self):
9094
    """Check prerequisites.
9095
    """
9096
    pass
9097

    
9098
  def Exec(self, feedback_fn):
9099
    """Remove any export.
9100

9101
    """
9102
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9103
    # If the instance was not found we'll try with the name that was passed in.
9104
    # This will only work if it was an FQDN, though.
9105
    fqdn_warn = False
9106
    if not instance_name:
9107
      fqdn_warn = True
9108
      instance_name = self.op.instance_name
9109

    
9110
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9111
    exportlist = self.rpc.call_export_list(locked_nodes)
9112
    found = False
9113
    for node in exportlist:
9114
      msg = exportlist[node].fail_msg
9115
      if msg:
9116
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9117
        continue
9118
      if instance_name in exportlist[node].payload:
9119
        found = True
9120
        result = self.rpc.call_export_remove(node, instance_name)
9121
        msg = result.fail_msg
9122
        if msg:
9123
          logging.error("Could not remove export for instance %s"
9124
                        " on node %s: %s", instance_name, node, msg)
9125

    
9126
    if fqdn_warn and not found:
9127
      feedback_fn("Export not found. If trying to remove an export belonging"
9128
                  " to a deleted instance please use its Fully Qualified"
9129
                  " Domain Name.")
9130

    
9131

    
9132
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9133
  """Generic tags LU.
9134

9135
  This is an abstract class which is the parent of all the other tags LUs.
9136

9137
  """
9138

    
9139
  def ExpandNames(self):
9140
    self.needed_locks = {}
9141
    if self.op.kind == constants.TAG_NODE:
9142
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9143
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
9144
    elif self.op.kind == constants.TAG_INSTANCE:
9145
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9146
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9147

    
9148
  def CheckPrereq(self):
9149
    """Check prerequisites.
9150

9151
    """
9152
    if self.op.kind == constants.TAG_CLUSTER:
9153
      self.target = self.cfg.GetClusterInfo()
9154
    elif self.op.kind == constants.TAG_NODE:
9155
      self.target = self.cfg.GetNodeInfo(self.op.name)
9156
    elif self.op.kind == constants.TAG_INSTANCE:
9157
      self.target = self.cfg.GetInstanceInfo(self.op.name)
9158
    else:
9159
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9160
                                 str(self.op.kind), errors.ECODE_INVAL)
9161

    
9162

    
9163
class LUGetTags(TagsLU):
9164
  """Returns the tags of a given object.
9165

9166
  """
9167
  _OP_REQP = ["kind", "name"]
9168
  REQ_BGL = False
9169

    
9170
  def Exec(self, feedback_fn):
9171
    """Returns the tag list.
9172

9173
    """
9174
    return list(self.target.GetTags())
9175

    
9176

    
9177
class LUSearchTags(NoHooksLU):
9178
  """Searches the tags for a given pattern.
9179

9180
  """
9181
  _OP_REQP = ["pattern"]
9182
  REQ_BGL = False
9183

    
9184
  def ExpandNames(self):
9185
    self.needed_locks = {}
9186

    
9187
  def CheckPrereq(self):
9188
    """Check prerequisites.
9189

9190
    This checks the pattern passed for validity by compiling it.
9191

9192
    """
9193
    try:
9194
      self.re = re.compile(self.op.pattern)
9195
    except re.error, err:
9196
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9197
                                 (self.op.pattern, err), errors.ECODE_INVAL)
9198

    
9199
  def Exec(self, feedback_fn):
9200
    """Returns the tag list.
9201

9202
    """
9203
    cfg = self.cfg
9204
    tgts = [("/cluster", cfg.GetClusterInfo())]
9205
    ilist = cfg.GetAllInstancesInfo().values()
9206
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9207
    nlist = cfg.GetAllNodesInfo().values()
9208
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9209
    results = []
9210
    for path, target in tgts:
9211
      for tag in target.GetTags():
9212
        if self.re.search(tag):
9213
          results.append((path, tag))
9214
    return results
9215

    
9216

    
9217
class LUAddTags(TagsLU):
9218
  """Sets a tag on a given object.
9219

9220
  """
9221
  _OP_REQP = ["kind", "name", "tags"]
9222
  REQ_BGL = False
9223

    
9224
  def CheckPrereq(self):
9225
    """Check prerequisites.
9226

9227
    This checks the type and length of the tag name and value.
9228

9229
    """
9230
    TagsLU.CheckPrereq(self)
9231
    for tag in self.op.tags:
9232
      objects.TaggableObject.ValidateTag(tag)
9233

    
9234
  def Exec(self, feedback_fn):
9235
    """Sets the tag.
9236

9237
    """
9238
    try:
9239
      for tag in self.op.tags:
9240
        self.target.AddTag(tag)
9241
    except errors.TagError, err:
9242
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
9243
    self.cfg.Update(self.target, feedback_fn)
9244

    
9245

    
9246
class LUDelTags(TagsLU):
9247
  """Delete a list of tags from a given object.
9248

9249
  """
9250
  _OP_REQP = ["kind", "name", "tags"]
9251
  REQ_BGL = False
9252

    
9253
  def CheckPrereq(self):
9254
    """Check prerequisites.
9255

9256
    This checks that we have the given tag.
9257

9258
    """
9259
    TagsLU.CheckPrereq(self)
9260
    for tag in self.op.tags:
9261
      objects.TaggableObject.ValidateTag(tag)
9262
    del_tags = frozenset(self.op.tags)
9263
    cur_tags = self.target.GetTags()
9264
    if not del_tags <= cur_tags:
9265
      diff_tags = del_tags - cur_tags
9266
      diff_names = ["'%s'" % tag for tag in diff_tags]
9267
      diff_names.sort()
9268
      raise errors.OpPrereqError("Tag(s) %s not found" %
9269
                                 (",".join(diff_names)), errors.ECODE_NOENT)
9270

    
9271
  def Exec(self, feedback_fn):
9272
    """Remove the tag from the object.
9273

9274
    """
9275
    for tag in self.op.tags:
9276
      self.target.RemoveTag(tag)
9277
    self.cfg.Update(self.target, feedback_fn)
9278

    
9279

    
9280
class LUTestDelay(NoHooksLU):
9281
  """Sleep for a specified amount of time.
9282

9283
  This LU sleeps on the master and/or nodes for a specified amount of
9284
  time.
9285

9286
  """
9287
  _OP_REQP = ["duration", "on_master", "on_nodes"]
9288
  REQ_BGL = False
9289

    
9290
  def ExpandNames(self):
9291
    """Expand names and set required locks.
9292

9293
    This expands the node list, if any.
9294

9295
    """
9296
    self.needed_locks = {}
9297
    if self.op.on_nodes:
9298
      # _GetWantedNodes can be used here, but is not always appropriate to use
9299
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9300
      # more information.
9301
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9302
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9303

    
9304
  def CheckPrereq(self):
9305
    """Check prerequisites.
9306

9307
    """
9308

    
9309
  def Exec(self, feedback_fn):
9310
    """Do the actual sleep.
9311

9312
    """
9313
    if self.op.on_master:
9314
      if not utils.TestDelay(self.op.duration):
9315
        raise errors.OpExecError("Error during master delay test")
9316
    if self.op.on_nodes:
9317
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9318
      for node, node_result in result.items():
9319
        node_result.Raise("Failure during rpc call to node %s" % node)
9320

    
9321

    
9322
class IAllocator(object):
9323
  """IAllocator framework.
9324

9325
  An IAllocator instance has three sets of attributes:
9326
    - cfg that is needed to query the cluster
9327
    - input data (all members of the _KEYS class attribute are required)
9328
    - four buffer attributes (in|out_data|text), that represent the
9329
      input (to the external script) in text and data structure format,
9330
      and the output from it, again in two formats
9331
    - the result variables from the script (success, info, nodes) for
9332
      easy usage
9333

9334
  """
9335
  # pylint: disable-msg=R0902
9336
  # lots of instance attributes
9337
  _ALLO_KEYS = [
9338
    "name", "mem_size", "disks", "disk_template",
9339
    "os", "tags", "nics", "vcpus", "hypervisor",
9340
    ]
9341
  _RELO_KEYS = [
9342
    "name", "relocate_from",
9343
    ]
9344
  _EVAC_KEYS = [
9345
    "evac_nodes",
9346
    ]
9347

    
9348
  def __init__(self, cfg, rpc, mode, **kwargs):
9349
    self.cfg = cfg
9350
    self.rpc = rpc
9351
    # init buffer variables
9352
    self.in_text = self.out_text = self.in_data = self.out_data = None
9353
    # init all input fields so that pylint is happy
9354
    self.mode = mode
9355
    self.mem_size = self.disks = self.disk_template = None
9356
    self.os = self.tags = self.nics = self.vcpus = None
9357
    self.hypervisor = None
9358
    self.relocate_from = None
9359
    self.name = None
9360
    self.evac_nodes = None
9361
    # computed fields
9362
    self.required_nodes = None
9363
    # init result fields
9364
    self.success = self.info = self.result = None
9365
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9366
      keyset = self._ALLO_KEYS
9367
      fn = self._AddNewInstance
9368
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9369
      keyset = self._RELO_KEYS
9370
      fn = self._AddRelocateInstance
9371
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9372
      keyset = self._EVAC_KEYS
9373
      fn = self._AddEvacuateNodes
9374
    else:
9375
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9376
                                   " IAllocator" % self.mode)
9377
    for key in kwargs:
9378
      if key not in keyset:
9379
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
9380
                                     " IAllocator" % key)
9381
      setattr(self, key, kwargs[key])
9382

    
9383
    for key in keyset:
9384
      if key not in kwargs:
9385
        raise errors.ProgrammerError("Missing input parameter '%s' to"
9386
                                     " IAllocator" % key)
9387
    self._BuildInputData(fn)
9388

    
9389
  def _ComputeClusterData(self):
9390
    """Compute the generic allocator input data.
9391

9392
    This is the data that is independent of the actual operation.
9393

9394
    """
9395
    cfg = self.cfg
9396
    cluster_info = cfg.GetClusterInfo()
9397
    # cluster data
9398
    data = {
9399
      "version": constants.IALLOCATOR_VERSION,
9400
      "cluster_name": cfg.GetClusterName(),
9401
      "cluster_tags": list(cluster_info.GetTags()),
9402
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9403
      # we don't have job IDs
9404
      }
9405
    iinfo = cfg.GetAllInstancesInfo().values()
9406
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9407

    
9408
    # node data
9409
    node_results = {}
9410
    node_list = cfg.GetNodeList()
9411

    
9412
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9413
      hypervisor_name = self.hypervisor
9414
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9415
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9416
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9417
      hypervisor_name = cluster_info.enabled_hypervisors[0]
9418

    
9419
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9420
                                        hypervisor_name)
9421
    node_iinfo = \
9422
      self.rpc.call_all_instances_info(node_list,
9423
                                       cluster_info.enabled_hypervisors)
9424
    for nname, nresult in node_data.items():
9425
      # first fill in static (config-based) values
9426
      ninfo = cfg.GetNodeInfo(nname)
9427
      pnr = {
9428
        "tags": list(ninfo.GetTags()),
9429
        "primary_ip": ninfo.primary_ip,
9430
        "secondary_ip": ninfo.secondary_ip,
9431
        "offline": ninfo.offline,
9432
        "drained": ninfo.drained,
9433
        "master_candidate": ninfo.master_candidate,
9434
        }
9435

    
9436
      if not (ninfo.offline or ninfo.drained):
9437
        nresult.Raise("Can't get data for node %s" % nname)
9438
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9439
                                nname)
9440
        remote_info = nresult.payload
9441

    
9442
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
9443
                     'vg_size', 'vg_free', 'cpu_total']:
9444
          if attr not in remote_info:
9445
            raise errors.OpExecError("Node '%s' didn't return attribute"
9446
                                     " '%s'" % (nname, attr))
9447
          if not isinstance(remote_info[attr], int):
9448
            raise errors.OpExecError("Node '%s' returned invalid value"
9449
                                     " for '%s': %s" %
9450
                                     (nname, attr, remote_info[attr]))
9451
        # compute memory used by primary instances
9452
        i_p_mem = i_p_up_mem = 0
9453
        for iinfo, beinfo in i_list:
9454
          if iinfo.primary_node == nname:
9455
            i_p_mem += beinfo[constants.BE_MEMORY]
9456
            if iinfo.name not in node_iinfo[nname].payload:
9457
              i_used_mem = 0
9458
            else:
9459
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9460
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9461
            remote_info['memory_free'] -= max(0, i_mem_diff)
9462

    
9463
            if iinfo.admin_up:
9464
              i_p_up_mem += beinfo[constants.BE_MEMORY]
9465

    
9466
        # compute memory used by instances
9467
        pnr_dyn = {
9468
          "total_memory": remote_info['memory_total'],
9469
          "reserved_memory": remote_info['memory_dom0'],
9470
          "free_memory": remote_info['memory_free'],
9471
          "total_disk": remote_info['vg_size'],
9472
          "free_disk": remote_info['vg_free'],
9473
          "total_cpus": remote_info['cpu_total'],
9474
          "i_pri_memory": i_p_mem,
9475
          "i_pri_up_memory": i_p_up_mem,
9476
          }
9477
        pnr.update(pnr_dyn)
9478

    
9479
      node_results[nname] = pnr
9480
    data["nodes"] = node_results
9481

    
9482
    # instance data
9483
    instance_data = {}
9484
    for iinfo, beinfo in i_list:
9485
      nic_data = []
9486
      for nic in iinfo.nics:
9487
        filled_params = objects.FillDict(
9488
            cluster_info.nicparams[constants.PP_DEFAULT],
9489
            nic.nicparams)
9490
        nic_dict = {"mac": nic.mac,
9491
                    "ip": nic.ip,
9492
                    "mode": filled_params[constants.NIC_MODE],
9493
                    "link": filled_params[constants.NIC_LINK],
9494
                   }
9495
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9496
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9497
        nic_data.append(nic_dict)
9498
      pir = {
9499
        "tags": list(iinfo.GetTags()),
9500
        "admin_up": iinfo.admin_up,
9501
        "vcpus": beinfo[constants.BE_VCPUS],
9502
        "memory": beinfo[constants.BE_MEMORY],
9503
        "os": iinfo.os,
9504
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9505
        "nics": nic_data,
9506
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9507
        "disk_template": iinfo.disk_template,
9508
        "hypervisor": iinfo.hypervisor,
9509
        }
9510
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9511
                                                 pir["disks"])
9512
      instance_data[iinfo.name] = pir
9513

    
9514
    data["instances"] = instance_data
9515

    
9516
    self.in_data = data
9517

    
9518
  def _AddNewInstance(self):
9519
    """Add new instance data to allocator structure.
9520

9521
    This in combination with _AllocatorGetClusterData will create the
9522
    correct structure needed as input for the allocator.
9523

9524
    The checks for the completeness of the opcode must have already been
9525
    done.
9526

9527
    """
9528
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9529

    
9530
    if self.disk_template in constants.DTS_NET_MIRROR:
9531
      self.required_nodes = 2
9532
    else:
9533
      self.required_nodes = 1
9534
    request = {
9535
      "name": self.name,
9536
      "disk_template": self.disk_template,
9537
      "tags": self.tags,
9538
      "os": self.os,
9539
      "vcpus": self.vcpus,
9540
      "memory": self.mem_size,
9541
      "disks": self.disks,
9542
      "disk_space_total": disk_space,
9543
      "nics": self.nics,
9544
      "required_nodes": self.required_nodes,
9545
      }
9546
    return request
9547

    
9548
  def _AddRelocateInstance(self):
9549
    """Add relocate instance data to allocator structure.
9550

9551
    This in combination with _IAllocatorGetClusterData will create the
9552
    correct structure needed as input for the allocator.
9553

9554
    The checks for the completeness of the opcode must have already been
9555
    done.
9556

9557
    """
9558
    instance = self.cfg.GetInstanceInfo(self.name)
9559
    if instance is None:
9560
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
9561
                                   " IAllocator" % self.name)
9562

    
9563
    if instance.disk_template not in constants.DTS_NET_MIRROR:
9564
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9565
                                 errors.ECODE_INVAL)
9566

    
9567
    if len(instance.secondary_nodes) != 1:
9568
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
9569
                                 errors.ECODE_STATE)
9570

    
9571
    self.required_nodes = 1
9572
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
9573
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9574

    
9575
    request = {
9576
      "name": self.name,
9577
      "disk_space_total": disk_space,
9578
      "required_nodes": self.required_nodes,
9579
      "relocate_from": self.relocate_from,
9580
      }
9581
    return request
9582

    
9583
  def _AddEvacuateNodes(self):
9584
    """Add evacuate nodes data to allocator structure.
9585

9586
    """
9587
    request = {
9588
      "evac_nodes": self.evac_nodes
9589
      }
9590
    return request
9591

    
9592
  def _BuildInputData(self, fn):
9593
    """Build input data structures.
9594

9595
    """
9596
    self._ComputeClusterData()
9597

    
9598
    request = fn()
9599
    request["type"] = self.mode
9600
    self.in_data["request"] = request
9601

    
9602
    self.in_text = serializer.Dump(self.in_data)
9603

    
9604
  def Run(self, name, validate=True, call_fn=None):
9605
    """Run an instance allocator and return the results.
9606

9607
    """
9608
    if call_fn is None:
9609
      call_fn = self.rpc.call_iallocator_runner
9610

    
9611
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9612
    result.Raise("Failure while running the iallocator script")
9613

    
9614
    self.out_text = result.payload
9615
    if validate:
9616
      self._ValidateResult()
9617

    
9618
  def _ValidateResult(self):
9619
    """Process the allocator results.
9620

9621
    This will process and if successful save the result in
9622
    self.out_data and the other parameters.
9623

9624
    """
9625
    try:
9626
      rdict = serializer.Load(self.out_text)
9627
    except Exception, err:
9628
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9629

    
9630
    if not isinstance(rdict, dict):
9631
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
9632

    
9633
    # TODO: remove backwards compatiblity in later versions
9634
    if "nodes" in rdict and "result" not in rdict:
9635
      rdict["result"] = rdict["nodes"]
9636
      del rdict["nodes"]
9637

    
9638
    for key in "success", "info", "result":
9639
      if key not in rdict:
9640
        raise errors.OpExecError("Can't parse iallocator results:"
9641
                                 " missing key '%s'" % key)
9642
      setattr(self, key, rdict[key])
9643

    
9644
    if not isinstance(rdict["result"], list):
9645
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9646
                               " is not a list")
9647
    self.out_data = rdict
9648

    
9649

    
9650
class LUTestAllocator(NoHooksLU):
9651
  """Run allocator tests.
9652

9653
  This LU runs the allocator tests
9654

9655
  """
9656
  _OP_REQP = ["direction", "mode", "name"]
9657

    
9658
  def CheckPrereq(self):
9659
    """Check prerequisites.
9660

9661
    This checks the opcode parameters depending on the director and mode test.
9662

9663
    """
9664
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9665
      for attr in ["name", "mem_size", "disks", "disk_template",
9666
                   "os", "tags", "nics", "vcpus"]:
9667
        if not hasattr(self.op, attr):
9668
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9669
                                     attr, errors.ECODE_INVAL)
9670
      iname = self.cfg.ExpandInstanceName(self.op.name)
9671
      if iname is not None:
9672
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9673
                                   iname, errors.ECODE_EXISTS)
9674
      if not isinstance(self.op.nics, list):
9675
        raise errors.OpPrereqError("Invalid parameter 'nics'",
9676
                                   errors.ECODE_INVAL)
9677
      for row in self.op.nics:
9678
        if (not isinstance(row, dict) or
9679
            "mac" not in row or
9680
            "ip" not in row or
9681
            "bridge" not in row):
9682
          raise errors.OpPrereqError("Invalid contents of the 'nics'"
9683
                                     " parameter", errors.ECODE_INVAL)
9684
      if not isinstance(self.op.disks, list):
9685
        raise errors.OpPrereqError("Invalid parameter 'disks'",
9686
                                   errors.ECODE_INVAL)
9687
      for row in self.op.disks:
9688
        if (not isinstance(row, dict) or
9689
            "size" not in row or
9690
            not isinstance(row["size"], int) or
9691
            "mode" not in row or
9692
            row["mode"] not in ['r', 'w']):
9693
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
9694
                                     " parameter", errors.ECODE_INVAL)
9695
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9696
        self.op.hypervisor = self.cfg.GetHypervisorType()
9697
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9698
      if not hasattr(self.op, "name"):
9699
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9700
                                   errors.ECODE_INVAL)
9701
      fname = _ExpandInstanceName(self.cfg, self.op.name)
9702
      self.op.name = fname
9703
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9704
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9705
      if not hasattr(self.op, "evac_nodes"):
9706
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9707
                                   " opcode input", errors.ECODE_INVAL)
9708
    else:
9709
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9710
                                 self.op.mode, errors.ECODE_INVAL)
9711

    
9712
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9713
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
9714
        raise errors.OpPrereqError("Missing allocator name",
9715
                                   errors.ECODE_INVAL)
9716
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9717
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
9718
                                 self.op.direction, errors.ECODE_INVAL)
9719

    
9720
  def Exec(self, feedback_fn):
9721
    """Run the allocator test.
9722

9723
    """
9724
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9725
      ial = IAllocator(self.cfg, self.rpc,
9726
                       mode=self.op.mode,
9727
                       name=self.op.name,
9728
                       mem_size=self.op.mem_size,
9729
                       disks=self.op.disks,
9730
                       disk_template=self.op.disk_template,
9731
                       os=self.op.os,
9732
                       tags=self.op.tags,
9733
                       nics=self.op.nics,
9734
                       vcpus=self.op.vcpus,
9735
                       hypervisor=self.op.hypervisor,
9736
                       )
9737
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9738
      ial = IAllocator(self.cfg, self.rpc,
9739
                       mode=self.op.mode,
9740
                       name=self.op.name,
9741
                       relocate_from=list(self.relocate_from),
9742
                       )
9743
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9744
      ial = IAllocator(self.cfg, self.rpc,
9745
                       mode=self.op.mode,
9746
                       evac_nodes=self.op.evac_nodes)
9747
    else:
9748
      raise errors.ProgrammerError("Uncatched mode %s in"
9749
                                   " LUTestAllocator.Exec", self.op.mode)
9750

    
9751
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
9752
      result = ial.in_text
9753
    else:
9754
      ial.Run(self.op.allocator, validate=False)
9755
      result = ial.out_text
9756
    return result