Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 6e04dc39

History | View | Annotate | Download (343.4 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
import os
30
import os.path
31
import time
32
import re
33
import platform
34
import logging
35
import copy
36
import OpenSSL
37

    
38
from ganeti import ssh
39
from ganeti import utils
40
from ganeti import errors
41
from ganeti import hypervisor
42
from ganeti import locking
43
from ganeti import constants
44
from ganeti import objects
45
from ganeti import serializer
46
from ganeti import ssconf
47
from ganeti import uidpool
48
from ganeti import compat
49

    
50

    
51
class LogicalUnit(object):
52
  """Logical Unit base class.
53

54
  Subclasses must follow these rules:
55
    - implement ExpandNames
56
    - implement CheckPrereq (except when tasklets are used)
57
    - implement Exec (except when tasklets are used)
58
    - implement BuildHooksEnv
59
    - redefine HPATH and HTYPE
60
    - optionally redefine their run requirements:
61
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
62

63
  Note that all commands require root permissions.
64

65
  @ivar dry_run_result: the value (if any) that will be returned to the caller
66
      in dry-run mode (signalled by opcode dry_run parameter)
67

68
  """
69
  HPATH = None
70
  HTYPE = None
71
  _OP_REQP = []
72
  REQ_BGL = True
73

    
74
  def __init__(self, processor, op, context, rpc):
75
    """Constructor for LogicalUnit.
76

77
    This needs to be overridden in derived classes in order to check op
78
    validity.
79

80
    """
81
    self.proc = processor
82
    self.op = op
83
    self.cfg = context.cfg
84
    self.context = context
85
    self.rpc = rpc
86
    # Dicts used to declare locking needs to mcpu
87
    self.needed_locks = None
88
    self.acquired_locks = {}
89
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
90
    self.add_locks = {}
91
    self.remove_locks = {}
92
    # Used to force good behavior when calling helper functions
93
    self.recalculate_locks = {}
94
    self.__ssh = None
95
    # logging
96
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
97
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
98
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
99
    # support for dry-run
100
    self.dry_run_result = None
101
    # support for generic debug attribute
102
    if (not hasattr(self.op, "debug_level") or
103
        not isinstance(self.op.debug_level, int)):
104
      self.op.debug_level = 0
105

    
106
    # Tasklets
107
    self.tasklets = None
108

    
109
    for attr_name in self._OP_REQP:
110
      attr_val = getattr(op, attr_name, None)
111
      if attr_val is None:
112
        raise errors.OpPrereqError("Required parameter '%s' missing" %
113
                                   attr_name, errors.ECODE_INVAL)
114

    
115
    self.CheckArguments()
116

    
117
  def __GetSSH(self):
118
    """Returns the SshRunner object
119

120
    """
121
    if not self.__ssh:
122
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
123
    return self.__ssh
124

    
125
  ssh = property(fget=__GetSSH)
126

    
127
  def CheckArguments(self):
128
    """Check syntactic validity for the opcode arguments.
129

130
    This method is for doing a simple syntactic check and ensure
131
    validity of opcode parameters, without any cluster-related
132
    checks. While the same can be accomplished in ExpandNames and/or
133
    CheckPrereq, doing these separate is better because:
134

135
      - ExpandNames is left as as purely a lock-related function
136
      - CheckPrereq is run after we have acquired locks (and possible
137
        waited for them)
138

139
    The function is allowed to change the self.op attribute so that
140
    later methods can no longer worry about missing parameters.
141

142
    """
143
    pass
144

    
145
  def ExpandNames(self):
146
    """Expand names for this LU.
147

148
    This method is called before starting to execute the opcode, and it should
149
    update all the parameters of the opcode to their canonical form (e.g. a
150
    short node name must be fully expanded after this method has successfully
151
    completed). This way locking, hooks, logging, ecc. can work correctly.
152

153
    LUs which implement this method must also populate the self.needed_locks
154
    member, as a dict with lock levels as keys, and a list of needed lock names
155
    as values. Rules:
156

157
      - use an empty dict if you don't need any lock
158
      - if you don't need any lock at a particular level omit that level
159
      - don't put anything for the BGL level
160
      - if you want all locks at a level use locking.ALL_SET as a value
161

162
    If you need to share locks (rather than acquire them exclusively) at one
163
    level you can modify self.share_locks, setting a true value (usually 1) for
164
    that level. By default locks are not shared.
165

166
    This function can also define a list of tasklets, which then will be
167
    executed in order instead of the usual LU-level CheckPrereq and Exec
168
    functions, if those are not defined by the LU.
169

170
    Examples::
171

172
      # Acquire all nodes and one instance
173
      self.needed_locks = {
174
        locking.LEVEL_NODE: locking.ALL_SET,
175
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
176
      }
177
      # Acquire just two nodes
178
      self.needed_locks = {
179
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
180
      }
181
      # Acquire no locks
182
      self.needed_locks = {} # No, you can't leave it to the default value None
183

184
    """
185
    # The implementation of this method is mandatory only if the new LU is
186
    # concurrent, so that old LUs don't need to be changed all at the same
187
    # time.
188
    if self.REQ_BGL:
189
      self.needed_locks = {} # Exclusive LUs don't need locks.
190
    else:
191
      raise NotImplementedError
192

    
193
  def DeclareLocks(self, level):
194
    """Declare LU locking needs for a level
195

196
    While most LUs can just declare their locking needs at ExpandNames time,
197
    sometimes there's the need to calculate some locks after having acquired
198
    the ones before. This function is called just before acquiring locks at a
199
    particular level, but after acquiring the ones at lower levels, and permits
200
    such calculations. It can be used to modify self.needed_locks, and by
201
    default it does nothing.
202

203
    This function is only called if you have something already set in
204
    self.needed_locks for the level.
205

206
    @param level: Locking level which is going to be locked
207
    @type level: member of ganeti.locking.LEVELS
208

209
    """
210

    
211
  def CheckPrereq(self):
212
    """Check prerequisites for this LU.
213

214
    This method should check that the prerequisites for the execution
215
    of this LU are fulfilled. It can do internode communication, but
216
    it should be idempotent - no cluster or system changes are
217
    allowed.
218

219
    The method should raise errors.OpPrereqError in case something is
220
    not fulfilled. Its return value is ignored.
221

222
    This method should also update all the parameters of the opcode to
223
    their canonical form if it hasn't been done by ExpandNames before.
224

225
    """
226
    if self.tasklets is not None:
227
      for (idx, tl) in enumerate(self.tasklets):
228
        logging.debug("Checking prerequisites for tasklet %s/%s",
229
                      idx + 1, len(self.tasklets))
230
        tl.CheckPrereq()
231
    else:
232
      raise NotImplementedError
233

    
234
  def Exec(self, feedback_fn):
235
    """Execute the LU.
236

237
    This method should implement the actual work. It should raise
238
    errors.OpExecError for failures that are somewhat dealt with in
239
    code, or expected.
240

241
    """
242
    if self.tasklets is not None:
243
      for (idx, tl) in enumerate(self.tasklets):
244
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
245
        tl.Exec(feedback_fn)
246
    else:
247
      raise NotImplementedError
248

    
249
  def BuildHooksEnv(self):
250
    """Build hooks environment for this LU.
251

252
    This method should return a three-node tuple consisting of: a dict
253
    containing the environment that will be used for running the
254
    specific hook for this LU, a list of node names on which the hook
255
    should run before the execution, and a list of node names on which
256
    the hook should run after the execution.
257

258
    The keys of the dict must not have 'GANETI_' prefixed as this will
259
    be handled in the hooks runner. Also note additional keys will be
260
    added by the hooks runner. If the LU doesn't define any
261
    environment, an empty dict (and not None) should be returned.
262

263
    No nodes should be returned as an empty list (and not None).
264

265
    Note that if the HPATH for a LU class is None, this function will
266
    not be called.
267

268
    """
269
    raise NotImplementedError
270

    
271
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
272
    """Notify the LU about the results of its hooks.
273

274
    This method is called every time a hooks phase is executed, and notifies
275
    the Logical Unit about the hooks' result. The LU can then use it to alter
276
    its result based on the hooks.  By default the method does nothing and the
277
    previous result is passed back unchanged but any LU can define it if it
278
    wants to use the local cluster hook-scripts somehow.
279

280
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
281
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
282
    @param hook_results: the results of the multi-node hooks rpc call
283
    @param feedback_fn: function used send feedback back to the caller
284
    @param lu_result: the previous Exec result this LU had, or None
285
        in the PRE phase
286
    @return: the new Exec result, based on the previous result
287
        and hook results
288

289
    """
290
    # API must be kept, thus we ignore the unused argument and could
291
    # be a function warnings
292
    # pylint: disable-msg=W0613,R0201
293
    return lu_result
294

    
295
  def _ExpandAndLockInstance(self):
296
    """Helper function to expand and lock an instance.
297

298
    Many LUs that work on an instance take its name in self.op.instance_name
299
    and need to expand it and then declare the expanded name for locking. This
300
    function does it, and then updates self.op.instance_name to the expanded
301
    name. It also initializes needed_locks as a dict, if this hasn't been done
302
    before.
303

304
    """
305
    if self.needed_locks is None:
306
      self.needed_locks = {}
307
    else:
308
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
309
        "_ExpandAndLockInstance called with instance-level locks set"
310
    self.op.instance_name = _ExpandInstanceName(self.cfg,
311
                                                self.op.instance_name)
312
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
313

    
314
  def _LockInstancesNodes(self, primary_only=False):
315
    """Helper function to declare instances' nodes for locking.
316

317
    This function should be called after locking one or more instances to lock
318
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
319
    with all primary or secondary nodes for instances already locked and
320
    present in self.needed_locks[locking.LEVEL_INSTANCE].
321

322
    It should be called from DeclareLocks, and for safety only works if
323
    self.recalculate_locks[locking.LEVEL_NODE] is set.
324

325
    In the future it may grow parameters to just lock some instance's nodes, or
326
    to just lock primaries or secondary nodes, if needed.
327

328
    If should be called in DeclareLocks in a way similar to::
329

330
      if level == locking.LEVEL_NODE:
331
        self._LockInstancesNodes()
332

333
    @type primary_only: boolean
334
    @param primary_only: only lock primary nodes of locked instances
335

336
    """
337
    assert locking.LEVEL_NODE in self.recalculate_locks, \
338
      "_LockInstancesNodes helper function called with no nodes to recalculate"
339

    
340
    # TODO: check if we're really been called with the instance locks held
341

    
342
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
343
    # future we might want to have different behaviors depending on the value
344
    # of self.recalculate_locks[locking.LEVEL_NODE]
345
    wanted_nodes = []
346
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
347
      instance = self.context.cfg.GetInstanceInfo(instance_name)
348
      wanted_nodes.append(instance.primary_node)
349
      if not primary_only:
350
        wanted_nodes.extend(instance.secondary_nodes)
351

    
352
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
353
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
354
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
355
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
356

    
357
    del self.recalculate_locks[locking.LEVEL_NODE]
358

    
359

    
360
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
361
  """Simple LU which runs no hooks.
362

363
  This LU is intended as a parent for other LogicalUnits which will
364
  run no hooks, in order to reduce duplicate code.
365

366
  """
367
  HPATH = None
368
  HTYPE = None
369

    
370
  def BuildHooksEnv(self):
371
    """Empty BuildHooksEnv for NoHooksLu.
372

373
    This just raises an error.
374

375
    """
376
    assert False, "BuildHooksEnv called for NoHooksLUs"
377

    
378

    
379
class Tasklet:
380
  """Tasklet base class.
381

382
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
383
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
384
  tasklets know nothing about locks.
385

386
  Subclasses must follow these rules:
387
    - Implement CheckPrereq
388
    - Implement Exec
389

390
  """
391
  def __init__(self, lu):
392
    self.lu = lu
393

    
394
    # Shortcuts
395
    self.cfg = lu.cfg
396
    self.rpc = lu.rpc
397

    
398
  def CheckPrereq(self):
399
    """Check prerequisites for this tasklets.
400

401
    This method should check whether the prerequisites for the execution of
402
    this tasklet are fulfilled. It can do internode communication, but it
403
    should be idempotent - no cluster or system changes are allowed.
404

405
    The method should raise errors.OpPrereqError in case something is not
406
    fulfilled. Its return value is ignored.
407

408
    This method should also update all parameters to their canonical form if it
409
    hasn't been done before.
410

411
    """
412
    raise NotImplementedError
413

    
414
  def Exec(self, feedback_fn):
415
    """Execute the tasklet.
416

417
    This method should implement the actual work. It should raise
418
    errors.OpExecError for failures that are somewhat dealt with in code, or
419
    expected.
420

421
    """
422
    raise NotImplementedError
423

    
424

    
425
def _GetWantedNodes(lu, nodes):
426
  """Returns list of checked and expanded node names.
427

428
  @type lu: L{LogicalUnit}
429
  @param lu: the logical unit on whose behalf we execute
430
  @type nodes: list
431
  @param nodes: list of node names or None for all nodes
432
  @rtype: list
433
  @return: the list of nodes, sorted
434
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
435

436
  """
437
  if not isinstance(nodes, list):
438
    raise errors.OpPrereqError("Invalid argument type 'nodes'",
439
                               errors.ECODE_INVAL)
440

    
441
  if not nodes:
442
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
443
      " non-empty list of nodes whose name is to be expanded.")
444

    
445
  wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
446
  return utils.NiceSort(wanted)
447

    
448

    
449
def _GetWantedInstances(lu, instances):
450
  """Returns list of checked and expanded instance names.
451

452
  @type lu: L{LogicalUnit}
453
  @param lu: the logical unit on whose behalf we execute
454
  @type instances: list
455
  @param instances: list of instance names or None for all instances
456
  @rtype: list
457
  @return: the list of instances, sorted
458
  @raise errors.OpPrereqError: if the instances parameter is wrong type
459
  @raise errors.OpPrereqError: if any of the passed instances is not found
460

461
  """
462
  if not isinstance(instances, list):
463
    raise errors.OpPrereqError("Invalid argument type 'instances'",
464
                               errors.ECODE_INVAL)
465

    
466
  if instances:
467
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
468
  else:
469
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
470
  return wanted
471

    
472

    
473
def _CheckOutputFields(static, dynamic, selected):
474
  """Checks whether all selected fields are valid.
475

476
  @type static: L{utils.FieldSet}
477
  @param static: static fields set
478
  @type dynamic: L{utils.FieldSet}
479
  @param dynamic: dynamic fields set
480

481
  """
482
  f = utils.FieldSet()
483
  f.Extend(static)
484
  f.Extend(dynamic)
485

    
486
  delta = f.NonMatching(selected)
487
  if delta:
488
    raise errors.OpPrereqError("Unknown output fields selected: %s"
489
                               % ",".join(delta), errors.ECODE_INVAL)
490

    
491

    
492
def _CheckBooleanOpField(op, name):
493
  """Validates boolean opcode parameters.
494

495
  This will ensure that an opcode parameter is either a boolean value,
496
  or None (but that it always exists).
497

498
  """
499
  val = getattr(op, name, None)
500
  if not (val is None or isinstance(val, bool)):
501
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
502
                               (name, str(val)), errors.ECODE_INVAL)
503
  setattr(op, name, val)
504

    
505

    
506
def _CheckGlobalHvParams(params):
507
  """Validates that given hypervisor params are not global ones.
508

509
  This will ensure that instances don't get customised versions of
510
  global params.
511

512
  """
513
  used_globals = constants.HVC_GLOBALS.intersection(params)
514
  if used_globals:
515
    msg = ("The following hypervisor parameters are global and cannot"
516
           " be customized at instance level, please modify them at"
517
           " cluster level: %s" % utils.CommaJoin(used_globals))
518
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
519

    
520

    
521
def _CheckNodeOnline(lu, node):
522
  """Ensure that a given node is online.
523

524
  @param lu: the LU on behalf of which we make the check
525
  @param node: the node to check
526
  @raise errors.OpPrereqError: if the node is offline
527

528
  """
529
  if lu.cfg.GetNodeInfo(node).offline:
530
    raise errors.OpPrereqError("Can't use offline node %s" % node,
531
                               errors.ECODE_INVAL)
532

    
533

    
534
def _CheckNodeNotDrained(lu, node):
535
  """Ensure that a given node is not drained.
536

537
  @param lu: the LU on behalf of which we make the check
538
  @param node: the node to check
539
  @raise errors.OpPrereqError: if the node is drained
540

541
  """
542
  if lu.cfg.GetNodeInfo(node).drained:
543
    raise errors.OpPrereqError("Can't use drained node %s" % node,
544
                               errors.ECODE_INVAL)
545

    
546

    
547
def _CheckNodeHasOS(lu, node, os_name, force_variant):
548
  """Ensure that a node supports a given OS.
549

550
  @param lu: the LU on behalf of which we make the check
551
  @param node: the node to check
552
  @param os_name: the OS to query about
553
  @param force_variant: whether to ignore variant errors
554
  @raise errors.OpPrereqError: if the node is not supporting the OS
555

556
  """
557
  result = lu.rpc.call_os_get(node, os_name)
558
  result.Raise("OS '%s' not in supported OS list for node %s" %
559
               (os_name, node),
560
               prereq=True, ecode=errors.ECODE_INVAL)
561
  if not force_variant:
562
    _CheckOSVariant(result.payload, os_name)
563

    
564

    
565
def _RequireFileStorage():
566
  """Checks that file storage is enabled.
567

568
  @raise errors.OpPrereqError: when file storage is disabled
569

570
  """
571
  if not constants.ENABLE_FILE_STORAGE:
572
    raise errors.OpPrereqError("File storage disabled at configure time",
573
                               errors.ECODE_INVAL)
574

    
575

    
576
def _CheckDiskTemplate(template):
577
  """Ensure a given disk template is valid.
578

579
  """
580
  if template not in constants.DISK_TEMPLATES:
581
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
582
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
583
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
584
  if template == constants.DT_FILE:
585
    _RequireFileStorage()
586

    
587

    
588
def _CheckStorageType(storage_type):
589
  """Ensure a given storage type is valid.
590

591
  """
592
  if storage_type not in constants.VALID_STORAGE_TYPES:
593
    raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
594
                               errors.ECODE_INVAL)
595
  if storage_type == constants.ST_FILE:
596
    _RequireFileStorage()
597

    
598

    
599

    
600
def _CheckInstanceDown(lu, instance, reason):
601
  """Ensure that an instance is not running."""
602
  if instance.admin_up:
603
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
604
                               (instance.name, reason), errors.ECODE_STATE)
605

    
606
  pnode = instance.primary_node
607
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
608
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
609
              prereq=True, ecode=errors.ECODE_ENVIRON)
610

    
611
  if instance.name in ins_l.payload:
612
    raise errors.OpPrereqError("Instance %s is running, %s" %
613
                               (instance.name, reason), errors.ECODE_STATE)
614

    
615

    
616
def _ExpandItemName(fn, name, kind):
617
  """Expand an item name.
618

619
  @param fn: the function to use for expansion
620
  @param name: requested item name
621
  @param kind: text description ('Node' or 'Instance')
622
  @return: the resolved (full) name
623
  @raise errors.OpPrereqError: if the item is not found
624

625
  """
626
  full_name = fn(name)
627
  if full_name is None:
628
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
629
                               errors.ECODE_NOENT)
630
  return full_name
631

    
632

    
633
def _ExpandNodeName(cfg, name):
634
  """Wrapper over L{_ExpandItemName} for nodes."""
635
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
636

    
637

    
638
def _ExpandInstanceName(cfg, name):
639
  """Wrapper over L{_ExpandItemName} for instance."""
640
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
641

    
642

    
643
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
644
                          memory, vcpus, nics, disk_template, disks,
645
                          bep, hvp, hypervisor_name):
646
  """Builds instance related env variables for hooks
647

648
  This builds the hook environment from individual variables.
649

650
  @type name: string
651
  @param name: the name of the instance
652
  @type primary_node: string
653
  @param primary_node: the name of the instance's primary node
654
  @type secondary_nodes: list
655
  @param secondary_nodes: list of secondary nodes as strings
656
  @type os_type: string
657
  @param os_type: the name of the instance's OS
658
  @type status: boolean
659
  @param status: the should_run status of the instance
660
  @type memory: string
661
  @param memory: the memory size of the instance
662
  @type vcpus: string
663
  @param vcpus: the count of VCPUs the instance has
664
  @type nics: list
665
  @param nics: list of tuples (ip, mac, mode, link) representing
666
      the NICs the instance has
667
  @type disk_template: string
668
  @param disk_template: the disk template of the instance
669
  @type disks: list
670
  @param disks: the list of (size, mode) pairs
671
  @type bep: dict
672
  @param bep: the backend parameters for the instance
673
  @type hvp: dict
674
  @param hvp: the hypervisor parameters for the instance
675
  @type hypervisor_name: string
676
  @param hypervisor_name: the hypervisor for the instance
677
  @rtype: dict
678
  @return: the hook environment for this instance
679

680
  """
681
  if status:
682
    str_status = "up"
683
  else:
684
    str_status = "down"
685
  env = {
686
    "OP_TARGET": name,
687
    "INSTANCE_NAME": name,
688
    "INSTANCE_PRIMARY": primary_node,
689
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
690
    "INSTANCE_OS_TYPE": os_type,
691
    "INSTANCE_STATUS": str_status,
692
    "INSTANCE_MEMORY": memory,
693
    "INSTANCE_VCPUS": vcpus,
694
    "INSTANCE_DISK_TEMPLATE": disk_template,
695
    "INSTANCE_HYPERVISOR": hypervisor_name,
696
  }
697

    
698
  if nics:
699
    nic_count = len(nics)
700
    for idx, (ip, mac, mode, link) in enumerate(nics):
701
      if ip is None:
702
        ip = ""
703
      env["INSTANCE_NIC%d_IP" % idx] = ip
704
      env["INSTANCE_NIC%d_MAC" % idx] = mac
705
      env["INSTANCE_NIC%d_MODE" % idx] = mode
706
      env["INSTANCE_NIC%d_LINK" % idx] = link
707
      if mode == constants.NIC_MODE_BRIDGED:
708
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
709
  else:
710
    nic_count = 0
711

    
712
  env["INSTANCE_NIC_COUNT"] = nic_count
713

    
714
  if disks:
715
    disk_count = len(disks)
716
    for idx, (size, mode) in enumerate(disks):
717
      env["INSTANCE_DISK%d_SIZE" % idx] = size
718
      env["INSTANCE_DISK%d_MODE" % idx] = mode
719
  else:
720
    disk_count = 0
721

    
722
  env["INSTANCE_DISK_COUNT"] = disk_count
723

    
724
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
725
    for key, value in source.items():
726
      env["INSTANCE_%s_%s" % (kind, key)] = value
727

    
728
  return env
729

    
730

    
731
def _NICListToTuple(lu, nics):
732
  """Build a list of nic information tuples.
733

734
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
735
  value in LUQueryInstanceData.
736

737
  @type lu:  L{LogicalUnit}
738
  @param lu: the logical unit on whose behalf we execute
739
  @type nics: list of L{objects.NIC}
740
  @param nics: list of nics to convert to hooks tuples
741

742
  """
743
  hooks_nics = []
744
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
745
  for nic in nics:
746
    ip = nic.ip
747
    mac = nic.mac
748
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
749
    mode = filled_params[constants.NIC_MODE]
750
    link = filled_params[constants.NIC_LINK]
751
    hooks_nics.append((ip, mac, mode, link))
752
  return hooks_nics
753

    
754

    
755
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
756
  """Builds instance related env variables for hooks from an object.
757

758
  @type lu: L{LogicalUnit}
759
  @param lu: the logical unit on whose behalf we execute
760
  @type instance: L{objects.Instance}
761
  @param instance: the instance for which we should build the
762
      environment
763
  @type override: dict
764
  @param override: dictionary with key/values that will override
765
      our values
766
  @rtype: dict
767
  @return: the hook environment dictionary
768

769
  """
770
  cluster = lu.cfg.GetClusterInfo()
771
  bep = cluster.FillBE(instance)
772
  hvp = cluster.FillHV(instance)
773
  args = {
774
    'name': instance.name,
775
    'primary_node': instance.primary_node,
776
    'secondary_nodes': instance.secondary_nodes,
777
    'os_type': instance.os,
778
    'status': instance.admin_up,
779
    'memory': bep[constants.BE_MEMORY],
780
    'vcpus': bep[constants.BE_VCPUS],
781
    'nics': _NICListToTuple(lu, instance.nics),
782
    'disk_template': instance.disk_template,
783
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
784
    'bep': bep,
785
    'hvp': hvp,
786
    'hypervisor_name': instance.hypervisor,
787
  }
788
  if override:
789
    args.update(override)
790
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
791

    
792

    
793
def _AdjustCandidatePool(lu, exceptions):
794
  """Adjust the candidate pool after node operations.
795

796
  """
797
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
798
  if mod_list:
799
    lu.LogInfo("Promoted nodes to master candidate role: %s",
800
               utils.CommaJoin(node.name for node in mod_list))
801
    for name in mod_list:
802
      lu.context.ReaddNode(name)
803
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
804
  if mc_now > mc_max:
805
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
806
               (mc_now, mc_max))
807

    
808

    
809
def _DecideSelfPromotion(lu, exceptions=None):
810
  """Decide whether I should promote myself as a master candidate.
811

812
  """
813
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
814
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
815
  # the new node will increase mc_max with one, so:
816
  mc_should = min(mc_should + 1, cp_size)
817
  return mc_now < mc_should
818

    
819

    
820
def _CheckNicsBridgesExist(lu, target_nics, target_node,
821
                               profile=constants.PP_DEFAULT):
822
  """Check that the brigdes needed by a list of nics exist.
823

824
  """
825
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
826
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
827
                for nic in target_nics]
828
  brlist = [params[constants.NIC_LINK] for params in paramslist
829
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
830
  if brlist:
831
    result = lu.rpc.call_bridges_exist(target_node, brlist)
832
    result.Raise("Error checking bridges on destination node '%s'" %
833
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
834

    
835

    
836
def _CheckInstanceBridgesExist(lu, instance, node=None):
837
  """Check that the brigdes needed by an instance exist.
838

839
  """
840
  if node is None:
841
    node = instance.primary_node
842
  _CheckNicsBridgesExist(lu, instance.nics, node)
843

    
844

    
845
def _CheckOSVariant(os_obj, name):
846
  """Check whether an OS name conforms to the os variants specification.
847

848
  @type os_obj: L{objects.OS}
849
  @param os_obj: OS object to check
850
  @type name: string
851
  @param name: OS name passed by the user, to check for validity
852

853
  """
854
  if not os_obj.supported_variants:
855
    return
856
  try:
857
    variant = name.split("+", 1)[1]
858
  except IndexError:
859
    raise errors.OpPrereqError("OS name must include a variant",
860
                               errors.ECODE_INVAL)
861

    
862
  if variant not in os_obj.supported_variants:
863
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
864

    
865

    
866
def _GetNodeInstancesInner(cfg, fn):
867
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
868

    
869

    
870
def _GetNodeInstances(cfg, node_name):
871
  """Returns a list of all primary and secondary instances on a node.
872

873
  """
874

    
875
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
876

    
877

    
878
def _GetNodePrimaryInstances(cfg, node_name):
879
  """Returns primary instances on a node.
880

881
  """
882
  return _GetNodeInstancesInner(cfg,
883
                                lambda inst: node_name == inst.primary_node)
884

    
885

    
886
def _GetNodeSecondaryInstances(cfg, node_name):
887
  """Returns secondary instances on a node.
888

889
  """
890
  return _GetNodeInstancesInner(cfg,
891
                                lambda inst: node_name in inst.secondary_nodes)
892

    
893

    
894
def _GetStorageTypeArgs(cfg, storage_type):
895
  """Returns the arguments for a storage type.
896

897
  """
898
  # Special case for file storage
899
  if storage_type == constants.ST_FILE:
900
    # storage.FileStorage wants a list of storage directories
901
    return [[cfg.GetFileStorageDir()]]
902

    
903
  return []
904

    
905

    
906
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
907
  faulty = []
908

    
909
  for dev in instance.disks:
910
    cfg.SetDiskID(dev, node_name)
911

    
912
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
913
  result.Raise("Failed to get disk status from node %s" % node_name,
914
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
915

    
916
  for idx, bdev_status in enumerate(result.payload):
917
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
918
      faulty.append(idx)
919

    
920
  return faulty
921

    
922

    
923
def _FormatTimestamp(secs):
924
  """Formats a Unix timestamp with the local timezone.
925

926
  """
927
  return time.strftime("%F %T %Z", time.gmtime(secs))
928

    
929

    
930
class LUPostInitCluster(LogicalUnit):
931
  """Logical unit for running hooks after cluster initialization.
932

933
  """
934
  HPATH = "cluster-init"
935
  HTYPE = constants.HTYPE_CLUSTER
936
  _OP_REQP = []
937

    
938
  def BuildHooksEnv(self):
939
    """Build hooks env.
940

941
    """
942
    env = {"OP_TARGET": self.cfg.GetClusterName()}
943
    mn = self.cfg.GetMasterNode()
944
    return env, [], [mn]
945

    
946
  def CheckPrereq(self):
947
    """No prerequisites to check.
948

949
    """
950
    return True
951

    
952
  def Exec(self, feedback_fn):
953
    """Nothing to do.
954

955
    """
956
    return True
957

    
958

    
959
class LUDestroyCluster(LogicalUnit):
960
  """Logical unit for destroying the cluster.
961

962
  """
963
  HPATH = "cluster-destroy"
964
  HTYPE = constants.HTYPE_CLUSTER
965
  _OP_REQP = []
966

    
967
  def BuildHooksEnv(self):
968
    """Build hooks env.
969

970
    """
971
    env = {"OP_TARGET": self.cfg.GetClusterName()}
972
    return env, [], []
973

    
974
  def CheckPrereq(self):
975
    """Check prerequisites.
976

977
    This checks whether the cluster is empty.
978

979
    Any errors are signaled by raising errors.OpPrereqError.
980

981
    """
982
    master = self.cfg.GetMasterNode()
983

    
984
    nodelist = self.cfg.GetNodeList()
985
    if len(nodelist) != 1 or nodelist[0] != master:
986
      raise errors.OpPrereqError("There are still %d node(s) in"
987
                                 " this cluster." % (len(nodelist) - 1),
988
                                 errors.ECODE_INVAL)
989
    instancelist = self.cfg.GetInstanceList()
990
    if instancelist:
991
      raise errors.OpPrereqError("There are still %d instance(s) in"
992
                                 " this cluster." % len(instancelist),
993
                                 errors.ECODE_INVAL)
994

    
995
  def Exec(self, feedback_fn):
996
    """Destroys the cluster.
997

998
    """
999
    master = self.cfg.GetMasterNode()
1000
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1001

    
1002
    # Run post hooks on master node before it's removed
1003
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1004
    try:
1005
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1006
    except:
1007
      # pylint: disable-msg=W0702
1008
      self.LogWarning("Errors occurred running hooks on %s" % master)
1009

    
1010
    result = self.rpc.call_node_stop_master(master, False)
1011
    result.Raise("Could not disable the master role")
1012

    
1013
    if modify_ssh_setup:
1014
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1015
      utils.CreateBackup(priv_key)
1016
      utils.CreateBackup(pub_key)
1017

    
1018
    return master
1019

    
1020

    
1021
def _VerifyCertificateInner(filename, expired, not_before, not_after, now,
1022
                            warn_days=constants.SSL_CERT_EXPIRATION_WARN,
1023
                            error_days=constants.SSL_CERT_EXPIRATION_ERROR):
1024
  """Verifies certificate details for LUVerifyCluster.
1025

1026
  """
1027
  if expired:
1028
    msg = "Certificate %s is expired" % filename
1029

    
1030
    if not_before is not None and not_after is not None:
1031
      msg += (" (valid from %s to %s)" %
1032
              (_FormatTimestamp(not_before),
1033
               _FormatTimestamp(not_after)))
1034
    elif not_before is not None:
1035
      msg += " (valid from %s)" % _FormatTimestamp(not_before)
1036
    elif not_after is not None:
1037
      msg += " (valid until %s)" % _FormatTimestamp(not_after)
1038

    
1039
    return (LUVerifyCluster.ETYPE_ERROR, msg)
1040

    
1041
  elif not_before is not None and not_before > now:
1042
    return (LUVerifyCluster.ETYPE_WARNING,
1043
            "Certificate %s not yet valid (valid from %s)" %
1044
            (filename, _FormatTimestamp(not_before)))
1045

    
1046
  elif not_after is not None:
1047
    remaining_days = int((not_after - now) / (24 * 3600))
1048

    
1049
    msg = ("Certificate %s expires in %d days" % (filename, remaining_days))
1050

    
1051
    if remaining_days <= error_days:
1052
      return (LUVerifyCluster.ETYPE_ERROR, msg)
1053

    
1054
    if remaining_days <= warn_days:
1055
      return (LUVerifyCluster.ETYPE_WARNING, msg)
1056

    
1057
  return (None, None)
1058

    
1059

    
1060
def _VerifyCertificate(filename):
1061
  """Verifies a certificate for LUVerifyCluster.
1062

1063
  @type filename: string
1064
  @param filename: Path to PEM file
1065

1066
  """
1067
  try:
1068
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1069
                                           utils.ReadFile(filename))
1070
  except Exception, err: # pylint: disable-msg=W0703
1071
    return (LUVerifyCluster.ETYPE_ERROR,
1072
            "Failed to load X509 certificate %s: %s" % (filename, err))
1073

    
1074
  # Depending on the pyOpenSSL version, this can just return (None, None)
1075
  (not_before, not_after) = utils.GetX509CertValidity(cert)
1076

    
1077
  return _VerifyCertificateInner(filename, cert.has_expired(),
1078
                                 not_before, not_after, time.time())
1079

    
1080

    
1081
class LUVerifyCluster(LogicalUnit):
1082
  """Verifies the cluster status.
1083

1084
  """
1085
  HPATH = "cluster-verify"
1086
  HTYPE = constants.HTYPE_CLUSTER
1087
  _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1088
  REQ_BGL = False
1089

    
1090
  TCLUSTER = "cluster"
1091
  TNODE = "node"
1092
  TINSTANCE = "instance"
1093

    
1094
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1095
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1096
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1097
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1098
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1099
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1100
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1101
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1102
  ENODEDRBD = (TNODE, "ENODEDRBD")
1103
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1104
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1105
  ENODEHV = (TNODE, "ENODEHV")
1106
  ENODELVM = (TNODE, "ENODELVM")
1107
  ENODEN1 = (TNODE, "ENODEN1")
1108
  ENODENET = (TNODE, "ENODENET")
1109
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1110
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1111
  ENODERPC = (TNODE, "ENODERPC")
1112
  ENODESSH = (TNODE, "ENODESSH")
1113
  ENODEVERSION = (TNODE, "ENODEVERSION")
1114
  ENODESETUP = (TNODE, "ENODESETUP")
1115
  ENODETIME = (TNODE, "ENODETIME")
1116

    
1117
  ETYPE_FIELD = "code"
1118
  ETYPE_ERROR = "ERROR"
1119
  ETYPE_WARNING = "WARNING"
1120

    
1121
  class NodeImage(object):
1122
    """A class representing the logical and physical status of a node.
1123

1124
    @ivar volumes: a structure as returned from
1125
        L{ganeti.backend.GetVolumeList} (runtime)
1126
    @ivar instances: a list of running instances (runtime)
1127
    @ivar pinst: list of configured primary instances (config)
1128
    @ivar sinst: list of configured secondary instances (config)
1129
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1130
        of this node (config)
1131
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1132
    @ivar dfree: free disk, as reported by the node (runtime)
1133
    @ivar offline: the offline status (config)
1134
    @type rpc_fail: boolean
1135
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1136
        not whether the individual keys were correct) (runtime)
1137
    @type lvm_fail: boolean
1138
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1139
    @type hyp_fail: boolean
1140
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1141
    @type ghost: boolean
1142
    @ivar ghost: whether this is a known node or not (config)
1143

1144
    """
1145
    def __init__(self, offline=False):
1146
      self.volumes = {}
1147
      self.instances = []
1148
      self.pinst = []
1149
      self.sinst = []
1150
      self.sbp = {}
1151
      self.mfree = 0
1152
      self.dfree = 0
1153
      self.offline = offline
1154
      self.rpc_fail = False
1155
      self.lvm_fail = False
1156
      self.hyp_fail = False
1157
      self.ghost = False
1158

    
1159
  def ExpandNames(self):
1160
    self.needed_locks = {
1161
      locking.LEVEL_NODE: locking.ALL_SET,
1162
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1163
    }
1164
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1165

    
1166
  def _Error(self, ecode, item, msg, *args, **kwargs):
1167
    """Format an error message.
1168

1169
    Based on the opcode's error_codes parameter, either format a
1170
    parseable error code, or a simpler error string.
1171

1172
    This must be called only from Exec and functions called from Exec.
1173

1174
    """
1175
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1176
    itype, etxt = ecode
1177
    # first complete the msg
1178
    if args:
1179
      msg = msg % args
1180
    # then format the whole message
1181
    if self.op.error_codes:
1182
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1183
    else:
1184
      if item:
1185
        item = " " + item
1186
      else:
1187
        item = ""
1188
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1189
    # and finally report it via the feedback_fn
1190
    self._feedback_fn("  - %s" % msg)
1191

    
1192
  def _ErrorIf(self, cond, *args, **kwargs):
1193
    """Log an error message if the passed condition is True.
1194

1195
    """
1196
    cond = bool(cond) or self.op.debug_simulate_errors
1197
    if cond:
1198
      self._Error(*args, **kwargs)
1199
    # do not mark the operation as failed for WARN cases only
1200
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1201
      self.bad = self.bad or cond
1202

    
1203
  def _VerifyNode(self, ninfo, nresult):
1204
    """Run multiple tests against a node.
1205

1206
    Test list:
1207

1208
      - compares ganeti version
1209
      - checks vg existence and size > 20G
1210
      - checks config file checksum
1211
      - checks ssh to other nodes
1212

1213
    @type ninfo: L{objects.Node}
1214
    @param ninfo: the node to check
1215
    @param nresult: the results from the node
1216
    @rtype: boolean
1217
    @return: whether overall this call was successful (and we can expect
1218
         reasonable values in the respose)
1219

1220
    """
1221
    node = ninfo.name
1222
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1223

    
1224
    # main result, nresult should be a non-empty dict
1225
    test = not nresult or not isinstance(nresult, dict)
1226
    _ErrorIf(test, self.ENODERPC, node,
1227
                  "unable to verify node: no data returned")
1228
    if test:
1229
      return False
1230

    
1231
    # compares ganeti version
1232
    local_version = constants.PROTOCOL_VERSION
1233
    remote_version = nresult.get("version", None)
1234
    test = not (remote_version and
1235
                isinstance(remote_version, (list, tuple)) and
1236
                len(remote_version) == 2)
1237
    _ErrorIf(test, self.ENODERPC, node,
1238
             "connection to node returned invalid data")
1239
    if test:
1240
      return False
1241

    
1242
    test = local_version != remote_version[0]
1243
    _ErrorIf(test, self.ENODEVERSION, node,
1244
             "incompatible protocol versions: master %s,"
1245
             " node %s", local_version, remote_version[0])
1246
    if test:
1247
      return False
1248

    
1249
    # node seems compatible, we can actually try to look into its results
1250

    
1251
    # full package version
1252
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1253
                  self.ENODEVERSION, node,
1254
                  "software version mismatch: master %s, node %s",
1255
                  constants.RELEASE_VERSION, remote_version[1],
1256
                  code=self.ETYPE_WARNING)
1257

    
1258
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1259
    if isinstance(hyp_result, dict):
1260
      for hv_name, hv_result in hyp_result.iteritems():
1261
        test = hv_result is not None
1262
        _ErrorIf(test, self.ENODEHV, node,
1263
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1264

    
1265

    
1266
    test = nresult.get(constants.NV_NODESETUP,
1267
                           ["Missing NODESETUP results"])
1268
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1269
             "; ".join(test))
1270

    
1271
    return True
1272

    
1273
  def _VerifyNodeTime(self, ninfo, nresult,
1274
                      nvinfo_starttime, nvinfo_endtime):
1275
    """Check the node time.
1276

1277
    @type ninfo: L{objects.Node}
1278
    @param ninfo: the node to check
1279
    @param nresult: the remote results for the node
1280
    @param nvinfo_starttime: the start time of the RPC call
1281
    @param nvinfo_endtime: the end time of the RPC call
1282

1283
    """
1284
    node = ninfo.name
1285
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1286

    
1287
    ntime = nresult.get(constants.NV_TIME, None)
1288
    try:
1289
      ntime_merged = utils.MergeTime(ntime)
1290
    except (ValueError, TypeError):
1291
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1292
      return
1293

    
1294
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1295
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1296
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1297
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1298
    else:
1299
      ntime_diff = None
1300

    
1301
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1302
             "Node time diverges by at least %s from master node time",
1303
             ntime_diff)
1304

    
1305
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1306
    """Check the node time.
1307

1308
    @type ninfo: L{objects.Node}
1309
    @param ninfo: the node to check
1310
    @param nresult: the remote results for the node
1311
    @param vg_name: the configured VG name
1312

1313
    """
1314
    if vg_name is None:
1315
      return
1316

    
1317
    node = ninfo.name
1318
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1319

    
1320
    # checks vg existence and size > 20G
1321
    vglist = nresult.get(constants.NV_VGLIST, None)
1322
    test = not vglist
1323
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1324
    if not test:
1325
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1326
                                            constants.MIN_VG_SIZE)
1327
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1328

    
1329
    # check pv names
1330
    pvlist = nresult.get(constants.NV_PVLIST, None)
1331
    test = pvlist is None
1332
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1333
    if not test:
1334
      # check that ':' is not present in PV names, since it's a
1335
      # special character for lvcreate (denotes the range of PEs to
1336
      # use on the PV)
1337
      for _, pvname, owner_vg in pvlist:
1338
        test = ":" in pvname
1339
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1340
                 " '%s' of VG '%s'", pvname, owner_vg)
1341

    
1342
  def _VerifyNodeNetwork(self, ninfo, nresult):
1343
    """Check the node time.
1344

1345
    @type ninfo: L{objects.Node}
1346
    @param ninfo: the node to check
1347
    @param nresult: the remote results for the node
1348

1349
    """
1350
    node = ninfo.name
1351
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1352

    
1353
    test = constants.NV_NODELIST not in nresult
1354
    _ErrorIf(test, self.ENODESSH, node,
1355
             "node hasn't returned node ssh connectivity data")
1356
    if not test:
1357
      if nresult[constants.NV_NODELIST]:
1358
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1359
          _ErrorIf(True, self.ENODESSH, node,
1360
                   "ssh communication with node '%s': %s", a_node, a_msg)
1361

    
1362
    test = constants.NV_NODENETTEST not in nresult
1363
    _ErrorIf(test, self.ENODENET, node,
1364
             "node hasn't returned node tcp connectivity data")
1365
    if not test:
1366
      if nresult[constants.NV_NODENETTEST]:
1367
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1368
        for anode in nlist:
1369
          _ErrorIf(True, self.ENODENET, node,
1370
                   "tcp communication with node '%s': %s",
1371
                   anode, nresult[constants.NV_NODENETTEST][anode])
1372

    
1373
    test = constants.NV_MASTERIP not in nresult
1374
    _ErrorIf(test, self.ENODENET, node,
1375
             "node hasn't returned node master IP reachability data")
1376
    if not test:
1377
      if not nresult[constants.NV_MASTERIP]:
1378
        if node == self.master_node:
1379
          msg = "the master node cannot reach the master IP (not configured?)"
1380
        else:
1381
          msg = "cannot reach the master IP"
1382
        _ErrorIf(True, self.ENODENET, node, msg)
1383

    
1384

    
1385
  def _VerifyInstance(self, instance, instanceconfig, node_image):
1386
    """Verify an instance.
1387

1388
    This function checks to see if the required block devices are
1389
    available on the instance's node.
1390

1391
    """
1392
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1393
    node_current = instanceconfig.primary_node
1394

    
1395
    node_vol_should = {}
1396
    instanceconfig.MapLVsByNode(node_vol_should)
1397

    
1398
    for node in node_vol_should:
1399
      n_img = node_image[node]
1400
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1401
        # ignore missing volumes on offline or broken nodes
1402
        continue
1403
      for volume in node_vol_should[node]:
1404
        test = volume not in n_img.volumes
1405
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1406
                 "volume %s missing on node %s", volume, node)
1407

    
1408
    if instanceconfig.admin_up:
1409
      pri_img = node_image[node_current]
1410
      test = instance not in pri_img.instances and not pri_img.offline
1411
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1412
               "instance not running on its primary node %s",
1413
               node_current)
1414

    
1415
    for node, n_img in node_image.items():
1416
      if (not node == node_current):
1417
        test = instance in n_img.instances
1418
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1419
                 "instance should not run on node %s", node)
1420

    
1421
  def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1422
    """Verify if there are any unknown volumes in the cluster.
1423

1424
    The .os, .swap and backup volumes are ignored. All other volumes are
1425
    reported as unknown.
1426

1427
    """
1428
    for node, n_img in node_image.items():
1429
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1430
        # skip non-healthy nodes
1431
        continue
1432
      for volume in n_img.volumes:
1433
        test = (node not in node_vol_should or
1434
                volume not in node_vol_should[node])
1435
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1436
                      "volume %s is unknown", volume)
1437

    
1438
  def _VerifyOrphanInstances(self, instancelist, node_image):
1439
    """Verify the list of running instances.
1440

1441
    This checks what instances are running but unknown to the cluster.
1442

1443
    """
1444
    for node, n_img in node_image.items():
1445
      for o_inst in n_img.instances:
1446
        test = o_inst not in instancelist
1447
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1448
                      "instance %s on node %s should not exist", o_inst, node)
1449

    
1450
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1451
    """Verify N+1 Memory Resilience.
1452

1453
    Check that if one single node dies we can still start all the
1454
    instances it was primary for.
1455

1456
    """
1457
    for node, n_img in node_image.items():
1458
      # This code checks that every node which is now listed as
1459
      # secondary has enough memory to host all instances it is
1460
      # supposed to should a single other node in the cluster fail.
1461
      # FIXME: not ready for failover to an arbitrary node
1462
      # FIXME: does not support file-backed instances
1463
      # WARNING: we currently take into account down instances as well
1464
      # as up ones, considering that even if they're down someone
1465
      # might want to start them even in the event of a node failure.
1466
      for prinode, instances in n_img.sbp.items():
1467
        needed_mem = 0
1468
        for instance in instances:
1469
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1470
          if bep[constants.BE_AUTO_BALANCE]:
1471
            needed_mem += bep[constants.BE_MEMORY]
1472
        test = n_img.mfree < needed_mem
1473
        self._ErrorIf(test, self.ENODEN1, node,
1474
                      "not enough memory on to accommodate"
1475
                      " failovers should peer node %s fail", prinode)
1476

    
1477
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1478
                       master_files):
1479
    """Verifies and computes the node required file checksums.
1480

1481
    @type ninfo: L{objects.Node}
1482
    @param ninfo: the node to check
1483
    @param nresult: the remote results for the node
1484
    @param file_list: required list of files
1485
    @param local_cksum: dictionary of local files and their checksums
1486
    @param master_files: list of files that only masters should have
1487

1488
    """
1489
    node = ninfo.name
1490
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1491

    
1492
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1493
    test = not isinstance(remote_cksum, dict)
1494
    _ErrorIf(test, self.ENODEFILECHECK, node,
1495
             "node hasn't returned file checksum data")
1496
    if test:
1497
      return
1498

    
1499
    for file_name in file_list:
1500
      node_is_mc = ninfo.master_candidate
1501
      must_have = (file_name not in master_files) or node_is_mc
1502
      # missing
1503
      test1 = file_name not in remote_cksum
1504
      # invalid checksum
1505
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1506
      # existing and good
1507
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1508
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1509
               "file '%s' missing", file_name)
1510
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1511
               "file '%s' has wrong checksum", file_name)
1512
      # not candidate and this is not a must-have file
1513
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1514
               "file '%s' should not exist on non master"
1515
               " candidates (and the file is outdated)", file_name)
1516
      # all good, except non-master/non-must have combination
1517
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1518
               "file '%s' should not exist"
1519
               " on non master candidates", file_name)
1520

    
1521
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1522
    """Verifies and the node DRBD status.
1523

1524
    @type ninfo: L{objects.Node}
1525
    @param ninfo: the node to check
1526
    @param nresult: the remote results for the node
1527
    @param instanceinfo: the dict of instances
1528
    @param drbd_map: the DRBD map as returned by
1529
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1530

1531
    """
1532
    node = ninfo.name
1533
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1534

    
1535
    # compute the DRBD minors
1536
    node_drbd = {}
1537
    for minor, instance in drbd_map[node].items():
1538
      test = instance not in instanceinfo
1539
      _ErrorIf(test, self.ECLUSTERCFG, None,
1540
               "ghost instance '%s' in temporary DRBD map", instance)
1541
        # ghost instance should not be running, but otherwise we
1542
        # don't give double warnings (both ghost instance and
1543
        # unallocated minor in use)
1544
      if test:
1545
        node_drbd[minor] = (instance, False)
1546
      else:
1547
        instance = instanceinfo[instance]
1548
        node_drbd[minor] = (instance.name, instance.admin_up)
1549

    
1550
    # and now check them
1551
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1552
    test = not isinstance(used_minors, (tuple, list))
1553
    _ErrorIf(test, self.ENODEDRBD, node,
1554
             "cannot parse drbd status file: %s", str(used_minors))
1555
    if test:
1556
      # we cannot check drbd status
1557
      return
1558

    
1559
    for minor, (iname, must_exist) in node_drbd.items():
1560
      test = minor not in used_minors and must_exist
1561
      _ErrorIf(test, self.ENODEDRBD, node,
1562
               "drbd minor %d of instance %s is not active", minor, iname)
1563
    for minor in used_minors:
1564
      test = minor not in node_drbd
1565
      _ErrorIf(test, self.ENODEDRBD, node,
1566
               "unallocated drbd minor %d is in use", minor)
1567

    
1568
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1569
    """Verifies and updates the node volume data.
1570

1571
    This function will update a L{NodeImage}'s internal structures
1572
    with data from the remote call.
1573

1574
    @type ninfo: L{objects.Node}
1575
    @param ninfo: the node to check
1576
    @param nresult: the remote results for the node
1577
    @param nimg: the node image object
1578
    @param vg_name: the configured VG name
1579

1580
    """
1581
    node = ninfo.name
1582
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1583

    
1584
    nimg.lvm_fail = True
1585
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1586
    if vg_name is None:
1587
      pass
1588
    elif isinstance(lvdata, basestring):
1589
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1590
               utils.SafeEncode(lvdata))
1591
    elif not isinstance(lvdata, dict):
1592
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1593
    else:
1594
      nimg.volumes = lvdata
1595
      nimg.lvm_fail = False
1596

    
1597
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1598
    """Verifies and updates the node instance list.
1599

1600
    If the listing was successful, then updates this node's instance
1601
    list. Otherwise, it marks the RPC call as failed for the instance
1602
    list key.
1603

1604
    @type ninfo: L{objects.Node}
1605
    @param ninfo: the node to check
1606
    @param nresult: the remote results for the node
1607
    @param nimg: the node image object
1608

1609
    """
1610
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1611
    test = not isinstance(idata, list)
1612
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1613
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1614
    if test:
1615
      nimg.hyp_fail = True
1616
    else:
1617
      nimg.instances = idata
1618

    
1619
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1620
    """Verifies and computes a node information map
1621

1622
    @type ninfo: L{objects.Node}
1623
    @param ninfo: the node to check
1624
    @param nresult: the remote results for the node
1625
    @param nimg: the node image object
1626
    @param vg_name: the configured VG name
1627

1628
    """
1629
    node = ninfo.name
1630
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1631

    
1632
    # try to read free memory (from the hypervisor)
1633
    hv_info = nresult.get(constants.NV_HVINFO, None)
1634
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1635
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1636
    if not test:
1637
      try:
1638
        nimg.mfree = int(hv_info["memory_free"])
1639
      except (ValueError, TypeError):
1640
        _ErrorIf(True, self.ENODERPC, node,
1641
                 "node returned invalid nodeinfo, check hypervisor")
1642

    
1643
    # FIXME: devise a free space model for file based instances as well
1644
    if vg_name is not None:
1645
      test = (constants.NV_VGLIST not in nresult or
1646
              vg_name not in nresult[constants.NV_VGLIST])
1647
      _ErrorIf(test, self.ENODELVM, node,
1648
               "node didn't return data for the volume group '%s'"
1649
               " - it is either missing or broken", vg_name)
1650
      if not test:
1651
        try:
1652
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1653
        except (ValueError, TypeError):
1654
          _ErrorIf(True, self.ENODERPC, node,
1655
                   "node returned invalid LVM info, check LVM status")
1656

    
1657
  def CheckPrereq(self):
1658
    """Check prerequisites.
1659

1660
    Transform the list of checks we're going to skip into a set and check that
1661
    all its members are valid.
1662

1663
    """
1664
    self.skip_set = frozenset(self.op.skip_checks)
1665
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1666
      raise errors.OpPrereqError("Invalid checks to be skipped specified",
1667
                                 errors.ECODE_INVAL)
1668

    
1669
  def BuildHooksEnv(self):
1670
    """Build hooks env.
1671

1672
    Cluster-Verify hooks just ran in the post phase and their failure makes
1673
    the output be logged in the verify output and the verification to fail.
1674

1675
    """
1676
    all_nodes = self.cfg.GetNodeList()
1677
    env = {
1678
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1679
      }
1680
    for node in self.cfg.GetAllNodesInfo().values():
1681
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1682

    
1683
    return env, [], all_nodes
1684

    
1685
  def Exec(self, feedback_fn):
1686
    """Verify integrity of cluster, performing various test on nodes.
1687

1688
    """
1689
    self.bad = False
1690
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1691
    verbose = self.op.verbose
1692
    self._feedback_fn = feedback_fn
1693
    feedback_fn("* Verifying global settings")
1694
    for msg in self.cfg.VerifyConfig():
1695
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1696

    
1697
    # Check the cluster certificates
1698
    for cert_filename in constants.ALL_CERT_FILES:
1699
      (errcode, msg) = _VerifyCertificate(cert_filename)
1700
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1701

    
1702
    vg_name = self.cfg.GetVGName()
1703
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1704
    cluster = self.cfg.GetClusterInfo()
1705
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1706
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1707
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1708
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1709
                        for iname in instancelist)
1710
    i_non_redundant = [] # Non redundant instances
1711
    i_non_a_balanced = [] # Non auto-balanced instances
1712
    n_offline = 0 # Count of offline nodes
1713
    n_drained = 0 # Count of nodes being drained
1714
    node_vol_should = {}
1715

    
1716
    # FIXME: verify OS list
1717
    # do local checksums
1718
    master_files = [constants.CLUSTER_CONF_FILE]
1719
    master_node = self.master_node = self.cfg.GetMasterNode()
1720
    master_ip = self.cfg.GetMasterIP()
1721

    
1722
    file_names = ssconf.SimpleStore().GetFileList()
1723
    file_names.extend(constants.ALL_CERT_FILES)
1724
    file_names.extend(master_files)
1725
    if cluster.modify_etc_hosts:
1726
      file_names.append(constants.ETC_HOSTS)
1727

    
1728
    local_checksums = utils.FingerprintFiles(file_names)
1729

    
1730
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1731
    node_verify_param = {
1732
      constants.NV_FILELIST: file_names,
1733
      constants.NV_NODELIST: [node.name for node in nodeinfo
1734
                              if not node.offline],
1735
      constants.NV_HYPERVISOR: hypervisors,
1736
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1737
                                  node.secondary_ip) for node in nodeinfo
1738
                                 if not node.offline],
1739
      constants.NV_INSTANCELIST: hypervisors,
1740
      constants.NV_VERSION: None,
1741
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1742
      constants.NV_NODESETUP: None,
1743
      constants.NV_TIME: None,
1744
      constants.NV_MASTERIP: (master_node, master_ip),
1745
      }
1746

    
1747
    if vg_name is not None:
1748
      node_verify_param[constants.NV_VGLIST] = None
1749
      node_verify_param[constants.NV_LVLIST] = vg_name
1750
      node_verify_param[constants.NV_PVLIST] = [vg_name]
1751
      node_verify_param[constants.NV_DRBDLIST] = None
1752

    
1753
    # Build our expected cluster state
1754
    node_image = dict((node.name, self.NodeImage(offline=node.offline))
1755
                      for node in nodeinfo)
1756

    
1757
    for instance in instancelist:
1758
      inst_config = instanceinfo[instance]
1759

    
1760
      for nname in inst_config.all_nodes:
1761
        if nname not in node_image:
1762
          # ghost node
1763
          gnode = self.NodeImage()
1764
          gnode.ghost = True
1765
          node_image[nname] = gnode
1766

    
1767
      inst_config.MapLVsByNode(node_vol_should)
1768

    
1769
      pnode = inst_config.primary_node
1770
      node_image[pnode].pinst.append(instance)
1771

    
1772
      for snode in inst_config.secondary_nodes:
1773
        nimg = node_image[snode]
1774
        nimg.sinst.append(instance)
1775
        if pnode not in nimg.sbp:
1776
          nimg.sbp[pnode] = []
1777
        nimg.sbp[pnode].append(instance)
1778

    
1779
    # At this point, we have the in-memory data structures complete,
1780
    # except for the runtime information, which we'll gather next
1781

    
1782
    # Due to the way our RPC system works, exact response times cannot be
1783
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1784
    # time before and after executing the request, we can at least have a time
1785
    # window.
1786
    nvinfo_starttime = time.time()
1787
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1788
                                           self.cfg.GetClusterName())
1789
    nvinfo_endtime = time.time()
1790

    
1791
    all_drbd_map = self.cfg.ComputeDRBDMap()
1792

    
1793
    feedback_fn("* Verifying node status")
1794
    for node_i in nodeinfo:
1795
      node = node_i.name
1796
      nimg = node_image[node]
1797

    
1798
      if node_i.offline:
1799
        if verbose:
1800
          feedback_fn("* Skipping offline node %s" % (node,))
1801
        n_offline += 1
1802
        continue
1803

    
1804
      if node == master_node:
1805
        ntype = "master"
1806
      elif node_i.master_candidate:
1807
        ntype = "master candidate"
1808
      elif node_i.drained:
1809
        ntype = "drained"
1810
        n_drained += 1
1811
      else:
1812
        ntype = "regular"
1813
      if verbose:
1814
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1815

    
1816
      msg = all_nvinfo[node].fail_msg
1817
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1818
      if msg:
1819
        nimg.rpc_fail = True
1820
        continue
1821

    
1822
      nresult = all_nvinfo[node].payload
1823

    
1824
      nimg.call_ok = self._VerifyNode(node_i, nresult)
1825
      self._VerifyNodeNetwork(node_i, nresult)
1826
      self._VerifyNodeLVM(node_i, nresult, vg_name)
1827
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1828
                            master_files)
1829
      self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1830
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1831

    
1832
      self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1833
      self._UpdateNodeInstances(node_i, nresult, nimg)
1834
      self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1835

    
1836
    feedback_fn("* Verifying instance status")
1837
    for instance in instancelist:
1838
      if verbose:
1839
        feedback_fn("* Verifying instance %s" % instance)
1840
      inst_config = instanceinfo[instance]
1841
      self._VerifyInstance(instance, inst_config, node_image)
1842
      inst_nodes_offline = []
1843

    
1844
      pnode = inst_config.primary_node
1845
      pnode_img = node_image[pnode]
1846
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
1847
               self.ENODERPC, pnode, "instance %s, connection to"
1848
               " primary node failed", instance)
1849

    
1850
      if pnode_img.offline:
1851
        inst_nodes_offline.append(pnode)
1852

    
1853
      # If the instance is non-redundant we cannot survive losing its primary
1854
      # node, so we are not N+1 compliant. On the other hand we have no disk
1855
      # templates with more than one secondary so that situation is not well
1856
      # supported either.
1857
      # FIXME: does not support file-backed instances
1858
      if not inst_config.secondary_nodes:
1859
        i_non_redundant.append(instance)
1860
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
1861
               instance, "instance has multiple secondary nodes: %s",
1862
               utils.CommaJoin(inst_config.secondary_nodes),
1863
               code=self.ETYPE_WARNING)
1864

    
1865
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1866
        i_non_a_balanced.append(instance)
1867

    
1868
      for snode in inst_config.secondary_nodes:
1869
        s_img = node_image[snode]
1870
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
1871
                 "instance %s, connection to secondary node failed", instance)
1872

    
1873
        if s_img.offline:
1874
          inst_nodes_offline.append(snode)
1875

    
1876
      # warn that the instance lives on offline nodes
1877
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1878
               "instance lives on offline node(s) %s",
1879
               utils.CommaJoin(inst_nodes_offline))
1880
      # ... or ghost nodes
1881
      for node in inst_config.all_nodes:
1882
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
1883
                 "instance lives on ghost node %s", node)
1884

    
1885
    feedback_fn("* Verifying orphan volumes")
1886
    self._VerifyOrphanVolumes(node_vol_should, node_image)
1887

    
1888
    feedback_fn("* Verifying orphan instances")
1889
    self._VerifyOrphanInstances(instancelist, node_image)
1890

    
1891
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1892
      feedback_fn("* Verifying N+1 Memory redundancy")
1893
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
1894

    
1895
    feedback_fn("* Other Notes")
1896
    if i_non_redundant:
1897
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1898
                  % len(i_non_redundant))
1899

    
1900
    if i_non_a_balanced:
1901
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1902
                  % len(i_non_a_balanced))
1903

    
1904
    if n_offline:
1905
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
1906

    
1907
    if n_drained:
1908
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
1909

    
1910
    return not self.bad
1911

    
1912
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1913
    """Analyze the post-hooks' result
1914

1915
    This method analyses the hook result, handles it, and sends some
1916
    nicely-formatted feedback back to the user.
1917

1918
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1919
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1920
    @param hooks_results: the results of the multi-node hooks rpc call
1921
    @param feedback_fn: function used send feedback back to the caller
1922
    @param lu_result: previous Exec result
1923
    @return: the new Exec result, based on the previous result
1924
        and hook results
1925

1926
    """
1927
    # We only really run POST phase hooks, and are only interested in
1928
    # their results
1929
    if phase == constants.HOOKS_PHASE_POST:
1930
      # Used to change hooks' output to proper indentation
1931
      indent_re = re.compile('^', re.M)
1932
      feedback_fn("* Hooks Results")
1933
      assert hooks_results, "invalid result from hooks"
1934

    
1935
      for node_name in hooks_results:
1936
        res = hooks_results[node_name]
1937
        msg = res.fail_msg
1938
        test = msg and not res.offline
1939
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
1940
                      "Communication failure in hooks execution: %s", msg)
1941
        if res.offline or msg:
1942
          # No need to investigate payload if node is offline or gave an error.
1943
          # override manually lu_result here as _ErrorIf only
1944
          # overrides self.bad
1945
          lu_result = 1
1946
          continue
1947
        for script, hkr, output in res.payload:
1948
          test = hkr == constants.HKR_FAIL
1949
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
1950
                        "Script %s failed, output:", script)
1951
          if test:
1952
            output = indent_re.sub('      ', output)
1953
            feedback_fn("%s" % output)
1954
            lu_result = 0
1955

    
1956
      return lu_result
1957

    
1958

    
1959
class LUVerifyDisks(NoHooksLU):
1960
  """Verifies the cluster disks status.
1961

1962
  """
1963
  _OP_REQP = []
1964
  REQ_BGL = False
1965

    
1966
  def ExpandNames(self):
1967
    self.needed_locks = {
1968
      locking.LEVEL_NODE: locking.ALL_SET,
1969
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1970
    }
1971
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1972

    
1973
  def CheckPrereq(self):
1974
    """Check prerequisites.
1975

1976
    This has no prerequisites.
1977

1978
    """
1979
    pass
1980

    
1981
  def Exec(self, feedback_fn):
1982
    """Verify integrity of cluster disks.
1983

1984
    @rtype: tuple of three items
1985
    @return: a tuple of (dict of node-to-node_error, list of instances
1986
        which need activate-disks, dict of instance: (node, volume) for
1987
        missing volumes
1988

1989
    """
1990
    result = res_nodes, res_instances, res_missing = {}, [], {}
1991

    
1992
    vg_name = self.cfg.GetVGName()
1993
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1994
    instances = [self.cfg.GetInstanceInfo(name)
1995
                 for name in self.cfg.GetInstanceList()]
1996

    
1997
    nv_dict = {}
1998
    for inst in instances:
1999
      inst_lvs = {}
2000
      if (not inst.admin_up or
2001
          inst.disk_template not in constants.DTS_NET_MIRROR):
2002
        continue
2003
      inst.MapLVsByNode(inst_lvs)
2004
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2005
      for node, vol_list in inst_lvs.iteritems():
2006
        for vol in vol_list:
2007
          nv_dict[(node, vol)] = inst
2008

    
2009
    if not nv_dict:
2010
      return result
2011

    
2012
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2013

    
2014
    for node in nodes:
2015
      # node_volume
2016
      node_res = node_lvs[node]
2017
      if node_res.offline:
2018
        continue
2019
      msg = node_res.fail_msg
2020
      if msg:
2021
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2022
        res_nodes[node] = msg
2023
        continue
2024

    
2025
      lvs = node_res.payload
2026
      for lv_name, (_, _, lv_online) in lvs.items():
2027
        inst = nv_dict.pop((node, lv_name), None)
2028
        if (not lv_online and inst is not None
2029
            and inst.name not in res_instances):
2030
          res_instances.append(inst.name)
2031

    
2032
    # any leftover items in nv_dict are missing LVs, let's arrange the
2033
    # data better
2034
    for key, inst in nv_dict.iteritems():
2035
      if inst.name not in res_missing:
2036
        res_missing[inst.name] = []
2037
      res_missing[inst.name].append(key)
2038

    
2039
    return result
2040

    
2041

    
2042
class LURepairDiskSizes(NoHooksLU):
2043
  """Verifies the cluster disks sizes.
2044

2045
  """
2046
  _OP_REQP = ["instances"]
2047
  REQ_BGL = False
2048

    
2049
  def ExpandNames(self):
2050
    if not isinstance(self.op.instances, list):
2051
      raise errors.OpPrereqError("Invalid argument type 'instances'",
2052
                                 errors.ECODE_INVAL)
2053

    
2054
    if self.op.instances:
2055
      self.wanted_names = []
2056
      for name in self.op.instances:
2057
        full_name = _ExpandInstanceName(self.cfg, name)
2058
        self.wanted_names.append(full_name)
2059
      self.needed_locks = {
2060
        locking.LEVEL_NODE: [],
2061
        locking.LEVEL_INSTANCE: self.wanted_names,
2062
        }
2063
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2064
    else:
2065
      self.wanted_names = None
2066
      self.needed_locks = {
2067
        locking.LEVEL_NODE: locking.ALL_SET,
2068
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2069
        }
2070
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2071

    
2072
  def DeclareLocks(self, level):
2073
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2074
      self._LockInstancesNodes(primary_only=True)
2075

    
2076
  def CheckPrereq(self):
2077
    """Check prerequisites.
2078

2079
    This only checks the optional instance list against the existing names.
2080

2081
    """
2082
    if self.wanted_names is None:
2083
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2084

    
2085
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2086
                             in self.wanted_names]
2087

    
2088
  def _EnsureChildSizes(self, disk):
2089
    """Ensure children of the disk have the needed disk size.
2090

2091
    This is valid mainly for DRBD8 and fixes an issue where the
2092
    children have smaller disk size.
2093

2094
    @param disk: an L{ganeti.objects.Disk} object
2095

2096
    """
2097
    if disk.dev_type == constants.LD_DRBD8:
2098
      assert disk.children, "Empty children for DRBD8?"
2099
      fchild = disk.children[0]
2100
      mismatch = fchild.size < disk.size
2101
      if mismatch:
2102
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2103
                     fchild.size, disk.size)
2104
        fchild.size = disk.size
2105

    
2106
      # and we recurse on this child only, not on the metadev
2107
      return self._EnsureChildSizes(fchild) or mismatch
2108
    else:
2109
      return False
2110

    
2111
  def Exec(self, feedback_fn):
2112
    """Verify the size of cluster disks.
2113

2114
    """
2115
    # TODO: check child disks too
2116
    # TODO: check differences in size between primary/secondary nodes
2117
    per_node_disks = {}
2118
    for instance in self.wanted_instances:
2119
      pnode = instance.primary_node
2120
      if pnode not in per_node_disks:
2121
        per_node_disks[pnode] = []
2122
      for idx, disk in enumerate(instance.disks):
2123
        per_node_disks[pnode].append((instance, idx, disk))
2124

    
2125
    changed = []
2126
    for node, dskl in per_node_disks.items():
2127
      newl = [v[2].Copy() for v in dskl]
2128
      for dsk in newl:
2129
        self.cfg.SetDiskID(dsk, node)
2130
      result = self.rpc.call_blockdev_getsizes(node, newl)
2131
      if result.fail_msg:
2132
        self.LogWarning("Failure in blockdev_getsizes call to node"
2133
                        " %s, ignoring", node)
2134
        continue
2135
      if len(result.data) != len(dskl):
2136
        self.LogWarning("Invalid result from node %s, ignoring node results",
2137
                        node)
2138
        continue
2139
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2140
        if size is None:
2141
          self.LogWarning("Disk %d of instance %s did not return size"
2142
                          " information, ignoring", idx, instance.name)
2143
          continue
2144
        if not isinstance(size, (int, long)):
2145
          self.LogWarning("Disk %d of instance %s did not return valid"
2146
                          " size information, ignoring", idx, instance.name)
2147
          continue
2148
        size = size >> 20
2149
        if size != disk.size:
2150
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2151
                       " correcting: recorded %d, actual %d", idx,
2152
                       instance.name, disk.size, size)
2153
          disk.size = size
2154
          self.cfg.Update(instance, feedback_fn)
2155
          changed.append((instance.name, idx, size))
2156
        if self._EnsureChildSizes(disk):
2157
          self.cfg.Update(instance, feedback_fn)
2158
          changed.append((instance.name, idx, disk.size))
2159
    return changed
2160

    
2161

    
2162
class LURenameCluster(LogicalUnit):
2163
  """Rename the cluster.
2164

2165
  """
2166
  HPATH = "cluster-rename"
2167
  HTYPE = constants.HTYPE_CLUSTER
2168
  _OP_REQP = ["name"]
2169

    
2170
  def BuildHooksEnv(self):
2171
    """Build hooks env.
2172

2173
    """
2174
    env = {
2175
      "OP_TARGET": self.cfg.GetClusterName(),
2176
      "NEW_NAME": self.op.name,
2177
      }
2178
    mn = self.cfg.GetMasterNode()
2179
    all_nodes = self.cfg.GetNodeList()
2180
    return env, [mn], all_nodes
2181

    
2182
  def CheckPrereq(self):
2183
    """Verify that the passed name is a valid one.
2184

2185
    """
2186
    hostname = utils.GetHostInfo(self.op.name)
2187

    
2188
    new_name = hostname.name
2189
    self.ip = new_ip = hostname.ip
2190
    old_name = self.cfg.GetClusterName()
2191
    old_ip = self.cfg.GetMasterIP()
2192
    if new_name == old_name and new_ip == old_ip:
2193
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2194
                                 " cluster has changed",
2195
                                 errors.ECODE_INVAL)
2196
    if new_ip != old_ip:
2197
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2198
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2199
                                   " reachable on the network. Aborting." %
2200
                                   new_ip, errors.ECODE_NOTUNIQUE)
2201

    
2202
    self.op.name = new_name
2203

    
2204
  def Exec(self, feedback_fn):
2205
    """Rename the cluster.
2206

2207
    """
2208
    clustername = self.op.name
2209
    ip = self.ip
2210

    
2211
    # shutdown the master IP
2212
    master = self.cfg.GetMasterNode()
2213
    result = self.rpc.call_node_stop_master(master, False)
2214
    result.Raise("Could not disable the master role")
2215

    
2216
    try:
2217
      cluster = self.cfg.GetClusterInfo()
2218
      cluster.cluster_name = clustername
2219
      cluster.master_ip = ip
2220
      self.cfg.Update(cluster, feedback_fn)
2221

    
2222
      # update the known hosts file
2223
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2224
      node_list = self.cfg.GetNodeList()
2225
      try:
2226
        node_list.remove(master)
2227
      except ValueError:
2228
        pass
2229
      result = self.rpc.call_upload_file(node_list,
2230
                                         constants.SSH_KNOWN_HOSTS_FILE)
2231
      for to_node, to_result in result.iteritems():
2232
        msg = to_result.fail_msg
2233
        if msg:
2234
          msg = ("Copy of file %s to node %s failed: %s" %
2235
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2236
          self.proc.LogWarning(msg)
2237

    
2238
    finally:
2239
      result = self.rpc.call_node_start_master(master, False, False)
2240
      msg = result.fail_msg
2241
      if msg:
2242
        self.LogWarning("Could not re-enable the master role on"
2243
                        " the master, please restart manually: %s", msg)
2244

    
2245

    
2246
def _RecursiveCheckIfLVMBased(disk):
2247
  """Check if the given disk or its children are lvm-based.
2248

2249
  @type disk: L{objects.Disk}
2250
  @param disk: the disk to check
2251
  @rtype: boolean
2252
  @return: boolean indicating whether a LD_LV dev_type was found or not
2253

2254
  """
2255
  if disk.children:
2256
    for chdisk in disk.children:
2257
      if _RecursiveCheckIfLVMBased(chdisk):
2258
        return True
2259
  return disk.dev_type == constants.LD_LV
2260

    
2261

    
2262
class LUSetClusterParams(LogicalUnit):
2263
  """Change the parameters of the cluster.
2264

2265
  """
2266
  HPATH = "cluster-modify"
2267
  HTYPE = constants.HTYPE_CLUSTER
2268
  _OP_REQP = []
2269
  REQ_BGL = False
2270

    
2271
  def CheckArguments(self):
2272
    """Check parameters
2273

2274
    """
2275
    for attr in ["candidate_pool_size",
2276
                 "uid_pool", "add_uids", "remove_uids"]:
2277
      if not hasattr(self.op, attr):
2278
        setattr(self.op, attr, None)
2279

    
2280
    if self.op.candidate_pool_size is not None:
2281
      try:
2282
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2283
      except (ValueError, TypeError), err:
2284
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2285
                                   str(err), errors.ECODE_INVAL)
2286
      if self.op.candidate_pool_size < 1:
2287
        raise errors.OpPrereqError("At least one master candidate needed",
2288
                                   errors.ECODE_INVAL)
2289

    
2290
    _CheckBooleanOpField(self.op, "maintain_node_health")
2291

    
2292
    if self.op.uid_pool:
2293
      uidpool.CheckUidPool(self.op.uid_pool)
2294

    
2295
    if self.op.add_uids:
2296
      uidpool.CheckUidPool(self.op.add_uids)
2297

    
2298
    if self.op.remove_uids:
2299
      uidpool.CheckUidPool(self.op.remove_uids)
2300

    
2301
  def ExpandNames(self):
2302
    # FIXME: in the future maybe other cluster params won't require checking on
2303
    # all nodes to be modified.
2304
    self.needed_locks = {
2305
      locking.LEVEL_NODE: locking.ALL_SET,
2306
    }
2307
    self.share_locks[locking.LEVEL_NODE] = 1
2308

    
2309
  def BuildHooksEnv(self):
2310
    """Build hooks env.
2311

2312
    """
2313
    env = {
2314
      "OP_TARGET": self.cfg.GetClusterName(),
2315
      "NEW_VG_NAME": self.op.vg_name,
2316
      }
2317
    mn = self.cfg.GetMasterNode()
2318
    return env, [mn], [mn]
2319

    
2320
  def CheckPrereq(self):
2321
    """Check prerequisites.
2322

2323
    This checks whether the given params don't conflict and
2324
    if the given volume group is valid.
2325

2326
    """
2327
    if self.op.vg_name is not None and not self.op.vg_name:
2328
      instances = self.cfg.GetAllInstancesInfo().values()
2329
      for inst in instances:
2330
        for disk in inst.disks:
2331
          if _RecursiveCheckIfLVMBased(disk):
2332
            raise errors.OpPrereqError("Cannot disable lvm storage while"
2333
                                       " lvm-based instances exist",
2334
                                       errors.ECODE_INVAL)
2335

    
2336
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2337

    
2338
    # if vg_name not None, checks given volume group on all nodes
2339
    if self.op.vg_name:
2340
      vglist = self.rpc.call_vg_list(node_list)
2341
      for node in node_list:
2342
        msg = vglist[node].fail_msg
2343
        if msg:
2344
          # ignoring down node
2345
          self.LogWarning("Error while gathering data on node %s"
2346
                          " (ignoring node): %s", node, msg)
2347
          continue
2348
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2349
                                              self.op.vg_name,
2350
                                              constants.MIN_VG_SIZE)
2351
        if vgstatus:
2352
          raise errors.OpPrereqError("Error on node '%s': %s" %
2353
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2354

    
2355
    self.cluster = cluster = self.cfg.GetClusterInfo()
2356
    # validate params changes
2357
    if self.op.beparams:
2358
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2359
      self.new_beparams = objects.FillDict(
2360
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2361

    
2362
    if self.op.nicparams:
2363
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2364
      self.new_nicparams = objects.FillDict(
2365
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2366
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2367
      nic_errors = []
2368

    
2369
      # check all instances for consistency
2370
      for instance in self.cfg.GetAllInstancesInfo().values():
2371
        for nic_idx, nic in enumerate(instance.nics):
2372
          params_copy = copy.deepcopy(nic.nicparams)
2373
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2374

    
2375
          # check parameter syntax
2376
          try:
2377
            objects.NIC.CheckParameterSyntax(params_filled)
2378
          except errors.ConfigurationError, err:
2379
            nic_errors.append("Instance %s, nic/%d: %s" %
2380
                              (instance.name, nic_idx, err))
2381

    
2382
          # if we're moving instances to routed, check that they have an ip
2383
          target_mode = params_filled[constants.NIC_MODE]
2384
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2385
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2386
                              (instance.name, nic_idx))
2387
      if nic_errors:
2388
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2389
                                   "\n".join(nic_errors))
2390

    
2391
    # hypervisor list/parameters
2392
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2393
    if self.op.hvparams:
2394
      if not isinstance(self.op.hvparams, dict):
2395
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2396
                                   errors.ECODE_INVAL)
2397
      for hv_name, hv_dict in self.op.hvparams.items():
2398
        if hv_name not in self.new_hvparams:
2399
          self.new_hvparams[hv_name] = hv_dict
2400
        else:
2401
          self.new_hvparams[hv_name].update(hv_dict)
2402

    
2403
    # os hypervisor parameters
2404
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2405
    if self.op.os_hvp:
2406
      if not isinstance(self.op.os_hvp, dict):
2407
        raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2408
                                   errors.ECODE_INVAL)
2409
      for os_name, hvs in self.op.os_hvp.items():
2410
        if not isinstance(hvs, dict):
2411
          raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2412
                                      " input"), errors.ECODE_INVAL)
2413
        if os_name not in self.new_os_hvp:
2414
          self.new_os_hvp[os_name] = hvs
2415
        else:
2416
          for hv_name, hv_dict in hvs.items():
2417
            if hv_name not in self.new_os_hvp[os_name]:
2418
              self.new_os_hvp[os_name][hv_name] = hv_dict
2419
            else:
2420
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2421

    
2422
    # changes to the hypervisor list
2423
    if self.op.enabled_hypervisors is not None:
2424
      self.hv_list = self.op.enabled_hypervisors
2425
      if not self.hv_list:
2426
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2427
                                   " least one member",
2428
                                   errors.ECODE_INVAL)
2429
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2430
      if invalid_hvs:
2431
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2432
                                   " entries: %s" %
2433
                                   utils.CommaJoin(invalid_hvs),
2434
                                   errors.ECODE_INVAL)
2435
      for hv in self.hv_list:
2436
        # if the hypervisor doesn't already exist in the cluster
2437
        # hvparams, we initialize it to empty, and then (in both
2438
        # cases) we make sure to fill the defaults, as we might not
2439
        # have a complete defaults list if the hypervisor wasn't
2440
        # enabled before
2441
        if hv not in new_hvp:
2442
          new_hvp[hv] = {}
2443
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2444
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2445
    else:
2446
      self.hv_list = cluster.enabled_hypervisors
2447

    
2448
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2449
      # either the enabled list has changed, or the parameters have, validate
2450
      for hv_name, hv_params in self.new_hvparams.items():
2451
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2452
            (self.op.enabled_hypervisors and
2453
             hv_name in self.op.enabled_hypervisors)):
2454
          # either this is a new hypervisor, or its parameters have changed
2455
          hv_class = hypervisor.GetHypervisor(hv_name)
2456
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2457
          hv_class.CheckParameterSyntax(hv_params)
2458
          _CheckHVParams(self, node_list, hv_name, hv_params)
2459

    
2460
    if self.op.os_hvp:
2461
      # no need to check any newly-enabled hypervisors, since the
2462
      # defaults have already been checked in the above code-block
2463
      for os_name, os_hvp in self.new_os_hvp.items():
2464
        for hv_name, hv_params in os_hvp.items():
2465
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2466
          # we need to fill in the new os_hvp on top of the actual hv_p
2467
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2468
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2469
          hv_class = hypervisor.GetHypervisor(hv_name)
2470
          hv_class.CheckParameterSyntax(new_osp)
2471
          _CheckHVParams(self, node_list, hv_name, new_osp)
2472

    
2473

    
2474
  def Exec(self, feedback_fn):
2475
    """Change the parameters of the cluster.
2476

2477
    """
2478
    if self.op.vg_name is not None:
2479
      new_volume = self.op.vg_name
2480
      if not new_volume:
2481
        new_volume = None
2482
      if new_volume != self.cfg.GetVGName():
2483
        self.cfg.SetVGName(new_volume)
2484
      else:
2485
        feedback_fn("Cluster LVM configuration already in desired"
2486
                    " state, not changing")
2487
    if self.op.hvparams:
2488
      self.cluster.hvparams = self.new_hvparams
2489
    if self.op.os_hvp:
2490
      self.cluster.os_hvp = self.new_os_hvp
2491
    if self.op.enabled_hypervisors is not None:
2492
      self.cluster.hvparams = self.new_hvparams
2493
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2494
    if self.op.beparams:
2495
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2496
    if self.op.nicparams:
2497
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2498

    
2499
    if self.op.candidate_pool_size is not None:
2500
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2501
      # we need to update the pool size here, otherwise the save will fail
2502
      _AdjustCandidatePool(self, [])
2503

    
2504
    if self.op.maintain_node_health is not None:
2505
      self.cluster.maintain_node_health = self.op.maintain_node_health
2506

    
2507
    if self.op.add_uids is not None:
2508
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2509

    
2510
    if self.op.remove_uids is not None:
2511
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2512

    
2513
    if self.op.uid_pool is not None:
2514
      self.cluster.uid_pool = self.op.uid_pool
2515

    
2516
    self.cfg.Update(self.cluster, feedback_fn)
2517

    
2518

    
2519
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2520
  """Distribute additional files which are part of the cluster configuration.
2521

2522
  ConfigWriter takes care of distributing the config and ssconf files, but
2523
  there are more files which should be distributed to all nodes. This function
2524
  makes sure those are copied.
2525

2526
  @param lu: calling logical unit
2527
  @param additional_nodes: list of nodes not in the config to distribute to
2528

2529
  """
2530
  # 1. Gather target nodes
2531
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2532
  dist_nodes = lu.cfg.GetOnlineNodeList()
2533
  if additional_nodes is not None:
2534
    dist_nodes.extend(additional_nodes)
2535
  if myself.name in dist_nodes:
2536
    dist_nodes.remove(myself.name)
2537

    
2538
  # 2. Gather files to distribute
2539
  dist_files = set([constants.ETC_HOSTS,
2540
                    constants.SSH_KNOWN_HOSTS_FILE,
2541
                    constants.RAPI_CERT_FILE,
2542
                    constants.RAPI_USERS_FILE,
2543
                    constants.CONFD_HMAC_KEY,
2544
                   ])
2545

    
2546
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2547
  for hv_name in enabled_hypervisors:
2548
    hv_class = hypervisor.GetHypervisor(hv_name)
2549
    dist_files.update(hv_class.GetAncillaryFiles())
2550

    
2551
  # 3. Perform the files upload
2552
  for fname in dist_files:
2553
    if os.path.exists(fname):
2554
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2555
      for to_node, to_result in result.items():
2556
        msg = to_result.fail_msg
2557
        if msg:
2558
          msg = ("Copy of file %s to node %s failed: %s" %
2559
                 (fname, to_node, msg))
2560
          lu.proc.LogWarning(msg)
2561

    
2562

    
2563
class LURedistributeConfig(NoHooksLU):
2564
  """Force the redistribution of cluster configuration.
2565

2566
  This is a very simple LU.
2567

2568
  """
2569
  _OP_REQP = []
2570
  REQ_BGL = False
2571

    
2572
  def ExpandNames(self):
2573
    self.needed_locks = {
2574
      locking.LEVEL_NODE: locking.ALL_SET,
2575
    }
2576
    self.share_locks[locking.LEVEL_NODE] = 1
2577

    
2578
  def CheckPrereq(self):
2579
    """Check prerequisites.
2580

2581
    """
2582

    
2583
  def Exec(self, feedback_fn):
2584
    """Redistribute the configuration.
2585

2586
    """
2587
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2588
    _RedistributeAncillaryFiles(self)
2589

    
2590

    
2591
def _WaitForSync(lu, instance, disks=None, oneshot=False):
2592
  """Sleep and poll for an instance's disk to sync.
2593

2594
  """
2595
  if not instance.disks or disks is not None and not disks:
2596
    return True
2597

    
2598
  disks = _ExpandCheckDisks(instance, disks)
2599

    
2600
  if not oneshot:
2601
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2602

    
2603
  node = instance.primary_node
2604

    
2605
  for dev in disks:
2606
    lu.cfg.SetDiskID(dev, node)
2607

    
2608
  # TODO: Convert to utils.Retry
2609

    
2610
  retries = 0
2611
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2612
  while True:
2613
    max_time = 0
2614
    done = True
2615
    cumul_degraded = False
2616
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2617
    msg = rstats.fail_msg
2618
    if msg:
2619
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2620
      retries += 1
2621
      if retries >= 10:
2622
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2623
                                 " aborting." % node)
2624
      time.sleep(6)
2625
      continue
2626
    rstats = rstats.payload
2627
    retries = 0
2628
    for i, mstat in enumerate(rstats):
2629
      if mstat is None:
2630
        lu.LogWarning("Can't compute data for node %s/%s",
2631
                           node, disks[i].iv_name)
2632
        continue
2633

    
2634
      cumul_degraded = (cumul_degraded or
2635
                        (mstat.is_degraded and mstat.sync_percent is None))
2636
      if mstat.sync_percent is not None:
2637
        done = False
2638
        if mstat.estimated_time is not None:
2639
          rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2640
          max_time = mstat.estimated_time
2641
        else:
2642
          rem_time = "no time estimate"
2643
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2644
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
2645

    
2646
    # if we're done but degraded, let's do a few small retries, to
2647
    # make sure we see a stable and not transient situation; therefore
2648
    # we force restart of the loop
2649
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2650
      logging.info("Degraded disks found, %d retries left", degr_retries)
2651
      degr_retries -= 1
2652
      time.sleep(1)
2653
      continue
2654

    
2655
    if done or oneshot:
2656
      break
2657

    
2658
    time.sleep(min(60, max_time))
2659

    
2660
  if done:
2661
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2662
  return not cumul_degraded
2663

    
2664

    
2665
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2666
  """Check that mirrors are not degraded.
2667

2668
  The ldisk parameter, if True, will change the test from the
2669
  is_degraded attribute (which represents overall non-ok status for
2670
  the device(s)) to the ldisk (representing the local storage status).
2671

2672
  """
2673
  lu.cfg.SetDiskID(dev, node)
2674

    
2675
  result = True
2676

    
2677
  if on_primary or dev.AssembleOnSecondary():
2678
    rstats = lu.rpc.call_blockdev_find(node, dev)
2679
    msg = rstats.fail_msg
2680
    if msg:
2681
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2682
      result = False
2683
    elif not rstats.payload:
2684
      lu.LogWarning("Can't find disk on node %s", node)
2685
      result = False
2686
    else:
2687
      if ldisk:
2688
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2689
      else:
2690
        result = result and not rstats.payload.is_degraded
2691

    
2692
  if dev.children:
2693
    for child in dev.children:
2694
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2695

    
2696
  return result
2697

    
2698

    
2699
class LUDiagnoseOS(NoHooksLU):
2700
  """Logical unit for OS diagnose/query.
2701

2702
  """
2703
  _OP_REQP = ["output_fields", "names"]
2704
  REQ_BGL = False
2705
  _FIELDS_STATIC = utils.FieldSet()
2706
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2707
  # Fields that need calculation of global os validity
2708
  _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2709

    
2710
  def ExpandNames(self):
2711
    if self.op.names:
2712
      raise errors.OpPrereqError("Selective OS query not supported",
2713
                                 errors.ECODE_INVAL)
2714

    
2715
    _CheckOutputFields(static=self._FIELDS_STATIC,
2716
                       dynamic=self._FIELDS_DYNAMIC,
2717
                       selected=self.op.output_fields)
2718

    
2719
    # Lock all nodes, in shared mode
2720
    # Temporary removal of locks, should be reverted later
2721
    # TODO: reintroduce locks when they are lighter-weight
2722
    self.needed_locks = {}
2723
    #self.share_locks[locking.LEVEL_NODE] = 1
2724
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2725

    
2726
  def CheckPrereq(self):
2727
    """Check prerequisites.
2728

2729
    """
2730

    
2731
  @staticmethod
2732
  def _DiagnoseByOS(rlist):
2733
    """Remaps a per-node return list into an a per-os per-node dictionary
2734

2735
    @param rlist: a map with node names as keys and OS objects as values
2736

2737
    @rtype: dict
2738
    @return: a dictionary with osnames as keys and as value another map, with
2739
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
2740

2741
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2742
                                     (/srv/..., False, "invalid api")],
2743
                           "node2": [(/srv/..., True, "")]}
2744
          }
2745

2746
    """
2747
    all_os = {}
2748
    # we build here the list of nodes that didn't fail the RPC (at RPC
2749
    # level), so that nodes with a non-responding node daemon don't
2750
    # make all OSes invalid
2751
    good_nodes = [node_name for node_name in rlist
2752
                  if not rlist[node_name].fail_msg]
2753
    for node_name, nr in rlist.items():
2754
      if nr.fail_msg or not nr.payload:
2755
        continue
2756
      for name, path, status, diagnose, variants in nr.payload:
2757
        if name not in all_os:
2758
          # build a list of nodes for this os containing empty lists
2759
          # for each node in node_list
2760
          all_os[name] = {}
2761
          for nname in good_nodes:
2762
            all_os[name][nname] = []
2763
        all_os[name][node_name].append((path, status, diagnose, variants))
2764
    return all_os
2765

    
2766
  def Exec(self, feedback_fn):
2767
    """Compute the list of OSes.
2768

2769
    """
2770
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2771
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2772
    pol = self._DiagnoseByOS(node_data)
2773
    output = []
2774
    calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2775
    calc_variants = "variants" in self.op.output_fields
2776

    
2777
    for os_name, os_data in pol.items():
2778
      row = []
2779
      if calc_valid:
2780
        valid = True
2781
        variants = None
2782
        for osl in os_data.values():
2783
          valid = bool(valid and osl and osl[0][1])
2784
          if not valid:
2785
            variants = set()
2786
            break
2787
          if calc_variants:
2788
            node_variants = osl[0][3]
2789
            if variants is None:
2790
              variants = set(node_variants)
2791
            else:
2792
              variants.intersection_update(node_variants)
2793

    
2794
      for field in self.op.output_fields:
2795
        if field == "name":
2796
          val = os_name
2797
        elif field == "valid":
2798
          val = valid
2799
        elif field == "node_status":
2800
          # this is just a copy of the dict
2801
          val = {}
2802
          for node_name, nos_list in os_data.items():
2803
            val[node_name] = nos_list
2804
        elif field == "variants":
2805
          val = list(variants)
2806
        else:
2807
          raise errors.ParameterError(field)
2808
        row.append(val)
2809
      output.append(row)
2810

    
2811
    return output
2812

    
2813

    
2814
class LURemoveNode(LogicalUnit):
2815
  """Logical unit for removing a node.
2816

2817
  """
2818
  HPATH = "node-remove"
2819
  HTYPE = constants.HTYPE_NODE
2820
  _OP_REQP = ["node_name"]
2821

    
2822
  def BuildHooksEnv(self):
2823
    """Build hooks env.
2824

2825
    This doesn't run on the target node in the pre phase as a failed
2826
    node would then be impossible to remove.
2827

2828
    """
2829
    env = {
2830
      "OP_TARGET": self.op.node_name,
2831
      "NODE_NAME": self.op.node_name,
2832
      }
2833
    all_nodes = self.cfg.GetNodeList()
2834
    try:
2835
      all_nodes.remove(self.op.node_name)
2836
    except ValueError:
2837
      logging.warning("Node %s which is about to be removed not found"
2838
                      " in the all nodes list", self.op.node_name)
2839
    return env, all_nodes, all_nodes
2840

    
2841
  def CheckPrereq(self):
2842
    """Check prerequisites.
2843

2844
    This checks:
2845
     - the node exists in the configuration
2846
     - it does not have primary or secondary instances
2847
     - it's not the master
2848

2849
    Any errors are signaled by raising errors.OpPrereqError.
2850

2851
    """
2852
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2853
    node = self.cfg.GetNodeInfo(self.op.node_name)
2854
    assert node is not None
2855

    
2856
    instance_list = self.cfg.GetInstanceList()
2857

    
2858
    masternode = self.cfg.GetMasterNode()
2859
    if node.name == masternode:
2860
      raise errors.OpPrereqError("Node is the master node,"
2861
                                 " you need to failover first.",
2862
                                 errors.ECODE_INVAL)
2863

    
2864
    for instance_name in instance_list:
2865
      instance = self.cfg.GetInstanceInfo(instance_name)
2866
      if node.name in instance.all_nodes:
2867
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2868
                                   " please remove first." % instance_name,
2869
                                   errors.ECODE_INVAL)
2870
    self.op.node_name = node.name
2871
    self.node = node
2872

    
2873
  def Exec(self, feedback_fn):
2874
    """Removes the node from the cluster.
2875

2876
    """
2877
    node = self.node
2878
    logging.info("Stopping the node daemon and removing configs from node %s",
2879
                 node.name)
2880

    
2881
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2882

    
2883
    # Promote nodes to master candidate as needed
2884
    _AdjustCandidatePool(self, exceptions=[node.name])
2885
    self.context.RemoveNode(node.name)
2886

    
2887
    # Run post hooks on the node before it's removed
2888
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2889
    try:
2890
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2891
    except:
2892
      # pylint: disable-msg=W0702
2893
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
2894

    
2895
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2896
    msg = result.fail_msg
2897
    if msg:
2898
      self.LogWarning("Errors encountered on the remote node while leaving"
2899
                      " the cluster: %s", msg)
2900

    
2901
    # Remove node from our /etc/hosts
2902
    if self.cfg.GetClusterInfo().modify_etc_hosts:
2903
      # FIXME: this should be done via an rpc call to node daemon
2904
      utils.RemoveHostFromEtcHosts(node.name)
2905
      _RedistributeAncillaryFiles(self)
2906

    
2907

    
2908
class LUQueryNodes(NoHooksLU):
2909
  """Logical unit for querying nodes.
2910

2911
  """
2912
  # pylint: disable-msg=W0142
2913
  _OP_REQP = ["output_fields", "names", "use_locking"]
2914
  REQ_BGL = False
2915

    
2916
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2917
                    "master_candidate", "offline", "drained"]
2918

    
2919
  _FIELDS_DYNAMIC = utils.FieldSet(
2920
    "dtotal", "dfree",
2921
    "mtotal", "mnode", "mfree",
2922
    "bootid",
2923
    "ctotal", "cnodes", "csockets",
2924
    )
2925

    
2926
  _FIELDS_STATIC = utils.FieldSet(*[
2927
    "pinst_cnt", "sinst_cnt",
2928
    "pinst_list", "sinst_list",
2929
    "pip", "sip", "tags",
2930
    "master",
2931
    "role"] + _SIMPLE_FIELDS
2932
    )
2933

    
2934
  def ExpandNames(self):
2935
    _CheckOutputFields(static=self._FIELDS_STATIC,
2936
                       dynamic=self._FIELDS_DYNAMIC,
2937
                       selected=self.op.output_fields)
2938

    
2939
    self.needed_locks = {}
2940
    self.share_locks[locking.LEVEL_NODE] = 1
2941

    
2942
    if self.op.names:
2943
      self.wanted = _GetWantedNodes(self, self.op.names)
2944
    else:
2945
      self.wanted = locking.ALL_SET
2946

    
2947
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2948
    self.do_locking = self.do_node_query and self.op.use_locking
2949
    if self.do_locking:
2950
      # if we don't request only static fields, we need to lock the nodes
2951
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2952

    
2953
  def CheckPrereq(self):
2954
    """Check prerequisites.
2955

2956
    """
2957
    # The validation of the node list is done in the _GetWantedNodes,
2958
    # if non empty, and if empty, there's no validation to do
2959
    pass
2960

    
2961
  def Exec(self, feedback_fn):
2962
    """Computes the list of nodes and their attributes.
2963

2964
    """
2965
    all_info = self.cfg.GetAllNodesInfo()
2966
    if self.do_locking:
2967
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2968
    elif self.wanted != locking.ALL_SET:
2969
      nodenames = self.wanted
2970
      missing = set(nodenames).difference(all_info.keys())
2971
      if missing:
2972
        raise errors.OpExecError(
2973
          "Some nodes were removed before retrieving their data: %s" % missing)
2974
    else:
2975
      nodenames = all_info.keys()
2976

    
2977
    nodenames = utils.NiceSort(nodenames)
2978
    nodelist = [all_info[name] for name in nodenames]
2979

    
2980
    # begin data gathering
2981

    
2982
    if self.do_node_query:
2983
      live_data = {}
2984
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2985
                                          self.cfg.GetHypervisorType())
2986
      for name in nodenames:
2987
        nodeinfo = node_data[name]
2988
        if not nodeinfo.fail_msg and nodeinfo.payload:
2989
          nodeinfo = nodeinfo.payload
2990
          fn = utils.TryConvert
2991
          live_data[name] = {
2992
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2993
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2994
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2995
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2996
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2997
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2998
            "bootid": nodeinfo.get('bootid', None),
2999
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3000
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3001
            }
3002
        else:
3003
          live_data[name] = {}
3004
    else:
3005
      live_data = dict.fromkeys(nodenames, {})
3006

    
3007
    node_to_primary = dict([(name, set()) for name in nodenames])
3008
    node_to_secondary = dict([(name, set()) for name in nodenames])
3009

    
3010
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
3011
                             "sinst_cnt", "sinst_list"))
3012
    if inst_fields & frozenset(self.op.output_fields):
3013
      inst_data = self.cfg.GetAllInstancesInfo()
3014

    
3015
      for inst in inst_data.values():
3016
        if inst.primary_node in node_to_primary:
3017
          node_to_primary[inst.primary_node].add(inst.name)
3018
        for secnode in inst.secondary_nodes:
3019
          if secnode in node_to_secondary:
3020
            node_to_secondary[secnode].add(inst.name)
3021

    
3022
    master_node = self.cfg.GetMasterNode()
3023

    
3024
    # end data gathering
3025

    
3026
    output = []
3027
    for node in nodelist:
3028
      node_output = []
3029
      for field in self.op.output_fields:
3030
        if field in self._SIMPLE_FIELDS:
3031
          val = getattr(node, field)
3032
        elif field == "pinst_list":
3033
          val = list(node_to_primary[node.name])
3034
        elif field == "sinst_list":
3035
          val = list(node_to_secondary[node.name])
3036
        elif field == "pinst_cnt":
3037
          val = len(node_to_primary[node.name])
3038
        elif field == "sinst_cnt":
3039
          val = len(node_to_secondary[node.name])
3040
        elif field == "pip":
3041
          val = node.primary_ip
3042
        elif field == "sip":
3043
          val = node.secondary_ip
3044
        elif field == "tags":
3045
          val = list(node.GetTags())
3046
        elif field == "master":
3047
          val = node.name == master_node
3048
        elif self._FIELDS_DYNAMIC.Matches(field):
3049
          val = live_data[node.name].get(field, None)
3050
        elif field == "role":
3051
          if node.name == master_node:
3052
            val = "M"
3053
          elif node.master_candidate:
3054
            val = "C"
3055
          elif node.drained:
3056
            val = "D"
3057
          elif node.offline:
3058
            val = "O"
3059
          else:
3060
            val = "R"
3061
        else:
3062
          raise errors.ParameterError(field)
3063
        node_output.append(val)
3064
      output.append(node_output)
3065

    
3066
    return output
3067

    
3068

    
3069
class LUQueryNodeVolumes(NoHooksLU):
3070
  """Logical unit for getting volumes on node(s).
3071

3072
  """
3073
  _OP_REQP = ["nodes", "output_fields"]
3074
  REQ_BGL = False
3075
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3076
  _FIELDS_STATIC = utils.FieldSet("node")
3077

    
3078
  def ExpandNames(self):
3079
    _CheckOutputFields(static=self._FIELDS_STATIC,
3080
                       dynamic=self._FIELDS_DYNAMIC,
3081
                       selected=self.op.output_fields)
3082

    
3083
    self.needed_locks = {}
3084
    self.share_locks[locking.LEVEL_NODE] = 1
3085
    if not self.op.nodes:
3086
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3087
    else:
3088
      self.needed_locks[locking.LEVEL_NODE] = \
3089
        _GetWantedNodes(self, self.op.nodes)
3090

    
3091
  def CheckPrereq(self):
3092
    """Check prerequisites.
3093

3094
    This checks that the fields required are valid output fields.
3095

3096
    """
3097
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3098

    
3099
  def Exec(self, feedback_fn):
3100
    """Computes the list of nodes and their attributes.
3101

3102
    """
3103
    nodenames = self.nodes
3104
    volumes = self.rpc.call_node_volumes(nodenames)
3105

    
3106
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3107
             in self.cfg.GetInstanceList()]
3108

    
3109
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3110

    
3111
    output = []
3112
    for node in nodenames:
3113
      nresult = volumes[node]
3114
      if nresult.offline:
3115
        continue
3116
      msg = nresult.fail_msg
3117
      if msg:
3118
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3119
        continue
3120

    
3121
      node_vols = nresult.payload[:]
3122
      node_vols.sort(key=lambda vol: vol['dev'])
3123

    
3124
      for vol in node_vols:
3125
        node_output = []
3126
        for field in self.op.output_fields:
3127
          if field == "node":
3128
            val = node
3129
          elif field == "phys":
3130
            val = vol['dev']
3131
          elif field == "vg":
3132
            val = vol['vg']
3133
          elif field == "name":
3134
            val = vol['name']
3135
          elif field == "size":
3136
            val = int(float(vol['size']))
3137
          elif field == "instance":
3138
            for inst in ilist:
3139
              if node not in lv_by_node[inst]:
3140
                continue
3141
              if vol['name'] in lv_by_node[inst][node]:
3142
                val = inst.name
3143
                break
3144
            else:
3145
              val = '-'
3146
          else:
3147
            raise errors.ParameterError(field)
3148
          node_output.append(str(val))
3149

    
3150
        output.append(node_output)
3151

    
3152
    return output
3153

    
3154

    
3155
class LUQueryNodeStorage(NoHooksLU):
3156
  """Logical unit for getting information on storage units on node(s).
3157

3158
  """
3159
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
3160
  REQ_BGL = False
3161
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3162

    
3163
  def CheckArguments(self):
3164
    _CheckStorageType(self.op.storage_type)
3165

    
3166
    _CheckOutputFields(static=self._FIELDS_STATIC,
3167
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3168
                       selected=self.op.output_fields)
3169

    
3170
  def ExpandNames(self):
3171
    self.needed_locks = {}
3172
    self.share_locks[locking.LEVEL_NODE] = 1
3173

    
3174
    if self.op.nodes:
3175
      self.needed_locks[locking.LEVEL_NODE] = \
3176
        _GetWantedNodes(self, self.op.nodes)
3177
    else:
3178
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3179

    
3180
  def CheckPrereq(self):
3181
    """Check prerequisites.
3182

3183
    This checks that the fields required are valid output fields.
3184

3185
    """
3186
    self.op.name = getattr(self.op, "name", None)
3187

    
3188
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3189

    
3190
  def Exec(self, feedback_fn):
3191
    """Computes the list of nodes and their attributes.
3192

3193
    """
3194
    # Always get name to sort by
3195
    if constants.SF_NAME in self.op.output_fields:
3196
      fields = self.op.output_fields[:]
3197
    else:
3198
      fields = [constants.SF_NAME] + self.op.output_fields
3199

    
3200
    # Never ask for node or type as it's only known to the LU
3201
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3202
      while extra in fields:
3203
        fields.remove(extra)
3204

    
3205
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3206
    name_idx = field_idx[constants.SF_NAME]
3207

    
3208
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3209
    data = self.rpc.call_storage_list(self.nodes,
3210
                                      self.op.storage_type, st_args,
3211
                                      self.op.name, fields)
3212

    
3213
    result = []
3214

    
3215
    for node in utils.NiceSort(self.nodes):
3216
      nresult = data[node]
3217
      if nresult.offline:
3218
        continue
3219

    
3220
      msg = nresult.fail_msg
3221
      if msg:
3222
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3223
        continue
3224

    
3225
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3226

    
3227
      for name in utils.NiceSort(rows.keys()):
3228
        row = rows[name]
3229

    
3230
        out = []
3231

    
3232
        for field in self.op.output_fields:
3233
          if field == constants.SF_NODE:
3234
            val = node
3235
          elif field == constants.SF_TYPE:
3236
            val = self.op.storage_type
3237
          elif field in field_idx:
3238
            val = row[field_idx[field]]
3239
          else:
3240
            raise errors.ParameterError(field)
3241

    
3242
          out.append(val)
3243

    
3244
        result.append(out)
3245

    
3246
    return result
3247

    
3248

    
3249
class LUModifyNodeStorage(NoHooksLU):
3250
  """Logical unit for modifying a storage volume on a node.
3251

3252
  """
3253
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3254
  REQ_BGL = False
3255

    
3256
  def CheckArguments(self):
3257
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3258

    
3259
    _CheckStorageType(self.op.storage_type)
3260

    
3261
  def ExpandNames(self):
3262
    self.needed_locks = {
3263
      locking.LEVEL_NODE: self.op.node_name,
3264
      }
3265

    
3266
  def CheckPrereq(self):
3267
    """Check prerequisites.
3268

3269
    """
3270
    storage_type = self.op.storage_type
3271

    
3272
    try:
3273
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3274
    except KeyError:
3275
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3276
                                 " modified" % storage_type,
3277
                                 errors.ECODE_INVAL)
3278

    
3279
    diff = set(self.op.changes.keys()) - modifiable
3280
    if diff:
3281
      raise errors.OpPrereqError("The following fields can not be modified for"
3282
                                 " storage units of type '%s': %r" %
3283
                                 (storage_type, list(diff)),
3284
                                 errors.ECODE_INVAL)
3285

    
3286
  def Exec(self, feedback_fn):
3287
    """Computes the list of nodes and their attributes.
3288

3289
    """
3290
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3291
    result = self.rpc.call_storage_modify(self.op.node_name,
3292
                                          self.op.storage_type, st_args,
3293
                                          self.op.name, self.op.changes)
3294
    result.Raise("Failed to modify storage unit '%s' on %s" %
3295
                 (self.op.name, self.op.node_name))
3296

    
3297

    
3298
class LUAddNode(LogicalUnit):
3299
  """Logical unit for adding node to the cluster.
3300

3301
  """
3302
  HPATH = "node-add"
3303
  HTYPE = constants.HTYPE_NODE
3304
  _OP_REQP = ["node_name"]
3305

    
3306
  def CheckArguments(self):
3307
    # validate/normalize the node name
3308
    self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3309

    
3310
  def BuildHooksEnv(self):
3311
    """Build hooks env.
3312

3313
    This will run on all nodes before, and on all nodes + the new node after.
3314

3315
    """
3316
    env = {
3317
      "OP_TARGET": self.op.node_name,
3318
      "NODE_NAME": self.op.node_name,
3319
      "NODE_PIP": self.op.primary_ip,
3320
      "NODE_SIP": self.op.secondary_ip,
3321
      }
3322
    nodes_0 = self.cfg.GetNodeList()
3323
    nodes_1 = nodes_0 + [self.op.node_name, ]
3324
    return env, nodes_0, nodes_1
3325

    
3326
  def CheckPrereq(self):
3327
    """Check prerequisites.
3328

3329
    This checks:
3330
     - the new node is not already in the config
3331
     - it is resolvable
3332
     - its parameters (single/dual homed) matches the cluster
3333

3334
    Any errors are signaled by raising errors.OpPrereqError.
3335

3336
    """
3337
    node_name = self.op.node_name
3338
    cfg = self.cfg
3339

    
3340
    dns_data = utils.GetHostInfo(node_name)
3341

    
3342
    node = dns_data.name
3343
    primary_ip = self.op.primary_ip = dns_data.ip
3344
    secondary_ip = getattr(self.op, "secondary_ip", None)
3345
    if secondary_ip is None:
3346
      secondary_ip = primary_ip
3347
    if not utils.IsValidIP(secondary_ip):
3348
      raise errors.OpPrereqError("Invalid secondary IP given",
3349
                                 errors.ECODE_INVAL)
3350
    self.op.secondary_ip = secondary_ip
3351

    
3352
    node_list = cfg.GetNodeList()
3353
    if not self.op.readd and node in node_list:
3354
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3355
                                 node, errors.ECODE_EXISTS)
3356
    elif self.op.readd and node not in node_list:
3357
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3358
                                 errors.ECODE_NOENT)
3359

    
3360
    self.changed_primary_ip = False
3361

    
3362
    for existing_node_name in node_list:
3363
      existing_node = cfg.GetNodeInfo(existing_node_name)
3364

    
3365
      if self.op.readd and node == existing_node_name:
3366
        if existing_node.secondary_ip != secondary_ip:
3367
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
3368
                                     " address configuration as before",
3369
                                     errors.ECODE_INVAL)
3370
        if existing_node.primary_ip != primary_ip:
3371
          self.changed_primary_ip = True
3372

    
3373
        continue
3374

    
3375
      if (existing_node.primary_ip == primary_ip or
3376
          existing_node.secondary_ip == primary_ip or
3377
          existing_node.primary_ip == secondary_ip or
3378
          existing_node.secondary_ip == secondary_ip):
3379
        raise errors.OpPrereqError("New node ip address(es) conflict with"
3380
                                   " existing node %s" % existing_node.name,
3381
                                   errors.ECODE_NOTUNIQUE)
3382

    
3383
    # check that the type of the node (single versus dual homed) is the
3384
    # same as for the master
3385
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3386
    master_singlehomed = myself.secondary_ip == myself.primary_ip
3387
    newbie_singlehomed = secondary_ip == primary_ip
3388
    if master_singlehomed != newbie_singlehomed:
3389
      if master_singlehomed:
3390
        raise errors.OpPrereqError("The master has no private ip but the"
3391
                                   " new node has one",
3392
                                   errors.ECODE_INVAL)
3393
      else:
3394
        raise errors.OpPrereqError("The master has a private ip but the"
3395
                                   " new node doesn't have one",
3396
                                   errors.ECODE_INVAL)
3397

    
3398
    # checks reachability
3399
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3400
      raise errors.OpPrereqError("Node not reachable by ping",
3401
                                 errors.ECODE_ENVIRON)
3402

    
3403
    if not newbie_singlehomed:
3404
      # check reachability from my secondary ip to newbie's secondary ip
3405
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3406
                           source=myself.secondary_ip):
3407
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3408
                                   " based ping to noded port",
3409
                                   errors.ECODE_ENVIRON)
3410

    
3411
    if self.op.readd:
3412
      exceptions = [node]
3413
    else:
3414
      exceptions = []
3415

    
3416
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3417

    
3418
    if self.op.readd:
3419
      self.new_node = self.cfg.GetNodeInfo(node)
3420
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
3421
    else:
3422
      self.new_node = objects.Node(name=node,
3423
                                   primary_ip=primary_ip,
3424
                                   secondary_ip=secondary_ip,
3425
                                   master_candidate=self.master_candidate,
3426
                                   offline=False, drained=False)
3427

    
3428
  def Exec(self, feedback_fn):
3429
    """Adds the new node to the cluster.
3430

3431
    """
3432
    new_node = self.new_node
3433
    node = new_node.name
3434

    
3435
    # for re-adds, reset the offline/drained/master-candidate flags;
3436
    # we need to reset here, otherwise offline would prevent RPC calls
3437
    # later in the procedure; this also means that if the re-add
3438
    # fails, we are left with a non-offlined, broken node
3439
    if self.op.readd:
3440
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3441
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3442
      # if we demote the node, we do cleanup later in the procedure
3443
      new_node.master_candidate = self.master_candidate
3444
      if self.changed_primary_ip:
3445
        new_node.primary_ip = self.op.primary_ip
3446

    
3447
    # notify the user about any possible mc promotion
3448
    if new_node.master_candidate:
3449
      self.LogInfo("Node will be a master candidate")
3450

    
3451
    # check connectivity
3452
    result = self.rpc.call_version([node])[node]
3453
    result.Raise("Can't get version information from node %s" % node)
3454
    if constants.PROTOCOL_VERSION == result.payload:
3455
      logging.info("Communication to node %s fine, sw version %s match",
3456
                   node, result.payload)
3457
    else:
3458
      raise errors.OpExecError("Version mismatch master version %s,"
3459
                               " node version %s" %
3460
                               (constants.PROTOCOL_VERSION, result.payload))
3461

    
3462
    # setup ssh on node
3463
    if self.cfg.GetClusterInfo().modify_ssh_setup:
3464
      logging.info("Copy ssh key to node %s", node)
3465
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3466
      keyarray = []
3467
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3468
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3469
                  priv_key, pub_key]
3470

    
3471
      for i in keyfiles:
3472
        keyarray.append(utils.ReadFile(i))
3473

    
3474
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3475
                                      keyarray[2], keyarray[3], keyarray[4],
3476
                                      keyarray[5])
3477
      result.Raise("Cannot transfer ssh keys to the new node")
3478

    
3479
    # Add node to our /etc/hosts, and add key to known_hosts
3480
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3481
      # FIXME: this should be done via an rpc call to node daemon
3482
      utils.AddHostToEtcHosts(new_node.name)
3483

    
3484
    if new_node.secondary_ip != new_node.primary_ip:
3485
      result = self.rpc.call_node_has_ip_address(new_node.name,
3486
                                                 new_node.secondary_ip)
3487
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3488
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3489
      if not result.payload:
3490
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3491
                                 " you gave (%s). Please fix and re-run this"
3492
                                 " command." % new_node.secondary_ip)
3493

    
3494
    node_verify_list = [self.cfg.GetMasterNode()]
3495
    node_verify_param = {
3496
      constants.NV_NODELIST: [node],
3497
      # TODO: do a node-net-test as well?
3498
    }
3499

    
3500
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3501
                                       self.cfg.GetClusterName())
3502
    for verifier in node_verify_list:
3503
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3504
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3505
      if nl_payload:
3506
        for failed in nl_payload:
3507
          feedback_fn("ssh/hostname verification failed"
3508
                      " (checking from %s): %s" %
3509
                      (verifier, nl_payload[failed]))
3510
        raise errors.OpExecError("ssh/hostname verification failed.")
3511

    
3512
    if self.op.readd:
3513
      _RedistributeAncillaryFiles(self)
3514
      self.context.ReaddNode(new_node)
3515
      # make sure we redistribute the config
3516
      self.cfg.Update(new_node, feedback_fn)
3517
      # and make sure the new node will not have old files around
3518
      if not new_node.master_candidate:
3519
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3520
        msg = result.fail_msg
3521
        if msg:
3522
          self.LogWarning("Node failed to demote itself from master"
3523
                          " candidate status: %s" % msg)
3524
    else:
3525
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3526
      self.context.AddNode(new_node, self.proc.GetECId())
3527

    
3528

    
3529
class LUSetNodeParams(LogicalUnit):
3530
  """Modifies the parameters of a node.
3531

3532
  """
3533
  HPATH = "node-modify"
3534
  HTYPE = constants.HTYPE_NODE
3535
  _OP_REQP = ["node_name"]
3536
  REQ_BGL = False
3537

    
3538
  def CheckArguments(self):
3539
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3540
    _CheckBooleanOpField(self.op, 'master_candidate')
3541
    _CheckBooleanOpField(self.op, 'offline')
3542
    _CheckBooleanOpField(self.op, 'drained')
3543
    _CheckBooleanOpField(self.op, 'auto_promote')
3544
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3545
    if all_mods.count(None) == 3:
3546
      raise errors.OpPrereqError("Please pass at least one modification",
3547
                                 errors.ECODE_INVAL)
3548
    if all_mods.count(True) > 1:
3549
      raise errors.OpPrereqError("Can't set the node into more than one"
3550
                                 " state at the same time",
3551
                                 errors.ECODE_INVAL)
3552

    
3553
    # Boolean value that tells us whether we're offlining or draining the node
3554
    self.offline_or_drain = (self.op.offline == True or
3555
                             self.op.drained == True)
3556
    self.deoffline_or_drain = (self.op.offline == False or
3557
                               self.op.drained == False)
3558
    self.might_demote = (self.op.master_candidate == False or
3559
                         self.offline_or_drain)
3560

    
3561
    self.lock_all = self.op.auto_promote and self.might_demote
3562

    
3563

    
3564
  def ExpandNames(self):
3565
    if self.lock_all:
3566
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3567
    else:
3568
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3569

    
3570
  def BuildHooksEnv(self):
3571
    """Build hooks env.
3572

3573
    This runs on the master node.
3574

3575
    """
3576
    env = {
3577
      "OP_TARGET": self.op.node_name,
3578
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3579
      "OFFLINE": str(self.op.offline),
3580
      "DRAINED": str(self.op.drained),
3581
      }
3582
    nl = [self.cfg.GetMasterNode(),
3583
          self.op.node_name]
3584
    return env, nl, nl
3585

    
3586
  def CheckPrereq(self):
3587
    """Check prerequisites.
3588

3589
    This only checks the instance list against the existing names.
3590

3591
    """
3592
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3593

    
3594
    if (self.op.master_candidate is not None or
3595
        self.op.drained is not None or
3596
        self.op.offline is not None):
3597
      # we can't change the master's node flags
3598
      if self.op.node_name == self.cfg.GetMasterNode():
3599
        raise errors.OpPrereqError("The master role can be changed"
3600
                                   " only via masterfailover",
3601
                                   errors.ECODE_INVAL)
3602

    
3603

    
3604
    if node.master_candidate and self.might_demote and not self.lock_all:
3605
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
3606
      # check if after removing the current node, we're missing master
3607
      # candidates
3608
      (mc_remaining, mc_should, _) = \
3609
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3610
      if mc_remaining < mc_should:
3611
        raise errors.OpPrereqError("Not enough master candidates, please"
3612
                                   " pass auto_promote to allow promotion",
3613
                                   errors.ECODE_INVAL)
3614

    
3615
    if (self.op.master_candidate == True and
3616
        ((node.offline and not self.op.offline == False) or
3617
         (node.drained and not self.op.drained == False))):
3618
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3619
                                 " to master_candidate" % node.name,
3620
                                 errors.ECODE_INVAL)
3621

    
3622
    # If we're being deofflined/drained, we'll MC ourself if needed
3623
    if (self.deoffline_or_drain and not self.offline_or_drain and not
3624
        self.op.master_candidate == True and not node.master_candidate):
3625
      self.op.master_candidate = _DecideSelfPromotion(self)
3626
      if self.op.master_candidate:
3627
        self.LogInfo("Autopromoting node to master candidate")
3628

    
3629
    return
3630

    
3631
  def Exec(self, feedback_fn):
3632
    """Modifies a node.
3633

3634
    """
3635
    node = self.node
3636

    
3637
    result = []
3638
    changed_mc = False
3639

    
3640
    if self.op.offline is not None:
3641
      node.offline = self.op.offline
3642
      result.append(("offline", str(self.op.offline)))
3643
      if self.op.offline == True:
3644
        if node.master_candidate:
3645
          node.master_candidate = False
3646
          changed_mc = True
3647
          result.append(("master_candidate", "auto-demotion due to offline"))
3648
        if node.drained:
3649
          node.drained = False
3650
          result.append(("drained", "clear drained status due to offline"))
3651

    
3652
    if self.op.master_candidate is not None:
3653
      node.master_candidate = self.op.master_candidate
3654
      changed_mc = True
3655
      result.append(("master_candidate", str(self.op.master_candidate)))
3656
      if self.op.master_candidate == False:
3657
        rrc = self.rpc.call_node_demote_from_mc(node.name)
3658
        msg = rrc.fail_msg
3659
        if msg:
3660
          self.LogWarning("Node failed to demote itself: %s" % msg)
3661

    
3662
    if self.op.drained is not None:
3663
      node.drained = self.op.drained
3664
      result.append(("drained", str(self.op.drained)))
3665
      if self.op.drained == True:
3666
        if node.master_candidate:
3667
          node.master_candidate = False
3668
          changed_mc = True
3669
          result.append(("master_candidate", "auto-demotion due to drain"))
3670
          rrc = self.rpc.call_node_demote_from_mc(node.name)
3671
          msg = rrc.fail_msg
3672
          if msg:
3673
            self.LogWarning("Node failed to demote itself: %s" % msg)
3674
        if node.offline:
3675
          node.offline = False
3676
          result.append(("offline", "clear offline status due to drain"))
3677

    
3678
    # we locked all nodes, we adjust the CP before updating this node
3679
    if self.lock_all:
3680
      _AdjustCandidatePool(self, [node.name])
3681

    
3682
    # this will trigger configuration file update, if needed
3683
    self.cfg.Update(node, feedback_fn)
3684

    
3685
    # this will trigger job queue propagation or cleanup
3686
    if changed_mc:
3687
      self.context.ReaddNode(node)
3688

    
3689
    return result
3690

    
3691

    
3692
class LUPowercycleNode(NoHooksLU):
3693
  """Powercycles a node.
3694

3695
  """
3696
  _OP_REQP = ["node_name", "force"]
3697
  REQ_BGL = False
3698

    
3699
  def CheckArguments(self):
3700
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3701
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3702
      raise errors.OpPrereqError("The node is the master and the force"
3703
                                 " parameter was not set",
3704
                                 errors.ECODE_INVAL)
3705

    
3706
  def ExpandNames(self):
3707
    """Locking for PowercycleNode.
3708

3709
    This is a last-resort option and shouldn't block on other
3710
    jobs. Therefore, we grab no locks.
3711

3712
    """
3713
    self.needed_locks = {}
3714

    
3715
  def CheckPrereq(self):
3716
    """Check prerequisites.
3717

3718
    This LU has no prereqs.
3719

3720
    """
3721
    pass
3722

    
3723
  def Exec(self, feedback_fn):
3724
    """Reboots a node.
3725

3726
    """
3727
    result = self.rpc.call_node_powercycle(self.op.node_name,
3728
                                           self.cfg.GetHypervisorType())
3729
    result.Raise("Failed to schedule the reboot")
3730
    return result.payload
3731

    
3732

    
3733
class LUQueryClusterInfo(NoHooksLU):
3734
  """Query cluster configuration.
3735

3736
  """
3737
  _OP_REQP = []
3738
  REQ_BGL = False
3739

    
3740
  def ExpandNames(self):
3741
    self.needed_locks = {}
3742

    
3743
  def CheckPrereq(self):
3744
    """No prerequsites needed for this LU.
3745

3746
    """
3747
    pass
3748

    
3749
  def Exec(self, feedback_fn):
3750
    """Return cluster config.
3751

3752
    """
3753
    cluster = self.cfg.GetClusterInfo()
3754
    os_hvp = {}
3755

    
3756
    # Filter just for enabled hypervisors
3757
    for os_name, hv_dict in cluster.os_hvp.items():
3758
      os_hvp[os_name] = {}
3759
      for hv_name, hv_params in hv_dict.items():
3760
        if hv_name in cluster.enabled_hypervisors:
3761
          os_hvp[os_name][hv_name] = hv_params
3762

    
3763
    result = {
3764
      "software_version": constants.RELEASE_VERSION,
3765
      "protocol_version": constants.PROTOCOL_VERSION,
3766
      "config_version": constants.CONFIG_VERSION,
3767
      "os_api_version": max(constants.OS_API_VERSIONS),
3768
      "export_version": constants.EXPORT_VERSION,
3769
      "architecture": (platform.architecture()[0], platform.machine()),
3770
      "name": cluster.cluster_name,
3771
      "master": cluster.master_node,
3772
      "default_hypervisor": cluster.enabled_hypervisors[0],
3773
      "enabled_hypervisors": cluster.enabled_hypervisors,
3774
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3775
                        for hypervisor_name in cluster.enabled_hypervisors]),
3776
      "os_hvp": os_hvp,
3777
      "beparams": cluster.beparams,
3778
      "nicparams": cluster.nicparams,
3779
      "candidate_pool_size": cluster.candidate_pool_size,
3780
      "master_netdev": cluster.master_netdev,
3781
      "volume_group_name": cluster.volume_group_name,
3782
      "file_storage_dir": cluster.file_storage_dir,
3783
      "maintain_node_health": cluster.maintain_node_health,
3784
      "ctime": cluster.ctime,
3785
      "mtime": cluster.mtime,
3786
      "uuid": cluster.uuid,
3787
      "tags": list(cluster.GetTags()),
3788
      "uid_pool": cluster.uid_pool,
3789
      }
3790

    
3791
    return result
3792

    
3793

    
3794
class LUQueryConfigValues(NoHooksLU):
3795
  """Return configuration values.
3796

3797
  """
3798
  _OP_REQP = []
3799
  REQ_BGL = False
3800
  _FIELDS_DYNAMIC = utils.FieldSet()
3801
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3802
                                  "watcher_pause")
3803

    
3804
  def ExpandNames(self):
3805
    self.needed_locks = {}
3806

    
3807
    _CheckOutputFields(static=self._FIELDS_STATIC,
3808
                       dynamic=self._FIELDS_DYNAMIC,
3809
                       selected=self.op.output_fields)
3810

    
3811
  def CheckPrereq(self):
3812
    """No prerequisites.
3813

3814
    """
3815
    pass
3816

    
3817
  def Exec(self, feedback_fn):
3818
    """Dump a representation of the cluster config to the standard output.
3819

3820
    """
3821
    values = []
3822
    for field in self.op.output_fields:
3823
      if field == "cluster_name":
3824
        entry = self.cfg.GetClusterName()
3825
      elif field == "master_node":
3826
        entry = self.cfg.GetMasterNode()
3827
      elif field == "drain_flag":
3828
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3829
      elif field == "watcher_pause":
3830
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3831
      else:
3832
        raise errors.ParameterError(field)
3833
      values.append(entry)
3834
    return values
3835

    
3836

    
3837
class LUActivateInstanceDisks(NoHooksLU):
3838
  """Bring up an instance's disks.
3839

3840
  """
3841
  _OP_REQP = ["instance_name"]
3842
  REQ_BGL = False
3843

    
3844
  def ExpandNames(self):
3845
    self._ExpandAndLockInstance()
3846
    self.needed_locks[locking.LEVEL_NODE] = []
3847
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3848

    
3849
  def DeclareLocks(self, level):
3850
    if level == locking.LEVEL_NODE:
3851
      self._LockInstancesNodes()
3852

    
3853
  def CheckPrereq(self):
3854
    """Check prerequisites.
3855

3856
    This checks that the instance is in the cluster.
3857

3858
    """
3859
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3860
    assert self.instance is not None, \
3861
      "Cannot retrieve locked instance %s" % self.op.instance_name
3862
    _CheckNodeOnline(self, self.instance.primary_node)
3863
    if not hasattr(self.op, "ignore_size"):
3864
      self.op.ignore_size = False
3865

    
3866
  def Exec(self, feedback_fn):
3867
    """Activate the disks.
3868

3869
    """
3870
    disks_ok, disks_info = \
3871
              _AssembleInstanceDisks(self, self.instance,
3872
                                     ignore_size=self.op.ignore_size)
3873
    if not disks_ok:
3874
      raise errors.OpExecError("Cannot activate block devices")
3875

    
3876
    return disks_info
3877

    
3878

    
3879
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
3880
                           ignore_size=False):
3881
  """Prepare the block devices for an instance.
3882

3883
  This sets up the block devices on all nodes.
3884

3885
  @type lu: L{LogicalUnit}
3886
  @param lu: the logical unit on whose behalf we execute
3887
  @type instance: L{objects.Instance}
3888
  @param instance: the instance for whose disks we assemble
3889
  @type disks: list of L{objects.Disk} or None
3890
  @param disks: which disks to assemble (or all, if None)
3891
  @type ignore_secondaries: boolean
3892
  @param ignore_secondaries: if true, errors on secondary nodes
3893
      won't result in an error return from the function
3894
  @type ignore_size: boolean
3895
  @param ignore_size: if true, the current known size of the disk
3896
      will not be used during the disk activation, useful for cases
3897
      when the size is wrong
3898
  @return: False if the operation failed, otherwise a list of
3899
      (host, instance_visible_name, node_visible_name)
3900
      with the mapping from node devices to instance devices
3901

3902
  """
3903
  device_info = []
3904
  disks_ok = True
3905
  iname = instance.name
3906
  disks = _ExpandCheckDisks(instance, disks)
3907

    
3908
  # With the two passes mechanism we try to reduce the window of
3909
  # opportunity for the race condition of switching DRBD to primary
3910
  # before handshaking occured, but we do not eliminate it
3911

    
3912
  # The proper fix would be to wait (with some limits) until the
3913
  # connection has been made and drbd transitions from WFConnection
3914
  # into any other network-connected state (Connected, SyncTarget,
3915
  # SyncSource, etc.)
3916

    
3917
  # 1st pass, assemble on all nodes in secondary mode
3918
  for inst_disk in disks:
3919
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3920
      if ignore_size:
3921
        node_disk = node_disk.Copy()
3922
        node_disk.UnsetSize()
3923
      lu.cfg.SetDiskID(node_disk, node)
3924
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3925
      msg = result.fail_msg
3926
      if msg:
3927
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3928
                           " (is_primary=False, pass=1): %s",
3929
                           inst_disk.iv_name, node, msg)
3930
        if not ignore_secondaries:
3931
          disks_ok = False
3932

    
3933
  # FIXME: race condition on drbd migration to primary
3934

    
3935
  # 2nd pass, do only the primary node
3936
  for inst_disk in disks:
3937
    dev_path = None
3938

    
3939
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3940
      if node != instance.primary_node:
3941
        continue
3942
      if ignore_size:
3943
        node_disk = node_disk.Copy()
3944
        node_disk.UnsetSize()
3945
      lu.cfg.SetDiskID(node_disk, node)
3946
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3947
      msg = result.fail_msg
3948
      if msg:
3949
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3950
                           " (is_primary=True, pass=2): %s",
3951
                           inst_disk.iv_name, node, msg)
3952
        disks_ok = False
3953
      else:
3954
        dev_path = result.payload
3955

    
3956
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3957

    
3958
  # leave the disks configured for the primary node
3959
  # this is a workaround that would be fixed better by
3960
  # improving the logical/physical id handling
3961
  for disk in disks:
3962
    lu.cfg.SetDiskID(disk, instance.primary_node)
3963

    
3964
  return disks_ok, device_info
3965

    
3966

    
3967
def _StartInstanceDisks(lu, instance, force):
3968
  """Start the disks of an instance.
3969

3970
  """
3971
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3972
                                           ignore_secondaries=force)
3973
  if not disks_ok:
3974
    _ShutdownInstanceDisks(lu, instance)
3975
    if force is not None and not force:
3976
      lu.proc.LogWarning("", hint="If the message above refers to a"
3977
                         " secondary node,"
3978
                         " you can retry the operation using '--force'.")
3979
    raise errors.OpExecError("Disk consistency error")
3980

    
3981

    
3982
class LUDeactivateInstanceDisks(NoHooksLU):
3983
  """Shutdown an instance's disks.
3984

3985
  """
3986
  _OP_REQP = ["instance_name"]
3987
  REQ_BGL = False
3988

    
3989
  def ExpandNames(self):
3990
    self._ExpandAndLockInstance()
3991
    self.needed_locks[locking.LEVEL_NODE] = []
3992
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3993

    
3994
  def DeclareLocks(self, level):
3995
    if level == locking.LEVEL_NODE:
3996
      self._LockInstancesNodes()
3997

    
3998
  def CheckPrereq(self):
3999
    """Check prerequisites.
4000

4001
    This checks that the instance is in the cluster.
4002

4003
    """
4004
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4005
    assert self.instance is not None, \
4006
      "Cannot retrieve locked instance %s" % self.op.instance_name
4007

    
4008
  def Exec(self, feedback_fn):
4009
    """Deactivate the disks
4010

4011
    """
4012
    instance = self.instance
4013
    _SafeShutdownInstanceDisks(self, instance)
4014

    
4015

    
4016
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4017
  """Shutdown block devices of an instance.
4018

4019
  This function checks if an instance is running, before calling
4020
  _ShutdownInstanceDisks.
4021

4022
  """
4023
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4024
  _ShutdownInstanceDisks(lu, instance, disks=disks)
4025

    
4026

    
4027
def _ExpandCheckDisks(instance, disks):
4028
  """Return the instance disks selected by the disks list
4029

4030
  @type disks: list of L{objects.Disk} or None
4031
  @param disks: selected disks
4032
  @rtype: list of L{objects.Disk}
4033
  @return: selected instance disks to act on
4034

4035
  """
4036
  if disks is None:
4037
    return instance.disks
4038
  else:
4039
    if not set(disks).issubset(instance.disks):
4040
      raise errors.ProgrammerError("Can only act on disks belonging to the"
4041
                                   " target instance")
4042
    return disks
4043

    
4044

    
4045
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4046
  """Shutdown block devices of an instance.
4047

4048
  This does the shutdown on all nodes of the instance.
4049

4050
  If the ignore_primary is false, errors on the primary node are
4051
  ignored.
4052

4053
  """
4054
  all_result = True
4055
  disks = _ExpandCheckDisks(instance, disks)
4056

    
4057
  for disk in disks:
4058
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4059
      lu.cfg.SetDiskID(top_disk, node)
4060
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4061
      msg = result.fail_msg
4062
      if msg:
4063
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4064
                      disk.iv_name, node, msg)
4065
        if not ignore_primary or node != instance.primary_node:
4066
          all_result = False
4067
  return all_result
4068

    
4069

    
4070
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4071
  """Checks if a node has enough free memory.
4072

4073
  This function check if a given node has the needed amount of free
4074
  memory. In case the node has less memory or we cannot get the
4075
  information from the node, this function raise an OpPrereqError
4076
  exception.
4077

4078
  @type lu: C{LogicalUnit}
4079
  @param lu: a logical unit from which we get configuration data
4080
  @type node: C{str}
4081
  @param node: the node to check
4082
  @type reason: C{str}
4083
  @param reason: string to use in the error message
4084
  @type requested: C{int}
4085
  @param requested: the amount of memory in MiB to check for
4086
  @type hypervisor_name: C{str}
4087
  @param hypervisor_name: the hypervisor to ask for memory stats
4088
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4089
      we cannot check the node
4090

4091
  """
4092
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4093
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4094
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4095
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4096
  if not isinstance(free_mem, int):
4097
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4098
                               " was '%s'" % (node, free_mem),
4099
                               errors.ECODE_ENVIRON)
4100
  if requested > free_mem:
4101
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4102
                               " needed %s MiB, available %s MiB" %
4103
                               (node, reason, requested, free_mem),
4104
                               errors.ECODE_NORES)
4105

    
4106

    
4107
def _CheckNodesFreeDisk(lu, nodenames, requested):
4108
  """Checks if nodes have enough free disk space in the default VG.
4109

4110
  This function check if all given nodes have the needed amount of
4111
  free disk. In case any node has less disk or we cannot get the
4112
  information from the node, this function raise an OpPrereqError
4113
  exception.
4114

4115
  @type lu: C{LogicalUnit}
4116
  @param lu: a logical unit from which we get configuration data
4117
  @type nodenames: C{list}
4118
  @param nodenames: the list of node names to check
4119
  @type requested: C{int}
4120
  @param requested: the amount of disk in MiB to check for
4121
  @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4122
      we cannot check the node
4123

4124
  """
4125
  nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4126
                                   lu.cfg.GetHypervisorType())
4127
  for node in nodenames:
4128
    info = nodeinfo[node]
4129
    info.Raise("Cannot get current information from node %s" % node,
4130
               prereq=True, ecode=errors.ECODE_ENVIRON)
4131
    vg_free = info.payload.get("vg_free", None)
4132
    if not isinstance(vg_free, int):
4133
      raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4134
                                 " result was '%s'" % (node, vg_free),
4135
                                 errors.ECODE_ENVIRON)
4136
    if requested > vg_free:
4137
      raise errors.OpPrereqError("Not enough disk space on target node %s:"
4138
                                 " required %d MiB, available %d MiB" %
4139
                                 (node, requested, vg_free),
4140
                                 errors.ECODE_NORES)
4141

    
4142

    
4143
class LUStartupInstance(LogicalUnit):
4144
  """Starts an instance.
4145

4146
  """
4147
  HPATH = "instance-start"
4148
  HTYPE = constants.HTYPE_INSTANCE
4149
  _OP_REQP = ["instance_name", "force"]
4150
  REQ_BGL = False
4151

    
4152
  def ExpandNames(self):
4153
    self._ExpandAndLockInstance()
4154

    
4155
  def BuildHooksEnv(self):
4156
    """Build hooks env.
4157

4158
    This runs on master, primary and secondary nodes of the instance.
4159

4160
    """
4161
    env = {
4162
      "FORCE": self.op.force,
4163
      }
4164
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4165
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4166
    return env, nl, nl
4167

    
4168
  def CheckPrereq(self):
4169
    """Check prerequisites.
4170

4171
    This checks that the instance is in the cluster.
4172

4173
    """
4174
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4175
    assert self.instance is not None, \
4176
      "Cannot retrieve locked instance %s" % self.op.instance_name
4177

    
4178
    # extra beparams
4179
    self.beparams = getattr(self.op, "beparams", {})
4180
    if self.beparams:
4181
      if not isinstance(self.beparams, dict):
4182
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4183
                                   " dict" % (type(self.beparams), ),
4184
                                   errors.ECODE_INVAL)
4185
      # fill the beparams dict
4186
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
4187
      self.op.beparams = self.beparams
4188

    
4189
    # extra hvparams
4190
    self.hvparams = getattr(self.op, "hvparams", {})
4191
    if self.hvparams:
4192
      if not isinstance(self.hvparams, dict):
4193
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4194
                                   " dict" % (type(self.hvparams), ),
4195
                                   errors.ECODE_INVAL)
4196

    
4197
      # check hypervisor parameter syntax (locally)
4198
      cluster = self.cfg.GetClusterInfo()
4199
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
4200
      filled_hvp = cluster.FillHV(instance)
4201
      filled_hvp.update(self.hvparams)
4202
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4203
      hv_type.CheckParameterSyntax(filled_hvp)
4204
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4205
      self.op.hvparams = self.hvparams
4206

    
4207
    _CheckNodeOnline(self, instance.primary_node)
4208

    
4209
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4210
    # check bridges existence
4211
    _CheckInstanceBridgesExist(self, instance)
4212

    
4213
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4214
                                              instance.name,
4215
                                              instance.hypervisor)
4216
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4217
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4218
    if not remote_info.payload: # not running already
4219
      _CheckNodeFreeMemory(self, instance.primary_node,
4220
                           "starting instance %s" % instance.name,
4221
                           bep[constants.BE_MEMORY], instance.hypervisor)
4222

    
4223
  def Exec(self, feedback_fn):
4224
    """Start the instance.
4225

4226
    """
4227
    instance = self.instance
4228
    force = self.op.force
4229

    
4230
    self.cfg.MarkInstanceUp(instance.name)
4231

    
4232
    node_current = instance.primary_node
4233

    
4234
    _StartInstanceDisks(self, instance, force)
4235

    
4236
    result = self.rpc.call_instance_start(node_current, instance,
4237
                                          self.hvparams, self.beparams)
4238
    msg = result.fail_msg
4239
    if msg:
4240
      _ShutdownInstanceDisks(self, instance)
4241
      raise errors.OpExecError("Could not start instance: %s" % msg)
4242

    
4243

    
4244
class LURebootInstance(LogicalUnit):
4245
  """Reboot an instance.
4246

4247
  """
4248
  HPATH = "instance-reboot"
4249
  HTYPE = constants.HTYPE_INSTANCE
4250
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4251
  REQ_BGL = False
4252

    
4253
  def CheckArguments(self):
4254
    """Check the arguments.
4255

4256
    """
4257
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4258
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4259

    
4260
  def ExpandNames(self):
4261
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
4262
                                   constants.INSTANCE_REBOOT_HARD,
4263
                                   constants.INSTANCE_REBOOT_FULL]:
4264
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
4265
                                  (constants.INSTANCE_REBOOT_SOFT,
4266
                                   constants.INSTANCE_REBOOT_HARD,
4267
                                   constants.INSTANCE_REBOOT_FULL))
4268
    self._ExpandAndLockInstance()
4269

    
4270
  def BuildHooksEnv(self):
4271
    """Build hooks env.
4272

4273
    This runs on master, primary and secondary nodes of the instance.
4274

4275
    """
4276
    env = {
4277
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4278
      "REBOOT_TYPE": self.op.reboot_type,
4279
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4280
      }
4281
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4282
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4283
    return env, nl, nl
4284

    
4285
  def CheckPrereq(self):
4286
    """Check prerequisites.
4287

4288
    This checks that the instance is in the cluster.
4289

4290
    """
4291
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4292
    assert self.instance is not None, \
4293
      "Cannot retrieve locked instance %s" % self.op.instance_name
4294

    
4295
    _CheckNodeOnline(self, instance.primary_node)
4296

    
4297
    # check bridges existence
4298
    _CheckInstanceBridgesExist(self, instance)
4299

    
4300
  def Exec(self, feedback_fn):
4301
    """Reboot the instance.
4302

4303
    """
4304
    instance = self.instance
4305
    ignore_secondaries = self.op.ignore_secondaries
4306
    reboot_type = self.op.reboot_type
4307

    
4308
    node_current = instance.primary_node
4309

    
4310
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4311
                       constants.INSTANCE_REBOOT_HARD]:
4312
      for disk in instance.disks:
4313
        self.cfg.SetDiskID(disk, node_current)
4314
      result = self.rpc.call_instance_reboot(node_current, instance,
4315
                                             reboot_type,
4316
                                             self.shutdown_timeout)
4317
      result.Raise("Could not reboot instance")
4318
    else:
4319
      result = self.rpc.call_instance_shutdown(node_current, instance,
4320
                                               self.shutdown_timeout)
4321
      result.Raise("Could not shutdown instance for full reboot")
4322
      _ShutdownInstanceDisks(self, instance)
4323
      _StartInstanceDisks(self, instance, ignore_secondaries)
4324
      result = self.rpc.call_instance_start(node_current, instance, None, None)
4325
      msg = result.fail_msg
4326
      if msg:
4327
        _ShutdownInstanceDisks(self, instance)
4328
        raise errors.OpExecError("Could not start instance for"
4329
                                 " full reboot: %s" % msg)
4330

    
4331
    self.cfg.MarkInstanceUp(instance.name)
4332

    
4333

    
4334
class LUShutdownInstance(LogicalUnit):
4335
  """Shutdown an instance.
4336

4337
  """
4338
  HPATH = "instance-stop"
4339
  HTYPE = constants.HTYPE_INSTANCE
4340
  _OP_REQP = ["instance_name"]
4341
  REQ_BGL = False
4342

    
4343
  def CheckArguments(self):
4344
    """Check the arguments.
4345

4346
    """
4347
    self.timeout = getattr(self.op, "timeout",
4348
                           constants.DEFAULT_SHUTDOWN_TIMEOUT)
4349

    
4350
  def ExpandNames(self):
4351
    self._ExpandAndLockInstance()
4352

    
4353
  def BuildHooksEnv(self):
4354
    """Build hooks env.
4355

4356
    This runs on master, primary and secondary nodes of the instance.
4357

4358
    """
4359
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4360
    env["TIMEOUT"] = self.timeout
4361
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4362
    return env, nl, nl
4363

    
4364
  def CheckPrereq(self):
4365
    """Check prerequisites.
4366

4367
    This checks that the instance is in the cluster.
4368

4369
    """
4370
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4371
    assert self.instance is not None, \
4372
      "Cannot retrieve locked instance %s" % self.op.instance_name
4373
    _CheckNodeOnline(self, self.instance.primary_node)
4374

    
4375
  def Exec(self, feedback_fn):
4376
    """Shutdown the instance.
4377

4378
    """
4379
    instance = self.instance
4380
    node_current = instance.primary_node
4381
    timeout = self.timeout
4382
    self.cfg.MarkInstanceDown(instance.name)
4383
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4384
    msg = result.fail_msg
4385
    if msg:
4386
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4387

    
4388
    _ShutdownInstanceDisks(self, instance)
4389

    
4390

    
4391
class LUReinstallInstance(LogicalUnit):
4392
  """Reinstall an instance.
4393

4394
  """
4395
  HPATH = "instance-reinstall"
4396
  HTYPE = constants.HTYPE_INSTANCE
4397
  _OP_REQP = ["instance_name"]
4398
  REQ_BGL = False
4399

    
4400
  def ExpandNames(self):
4401
    self._ExpandAndLockInstance()
4402

    
4403
  def BuildHooksEnv(self):
4404
    """Build hooks env.
4405

4406
    This runs on master, primary and secondary nodes of the instance.
4407

4408
    """
4409
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4410
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4411
    return env, nl, nl
4412

    
4413
  def CheckPrereq(self):
4414
    """Check prerequisites.
4415

4416
    This checks that the instance is in the cluster and is not running.
4417

4418
    """
4419
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4420
    assert instance is not None, \
4421
      "Cannot retrieve locked instance %s" % self.op.instance_name
4422
    _CheckNodeOnline(self, instance.primary_node)
4423

    
4424
    if instance.disk_template == constants.DT_DISKLESS:
4425
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4426
                                 self.op.instance_name,
4427
                                 errors.ECODE_INVAL)
4428
    _CheckInstanceDown(self, instance, "cannot reinstall")
4429

    
4430
    self.op.os_type = getattr(self.op, "os_type", None)
4431
    self.op.force_variant = getattr(self.op, "force_variant", False)
4432
    if self.op.os_type is not None:
4433
      # OS verification
4434
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4435
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4436

    
4437
    self.instance = instance
4438

    
4439
  def Exec(self, feedback_fn):
4440
    """Reinstall the instance.
4441

4442
    """
4443
    inst = self.instance
4444

    
4445
    if self.op.os_type is not None:
4446
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4447
      inst.os = self.op.os_type
4448
      self.cfg.Update(inst, feedback_fn)
4449

    
4450
    _StartInstanceDisks(self, inst, None)
4451
    try:
4452
      feedback_fn("Running the instance OS create scripts...")
4453
      # FIXME: pass debug option from opcode to backend
4454
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4455
                                             self.op.debug_level)
4456
      result.Raise("Could not install OS for instance %s on node %s" %
4457
                   (inst.name, inst.primary_node))
4458
    finally:
4459
      _ShutdownInstanceDisks(self, inst)
4460

    
4461

    
4462
class LURecreateInstanceDisks(LogicalUnit):
4463
  """Recreate an instance's missing disks.
4464

4465
  """
4466
  HPATH = "instance-recreate-disks"
4467
  HTYPE = constants.HTYPE_INSTANCE
4468
  _OP_REQP = ["instance_name", "disks"]
4469
  REQ_BGL = False
4470

    
4471
  def CheckArguments(self):
4472
    """Check the arguments.
4473

4474
    """
4475
    if not isinstance(self.op.disks, list):
4476
      raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4477
    for item in self.op.disks:
4478
      if (not isinstance(item, int) or
4479
          item < 0):
4480
        raise errors.OpPrereqError("Invalid disk specification '%s'" %
4481
                                   str(item), errors.ECODE_INVAL)
4482

    
4483
  def ExpandNames(self):
4484
    self._ExpandAndLockInstance()
4485

    
4486
  def BuildHooksEnv(self):
4487
    """Build hooks env.
4488

4489
    This runs on master, primary and secondary nodes of the instance.
4490

4491
    """
4492
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4493
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4494
    return env, nl, nl
4495

    
4496
  def CheckPrereq(self):
4497
    """Check prerequisites.
4498

4499
    This checks that the instance is in the cluster and is not running.
4500

4501
    """
4502
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4503
    assert instance is not None, \
4504
      "Cannot retrieve locked instance %s" % self.op.instance_name
4505
    _CheckNodeOnline(self, instance.primary_node)
4506

    
4507
    if instance.disk_template == constants.DT_DISKLESS:
4508
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4509
                                 self.op.instance_name, errors.ECODE_INVAL)
4510
    _CheckInstanceDown(self, instance, "cannot recreate disks")
4511

    
4512
    if not self.op.disks:
4513
      self.op.disks = range(len(instance.disks))
4514
    else:
4515
      for idx in self.op.disks:
4516
        if idx >= len(instance.disks):
4517
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4518
                                     errors.ECODE_INVAL)
4519

    
4520
    self.instance = instance
4521

    
4522
  def Exec(self, feedback_fn):
4523
    """Recreate the disks.
4524

4525
    """
4526
    to_skip = []
4527
    for idx, _ in enumerate(self.instance.disks):
4528
      if idx not in self.op.disks: # disk idx has not been passed in
4529
        to_skip.append(idx)
4530
        continue
4531

    
4532
    _CreateDisks(self, self.instance, to_skip=to_skip)
4533

    
4534

    
4535
class LURenameInstance(LogicalUnit):
4536
  """Rename an instance.
4537

4538
  """
4539
  HPATH = "instance-rename"
4540
  HTYPE = constants.HTYPE_INSTANCE
4541
  _OP_REQP = ["instance_name", "new_name"]
4542

    
4543
  def BuildHooksEnv(self):
4544
    """Build hooks env.
4545

4546
    This runs on master, primary and secondary nodes of the instance.
4547

4548
    """
4549
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4550
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4551
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4552
    return env, nl, nl
4553

    
4554
  def CheckPrereq(self):
4555
    """Check prerequisites.
4556

4557
    This checks that the instance is in the cluster and is not running.
4558

4559
    """
4560
    self.op.instance_name = _ExpandInstanceName(self.cfg,
4561
                                                self.op.instance_name)
4562
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4563
    assert instance is not None
4564
    _CheckNodeOnline(self, instance.primary_node)
4565
    _CheckInstanceDown(self, instance, "cannot rename")
4566
    self.instance = instance
4567

    
4568
    # new name verification
4569
    name_info = utils.GetHostInfo(self.op.new_name)
4570

    
4571
    self.op.new_name = new_name = name_info.name
4572
    instance_list = self.cfg.GetInstanceList()
4573
    if new_name in instance_list:
4574
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4575
                                 new_name, errors.ECODE_EXISTS)
4576

    
4577
    if not getattr(self.op, "ignore_ip", False):
4578
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4579
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4580
                                   (name_info.ip, new_name),
4581
                                   errors.ECODE_NOTUNIQUE)
4582

    
4583

    
4584
  def Exec(self, feedback_fn):
4585
    """Reinstall the instance.
4586

4587
    """
4588
    inst = self.instance
4589
    old_name = inst.name
4590

    
4591
    if inst.disk_template == constants.DT_FILE:
4592
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4593

    
4594
    self.cfg.RenameInstance(inst.name, self.op.new_name)
4595
    # Change the instance lock. This is definitely safe while we hold the BGL
4596
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4597
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4598

    
4599
    # re-read the instance from the configuration after rename
4600
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4601

    
4602
    if inst.disk_template == constants.DT_FILE:
4603
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4604
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4605
                                                     old_file_storage_dir,
4606
                                                     new_file_storage_dir)
4607
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4608
                   " (but the instance has been renamed in Ganeti)" %
4609
                   (inst.primary_node, old_file_storage_dir,
4610
                    new_file_storage_dir))
4611

    
4612
    _StartInstanceDisks(self, inst, None)
4613
    try:
4614
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4615
                                                 old_name, self.op.debug_level)
4616
      msg = result.fail_msg
4617
      if msg:
4618
        msg = ("Could not run OS rename script for instance %s on node %s"
4619
               " (but the instance has been renamed in Ganeti): %s" %
4620
               (inst.name, inst.primary_node, msg))
4621
        self.proc.LogWarning(msg)
4622
    finally:
4623
      _ShutdownInstanceDisks(self, inst)
4624

    
4625

    
4626
class LURemoveInstance(LogicalUnit):
4627
  """Remove an instance.
4628

4629
  """
4630
  HPATH = "instance-remove"
4631
  HTYPE = constants.HTYPE_INSTANCE
4632
  _OP_REQP = ["instance_name", "ignore_failures"]
4633
  REQ_BGL = False
4634

    
4635
  def CheckArguments(self):
4636
    """Check the arguments.
4637

4638
    """
4639
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4640
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4641

    
4642
  def ExpandNames(self):
4643
    self._ExpandAndLockInstance()
4644
    self.needed_locks[locking.LEVEL_NODE] = []
4645
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4646

    
4647
  def DeclareLocks(self, level):
4648
    if level == locking.LEVEL_NODE:
4649
      self._LockInstancesNodes()
4650

    
4651
  def BuildHooksEnv(self):
4652
    """Build hooks env.
4653

4654
    This runs on master, primary and secondary nodes of the instance.
4655

4656
    """
4657
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4658
    env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4659
    nl = [self.cfg.GetMasterNode()]
4660
    nl_post = list(self.instance.all_nodes) + nl
4661
    return env, nl, nl_post
4662

    
4663
  def CheckPrereq(self):
4664
    """Check prerequisites.
4665

4666
    This checks that the instance is in the cluster.
4667

4668
    """
4669
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4670
    assert self.instance is not None, \
4671
      "Cannot retrieve locked instance %s" % self.op.instance_name
4672

    
4673
  def Exec(self, feedback_fn):
4674
    """Remove the instance.
4675

4676
    """
4677
    instance = self.instance
4678
    logging.info("Shutting down instance %s on node %s",
4679
                 instance.name, instance.primary_node)
4680

    
4681
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4682
                                             self.shutdown_timeout)
4683
    msg = result.fail_msg
4684
    if msg:
4685
      if self.op.ignore_failures:
4686
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
4687
      else:
4688
        raise errors.OpExecError("Could not shutdown instance %s on"
4689
                                 " node %s: %s" %
4690
                                 (instance.name, instance.primary_node, msg))
4691

    
4692
    logging.info("Removing block devices for instance %s", instance.name)
4693

    
4694
    if not _RemoveDisks(self, instance):
4695
      if self.op.ignore_failures:
4696
        feedback_fn("Warning: can't remove instance's disks")
4697
      else:
4698
        raise errors.OpExecError("Can't remove instance's disks")
4699

    
4700
    logging.info("Removing instance %s out of cluster config", instance.name)
4701

    
4702
    self.cfg.RemoveInstance(instance.name)
4703
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4704

    
4705

    
4706
class LUQueryInstances(NoHooksLU):
4707
  """Logical unit for querying instances.
4708

4709
  """
4710
  # pylint: disable-msg=W0142
4711
  _OP_REQP = ["output_fields", "names", "use_locking"]
4712
  REQ_BGL = False
4713
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4714
                    "serial_no", "ctime", "mtime", "uuid"]
4715
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4716
                                    "admin_state",
4717
                                    "disk_template", "ip", "mac", "bridge",
4718
                                    "nic_mode", "nic_link",
4719
                                    "sda_size", "sdb_size", "vcpus", "tags",
4720
                                    "network_port", "beparams",
4721
                                    r"(disk)\.(size)/([0-9]+)",
4722
                                    r"(disk)\.(sizes)", "disk_usage",
4723
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4724
                                    r"(nic)\.(bridge)/([0-9]+)",
4725
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
4726
                                    r"(disk|nic)\.(count)",
4727
                                    "hvparams",
4728
                                    ] + _SIMPLE_FIELDS +
4729
                                  ["hv/%s" % name
4730
                                   for name in constants.HVS_PARAMETERS
4731
                                   if name not in constants.HVC_GLOBALS] +
4732
                                  ["be/%s" % name
4733
                                   for name in constants.BES_PARAMETERS])
4734
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4735

    
4736

    
4737
  def ExpandNames(self):
4738
    _CheckOutputFields(static=self._FIELDS_STATIC,
4739
                       dynamic=self._FIELDS_DYNAMIC,
4740
                       selected=self.op.output_fields)
4741

    
4742
    self.needed_locks = {}
4743
    self.share_locks[locking.LEVEL_INSTANCE] = 1
4744
    self.share_locks[locking.LEVEL_NODE] = 1
4745

    
4746
    if self.op.names:
4747
      self.wanted = _GetWantedInstances(self, self.op.names)
4748
    else:
4749
      self.wanted = locking.ALL_SET
4750

    
4751
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4752
    self.do_locking = self.do_node_query and self.op.use_locking
4753
    if self.do_locking:
4754
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4755
      self.needed_locks[locking.LEVEL_NODE] = []
4756
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4757

    
4758
  def DeclareLocks(self, level):
4759
    if level == locking.LEVEL_NODE and self.do_locking:
4760
      self._LockInstancesNodes()
4761

    
4762
  def CheckPrereq(self):
4763
    """Check prerequisites.
4764

4765
    """
4766
    pass
4767

    
4768
  def Exec(self, feedback_fn):
4769
    """Computes the list of nodes and their attributes.
4770

4771
    """
4772
    # pylint: disable-msg=R0912
4773
    # way too many branches here
4774
    all_info = self.cfg.GetAllInstancesInfo()
4775
    if self.wanted == locking.ALL_SET:
4776
      # caller didn't specify instance names, so ordering is not important
4777
      if self.do_locking:
4778
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4779
      else:
4780
        instance_names = all_info.keys()
4781
      instance_names = utils.NiceSort(instance_names)
4782
    else:
4783
      # caller did specify names, so we must keep the ordering
4784
      if self.do_locking:
4785
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4786
      else:
4787
        tgt_set = all_info.keys()
4788
      missing = set(self.wanted).difference(tgt_set)
4789
      if missing:
4790
        raise errors.OpExecError("Some instances were removed before"
4791
                                 " retrieving their data: %s" % missing)
4792
      instance_names = self.wanted
4793

    
4794
    instance_list = [all_info[iname] for iname in instance_names]
4795

    
4796
    # begin data gathering
4797

    
4798
    nodes = frozenset([inst.primary_node for inst in instance_list])
4799
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4800

    
4801
    bad_nodes = []
4802
    off_nodes = []
4803
    if self.do_node_query:
4804
      live_data = {}
4805
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4806
      for name in nodes:
4807
        result = node_data[name]
4808
        if result.offline:
4809
          # offline nodes will be in both lists
4810
          off_nodes.append(name)
4811
        if result.fail_msg:
4812
          bad_nodes.append(name)
4813
        else:
4814
          if result.payload:
4815
            live_data.update(result.payload)
4816
          # else no instance is alive
4817
    else:
4818
      live_data = dict([(name, {}) for name in instance_names])
4819

    
4820
    # end data gathering
4821

    
4822
    HVPREFIX = "hv/"
4823
    BEPREFIX = "be/"
4824
    output = []
4825
    cluster = self.cfg.GetClusterInfo()
4826
    for instance in instance_list:
4827
      iout = []
4828
      i_hv = cluster.FillHV(instance, skip_globals=True)
4829
      i_be = cluster.FillBE(instance)
4830
      i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4831
                                 nic.nicparams) for nic in instance.nics]
4832
      for field in self.op.output_fields:
4833
        st_match = self._FIELDS_STATIC.Matches(field)
4834
        if field in self._SIMPLE_FIELDS:
4835
          val = getattr(instance, field)
4836
        elif field == "pnode":
4837
          val = instance.primary_node
4838
        elif field == "snodes":
4839
          val = list(instance.secondary_nodes)
4840
        elif field == "admin_state":
4841
          val = instance.admin_up
4842
        elif field == "oper_state":
4843
          if instance.primary_node in bad_nodes:
4844
            val = None
4845
          else:
4846
            val = bool(live_data.get(instance.name))
4847
        elif field == "status":
4848
          if instance.primary_node in off_nodes:
4849
            val = "ERROR_nodeoffline"
4850
          elif instance.primary_node in bad_nodes:
4851
            val = "ERROR_nodedown"
4852
          else:
4853
            running = bool(live_data.get(instance.name))
4854
            if running:
4855
              if instance.admin_up:
4856
                val = "running"
4857
              else:
4858
                val = "ERROR_up"
4859
            else:
4860
              if instance.admin_up:
4861
                val = "ERROR_down"
4862
              else:
4863
                val = "ADMIN_down"
4864
        elif field == "oper_ram":
4865
          if instance.primary_node in bad_nodes:
4866
            val = None
4867
          elif instance.name in live_data:
4868
            val = live_data[instance.name].get("memory", "?")
4869
          else:
4870
            val = "-"
4871
        elif field == "vcpus":
4872
          val = i_be[constants.BE_VCPUS]
4873
        elif field == "disk_template":
4874
          val = instance.disk_template
4875
        elif field == "ip":
4876
          if instance.nics:
4877
            val = instance.nics[0].ip
4878
          else:
4879
            val = None
4880
        elif field == "nic_mode":
4881
          if instance.nics:
4882
            val = i_nicp[0][constants.NIC_MODE]
4883
          else:
4884
            val = None
4885
        elif field == "nic_link":
4886
          if instance.nics:
4887
            val = i_nicp[0][constants.NIC_LINK]
4888
          else:
4889
            val = None
4890
        elif field == "bridge":
4891
          if (instance.nics and
4892
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4893
            val = i_nicp[0][constants.NIC_LINK]
4894
          else:
4895
            val = None
4896
        elif field == "mac":
4897
          if instance.nics:
4898
            val = instance.nics[0].mac
4899
          else:
4900
            val = None
4901
        elif field == "sda_size" or field == "sdb_size":
4902
          idx = ord(field[2]) - ord('a')
4903
          try:
4904
            val = instance.FindDisk(idx).size
4905
          except errors.OpPrereqError:
4906
            val = None
4907
        elif field == "disk_usage": # total disk usage per node
4908
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
4909
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4910
        elif field == "tags":
4911
          val = list(instance.GetTags())
4912
        elif field == "hvparams":
4913
          val = i_hv
4914
        elif (field.startswith(HVPREFIX) and
4915
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4916
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4917
          val = i_hv.get(field[len(HVPREFIX):], None)
4918
        elif field == "beparams":
4919
          val = i_be
4920
        elif (field.startswith(BEPREFIX) and
4921
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4922
          val = i_be.get(field[len(BEPREFIX):], None)
4923
        elif st_match and st_match.groups():
4924
          # matches a variable list
4925
          st_groups = st_match.groups()
4926
          if st_groups and st_groups[0] == "disk":
4927
            if st_groups[1] == "count":
4928
              val = len(instance.disks)
4929
            elif st_groups[1] == "sizes":
4930
              val = [disk.size for disk in instance.disks]
4931
            elif st_groups[1] == "size":
4932
              try:
4933
                val = instance.FindDisk(st_groups[2]).size
4934
              except errors.OpPrereqError:
4935
                val = None
4936
            else:
4937
              assert False, "Unhandled disk parameter"
4938
          elif st_groups[0] == "nic":
4939
            if st_groups[1] == "count":
4940
              val = len(instance.nics)
4941
            elif st_groups[1] == "macs":
4942
              val = [nic.mac for nic in instance.nics]
4943
            elif st_groups[1] == "ips":
4944
              val = [nic.ip for nic in instance.nics]
4945
            elif st_groups[1] == "modes":
4946
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4947
            elif st_groups[1] == "links":
4948
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4949
            elif st_groups[1] == "bridges":
4950
              val = []
4951
              for nicp in i_nicp:
4952
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4953
                  val.append(nicp[constants.NIC_LINK])
4954
                else:
4955
                  val.append(None)
4956
            else:
4957
              # index-based item
4958
              nic_idx = int(st_groups[2])
4959
              if nic_idx >= len(instance.nics):
4960
                val = None
4961
              else:
4962
                if st_groups[1] == "mac":
4963
                  val = instance.nics[nic_idx].mac
4964
                elif st_groups[1] == "ip":
4965
                  val = instance.nics[nic_idx].ip
4966
                elif st_groups[1] == "mode":
4967
                  val = i_nicp[nic_idx][constants.NIC_MODE]
4968
                elif st_groups[1] == "link":
4969
                  val = i_nicp[nic_idx][constants.NIC_LINK]
4970
                elif st_groups[1] == "bridge":
4971
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4972
                  if nic_mode == constants.NIC_MODE_BRIDGED:
4973
                    val = i_nicp[nic_idx][constants.NIC_LINK]
4974
                  else:
4975
                    val = None
4976
                else:
4977
                  assert False, "Unhandled NIC parameter"
4978
          else:
4979
            assert False, ("Declared but unhandled variable parameter '%s'" %
4980
                           field)
4981
        else:
4982
          assert False, "Declared but unhandled parameter '%s'" % field
4983
        iout.append(val)
4984
      output.append(iout)
4985

    
4986
    return output
4987

    
4988

    
4989
class LUFailoverInstance(LogicalUnit):
4990
  """Failover an instance.
4991

4992
  """
4993
  HPATH = "instance-failover"
4994
  HTYPE = constants.HTYPE_INSTANCE
4995
  _OP_REQP = ["instance_name", "ignore_consistency"]
4996
  REQ_BGL = False
4997

    
4998
  def CheckArguments(self):
4999
    """Check the arguments.
5000

5001
    """
5002
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5003
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
5004

    
5005
  def ExpandNames(self):
5006
    self._ExpandAndLockInstance()
5007
    self.needed_locks[locking.LEVEL_NODE] = []
5008
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5009

    
5010
  def DeclareLocks(self, level):
5011
    if level == locking.LEVEL_NODE:
5012
      self._LockInstancesNodes()
5013

    
5014
  def BuildHooksEnv(self):
5015
    """Build hooks env.
5016

5017
    This runs on master, primary and secondary nodes of the instance.
5018

5019
    """
5020
    instance = self.instance
5021
    source_node = instance.primary_node
5022
    target_node = instance.secondary_nodes[0]
5023
    env = {
5024
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5025
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5026
      "OLD_PRIMARY": source_node,
5027
      "OLD_SECONDARY": target_node,
5028
      "NEW_PRIMARY": target_node,
5029
      "NEW_SECONDARY": source_node,
5030
      }
5031
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5032
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5033
    nl_post = list(nl)
5034
    nl_post.append(source_node)
5035
    return env, nl, nl_post
5036

    
5037
  def CheckPrereq(self):
5038
    """Check prerequisites.
5039

5040
    This checks that the instance is in the cluster.
5041

5042
    """
5043
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5044
    assert self.instance is not None, \
5045
      "Cannot retrieve locked instance %s" % self.op.instance_name
5046

    
5047
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5048
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5049
      raise errors.OpPrereqError("Instance's disk layout is not"
5050
                                 " network mirrored, cannot failover.",
5051
                                 errors.ECODE_STATE)
5052

    
5053
    secondary_nodes = instance.secondary_nodes
5054
    if not secondary_nodes:
5055
      raise errors.ProgrammerError("no secondary node but using "
5056
                                   "a mirrored disk template")
5057

    
5058
    target_node = secondary_nodes[0]
5059
    _CheckNodeOnline(self, target_node)
5060
    _CheckNodeNotDrained(self, target_node)
5061
    if instance.admin_up:
5062
      # check memory requirements on the secondary node
5063
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5064
                           instance.name, bep[constants.BE_MEMORY],
5065
                           instance.hypervisor)
5066
    else:
5067
      self.LogInfo("Not checking memory on the secondary node as"
5068
                   " instance will not be started")
5069

    
5070
    # check bridge existance
5071
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5072

    
5073
  def Exec(self, feedback_fn):
5074
    """Failover an instance.
5075

5076
    The failover is done by shutting it down on its present node and
5077
    starting it on the secondary.
5078

5079
    """
5080
    instance = self.instance
5081

    
5082
    source_node = instance.primary_node
5083
    target_node = instance.secondary_nodes[0]
5084

    
5085
    if instance.admin_up:
5086
      feedback_fn("* checking disk consistency between source and target")
5087
      for dev in instance.disks:
5088
        # for drbd, these are drbd over lvm
5089
        if not _CheckDiskConsistency(self, dev, target_node, False):
5090
          if not self.op.ignore_consistency:
5091
            raise errors.OpExecError("Disk %s is degraded on target node,"
5092
                                     " aborting failover." % dev.iv_name)
5093
    else:
5094
      feedback_fn("* not checking disk consistency as instance is not running")
5095

    
5096
    feedback_fn("* shutting down instance on source node")
5097
    logging.info("Shutting down instance %s on node %s",
5098
                 instance.name, source_node)
5099

    
5100
    result = self.rpc.call_instance_shutdown(source_node, instance,
5101
                                             self.shutdown_timeout)
5102
    msg = result.fail_msg
5103
    if msg:
5104
      if self.op.ignore_consistency:
5105
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5106
                             " Proceeding anyway. Please make sure node"
5107
                             " %s is down. Error details: %s",
5108
                             instance.name, source_node, source_node, msg)
5109
      else:
5110
        raise errors.OpExecError("Could not shutdown instance %s on"
5111
                                 " node %s: %s" %
5112
                                 (instance.name, source_node, msg))
5113

    
5114
    feedback_fn("* deactivating the instance's disks on source node")
5115
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5116
      raise errors.OpExecError("Can't shut down the instance's disks.")
5117

    
5118
    instance.primary_node = target_node
5119
    # distribute new instance config to the other nodes
5120
    self.cfg.Update(instance, feedback_fn)
5121

    
5122
    # Only start the instance if it's marked as up
5123
    if instance.admin_up:
5124
      feedback_fn("* activating the instance's disks on target node")
5125
      logging.info("Starting instance %s on node %s",
5126
                   instance.name, target_node)
5127

    
5128
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5129
                                               ignore_secondaries=True)
5130
      if not disks_ok:
5131
        _ShutdownInstanceDisks(self, instance)
5132
        raise errors.OpExecError("Can't activate the instance's disks")
5133

    
5134
      feedback_fn("* starting the instance on the target node")
5135
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5136
      msg = result.fail_msg
5137
      if msg:
5138
        _ShutdownInstanceDisks(self, instance)
5139
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5140
                                 (instance.name, target_node, msg))
5141

    
5142

    
5143
class LUMigrateInstance(LogicalUnit):
5144
  """Migrate an instance.
5145

5146
  This is migration without shutting down, compared to the failover,
5147
  which is done with shutdown.
5148

5149
  """
5150
  HPATH = "instance-migrate"
5151
  HTYPE = constants.HTYPE_INSTANCE
5152
  _OP_REQP = ["instance_name", "live", "cleanup"]
5153

    
5154
  REQ_BGL = False
5155

    
5156
  def ExpandNames(self):
5157
    self._ExpandAndLockInstance()
5158

    
5159
    self.needed_locks[locking.LEVEL_NODE] = []
5160
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5161

    
5162
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5163
                                       self.op.live, self.op.cleanup)
5164
    self.tasklets = [self._migrater]
5165

    
5166
  def DeclareLocks(self, level):
5167
    if level == locking.LEVEL_NODE:
5168
      self._LockInstancesNodes()
5169

    
5170
  def BuildHooksEnv(self):
5171
    """Build hooks env.
5172

5173
    This runs on master, primary and secondary nodes of the instance.
5174

5175
    """
5176
    instance = self._migrater.instance
5177
    source_node = instance.primary_node
5178
    target_node = instance.secondary_nodes[0]
5179
    env = _BuildInstanceHookEnvByObject(self, instance)
5180
    env["MIGRATE_LIVE"] = self.op.live
5181
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5182
    env.update({
5183
        "OLD_PRIMARY": source_node,
5184
        "OLD_SECONDARY": target_node,
5185
        "NEW_PRIMARY": target_node,
5186
        "NEW_SECONDARY": source_node,
5187
        })
5188
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5189
    nl_post = list(nl)
5190
    nl_post.append(source_node)
5191
    return env, nl, nl_post
5192

    
5193

    
5194
class LUMoveInstance(LogicalUnit):
5195
  """Move an instance by data-copying.
5196

5197
  """
5198
  HPATH = "instance-move"
5199
  HTYPE = constants.HTYPE_INSTANCE
5200
  _OP_REQP = ["instance_name", "target_node"]
5201
  REQ_BGL = False
5202

    
5203
  def CheckArguments(self):
5204
    """Check the arguments.
5205

5206
    """
5207
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5208
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
5209

    
5210
  def ExpandNames(self):
5211
    self._ExpandAndLockInstance()
5212
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5213
    self.op.target_node = target_node
5214
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5215
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5216

    
5217
  def DeclareLocks(self, level):
5218
    if level == locking.LEVEL_NODE:
5219
      self._LockInstancesNodes(primary_only=True)
5220

    
5221
  def BuildHooksEnv(self):
5222
    """Build hooks env.
5223

5224
    This runs on master, primary and secondary nodes of the instance.
5225

5226
    """
5227
    env = {
5228
      "TARGET_NODE": self.op.target_node,
5229
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5230
      }
5231
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5232
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5233
                                       self.op.target_node]
5234
    return env, nl, nl
5235

    
5236
  def CheckPrereq(self):
5237
    """Check prerequisites.
5238

5239
    This checks that the instance is in the cluster.
5240

5241
    """
5242
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5243
    assert self.instance is not None, \
5244
      "Cannot retrieve locked instance %s" % self.op.instance_name
5245

    
5246
    node = self.cfg.GetNodeInfo(self.op.target_node)
5247
    assert node is not None, \
5248
      "Cannot retrieve locked node %s" % self.op.target_node
5249

    
5250
    self.target_node = target_node = node.name
5251

    
5252
    if target_node == instance.primary_node:
5253
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
5254
                                 (instance.name, target_node),
5255
                                 errors.ECODE_STATE)
5256

    
5257
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5258

    
5259
    for idx, dsk in enumerate(instance.disks):
5260
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5261
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5262
                                   " cannot copy" % idx, errors.ECODE_STATE)
5263

    
5264
    _CheckNodeOnline(self, target_node)
5265
    _CheckNodeNotDrained(self, target_node)
5266

    
5267
    if instance.admin_up:
5268
      # check memory requirements on the secondary node
5269
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5270
                           instance.name, bep[constants.BE_MEMORY],
5271
                           instance.hypervisor)
5272
    else:
5273
      self.LogInfo("Not checking memory on the secondary node as"
5274
                   " instance will not be started")
5275

    
5276
    # check bridge existance
5277
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5278

    
5279
  def Exec(self, feedback_fn):
5280
    """Move an instance.
5281

5282
    The move is done by shutting it down on its present node, copying
5283
    the data over (slow) and starting it on the new node.
5284

5285
    """
5286
    instance = self.instance
5287

    
5288
    source_node = instance.primary_node
5289
    target_node = self.target_node
5290

    
5291
    self.LogInfo("Shutting down instance %s on source node %s",
5292
                 instance.name, source_node)
5293

    
5294
    result = self.rpc.call_instance_shutdown(source_node, instance,
5295
                                             self.shutdown_timeout)
5296
    msg = result.fail_msg
5297
    if msg:
5298
      if self.op.ignore_consistency:
5299
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5300
                             " Proceeding anyway. Please make sure node"
5301
                             " %s is down. Error details: %s",
5302
                             instance.name, source_node, source_node, msg)
5303
      else:
5304
        raise errors.OpExecError("Could not shutdown instance %s on"
5305
                                 " node %s: %s" %
5306
                                 (instance.name, source_node, msg))
5307

    
5308
    # create the target disks
5309
    try:
5310
      _CreateDisks(self, instance, target_node=target_node)
5311
    except errors.OpExecError:
5312
      self.LogWarning("Device creation failed, reverting...")
5313
      try:
5314
        _RemoveDisks(self, instance, target_node=target_node)
5315
      finally:
5316
        self.cfg.ReleaseDRBDMinors(instance.name)
5317
        raise
5318

    
5319
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5320

    
5321
    errs = []
5322
    # activate, get path, copy the data over
5323
    for idx, disk in enumerate(instance.disks):
5324
      self.LogInfo("Copying data for disk %d", idx)
5325
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5326
                                               instance.name, True)
5327
      if result.fail_msg:
5328
        self.LogWarning("Can't assemble newly created disk %d: %s",
5329
                        idx, result.fail_msg)
5330
        errs.append(result.fail_msg)
5331
        break
5332
      dev_path = result.payload
5333
      result = self.rpc.call_blockdev_export(source_node, disk,
5334
                                             target_node, dev_path,
5335
                                             cluster_name)
5336
      if result.fail_msg:
5337
        self.LogWarning("Can't copy data over for disk %d: %s",
5338
                        idx, result.fail_msg)
5339
        errs.append(result.fail_msg)
5340
        break
5341

    
5342
    if errs:
5343
      self.LogWarning("Some disks failed to copy, aborting")
5344
      try:
5345
        _RemoveDisks(self, instance, target_node=target_node)
5346
      finally:
5347
        self.cfg.ReleaseDRBDMinors(instance.name)
5348
        raise errors.OpExecError("Errors during disk copy: %s" %
5349
                                 (",".join(errs),))
5350

    
5351
    instance.primary_node = target_node
5352
    self.cfg.Update(instance, feedback_fn)
5353

    
5354
    self.LogInfo("Removing the disks on the original node")
5355
    _RemoveDisks(self, instance, target_node=source_node)
5356

    
5357
    # Only start the instance if it's marked as up
5358
    if instance.admin_up:
5359
      self.LogInfo("Starting instance %s on node %s",
5360
                   instance.name, target_node)
5361

    
5362
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5363
                                           ignore_secondaries=True)
5364
      if not disks_ok:
5365
        _ShutdownInstanceDisks(self, instance)
5366
        raise errors.OpExecError("Can't activate the instance's disks")
5367

    
5368
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5369
      msg = result.fail_msg
5370
      if msg:
5371
        _ShutdownInstanceDisks(self, instance)
5372
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5373
                                 (instance.name, target_node, msg))
5374

    
5375

    
5376
class LUMigrateNode(LogicalUnit):
5377
  """Migrate all instances from a node.
5378

5379
  """
5380
  HPATH = "node-migrate"
5381
  HTYPE = constants.HTYPE_NODE
5382
  _OP_REQP = ["node_name", "live"]
5383
  REQ_BGL = False
5384

    
5385
  def ExpandNames(self):
5386
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5387

    
5388
    self.needed_locks = {
5389
      locking.LEVEL_NODE: [self.op.node_name],
5390
      }
5391

    
5392
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5393

    
5394
    # Create tasklets for migrating instances for all instances on this node
5395
    names = []
5396
    tasklets = []
5397

    
5398
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5399
      logging.debug("Migrating instance %s", inst.name)
5400
      names.append(inst.name)
5401

    
5402
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5403

    
5404
    self.tasklets = tasklets
5405

    
5406
    # Declare instance locks
5407
    self.needed_locks[locking.LEVEL_INSTANCE] = names
5408

    
5409
  def DeclareLocks(self, level):
5410
    if level == locking.LEVEL_NODE:
5411
      self._LockInstancesNodes()
5412

    
5413
  def BuildHooksEnv(self):
5414
    """Build hooks env.
5415

5416
    This runs on the master, the primary and all the secondaries.
5417

5418
    """
5419
    env = {
5420
      "NODE_NAME": self.op.node_name,
5421
      }
5422

    
5423
    nl = [self.cfg.GetMasterNode()]
5424

    
5425
    return (env, nl, nl)
5426

    
5427

    
5428
class TLMigrateInstance(Tasklet):
5429
  def __init__(self, lu, instance_name, live, cleanup):
5430
    """Initializes this class.
5431

5432
    """
5433
    Tasklet.__init__(self, lu)
5434

    
5435
    # Parameters
5436
    self.instance_name = instance_name
5437
    self.live = live
5438
    self.cleanup = cleanup
5439

    
5440
  def CheckPrereq(self):
5441
    """Check prerequisites.
5442

5443
    This checks that the instance is in the cluster.
5444

5445
    """
5446
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5447
    instance = self.cfg.GetInstanceInfo(instance_name)
5448
    assert instance is not None
5449

    
5450
    if instance.disk_template != constants.DT_DRBD8:
5451
      raise errors.OpPrereqError("Instance's disk layout is not"
5452
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5453

    
5454
    secondary_nodes = instance.secondary_nodes
5455
    if not secondary_nodes:
5456
      raise errors.ConfigurationError("No secondary node but using"
5457
                                      " drbd8 disk template")
5458

    
5459
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
5460

    
5461
    target_node = secondary_nodes[0]
5462
    # check memory requirements on the secondary node
5463
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5464
                         instance.name, i_be[constants.BE_MEMORY],
5465
                         instance.hypervisor)
5466

    
5467
    # check bridge existance
5468
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5469

    
5470
    if not self.cleanup:
5471
      _CheckNodeNotDrained(self.lu, target_node)
5472
      result = self.rpc.call_instance_migratable(instance.primary_node,
5473
                                                 instance)
5474
      result.Raise("Can't migrate, please use failover",
5475
                   prereq=True, ecode=errors.ECODE_STATE)
5476

    
5477
    self.instance = instance
5478

    
5479
  def _WaitUntilSync(self):
5480
    """Poll with custom rpc for disk sync.
5481

5482
    This uses our own step-based rpc call.
5483

5484
    """
5485
    self.feedback_fn("* wait until resync is done")
5486
    all_done = False
5487
    while not all_done:
5488
      all_done = True
5489
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5490
                                            self.nodes_ip,
5491
                                            self.instance.disks)
5492
      min_percent = 100
5493
      for node, nres in result.items():
5494
        nres.Raise("Cannot resync disks on node %s" % node)
5495
        node_done, node_percent = nres.payload
5496
        all_done = all_done and node_done
5497
        if node_percent is not None:
5498
          min_percent = min(min_percent, node_percent)
5499
      if not all_done:
5500
        if min_percent < 100:
5501
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
5502
        time.sleep(2)
5503

    
5504
  def _EnsureSecondary(self, node):
5505
    """Demote a node to secondary.
5506

5507
    """
5508
    self.feedback_fn("* switching node %s to secondary mode" % node)
5509

    
5510
    for dev in self.instance.disks:
5511
      self.cfg.SetDiskID(dev, node)
5512

    
5513
    result = self.rpc.call_blockdev_close(node, self.instance.name,
5514
                                          self.instance.disks)
5515
    result.Raise("Cannot change disk to secondary on node %s" % node)
5516

    
5517
  def _GoStandalone(self):
5518
    """Disconnect from the network.
5519

5520
    """
5521
    self.feedback_fn("* changing into standalone mode")
5522
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5523
                                               self.instance.disks)
5524
    for node, nres in result.items():
5525
      nres.Raise("Cannot disconnect disks node %s" % node)
5526

    
5527
  def _GoReconnect(self, multimaster):
5528
    """Reconnect to the network.
5529

5530
    """
5531
    if multimaster:
5532
      msg = "dual-master"
5533
    else:
5534
      msg = "single-master"
5535
    self.feedback_fn("* changing disks into %s mode" % msg)
5536
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5537
                                           self.instance.disks,
5538
                                           self.instance.name, multimaster)
5539
    for node, nres in result.items():
5540
      nres.Raise("Cannot change disks config on node %s" % node)
5541

    
5542
  def _ExecCleanup(self):
5543
    """Try to cleanup after a failed migration.
5544

5545
    The cleanup is done by:
5546
      - check that the instance is running only on one node
5547
        (and update the config if needed)
5548
      - change disks on its secondary node to secondary
5549
      - wait until disks are fully synchronized
5550
      - disconnect from the network
5551
      - change disks into single-master mode
5552
      - wait again until disks are fully synchronized
5553

5554
    """
5555
    instance = self.instance
5556
    target_node = self.target_node
5557
    source_node = self.source_node
5558

    
5559
    # check running on only one node
5560
    self.feedback_fn("* checking where the instance actually runs"
5561
                     " (if this hangs, the hypervisor might be in"
5562
                     " a bad state)")
5563
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5564
    for node, result in ins_l.items():
5565
      result.Raise("Can't contact node %s" % node)
5566

    
5567
    runningon_source = instance.name in ins_l[source_node].payload
5568
    runningon_target = instance.name in ins_l[target_node].payload
5569

    
5570
    if runningon_source and runningon_target:
5571
      raise errors.OpExecError("Instance seems to be running on two nodes,"
5572
                               " or the hypervisor is confused. You will have"
5573
                               " to ensure manually that it runs only on one"
5574
                               " and restart this operation.")
5575

    
5576
    if not (runningon_source or runningon_target):
5577
      raise errors.OpExecError("Instance does not seem to be running at all."
5578
                               " In this case, it's safer to repair by"
5579
                               " running 'gnt-instance stop' to ensure disk"
5580
                               " shutdown, and then restarting it.")
5581

    
5582
    if runningon_target:
5583
      # the migration has actually succeeded, we need to update the config
5584
      self.feedback_fn("* instance running on secondary node (%s),"
5585
                       " updating config" % target_node)
5586
      instance.primary_node = target_node
5587
      self.cfg.Update(instance, self.feedback_fn)
5588
      demoted_node = source_node
5589
    else:
5590
      self.feedback_fn("* instance confirmed to be running on its"
5591
                       " primary node (%s)" % source_node)
5592
      demoted_node = target_node
5593

    
5594
    self._EnsureSecondary(demoted_node)
5595
    try:
5596
      self._WaitUntilSync()
5597
    except errors.OpExecError:
5598
      # we ignore here errors, since if the device is standalone, it
5599
      # won't be able to sync
5600
      pass
5601
    self._GoStandalone()
5602
    self._GoReconnect(False)
5603
    self._WaitUntilSync()
5604

    
5605
    self.feedback_fn("* done")
5606

    
5607
  def _RevertDiskStatus(self):
5608
    """Try to revert the disk status after a failed migration.
5609

5610
    """
5611
    target_node = self.target_node
5612
    try:
5613
      self._EnsureSecondary(target_node)
5614
      self._GoStandalone()
5615
      self._GoReconnect(False)
5616
      self._WaitUntilSync()
5617
    except errors.OpExecError, err:
5618
      self.lu.LogWarning("Migration failed and I can't reconnect the"
5619
                         " drives: error '%s'\n"
5620
                         "Please look and recover the instance status" %
5621
                         str(err))
5622

    
5623
  def _AbortMigration(self):
5624
    """Call the hypervisor code to abort a started migration.
5625

5626
    """
5627
    instance = self.instance
5628
    target_node = self.target_node
5629
    migration_info = self.migration_info
5630

    
5631
    abort_result = self.rpc.call_finalize_migration(target_node,
5632
                                                    instance,
5633
                                                    migration_info,
5634
                                                    False)
5635
    abort_msg = abort_result.fail_msg
5636
    if abort_msg:
5637
      logging.error("Aborting migration failed on target node %s: %s",
5638
                    target_node, abort_msg)
5639
      # Don't raise an exception here, as we stil have to try to revert the
5640
      # disk status, even if this step failed.
5641

    
5642
  def _ExecMigration(self):
5643
    """Migrate an instance.
5644

5645
    The migrate is done by:
5646
      - change the disks into dual-master mode
5647
      - wait until disks are fully synchronized again
5648
      - migrate the instance
5649
      - change disks on the new secondary node (the old primary) to secondary
5650
      - wait until disks are fully synchronized
5651
      - change disks into single-master mode
5652

5653
    """
5654
    instance = self.instance
5655
    target_node = self.target_node
5656
    source_node = self.source_node
5657

    
5658
    self.feedback_fn("* checking disk consistency between source and target")
5659
    for dev in instance.disks:
5660
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
5661
        raise errors.OpExecError("Disk %s is degraded or not fully"
5662
                                 " synchronized on target node,"
5663
                                 " aborting migrate." % dev.iv_name)
5664

    
5665
    # First get the migration information from the remote node
5666
    result = self.rpc.call_migration_info(source_node, instance)
5667
    msg = result.fail_msg
5668
    if msg:
5669
      log_err = ("Failed fetching source migration information from %s: %s" %
5670
                 (source_node, msg))
5671
      logging.error(log_err)
5672
      raise errors.OpExecError(log_err)
5673

    
5674
    self.migration_info = migration_info = result.payload
5675

    
5676
    # Then switch the disks to master/master mode
5677
    self._EnsureSecondary(target_node)
5678
    self._GoStandalone()
5679
    self._GoReconnect(True)
5680
    self._WaitUntilSync()
5681

    
5682
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
5683
    result = self.rpc.call_accept_instance(target_node,
5684
                                           instance,
5685
                                           migration_info,
5686
                                           self.nodes_ip[target_node])
5687

    
5688
    msg = result.fail_msg
5689
    if msg:
5690
      logging.error("Instance pre-migration failed, trying to revert"
5691
                    " disk status: %s", msg)
5692
      self.feedback_fn("Pre-migration failed, aborting")
5693
      self._AbortMigration()
5694
      self._RevertDiskStatus()
5695
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5696
                               (instance.name, msg))
5697

    
5698
    self.feedback_fn("* migrating instance to %s" % target_node)
5699
    time.sleep(10)
5700
    result = self.rpc.call_instance_migrate(source_node, instance,
5701
                                            self.nodes_ip[target_node],
5702
                                            self.live)
5703
    msg = result.fail_msg
5704
    if msg:
5705
      logging.error("Instance migration failed, trying to revert"
5706
                    " disk status: %s", msg)
5707
      self.feedback_fn("Migration failed, aborting")
5708
      self._AbortMigration()
5709
      self._RevertDiskStatus()
5710
      raise errors.OpExecError("Could not migrate instance %s: %s" %
5711
                               (instance.name, msg))
5712
    time.sleep(10)
5713

    
5714
    instance.primary_node = target_node
5715
    # distribute new instance config to the other nodes
5716
    self.cfg.Update(instance, self.feedback_fn)
5717

    
5718
    result = self.rpc.call_finalize_migration(target_node,
5719
                                              instance,
5720
                                              migration_info,
5721
                                              True)
5722
    msg = result.fail_msg
5723
    if msg:
5724
      logging.error("Instance migration succeeded, but finalization failed:"
5725
                    " %s", msg)
5726
      raise errors.OpExecError("Could not finalize instance migration: %s" %
5727
                               msg)
5728

    
5729
    self._EnsureSecondary(source_node)
5730
    self._WaitUntilSync()
5731
    self._GoStandalone()
5732
    self._GoReconnect(False)
5733
    self._WaitUntilSync()
5734

    
5735
    self.feedback_fn("* done")
5736

    
5737
  def Exec(self, feedback_fn):
5738
    """Perform the migration.
5739

5740
    """
5741
    feedback_fn("Migrating instance %s" % self.instance.name)
5742

    
5743
    self.feedback_fn = feedback_fn
5744

    
5745
    self.source_node = self.instance.primary_node
5746
    self.target_node = self.instance.secondary_nodes[0]
5747
    self.all_nodes = [self.source_node, self.target_node]
5748
    self.nodes_ip = {
5749
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5750
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5751
      }
5752

    
5753
    if self.cleanup:
5754
      return self._ExecCleanup()
5755
    else:
5756
      return self._ExecMigration()
5757

    
5758

    
5759
def _CreateBlockDev(lu, node, instance, device, force_create,
5760
                    info, force_open):
5761
  """Create a tree of block devices on a given node.
5762

5763
  If this device type has to be created on secondaries, create it and
5764
  all its children.
5765

5766
  If not, just recurse to children keeping the same 'force' value.
5767

5768
  @param lu: the lu on whose behalf we execute
5769
  @param node: the node on which to create the device
5770
  @type instance: L{objects.Instance}
5771
  @param instance: the instance which owns the device
5772
  @type device: L{objects.Disk}
5773
  @param device: the device to create
5774
  @type force_create: boolean
5775
  @param force_create: whether to force creation of this device; this
5776
      will be change to True whenever we find a device which has
5777
      CreateOnSecondary() attribute
5778
  @param info: the extra 'metadata' we should attach to the device
5779
      (this will be represented as a LVM tag)
5780
  @type force_open: boolean
5781
  @param force_open: this parameter will be passes to the
5782
      L{backend.BlockdevCreate} function where it specifies
5783
      whether we run on primary or not, and it affects both
5784
      the child assembly and the device own Open() execution
5785

5786
  """
5787
  if device.CreateOnSecondary():
5788
    force_create = True
5789

    
5790
  if device.children:
5791
    for child in device.children:
5792
      _CreateBlockDev(lu, node, instance, child, force_create,
5793
                      info, force_open)
5794

    
5795
  if not force_create:
5796
    return
5797

    
5798
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5799

    
5800

    
5801
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5802
  """Create a single block device on a given node.
5803

5804
  This will not recurse over children of the device, so they must be
5805
  created in advance.
5806

5807
  @param lu: the lu on whose behalf we execute
5808
  @param node: the node on which to create the device
5809
  @type instance: L{objects.Instance}
5810
  @param instance: the instance which owns the device
5811
  @type device: L{objects.Disk}
5812
  @param device: the device to create
5813
  @param info: the extra 'metadata' we should attach to the device
5814
      (this will be represented as a LVM tag)
5815
  @type force_open: boolean
5816
  @param force_open: this parameter will be passes to the
5817
      L{backend.BlockdevCreate} function where it specifies
5818
      whether we run on primary or not, and it affects both
5819
      the child assembly and the device own Open() execution
5820

5821
  """
5822
  lu.cfg.SetDiskID(device, node)
5823
  result = lu.rpc.call_blockdev_create(node, device, device.size,
5824
                                       instance.name, force_open, info)
5825
  result.Raise("Can't create block device %s on"
5826
               " node %s for instance %s" % (device, node, instance.name))
5827
  if device.physical_id is None:
5828
    device.physical_id = result.payload
5829

    
5830

    
5831
def _GenerateUniqueNames(lu, exts):
5832
  """Generate a suitable LV name.
5833

5834
  This will generate a logical volume name for the given instance.
5835

5836
  """
5837
  results = []
5838
  for val in exts:
5839
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5840
    results.append("%s%s" % (new_id, val))
5841
  return results
5842

    
5843

    
5844
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5845
                         p_minor, s_minor):
5846
  """Generate a drbd8 device complete with its children.
5847

5848
  """
5849
  port = lu.cfg.AllocatePort()
5850
  vgname = lu.cfg.GetVGName()
5851
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5852
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5853
                          logical_id=(vgname, names[0]))
5854
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5855
                          logical_id=(vgname, names[1]))
5856
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5857
                          logical_id=(primary, secondary, port,
5858
                                      p_minor, s_minor,
5859
                                      shared_secret),
5860
                          children=[dev_data, dev_meta],
5861
                          iv_name=iv_name)
5862
  return drbd_dev
5863

    
5864

    
5865
def _GenerateDiskTemplate(lu, template_name,
5866
                          instance_name, primary_node,
5867
                          secondary_nodes, disk_info,
5868
                          file_storage_dir, file_driver,
5869
                          base_index):
5870
  """Generate the entire disk layout for a given template type.
5871

5872
  """
5873
  #TODO: compute space requirements
5874

    
5875
  vgname = lu.cfg.GetVGName()
5876
  disk_count = len(disk_info)
5877
  disks = []
5878
  if template_name == constants.DT_DISKLESS:
5879
    pass
5880
  elif template_name == constants.DT_PLAIN:
5881
    if len(secondary_nodes) != 0:
5882
      raise errors.ProgrammerError("Wrong template configuration")
5883

    
5884
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5885
                                      for i in range(disk_count)])
5886
    for idx, disk in enumerate(disk_info):
5887
      disk_index = idx + base_index
5888
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5889
                              logical_id=(vgname, names[idx]),
5890
                              iv_name="disk/%d" % disk_index,
5891
                              mode=disk["mode"])
5892
      disks.append(disk_dev)
5893
  elif template_name == constants.DT_DRBD8:
5894
    if len(secondary_nodes) != 1:
5895
      raise errors.ProgrammerError("Wrong template configuration")
5896
    remote_node = secondary_nodes[0]
5897
    minors = lu.cfg.AllocateDRBDMinor(
5898
      [primary_node, remote_node] * len(disk_info), instance_name)
5899

    
5900
    names = []
5901
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5902
                                               for i in range(disk_count)]):
5903
      names.append(lv_prefix + "_data")
5904
      names.append(lv_prefix + "_meta")
5905
    for idx, disk in enumerate(disk_info):
5906
      disk_index = idx + base_index
5907
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5908
                                      disk["size"], names[idx*2:idx*2+2],
5909
                                      "disk/%d" % disk_index,
5910
                                      minors[idx*2], minors[idx*2+1])
5911
      disk_dev.mode = disk["mode"]
5912
      disks.append(disk_dev)
5913
  elif template_name == constants.DT_FILE:
5914
    if len(secondary_nodes) != 0:
5915
      raise errors.ProgrammerError("Wrong template configuration")
5916

    
5917
    _RequireFileStorage()
5918

    
5919
    for idx, disk in enumerate(disk_info):
5920
      disk_index = idx + base_index
5921
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5922
                              iv_name="disk/%d" % disk_index,
5923
                              logical_id=(file_driver,
5924
                                          "%s/disk%d" % (file_storage_dir,
5925
                                                         disk_index)),
5926
                              mode=disk["mode"])
5927
      disks.append(disk_dev)
5928
  else:
5929
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5930
  return disks
5931

    
5932

    
5933
def _GetInstanceInfoText(instance):
5934
  """Compute that text that should be added to the disk's metadata.
5935

5936
  """
5937
  return "originstname+%s" % instance.name
5938

    
5939

    
5940
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5941
  """Create all disks for an instance.
5942

5943
  This abstracts away some work from AddInstance.
5944

5945
  @type lu: L{LogicalUnit}
5946
  @param lu: the logical unit on whose behalf we execute
5947
  @type instance: L{objects.Instance}
5948
  @param instance: the instance whose disks we should create
5949
  @type to_skip: list
5950
  @param to_skip: list of indices to skip
5951
  @type target_node: string
5952
  @param target_node: if passed, overrides the target node for creation
5953
  @rtype: boolean
5954
  @return: the success of the creation
5955

5956
  """
5957
  info = _GetInstanceInfoText(instance)
5958
  if target_node is None:
5959
    pnode = instance.primary_node
5960
    all_nodes = instance.all_nodes
5961
  else:
5962
    pnode = target_node
5963
    all_nodes = [pnode]
5964

    
5965
  if instance.disk_template == constants.DT_FILE:
5966
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5967
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5968

    
5969
    result.Raise("Failed to create directory '%s' on"
5970
                 " node %s" % (file_storage_dir, pnode))
5971

    
5972
  # Note: this needs to be kept in sync with adding of disks in
5973
  # LUSetInstanceParams
5974
  for idx, device in enumerate(instance.disks):
5975
    if to_skip and idx in to_skip:
5976
      continue
5977
    logging.info("Creating volume %s for instance %s",
5978
                 device.iv_name, instance.name)
5979
    #HARDCODE
5980
    for node in all_nodes:
5981
      f_create = node == pnode
5982
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5983

    
5984

    
5985
def _RemoveDisks(lu, instance, target_node=None):
5986
  """Remove all disks for an instance.
5987

5988
  This abstracts away some work from `AddInstance()` and
5989
  `RemoveInstance()`. Note that in case some of the devices couldn't
5990
  be removed, the removal will continue with the other ones (compare
5991
  with `_CreateDisks()`).
5992

5993
  @type lu: L{LogicalUnit}
5994
  @param lu: the logical unit on whose behalf we execute
5995
  @type instance: L{objects.Instance}
5996
  @param instance: the instance whose disks we should remove
5997
  @type target_node: string
5998
  @param target_node: used to override the node on which to remove the disks
5999
  @rtype: boolean
6000
  @return: the success of the removal
6001

6002
  """
6003
  logging.info("Removing block devices for instance %s", instance.name)
6004

    
6005
  all_result = True
6006
  for device in instance.disks:
6007
    if target_node:
6008
      edata = [(target_node, device)]
6009
    else:
6010
      edata = device.ComputeNodeTree(instance.primary_node)
6011
    for node, disk in edata:
6012
      lu.cfg.SetDiskID(disk, node)
6013
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6014
      if msg:
6015
        lu.LogWarning("Could not remove block device %s on node %s,"
6016
                      " continuing anyway: %s", device.iv_name, node, msg)
6017
        all_result = False
6018

    
6019
  if instance.disk_template == constants.DT_FILE:
6020
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6021
    if target_node:
6022
      tgt = target_node
6023
    else:
6024
      tgt = instance.primary_node
6025
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6026
    if result.fail_msg:
6027
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6028
                    file_storage_dir, instance.primary_node, result.fail_msg)
6029
      all_result = False
6030

    
6031
  return all_result
6032

    
6033

    
6034
def _ComputeDiskSize(disk_template, disks):
6035
  """Compute disk size requirements in the volume group
6036

6037
  """
6038
  # Required free disk space as a function of disk and swap space
6039
  req_size_dict = {
6040
    constants.DT_DISKLESS: None,
6041
    constants.DT_PLAIN: sum(d["size"] for d in disks),
6042
    # 128 MB are added for drbd metadata for each disk
6043
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6044
    constants.DT_FILE: None,
6045
  }
6046

    
6047
  if disk_template not in req_size_dict:
6048
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6049
                                 " is unknown" %  disk_template)
6050

    
6051
  return req_size_dict[disk_template]
6052

    
6053

    
6054
def _CheckHVParams(lu, nodenames, hvname, hvparams):
6055
  """Hypervisor parameter validation.
6056

6057
  This function abstract the hypervisor parameter validation to be
6058
  used in both instance create and instance modify.
6059

6060
  @type lu: L{LogicalUnit}
6061
  @param lu: the logical unit for which we check
6062
  @type nodenames: list
6063
  @param nodenames: the list of nodes on which we should check
6064
  @type hvname: string
6065
  @param hvname: the name of the hypervisor we should use
6066
  @type hvparams: dict
6067
  @param hvparams: the parameters which we need to check
6068
  @raise errors.OpPrereqError: if the parameters are not valid
6069

6070
  """
6071
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6072
                                                  hvname,
6073
                                                  hvparams)
6074
  for node in nodenames:
6075
    info = hvinfo[node]
6076
    if info.offline:
6077
      continue
6078
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6079

    
6080

    
6081
class LUCreateInstance(LogicalUnit):
6082
  """Create an instance.
6083

6084
  """
6085
  HPATH = "instance-add"
6086
  HTYPE = constants.HTYPE_INSTANCE
6087
  _OP_REQP = ["instance_name", "disks",
6088
              "mode", "start",
6089
              "wait_for_sync", "ip_check", "nics",
6090
              "hvparams", "beparams"]
6091
  REQ_BGL = False
6092

    
6093
  def CheckArguments(self):
6094
    """Check arguments.
6095

6096
    """
6097
    # set optional parameters to none if they don't exist
6098
    for attr in ["pnode", "snode", "iallocator", "hypervisor",
6099
                 "disk_template", "identify_defaults"]:
6100
      if not hasattr(self.op, attr):
6101
        setattr(self.op, attr, None)
6102

    
6103
    # do not require name_check to ease forward/backward compatibility
6104
    # for tools
6105
    if not hasattr(self.op, "name_check"):
6106
      self.op.name_check = True
6107
    if not hasattr(self.op, "no_install"):
6108
      self.op.no_install = False
6109
    if self.op.no_install and self.op.start:
6110
      self.LogInfo("No-installation mode selected, disabling startup")
6111
      self.op.start = False
6112
    # validate/normalize the instance name
6113
    self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6114
    if self.op.ip_check and not self.op.name_check:
6115
      # TODO: make the ip check more flexible and not depend on the name check
6116
      raise errors.OpPrereqError("Cannot do ip checks without a name check",
6117
                                 errors.ECODE_INVAL)
6118

    
6119
    # check nics' parameter names
6120
    for nic in self.op.nics:
6121
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6122

    
6123
    # check disks. parameter names and consistent adopt/no-adopt strategy
6124
    has_adopt = has_no_adopt = False
6125
    for disk in self.op.disks:
6126
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6127
      if "adopt" in disk:
6128
        has_adopt = True
6129
      else:
6130
        has_no_adopt = True
6131
    if has_adopt and has_no_adopt:
6132
      raise errors.OpPrereqError("Either all disks are adopted or none is",
6133
                                 errors.ECODE_INVAL)
6134
    if has_adopt:
6135
      if self.op.disk_template != constants.DT_PLAIN:
6136
        raise errors.OpPrereqError("Disk adoption is only supported for the"
6137
                                   " 'plain' disk template",
6138
                                   errors.ECODE_INVAL)
6139
      if self.op.iallocator is not None:
6140
        raise errors.OpPrereqError("Disk adoption not allowed with an"
6141
                                   " iallocator script", errors.ECODE_INVAL)
6142
      if self.op.mode == constants.INSTANCE_IMPORT:
6143
        raise errors.OpPrereqError("Disk adoption not allowed for"
6144
                                   " instance import", errors.ECODE_INVAL)
6145

    
6146
    self.adopt_disks = has_adopt
6147

    
6148
    # verify creation mode
6149
    if self.op.mode not in (constants.INSTANCE_CREATE,
6150
                            constants.INSTANCE_IMPORT):
6151
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6152
                                 self.op.mode, errors.ECODE_INVAL)
6153

    
6154
    # instance name verification
6155
    if self.op.name_check:
6156
      self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6157
      self.op.instance_name = self.hostname1.name
6158
      # used in CheckPrereq for ip ping check
6159
      self.check_ip = self.hostname1.ip
6160
    else:
6161
      self.check_ip = None
6162

    
6163
    # file storage checks
6164
    if (self.op.file_driver and
6165
        not self.op.file_driver in constants.FILE_DRIVER):
6166
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6167
                                 self.op.file_driver, errors.ECODE_INVAL)
6168

    
6169
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6170
      raise errors.OpPrereqError("File storage directory path not absolute",
6171
                                 errors.ECODE_INVAL)
6172

    
6173
    ### Node/iallocator related checks
6174
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
6175
      raise errors.OpPrereqError("One and only one of iallocator and primary"
6176
                                 " node must be given",
6177
                                 errors.ECODE_INVAL)
6178

    
6179
    if self.op.mode == constants.INSTANCE_IMPORT:
6180
      # On import force_variant must be True, because if we forced it at
6181
      # initial install, our only chance when importing it back is that it
6182
      # works again!
6183
      self.op.force_variant = True
6184

    
6185
      if self.op.no_install:
6186
        self.LogInfo("No-installation mode has no effect during import")
6187

    
6188
    else: # INSTANCE_CREATE
6189
      if getattr(self.op, "os_type", None) is None:
6190
        raise errors.OpPrereqError("No guest OS specified",
6191
                                   errors.ECODE_INVAL)
6192
      self.op.force_variant = getattr(self.op, "force_variant", False)
6193
      if self.op.disk_template is None:
6194
        raise errors.OpPrereqError("No disk template specified",
6195
                                   errors.ECODE_INVAL)
6196

    
6197
  def ExpandNames(self):
6198
    """ExpandNames for CreateInstance.
6199

6200
    Figure out the right locks for instance creation.
6201

6202
    """
6203
    self.needed_locks = {}
6204

    
6205
    instance_name = self.op.instance_name
6206
    # this is just a preventive check, but someone might still add this
6207
    # instance in the meantime, and creation will fail at lock-add time
6208
    if instance_name in self.cfg.GetInstanceList():
6209
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6210
                                 instance_name, errors.ECODE_EXISTS)
6211

    
6212
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6213

    
6214
    if self.op.iallocator:
6215
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6216
    else:
6217
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6218
      nodelist = [self.op.pnode]
6219
      if self.op.snode is not None:
6220
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6221
        nodelist.append(self.op.snode)
6222
      self.needed_locks[locking.LEVEL_NODE] = nodelist
6223

    
6224
    # in case of import lock the source node too
6225
    if self.op.mode == constants.INSTANCE_IMPORT:
6226
      src_node = getattr(self.op, "src_node", None)
6227
      src_path = getattr(self.op, "src_path", None)
6228

    
6229
      if src_path is None:
6230
        self.op.src_path = src_path = self.op.instance_name
6231

    
6232
      if src_node is None:
6233
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6234
        self.op.src_node = None
6235
        if os.path.isabs(src_path):
6236
          raise errors.OpPrereqError("Importing an instance from an absolute"
6237
                                     " path requires a source node option.",
6238
                                     errors.ECODE_INVAL)
6239
      else:
6240
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6241
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6242
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
6243
        if not os.path.isabs(src_path):
6244
          self.op.src_path = src_path = \
6245
            utils.PathJoin(constants.EXPORT_DIR, src_path)
6246

    
6247
  def _RunAllocator(self):
6248
    """Run the allocator based on input opcode.
6249

6250
    """
6251
    nics = [n.ToDict() for n in self.nics]
6252
    ial = IAllocator(self.cfg, self.rpc,
6253
                     mode=constants.IALLOCATOR_MODE_ALLOC,
6254
                     name=self.op.instance_name,
6255
                     disk_template=self.op.disk_template,
6256
                     tags=[],
6257
                     os=self.op.os_type,
6258
                     vcpus=self.be_full[constants.BE_VCPUS],
6259
                     mem_size=self.be_full[constants.BE_MEMORY],
6260
                     disks=self.disks,
6261
                     nics=nics,
6262
                     hypervisor=self.op.hypervisor,
6263
                     )
6264

    
6265
    ial.Run(self.op.iallocator)
6266

    
6267
    if not ial.success:
6268
      raise errors.OpPrereqError("Can't compute nodes using"
6269
                                 " iallocator '%s': %s" %
6270
                                 (self.op.iallocator, ial.info),
6271
                                 errors.ECODE_NORES)
6272
    if len(ial.result) != ial.required_nodes:
6273
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6274
                                 " of nodes (%s), required %s" %
6275
                                 (self.op.iallocator, len(ial.result),
6276
                                  ial.required_nodes), errors.ECODE_FAULT)
6277
    self.op.pnode = ial.result[0]
6278
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6279
                 self.op.instance_name, self.op.iallocator,
6280
                 utils.CommaJoin(ial.result))
6281
    if ial.required_nodes == 2:
6282
      self.op.snode = ial.result[1]
6283

    
6284
  def BuildHooksEnv(self):
6285
    """Build hooks env.
6286

6287
    This runs on master, primary and secondary nodes of the instance.
6288

6289
    """
6290
    env = {
6291
      "ADD_MODE": self.op.mode,
6292
      }
6293
    if self.op.mode == constants.INSTANCE_IMPORT:
6294
      env["SRC_NODE"] = self.op.src_node
6295
      env["SRC_PATH"] = self.op.src_path
6296
      env["SRC_IMAGES"] = self.src_images
6297

    
6298
    env.update(_BuildInstanceHookEnv(
6299
      name=self.op.instance_name,
6300
      primary_node=self.op.pnode,
6301
      secondary_nodes=self.secondaries,
6302
      status=self.op.start,
6303
      os_type=self.op.os_type,
6304
      memory=self.be_full[constants.BE_MEMORY],
6305
      vcpus=self.be_full[constants.BE_VCPUS],
6306
      nics=_NICListToTuple(self, self.nics),
6307
      disk_template=self.op.disk_template,
6308
      disks=[(d["size"], d["mode"]) for d in self.disks],
6309
      bep=self.be_full,
6310
      hvp=self.hv_full,
6311
      hypervisor_name=self.op.hypervisor,
6312
    ))
6313

    
6314
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6315
          self.secondaries)
6316
    return env, nl, nl
6317

    
6318
  def _ReadExportInfo(self):
6319
    """Reads the export information from disk.
6320

6321
    It will override the opcode source node and path with the actual
6322
    information, if these two were not specified before.
6323

6324
    @return: the export information
6325

6326
    """
6327
    assert self.op.mode == constants.INSTANCE_IMPORT
6328

    
6329
    src_node = self.op.src_node
6330
    src_path = self.op.src_path
6331

    
6332
    if src_node is None:
6333
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6334
      exp_list = self.rpc.call_export_list(locked_nodes)
6335
      found = False
6336
      for node in exp_list:
6337
        if exp_list[node].fail_msg:
6338
          continue
6339
        if src_path in exp_list[node].payload:
6340
          found = True
6341
          self.op.src_node = src_node = node
6342
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6343
                                                       src_path)
6344
          break
6345
      if not found:
6346
        raise errors.OpPrereqError("No export found for relative path %s" %
6347
                                    src_path, errors.ECODE_INVAL)
6348

    
6349
    _CheckNodeOnline(self, src_node)
6350
    result = self.rpc.call_export_info(src_node, src_path)
6351
    result.Raise("No export or invalid export found in dir %s" % src_path)
6352

    
6353
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6354
    if not export_info.has_section(constants.INISECT_EXP):
6355
      raise errors.ProgrammerError("Corrupted export config",
6356
                                   errors.ECODE_ENVIRON)
6357

    
6358
    ei_version = export_info.get(constants.INISECT_EXP, "version")
6359
    if (int(ei_version) != constants.EXPORT_VERSION):
6360
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6361
                                 (ei_version, constants.EXPORT_VERSION),
6362
                                 errors.ECODE_ENVIRON)
6363
    return export_info
6364

    
6365
  def _ReadExportParams(self, einfo):
6366
    """Use export parameters as defaults.
6367

6368
    In case the opcode doesn't specify (as in override) some instance
6369
    parameters, then try to use them from the export information, if
6370
    that declares them.
6371

6372
    """
6373
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6374

    
6375
    if self.op.disk_template is None:
6376
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
6377
        self.op.disk_template = einfo.get(constants.INISECT_INS,
6378
                                          "disk_template")
6379
      else:
6380
        raise errors.OpPrereqError("No disk template specified and the export"
6381
                                   " is missing the disk_template information",
6382
                                   errors.ECODE_INVAL)
6383

    
6384
    if not self.op.disks:
6385
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
6386
        disks = []
6387
        # TODO: import the disk iv_name too
6388
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6389
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6390
          disks.append({"size": disk_sz})
6391
        self.op.disks = disks
6392
      else:
6393
        raise errors.OpPrereqError("No disk info specified and the export"
6394
                                   " is missing the disk information",
6395
                                   errors.ECODE_INVAL)
6396

    
6397
    if (not self.op.nics and
6398
        einfo.has_option(constants.INISECT_INS, "nic_count")):
6399
      nics = []
6400
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6401
        ndict = {}
6402
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6403
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6404
          ndict[name] = v
6405
        nics.append(ndict)
6406
      self.op.nics = nics
6407

    
6408
    if (self.op.hypervisor is None and
6409
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
6410
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6411
    if einfo.has_section(constants.INISECT_HYP):
6412
      # use the export parameters but do not override the ones
6413
      # specified by the user
6414
      for name, value in einfo.items(constants.INISECT_HYP):
6415
        if name not in self.op.hvparams:
6416
          self.op.hvparams[name] = value
6417

    
6418
    if einfo.has_section(constants.INISECT_BEP):
6419
      # use the parameters, without overriding
6420
      for name, value in einfo.items(constants.INISECT_BEP):
6421
        if name not in self.op.beparams:
6422
          self.op.beparams[name] = value
6423
    else:
6424
      # try to read the parameters old style, from the main section
6425
      for name in constants.BES_PARAMETERS:
6426
        if (name not in self.op.beparams and
6427
            einfo.has_option(constants.INISECT_INS, name)):
6428
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6429

    
6430
  def _RevertToDefaults(self, cluster):
6431
    """Revert the instance parameters to the default values.
6432

6433
    """
6434
    # hvparams
6435
    hv_defs = cluster.GetHVDefaults(self.op.hypervisor, self.op.os_type)
6436
    for name in self.op.hvparams.keys():
6437
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6438
        del self.op.hvparams[name]
6439
    # beparams
6440
    be_defs = cluster.beparams.get(constants.PP_DEFAULT, {})
6441
    for name in self.op.beparams.keys():
6442
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
6443
        del self.op.beparams[name]
6444
    # nic params
6445
    nic_defs = cluster.nicparams.get(constants.PP_DEFAULT, {})
6446
    for nic in self.op.nics:
6447
      for name in constants.NICS_PARAMETERS:
6448
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6449
          del nic[name]
6450

    
6451
  def CheckPrereq(self):
6452
    """Check prerequisites.
6453

6454
    """
6455
    if self.op.mode == constants.INSTANCE_IMPORT:
6456
      export_info = self._ReadExportInfo()
6457
      self._ReadExportParams(export_info)
6458

    
6459
    _CheckDiskTemplate(self.op.disk_template)
6460

    
6461
    if (not self.cfg.GetVGName() and
6462
        self.op.disk_template not in constants.DTS_NOT_LVM):
6463
      raise errors.OpPrereqError("Cluster does not support lvm-based"
6464
                                 " instances", errors.ECODE_STATE)
6465

    
6466
    if self.op.hypervisor is None:
6467
      self.op.hypervisor = self.cfg.GetHypervisorType()
6468

    
6469
    cluster = self.cfg.GetClusterInfo()
6470
    enabled_hvs = cluster.enabled_hypervisors
6471
    if self.op.hypervisor not in enabled_hvs:
6472
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6473
                                 " cluster (%s)" % (self.op.hypervisor,
6474
                                  ",".join(enabled_hvs)),
6475
                                 errors.ECODE_STATE)
6476

    
6477
    # check hypervisor parameter syntax (locally)
6478
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6479
    filled_hvp = objects.FillDict(cluster.GetHVDefaults(self.op.hypervisor,
6480
                                                        self.op.os_type),
6481
                                  self.op.hvparams)
6482
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6483
    hv_type.CheckParameterSyntax(filled_hvp)
6484
    self.hv_full = filled_hvp
6485
    # check that we don't specify global parameters on an instance
6486
    _CheckGlobalHvParams(self.op.hvparams)
6487

    
6488
    # fill and remember the beparams dict
6489
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6490
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
6491
                                    self.op.beparams)
6492

    
6493
    # now that hvp/bep are in final format, let's reset to defaults,
6494
    # if told to do so
6495
    if self.op.identify_defaults:
6496
      self._RevertToDefaults(cluster)
6497

    
6498
    # NIC buildup
6499
    self.nics = []
6500
    for idx, nic in enumerate(self.op.nics):
6501
      nic_mode_req = nic.get("mode", None)
6502
      nic_mode = nic_mode_req
6503
      if nic_mode is None:
6504
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6505

    
6506
      # in routed mode, for the first nic, the default ip is 'auto'
6507
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6508
        default_ip_mode = constants.VALUE_AUTO
6509
      else:
6510
        default_ip_mode = constants.VALUE_NONE
6511

    
6512
      # ip validity checks
6513
      ip = nic.get("ip", default_ip_mode)
6514
      if ip is None or ip.lower() == constants.VALUE_NONE:
6515
        nic_ip = None
6516
      elif ip.lower() == constants.VALUE_AUTO:
6517
        if not self.op.name_check:
6518
          raise errors.OpPrereqError("IP address set to auto but name checks"
6519
                                     " have been skipped. Aborting.",
6520
                                     errors.ECODE_INVAL)
6521
        nic_ip = self.hostname1.ip
6522
      else:
6523
        if not utils.IsValidIP(ip):
6524
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6525
                                     " like a valid IP" % ip,
6526
                                     errors.ECODE_INVAL)
6527
        nic_ip = ip
6528

    
6529
      # TODO: check the ip address for uniqueness
6530
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6531
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
6532
                                   errors.ECODE_INVAL)
6533

    
6534
      # MAC address verification
6535
      mac = nic.get("mac", constants.VALUE_AUTO)
6536
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6537
        mac = utils.NormalizeAndValidateMac(mac)
6538

    
6539
        try:
6540
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
6541
        except errors.ReservationError:
6542
          raise errors.OpPrereqError("MAC address %s already in use"
6543
                                     " in cluster" % mac,
6544
                                     errors.ECODE_NOTUNIQUE)
6545

    
6546
      # bridge verification
6547
      bridge = nic.get("bridge", None)
6548
      link = nic.get("link", None)
6549
      if bridge and link:
6550
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6551
                                   " at the same time", errors.ECODE_INVAL)
6552
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6553
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6554
                                   errors.ECODE_INVAL)
6555
      elif bridge:
6556
        link = bridge
6557

    
6558
      nicparams = {}
6559
      if nic_mode_req:
6560
        nicparams[constants.NIC_MODE] = nic_mode_req
6561
      if link:
6562
        nicparams[constants.NIC_LINK] = link
6563

    
6564
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
6565
                                      nicparams)
6566
      objects.NIC.CheckParameterSyntax(check_params)
6567
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6568

    
6569
    # disk checks/pre-build
6570
    self.disks = []
6571
    for disk in self.op.disks:
6572
      mode = disk.get("mode", constants.DISK_RDWR)
6573
      if mode not in constants.DISK_ACCESS_SET:
6574
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6575
                                   mode, errors.ECODE_INVAL)
6576
      size = disk.get("size", None)
6577
      if size is None:
6578
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6579
      try:
6580
        size = int(size)
6581
      except (TypeError, ValueError):
6582
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6583
                                   errors.ECODE_INVAL)
6584
      new_disk = {"size": size, "mode": mode}
6585
      if "adopt" in disk:
6586
        new_disk["adopt"] = disk["adopt"]
6587
      self.disks.append(new_disk)
6588

    
6589
    if self.op.mode == constants.INSTANCE_IMPORT:
6590

    
6591
      # Check that the new instance doesn't have less disks than the export
6592
      instance_disks = len(self.disks)
6593
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6594
      if instance_disks < export_disks:
6595
        raise errors.OpPrereqError("Not enough disks to import."
6596
                                   " (instance: %d, export: %d)" %
6597
                                   (instance_disks, export_disks),
6598
                                   errors.ECODE_INVAL)
6599

    
6600
      disk_images = []
6601
      for idx in range(export_disks):
6602
        option = 'disk%d_dump' % idx
6603
        if export_info.has_option(constants.INISECT_INS, option):
6604
          # FIXME: are the old os-es, disk sizes, etc. useful?
6605
          export_name = export_info.get(constants.INISECT_INS, option)
6606
          image = utils.PathJoin(self.op.src_path, export_name)
6607
          disk_images.append(image)
6608
        else:
6609
          disk_images.append(False)
6610

    
6611
      self.src_images = disk_images
6612

    
6613
      old_name = export_info.get(constants.INISECT_INS, 'name')
6614
      try:
6615
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6616
      except (TypeError, ValueError), err:
6617
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
6618
                                   " an integer: %s" % str(err),
6619
                                   errors.ECODE_STATE)
6620
      if self.op.instance_name == old_name:
6621
        for idx, nic in enumerate(self.nics):
6622
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6623
            nic_mac_ini = 'nic%d_mac' % idx
6624
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6625

    
6626
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6627

    
6628
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
6629
    if self.op.ip_check:
6630
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6631
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6632
                                   (self.check_ip, self.op.instance_name),
6633
                                   errors.ECODE_NOTUNIQUE)
6634

    
6635
    #### mac address generation
6636
    # By generating here the mac address both the allocator and the hooks get
6637
    # the real final mac address rather than the 'auto' or 'generate' value.
6638
    # There is a race condition between the generation and the instance object
6639
    # creation, which means that we know the mac is valid now, but we're not
6640
    # sure it will be when we actually add the instance. If things go bad
6641
    # adding the instance will abort because of a duplicate mac, and the
6642
    # creation job will fail.
6643
    for nic in self.nics:
6644
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6645
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6646

    
6647
    #### allocator run
6648

    
6649
    if self.op.iallocator is not None:
6650
      self._RunAllocator()
6651

    
6652
    #### node related checks
6653

    
6654
    # check primary node
6655
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6656
    assert self.pnode is not None, \
6657
      "Cannot retrieve locked node %s" % self.op.pnode
6658
    if pnode.offline:
6659
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6660
                                 pnode.name, errors.ECODE_STATE)
6661
    if pnode.drained:
6662
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6663
                                 pnode.name, errors.ECODE_STATE)
6664

    
6665
    self.secondaries = []
6666

    
6667
    # mirror node verification
6668
    if self.op.disk_template in constants.DTS_NET_MIRROR:
6669
      if self.op.snode is None:
6670
        raise errors.OpPrereqError("The networked disk templates need"
6671
                                   " a mirror node", errors.ECODE_INVAL)
6672
      if self.op.snode == pnode.name:
6673
        raise errors.OpPrereqError("The secondary node cannot be the"
6674
                                   " primary node.", errors.ECODE_INVAL)
6675
      _CheckNodeOnline(self, self.op.snode)
6676
      _CheckNodeNotDrained(self, self.op.snode)
6677
      self.secondaries.append(self.op.snode)
6678

    
6679
    nodenames = [pnode.name] + self.secondaries
6680

    
6681
    req_size = _ComputeDiskSize(self.op.disk_template,
6682
                                self.disks)
6683

    
6684
    # Check lv size requirements, if not adopting
6685
    if req_size is not None and not self.adopt_disks:
6686
      _CheckNodesFreeDisk(self, nodenames, req_size)
6687

    
6688
    if self.adopt_disks: # instead, we must check the adoption data
6689
      all_lvs = set([i["adopt"] for i in self.disks])
6690
      if len(all_lvs) != len(self.disks):
6691
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
6692
                                   errors.ECODE_INVAL)
6693
      for lv_name in all_lvs:
6694
        try:
6695
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6696
        except errors.ReservationError:
6697
          raise errors.OpPrereqError("LV named %s used by another instance" %
6698
                                     lv_name, errors.ECODE_NOTUNIQUE)
6699

    
6700
      node_lvs = self.rpc.call_lv_list([pnode.name],
6701
                                       self.cfg.GetVGName())[pnode.name]
6702
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6703
      node_lvs = node_lvs.payload
6704
      delta = all_lvs.difference(node_lvs.keys())
6705
      if delta:
6706
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
6707
                                   utils.CommaJoin(delta),
6708
                                   errors.ECODE_INVAL)
6709
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6710
      if online_lvs:
6711
        raise errors.OpPrereqError("Online logical volumes found, cannot"
6712
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
6713
                                   errors.ECODE_STATE)
6714
      # update the size of disk based on what is found
6715
      for dsk in self.disks:
6716
        dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6717

    
6718
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6719

    
6720
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6721

    
6722
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6723

    
6724
    # memory check on primary node
6725
    if self.op.start:
6726
      _CheckNodeFreeMemory(self, self.pnode.name,
6727
                           "creating instance %s" % self.op.instance_name,
6728
                           self.be_full[constants.BE_MEMORY],
6729
                           self.op.hypervisor)
6730

    
6731
    self.dry_run_result = list(nodenames)
6732

    
6733
  def Exec(self, feedback_fn):
6734
    """Create and add the instance to the cluster.
6735

6736
    """
6737
    instance = self.op.instance_name
6738
    pnode_name = self.pnode.name
6739

    
6740
    ht_kind = self.op.hypervisor
6741
    if ht_kind in constants.HTS_REQ_PORT:
6742
      network_port = self.cfg.AllocatePort()
6743
    else:
6744
      network_port = None
6745

    
6746
    if constants.ENABLE_FILE_STORAGE:
6747
      # this is needed because os.path.join does not accept None arguments
6748
      if self.op.file_storage_dir is None:
6749
        string_file_storage_dir = ""
6750
      else:
6751
        string_file_storage_dir = self.op.file_storage_dir
6752

    
6753
      # build the full file storage dir path
6754
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6755
                                        string_file_storage_dir, instance)
6756
    else:
6757
      file_storage_dir = ""
6758

    
6759

    
6760
    disks = _GenerateDiskTemplate(self,
6761
                                  self.op.disk_template,
6762
                                  instance, pnode_name,
6763
                                  self.secondaries,
6764
                                  self.disks,
6765
                                  file_storage_dir,
6766
                                  self.op.file_driver,
6767
                                  0)
6768

    
6769
    iobj = objects.Instance(name=instance, os=self.op.os_type,
6770
                            primary_node=pnode_name,
6771
                            nics=self.nics, disks=disks,
6772
                            disk_template=self.op.disk_template,
6773
                            admin_up=False,
6774
                            network_port=network_port,
6775
                            beparams=self.op.beparams,
6776
                            hvparams=self.op.hvparams,
6777
                            hypervisor=self.op.hypervisor,
6778
                            )
6779

    
6780
    if self.adopt_disks:
6781
      # rename LVs to the newly-generated names; we need to construct
6782
      # 'fake' LV disks with the old data, plus the new unique_id
6783
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6784
      rename_to = []
6785
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6786
        rename_to.append(t_dsk.logical_id)
6787
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6788
        self.cfg.SetDiskID(t_dsk, pnode_name)
6789
      result = self.rpc.call_blockdev_rename(pnode_name,
6790
                                             zip(tmp_disks, rename_to))
6791
      result.Raise("Failed to rename adoped LVs")
6792
    else:
6793
      feedback_fn("* creating instance disks...")
6794
      try:
6795
        _CreateDisks(self, iobj)
6796
      except errors.OpExecError:
6797
        self.LogWarning("Device creation failed, reverting...")
6798
        try:
6799
          _RemoveDisks(self, iobj)
6800
        finally:
6801
          self.cfg.ReleaseDRBDMinors(instance)
6802
          raise
6803

    
6804
    feedback_fn("adding instance %s to cluster config" % instance)
6805

    
6806
    self.cfg.AddInstance(iobj, self.proc.GetECId())
6807

    
6808
    # Declare that we don't want to remove the instance lock anymore, as we've
6809
    # added the instance to the config
6810
    del self.remove_locks[locking.LEVEL_INSTANCE]
6811
    # Unlock all the nodes
6812
    if self.op.mode == constants.INSTANCE_IMPORT:
6813
      nodes_keep = [self.op.src_node]
6814
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6815
                       if node != self.op.src_node]
6816
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6817
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6818
    else:
6819
      self.context.glm.release(locking.LEVEL_NODE)
6820
      del self.acquired_locks[locking.LEVEL_NODE]
6821

    
6822
    if self.op.wait_for_sync:
6823
      disk_abort = not _WaitForSync(self, iobj)
6824
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
6825
      # make sure the disks are not degraded (still sync-ing is ok)
6826
      time.sleep(15)
6827
      feedback_fn("* checking mirrors status")
6828
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6829
    else:
6830
      disk_abort = False
6831

    
6832
    if disk_abort:
6833
      _RemoveDisks(self, iobj)
6834
      self.cfg.RemoveInstance(iobj.name)
6835
      # Make sure the instance lock gets removed
6836
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6837
      raise errors.OpExecError("There are some degraded disks for"
6838
                               " this instance")
6839

    
6840
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6841
      if self.op.mode == constants.INSTANCE_CREATE:
6842
        if not self.op.no_install:
6843
          feedback_fn("* running the instance OS create scripts...")
6844
          # FIXME: pass debug option from opcode to backend
6845
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6846
                                                 self.op.debug_level)
6847
          result.Raise("Could not add os for instance %s"
6848
                       " on node %s" % (instance, pnode_name))
6849

    
6850
      elif self.op.mode == constants.INSTANCE_IMPORT:
6851
        feedback_fn("* running the instance OS import scripts...")
6852
        src_node = self.op.src_node
6853
        src_images = self.src_images
6854
        cluster_name = self.cfg.GetClusterName()
6855
        # FIXME: pass debug option from opcode to backend
6856
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6857
                                                         src_node, src_images,
6858
                                                         cluster_name,
6859
                                                         self.op.debug_level)
6860
        msg = import_result.fail_msg
6861
        if msg:
6862
          self.LogWarning("Error while importing the disk images for instance"
6863
                          " %s on node %s: %s" % (instance, pnode_name, msg))
6864
      else:
6865
        # also checked in the prereq part
6866
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6867
                                     % self.op.mode)
6868

    
6869
    if self.op.start:
6870
      iobj.admin_up = True
6871
      self.cfg.Update(iobj, feedback_fn)
6872
      logging.info("Starting instance %s on node %s", instance, pnode_name)
6873
      feedback_fn("* starting instance...")
6874
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6875
      result.Raise("Could not start instance")
6876

    
6877
    return list(iobj.all_nodes)
6878

    
6879

    
6880
class LUConnectConsole(NoHooksLU):
6881
  """Connect to an instance's console.
6882

6883
  This is somewhat special in that it returns the command line that
6884
  you need to run on the master node in order to connect to the
6885
  console.
6886

6887
  """
6888
  _OP_REQP = ["instance_name"]
6889
  REQ_BGL = False
6890

    
6891
  def ExpandNames(self):
6892
    self._ExpandAndLockInstance()
6893

    
6894
  def CheckPrereq(self):
6895
    """Check prerequisites.
6896

6897
    This checks that the instance is in the cluster.
6898

6899
    """
6900
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6901
    assert self.instance is not None, \
6902
      "Cannot retrieve locked instance %s" % self.op.instance_name
6903
    _CheckNodeOnline(self, self.instance.primary_node)
6904

    
6905
  def Exec(self, feedback_fn):
6906
    """Connect to the console of an instance
6907

6908
    """
6909
    instance = self.instance
6910
    node = instance.primary_node
6911

    
6912
    node_insts = self.rpc.call_instance_list([node],
6913
                                             [instance.hypervisor])[node]
6914
    node_insts.Raise("Can't get node information from %s" % node)
6915

    
6916
    if instance.name not in node_insts.payload:
6917
      raise errors.OpExecError("Instance %s is not running." % instance.name)
6918

    
6919
    logging.debug("Connecting to console of %s on %s", instance.name, node)
6920

    
6921
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
6922
    cluster = self.cfg.GetClusterInfo()
6923
    # beparams and hvparams are passed separately, to avoid editing the
6924
    # instance and then saving the defaults in the instance itself.
6925
    hvparams = cluster.FillHV(instance)
6926
    beparams = cluster.FillBE(instance)
6927
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6928

    
6929
    # build ssh cmdline
6930
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6931

    
6932

    
6933
class LUReplaceDisks(LogicalUnit):
6934
  """Replace the disks of an instance.
6935

6936
  """
6937
  HPATH = "mirrors-replace"
6938
  HTYPE = constants.HTYPE_INSTANCE
6939
  _OP_REQP = ["instance_name", "mode", "disks"]
6940
  REQ_BGL = False
6941

    
6942
  def CheckArguments(self):
6943
    if not hasattr(self.op, "remote_node"):
6944
      self.op.remote_node = None
6945
    if not hasattr(self.op, "iallocator"):
6946
      self.op.iallocator = None
6947
    if not hasattr(self.op, "early_release"):
6948
      self.op.early_release = False
6949

    
6950
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6951
                                  self.op.iallocator)
6952

    
6953
  def ExpandNames(self):
6954
    self._ExpandAndLockInstance()
6955

    
6956
    if self.op.iallocator is not None:
6957
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6958

    
6959
    elif self.op.remote_node is not None:
6960
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6961
      self.op.remote_node = remote_node
6962

    
6963
      # Warning: do not remove the locking of the new secondary here
6964
      # unless DRBD8.AddChildren is changed to work in parallel;
6965
      # currently it doesn't since parallel invocations of
6966
      # FindUnusedMinor will conflict
6967
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6968
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6969

    
6970
    else:
6971
      self.needed_locks[locking.LEVEL_NODE] = []
6972
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6973

    
6974
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6975
                                   self.op.iallocator, self.op.remote_node,
6976
                                   self.op.disks, False, self.op.early_release)
6977

    
6978
    self.tasklets = [self.replacer]
6979

    
6980
  def DeclareLocks(self, level):
6981
    # If we're not already locking all nodes in the set we have to declare the
6982
    # instance's primary/secondary nodes.
6983
    if (level == locking.LEVEL_NODE and
6984
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6985
      self._LockInstancesNodes()
6986

    
6987
  def BuildHooksEnv(self):
6988
    """Build hooks env.
6989

6990
    This runs on the master, the primary and all the secondaries.
6991

6992
    """
6993
    instance = self.replacer.instance
6994
    env = {
6995
      "MODE": self.op.mode,
6996
      "NEW_SECONDARY": self.op.remote_node,
6997
      "OLD_SECONDARY": instance.secondary_nodes[0],
6998
      }
6999
    env.update(_BuildInstanceHookEnvByObject(self, instance))
7000
    nl = [
7001
      self.cfg.GetMasterNode(),
7002
      instance.primary_node,
7003
      ]
7004
    if self.op.remote_node is not None:
7005
      nl.append(self.op.remote_node)
7006
    return env, nl, nl
7007

    
7008

    
7009
class LUEvacuateNode(LogicalUnit):
7010
  """Relocate the secondary instances from a node.
7011

7012
  """
7013
  HPATH = "node-evacuate"
7014
  HTYPE = constants.HTYPE_NODE
7015
  _OP_REQP = ["node_name"]
7016
  REQ_BGL = False
7017

    
7018
  def CheckArguments(self):
7019
    if not hasattr(self.op, "remote_node"):
7020
      self.op.remote_node = None
7021
    if not hasattr(self.op, "iallocator"):
7022
      self.op.iallocator = None
7023
    if not hasattr(self.op, "early_release"):
7024
      self.op.early_release = False
7025

    
7026
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
7027
                                  self.op.remote_node,
7028
                                  self.op.iallocator)
7029

    
7030
  def ExpandNames(self):
7031
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7032

    
7033
    self.needed_locks = {}
7034

    
7035
    # Declare node locks
7036
    if self.op.iallocator is not None:
7037
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7038

    
7039
    elif self.op.remote_node is not None:
7040
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7041

    
7042
      # Warning: do not remove the locking of the new secondary here
7043
      # unless DRBD8.AddChildren is changed to work in parallel;
7044
      # currently it doesn't since parallel invocations of
7045
      # FindUnusedMinor will conflict
7046
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
7047
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7048

    
7049
    else:
7050
      raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
7051

    
7052
    # Create tasklets for replacing disks for all secondary instances on this
7053
    # node
7054
    names = []
7055
    tasklets = []
7056

    
7057
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
7058
      logging.debug("Replacing disks for instance %s", inst.name)
7059
      names.append(inst.name)
7060

    
7061
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
7062
                                self.op.iallocator, self.op.remote_node, [],
7063
                                True, self.op.early_release)
7064
      tasklets.append(replacer)
7065

    
7066
    self.tasklets = tasklets
7067
    self.instance_names = names
7068

    
7069
    # Declare instance locks
7070
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
7071

    
7072
  def DeclareLocks(self, level):
7073
    # If we're not already locking all nodes in the set we have to declare the
7074
    # instance's primary/secondary nodes.
7075
    if (level == locking.LEVEL_NODE and
7076
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7077
      self._LockInstancesNodes()
7078

    
7079
  def BuildHooksEnv(self):
7080
    """Build hooks env.
7081

7082
    This runs on the master, the primary and all the secondaries.
7083

7084
    """
7085
    env = {
7086
      "NODE_NAME": self.op.node_name,
7087
      }
7088

    
7089
    nl = [self.cfg.GetMasterNode()]
7090

    
7091
    if self.op.remote_node is not None:
7092
      env["NEW_SECONDARY"] = self.op.remote_node
7093
      nl.append(self.op.remote_node)
7094

    
7095
    return (env, nl, nl)
7096

    
7097

    
7098
class TLReplaceDisks(Tasklet):
7099
  """Replaces disks for an instance.
7100

7101
  Note: Locking is not within the scope of this class.
7102

7103
  """
7104
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7105
               disks, delay_iallocator, early_release):
7106
    """Initializes this class.
7107

7108
    """
7109
    Tasklet.__init__(self, lu)
7110

    
7111
    # Parameters
7112
    self.instance_name = instance_name
7113
    self.mode = mode
7114
    self.iallocator_name = iallocator_name
7115
    self.remote_node = remote_node
7116
    self.disks = disks
7117
    self.delay_iallocator = delay_iallocator
7118
    self.early_release = early_release
7119

    
7120
    # Runtime data
7121
    self.instance = None
7122
    self.new_node = None
7123
    self.target_node = None
7124
    self.other_node = None
7125
    self.remote_node_info = None
7126
    self.node_secondary_ip = None
7127

    
7128
  @staticmethod
7129
  def CheckArguments(mode, remote_node, iallocator):
7130
    """Helper function for users of this class.
7131

7132
    """
7133
    # check for valid parameter combination
7134
    if mode == constants.REPLACE_DISK_CHG:
7135
      if remote_node is None and iallocator is None:
7136
        raise errors.OpPrereqError("When changing the secondary either an"
7137
                                   " iallocator script must be used or the"
7138
                                   " new node given", errors.ECODE_INVAL)
7139

    
7140
      if remote_node is not None and iallocator is not None:
7141
        raise errors.OpPrereqError("Give either the iallocator or the new"
7142
                                   " secondary, not both", errors.ECODE_INVAL)
7143

    
7144
    elif remote_node is not None or iallocator is not None:
7145
      # Not replacing the secondary
7146
      raise errors.OpPrereqError("The iallocator and new node options can"
7147
                                 " only be used when changing the"
7148
                                 " secondary node", errors.ECODE_INVAL)
7149

    
7150
  @staticmethod
7151
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7152
    """Compute a new secondary node using an IAllocator.
7153

7154
    """
7155
    ial = IAllocator(lu.cfg, lu.rpc,
7156
                     mode=constants.IALLOCATOR_MODE_RELOC,
7157
                     name=instance_name,
7158
                     relocate_from=relocate_from)
7159

    
7160
    ial.Run(iallocator_name)
7161

    
7162
    if not ial.success:
7163
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7164
                                 " %s" % (iallocator_name, ial.info),
7165
                                 errors.ECODE_NORES)
7166

    
7167
    if len(ial.result) != ial.required_nodes:
7168
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7169
                                 " of nodes (%s), required %s" %
7170
                                 (iallocator_name,
7171
                                  len(ial.result), ial.required_nodes),
7172
                                 errors.ECODE_FAULT)
7173

    
7174
    remote_node_name = ial.result[0]
7175

    
7176
    lu.LogInfo("Selected new secondary for instance '%s': %s",
7177
               instance_name, remote_node_name)
7178

    
7179
    return remote_node_name
7180

    
7181
  def _FindFaultyDisks(self, node_name):
7182
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7183
                                    node_name, True)
7184

    
7185
  def CheckPrereq(self):
7186
    """Check prerequisites.
7187

7188
    This checks that the instance is in the cluster.
7189

7190
    """
7191
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7192
    assert instance is not None, \
7193
      "Cannot retrieve locked instance %s" % self.instance_name
7194

    
7195
    if instance.disk_template != constants.DT_DRBD8:
7196
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7197
                                 " instances", errors.ECODE_INVAL)
7198

    
7199
    if len(instance.secondary_nodes) != 1:
7200
      raise errors.OpPrereqError("The instance has a strange layout,"
7201
                                 " expected one secondary but found %d" %
7202
                                 len(instance.secondary_nodes),
7203
                                 errors.ECODE_FAULT)
7204

    
7205
    if not self.delay_iallocator:
7206
      self._CheckPrereq2()
7207

    
7208
  def _CheckPrereq2(self):
7209
    """Check prerequisites, second part.
7210

7211
    This function should always be part of CheckPrereq. It was separated and is
7212
    now called from Exec because during node evacuation iallocator was only
7213
    called with an unmodified cluster model, not taking planned changes into
7214
    account.
7215

7216
    """
7217
    instance = self.instance
7218
    secondary_node = instance.secondary_nodes[0]
7219

    
7220
    if self.iallocator_name is None:
7221
      remote_node = self.remote_node
7222
    else:
7223
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7224
                                       instance.name, instance.secondary_nodes)
7225

    
7226
    if remote_node is not None:
7227
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7228
      assert self.remote_node_info is not None, \
7229
        "Cannot retrieve locked node %s" % remote_node
7230
    else:
7231
      self.remote_node_info = None
7232

    
7233
    if remote_node == self.instance.primary_node:
7234
      raise errors.OpPrereqError("The specified node is the primary node of"
7235
                                 " the instance.", errors.ECODE_INVAL)
7236

    
7237
    if remote_node == secondary_node:
7238
      raise errors.OpPrereqError("The specified node is already the"
7239
                                 " secondary node of the instance.",
7240
                                 errors.ECODE_INVAL)
7241

    
7242
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7243
                                    constants.REPLACE_DISK_CHG):
7244
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
7245
                                 errors.ECODE_INVAL)
7246

    
7247
    if self.mode == constants.REPLACE_DISK_AUTO:
7248
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
7249
      faulty_secondary = self._FindFaultyDisks(secondary_node)
7250

    
7251
      if faulty_primary and faulty_secondary:
7252
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7253
                                   " one node and can not be repaired"
7254
                                   " automatically" % self.instance_name,
7255
                                   errors.ECODE_STATE)
7256

    
7257
      if faulty_primary:
7258
        self.disks = faulty_primary
7259
        self.target_node = instance.primary_node
7260
        self.other_node = secondary_node
7261
        check_nodes = [self.target_node, self.other_node]
7262
      elif faulty_secondary:
7263
        self.disks = faulty_secondary
7264
        self.target_node = secondary_node
7265
        self.other_node = instance.primary_node
7266
        check_nodes = [self.target_node, self.other_node]
7267
      else:
7268
        self.disks = []
7269
        check_nodes = []
7270

    
7271
    else:
7272
      # Non-automatic modes
7273
      if self.mode == constants.REPLACE_DISK_PRI:
7274
        self.target_node = instance.primary_node
7275
        self.other_node = secondary_node
7276
        check_nodes = [self.target_node, self.other_node]
7277

    
7278
      elif self.mode == constants.REPLACE_DISK_SEC:
7279
        self.target_node = secondary_node
7280
        self.other_node = instance.primary_node
7281
        check_nodes = [self.target_node, self.other_node]
7282

    
7283
      elif self.mode == constants.REPLACE_DISK_CHG:
7284
        self.new_node = remote_node
7285
        self.other_node = instance.primary_node
7286
        self.target_node = secondary_node
7287
        check_nodes = [self.new_node, self.other_node]
7288

    
7289
        _CheckNodeNotDrained(self.lu, remote_node)
7290

    
7291
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
7292
        assert old_node_info is not None
7293
        if old_node_info.offline and not self.early_release:
7294
          # doesn't make sense to delay the release
7295
          self.early_release = True
7296
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7297
                          " early-release mode", secondary_node)
7298

    
7299
      else:
7300
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7301
                                     self.mode)
7302

    
7303
      # If not specified all disks should be replaced
7304
      if not self.disks:
7305
        self.disks = range(len(self.instance.disks))
7306

    
7307
    for node in check_nodes:
7308
      _CheckNodeOnline(self.lu, node)
7309

    
7310
    # Check whether disks are valid
7311
    for disk_idx in self.disks:
7312
      instance.FindDisk(disk_idx)
7313

    
7314
    # Get secondary node IP addresses
7315
    node_2nd_ip = {}
7316

    
7317
    for node_name in [self.target_node, self.other_node, self.new_node]:
7318
      if node_name is not None:
7319
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7320

    
7321
    self.node_secondary_ip = node_2nd_ip
7322

    
7323
  def Exec(self, feedback_fn):
7324
    """Execute disk replacement.
7325

7326
    This dispatches the disk replacement to the appropriate handler.
7327

7328
    """
7329
    if self.delay_iallocator:
7330
      self._CheckPrereq2()
7331

    
7332
    if not self.disks:
7333
      feedback_fn("No disks need replacement")
7334
      return
7335

    
7336
    feedback_fn("Replacing disk(s) %s for %s" %
7337
                (utils.CommaJoin(self.disks), self.instance.name))
7338

    
7339
    activate_disks = (not self.instance.admin_up)
7340

    
7341
    # Activate the instance disks if we're replacing them on a down instance
7342
    if activate_disks:
7343
      _StartInstanceDisks(self.lu, self.instance, True)
7344

    
7345
    try:
7346
      # Should we replace the secondary node?
7347
      if self.new_node is not None:
7348
        fn = self._ExecDrbd8Secondary
7349
      else:
7350
        fn = self._ExecDrbd8DiskOnly
7351

    
7352
      return fn(feedback_fn)
7353

    
7354
    finally:
7355
      # Deactivate the instance disks if we're replacing them on a
7356
      # down instance
7357
      if activate_disks:
7358
        _SafeShutdownInstanceDisks(self.lu, self.instance)
7359

    
7360
  def _CheckVolumeGroup(self, nodes):
7361
    self.lu.LogInfo("Checking volume groups")
7362

    
7363
    vgname = self.cfg.GetVGName()
7364

    
7365
    # Make sure volume group exists on all involved nodes
7366
    results = self.rpc.call_vg_list(nodes)
7367
    if not results:
7368
      raise errors.OpExecError("Can't list volume groups on the nodes")
7369

    
7370
    for node in nodes:
7371
      res = results[node]
7372
      res.Raise("Error checking node %s" % node)
7373
      if vgname not in res.payload:
7374
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
7375
                                 (vgname, node))
7376

    
7377
  def _CheckDisksExistence(self, nodes):
7378
    # Check disk existence
7379
    for idx, dev in enumerate(self.instance.disks):
7380
      if idx not in self.disks:
7381
        continue
7382

    
7383
      for node in nodes:
7384
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7385
        self.cfg.SetDiskID(dev, node)
7386

    
7387
        result = self.rpc.call_blockdev_find(node, dev)
7388

    
7389
        msg = result.fail_msg
7390
        if msg or not result.payload:
7391
          if not msg:
7392
            msg = "disk not found"
7393
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7394
                                   (idx, node, msg))
7395

    
7396
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7397
    for idx, dev in enumerate(self.instance.disks):
7398
      if idx not in self.disks:
7399
        continue
7400

    
7401
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7402
                      (idx, node_name))
7403

    
7404
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7405
                                   ldisk=ldisk):
7406
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7407
                                 " replace disks for instance %s" %
7408
                                 (node_name, self.instance.name))
7409

    
7410
  def _CreateNewStorage(self, node_name):
7411
    vgname = self.cfg.GetVGName()
7412
    iv_names = {}
7413

    
7414
    for idx, dev in enumerate(self.instance.disks):
7415
      if idx not in self.disks:
7416
        continue
7417

    
7418
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7419

    
7420
      self.cfg.SetDiskID(dev, node_name)
7421

    
7422
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7423
      names = _GenerateUniqueNames(self.lu, lv_names)
7424

    
7425
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7426
                             logical_id=(vgname, names[0]))
7427
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7428
                             logical_id=(vgname, names[1]))
7429

    
7430
      new_lvs = [lv_data, lv_meta]
7431
      old_lvs = dev.children
7432
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7433

    
7434
      # we pass force_create=True to force the LVM creation
7435
      for new_lv in new_lvs:
7436
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7437
                        _GetInstanceInfoText(self.instance), False)
7438

    
7439
    return iv_names
7440

    
7441
  def _CheckDevices(self, node_name, iv_names):
7442
    for name, (dev, _, _) in iv_names.iteritems():
7443
      self.cfg.SetDiskID(dev, node_name)
7444

    
7445
      result = self.rpc.call_blockdev_find(node_name, dev)
7446

    
7447
      msg = result.fail_msg
7448
      if msg or not result.payload:
7449
        if not msg:
7450
          msg = "disk not found"
7451
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
7452
                                 (name, msg))
7453

    
7454
      if result.payload.is_degraded:
7455
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
7456

    
7457
  def _RemoveOldStorage(self, node_name, iv_names):
7458
    for name, (_, old_lvs, _) in iv_names.iteritems():
7459
      self.lu.LogInfo("Remove logical volumes for %s" % name)
7460

    
7461
      for lv in old_lvs:
7462
        self.cfg.SetDiskID(lv, node_name)
7463

    
7464
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7465
        if msg:
7466
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
7467
                             hint="remove unused LVs manually")
7468

    
7469
  def _ReleaseNodeLock(self, node_name):
7470
    """Releases the lock for a given node."""
7471
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7472

    
7473
  def _ExecDrbd8DiskOnly(self, feedback_fn):
7474
    """Replace a disk on the primary or secondary for DRBD 8.
7475

7476
    The algorithm for replace is quite complicated:
7477

7478
      1. for each disk to be replaced:
7479

7480
        1. create new LVs on the target node with unique names
7481
        1. detach old LVs from the drbd device
7482
        1. rename old LVs to name_replaced.<time_t>
7483
        1. rename new LVs to old LVs
7484
        1. attach the new LVs (with the old names now) to the drbd device
7485

7486
      1. wait for sync across all devices
7487

7488
      1. for each modified disk:
7489

7490
        1. remove old LVs (which have the name name_replaces.<time_t>)
7491

7492
    Failures are not very well handled.
7493

7494
    """
7495
    steps_total = 6
7496

    
7497
    # Step: check device activation
7498
    self.lu.LogStep(1, steps_total, "Check device existence")
7499
    self._CheckDisksExistence([self.other_node, self.target_node])
7500
    self._CheckVolumeGroup([self.target_node, self.other_node])
7501

    
7502
    # Step: check other node consistency
7503
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7504
    self._CheckDisksConsistency(self.other_node,
7505
                                self.other_node == self.instance.primary_node,
7506
                                False)
7507

    
7508
    # Step: create new storage
7509
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7510
    iv_names = self._CreateNewStorage(self.target_node)
7511

    
7512
    # Step: for each lv, detach+rename*2+attach
7513
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7514
    for dev, old_lvs, new_lvs in iv_names.itervalues():
7515
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7516

    
7517
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7518
                                                     old_lvs)
7519
      result.Raise("Can't detach drbd from local storage on node"
7520
                   " %s for device %s" % (self.target_node, dev.iv_name))
7521
      #dev.children = []
7522
      #cfg.Update(instance)
7523

    
7524
      # ok, we created the new LVs, so now we know we have the needed
7525
      # storage; as such, we proceed on the target node to rename
7526
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7527
      # using the assumption that logical_id == physical_id (which in
7528
      # turn is the unique_id on that node)
7529

    
7530
      # FIXME(iustin): use a better name for the replaced LVs
7531
      temp_suffix = int(time.time())
7532
      ren_fn = lambda d, suff: (d.physical_id[0],
7533
                                d.physical_id[1] + "_replaced-%s" % suff)
7534

    
7535
      # Build the rename list based on what LVs exist on the node
7536
      rename_old_to_new = []
7537
      for to_ren in old_lvs:
7538
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7539
        if not result.fail_msg and result.payload:
7540
          # device exists
7541
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7542

    
7543
      self.lu.LogInfo("Renaming the old LVs on the target node")
7544
      result = self.rpc.call_blockdev_rename(self.target_node,
7545
                                             rename_old_to_new)
7546
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
7547

    
7548
      # Now we rename the new LVs to the old LVs
7549
      self.lu.LogInfo("Renaming the new LVs on the target node")
7550
      rename_new_to_old = [(new, old.physical_id)
7551
                           for old, new in zip(old_lvs, new_lvs)]
7552
      result = self.rpc.call_blockdev_rename(self.target_node,
7553
                                             rename_new_to_old)
7554
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
7555

    
7556
      for old, new in zip(old_lvs, new_lvs):
7557
        new.logical_id = old.logical_id
7558
        self.cfg.SetDiskID(new, self.target_node)
7559

    
7560
      for disk in old_lvs:
7561
        disk.logical_id = ren_fn(disk, temp_suffix)
7562
        self.cfg.SetDiskID(disk, self.target_node)
7563

    
7564
      # Now that the new lvs have the old name, we can add them to the device
7565
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7566
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7567
                                                  new_lvs)
7568
      msg = result.fail_msg
7569
      if msg:
7570
        for new_lv in new_lvs:
7571
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
7572
                                               new_lv).fail_msg
7573
          if msg2:
7574
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7575
                               hint=("cleanup manually the unused logical"
7576
                                     "volumes"))
7577
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7578

    
7579
      dev.children = new_lvs
7580

    
7581
      self.cfg.Update(self.instance, feedback_fn)
7582

    
7583
    cstep = 5
7584
    if self.early_release:
7585
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7586
      cstep += 1
7587
      self._RemoveOldStorage(self.target_node, iv_names)
7588
      # WARNING: we release both node locks here, do not do other RPCs
7589
      # than WaitForSync to the primary node
7590
      self._ReleaseNodeLock([self.target_node, self.other_node])
7591

    
7592
    # Wait for sync
7593
    # This can fail as the old devices are degraded and _WaitForSync
7594
    # does a combined result over all disks, so we don't check its return value
7595
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7596
    cstep += 1
7597
    _WaitForSync(self.lu, self.instance)
7598

    
7599
    # Check all devices manually
7600
    self._CheckDevices(self.instance.primary_node, iv_names)
7601

    
7602
    # Step: remove old storage
7603
    if not self.early_release:
7604
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7605
      cstep += 1
7606
      self._RemoveOldStorage(self.target_node, iv_names)
7607

    
7608
  def _ExecDrbd8Secondary(self, feedback_fn):
7609
    """Replace the secondary node for DRBD 8.
7610

7611
    The algorithm for replace is quite complicated:
7612
      - for all disks of the instance:
7613
        - create new LVs on the new node with same names
7614
        - shutdown the drbd device on the old secondary
7615
        - disconnect the drbd network on the primary
7616
        - create the drbd device on the new secondary
7617
        - network attach the drbd on the primary, using an artifice:
7618
          the drbd code for Attach() will connect to the network if it
7619
          finds a device which is connected to the good local disks but
7620
          not network enabled
7621
      - wait for sync across all devices
7622
      - remove all disks from the old secondary
7623

7624
    Failures are not very well handled.
7625

7626
    """
7627
    steps_total = 6
7628

    
7629
    # Step: check device activation
7630
    self.lu.LogStep(1, steps_total, "Check device existence")
7631
    self._CheckDisksExistence([self.instance.primary_node])
7632
    self._CheckVolumeGroup([self.instance.primary_node])
7633

    
7634
    # Step: check other node consistency
7635
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7636
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
7637

    
7638
    # Step: create new storage
7639
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7640
    for idx, dev in enumerate(self.instance.disks):
7641
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7642
                      (self.new_node, idx))
7643
      # we pass force_create=True to force LVM creation
7644
      for new_lv in dev.children:
7645
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7646
                        _GetInstanceInfoText(self.instance), False)
7647

    
7648
    # Step 4: dbrd minors and drbd setups changes
7649
    # after this, we must manually remove the drbd minors on both the
7650
    # error and the success paths
7651
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7652
    minors = self.cfg.AllocateDRBDMinor([self.new_node
7653
                                         for dev in self.instance.disks],
7654
                                        self.instance.name)
7655
    logging.debug("Allocated minors %r", minors)
7656

    
7657
    iv_names = {}
7658
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7659
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7660
                      (self.new_node, idx))
7661
      # create new devices on new_node; note that we create two IDs:
7662
      # one without port, so the drbd will be activated without
7663
      # networking information on the new node at this stage, and one
7664
      # with network, for the latter activation in step 4
7665
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7666
      if self.instance.primary_node == o_node1:
7667
        p_minor = o_minor1
7668
      else:
7669
        assert self.instance.primary_node == o_node2, "Three-node instance?"
7670
        p_minor = o_minor2
7671

    
7672
      new_alone_id = (self.instance.primary_node, self.new_node, None,
7673
                      p_minor, new_minor, o_secret)
7674
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
7675
                    p_minor, new_minor, o_secret)
7676

    
7677
      iv_names[idx] = (dev, dev.children, new_net_id)
7678
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7679
                    new_net_id)
7680
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7681
                              logical_id=new_alone_id,
7682
                              children=dev.children,
7683
                              size=dev.size)
7684
      try:
7685
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7686
                              _GetInstanceInfoText(self.instance), False)
7687
      except errors.GenericError:
7688
        self.cfg.ReleaseDRBDMinors(self.instance.name)
7689
        raise
7690

    
7691
    # We have new devices, shutdown the drbd on the old secondary
7692
    for idx, dev in enumerate(self.instance.disks):
7693
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7694
      self.cfg.SetDiskID(dev, self.target_node)
7695
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7696
      if msg:
7697
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7698
                           "node: %s" % (idx, msg),
7699
                           hint=("Please cleanup this device manually as"
7700
                                 " soon as possible"))
7701

    
7702
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7703
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7704
                                               self.node_secondary_ip,
7705
                                               self.instance.disks)\
7706
                                              [self.instance.primary_node]
7707

    
7708
    msg = result.fail_msg
7709
    if msg:
7710
      # detaches didn't succeed (unlikely)
7711
      self.cfg.ReleaseDRBDMinors(self.instance.name)
7712
      raise errors.OpExecError("Can't detach the disks from the network on"
7713
                               " old node: %s" % (msg,))
7714

    
7715
    # if we managed to detach at least one, we update all the disks of
7716
    # the instance to point to the new secondary
7717
    self.lu.LogInfo("Updating instance configuration")
7718
    for dev, _, new_logical_id in iv_names.itervalues():
7719
      dev.logical_id = new_logical_id
7720
      self.cfg.SetDiskID(dev, self.instance.primary_node)
7721

    
7722
    self.cfg.Update(self.instance, feedback_fn)
7723

    
7724
    # and now perform the drbd attach
7725
    self.lu.LogInfo("Attaching primary drbds to new secondary"
7726
                    " (standalone => connected)")
7727
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7728
                                            self.new_node],
7729
                                           self.node_secondary_ip,
7730
                                           self.instance.disks,
7731
                                           self.instance.name,
7732
                                           False)
7733
    for to_node, to_result in result.items():
7734
      msg = to_result.fail_msg
7735
      if msg:
7736
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7737
                           to_node, msg,
7738
                           hint=("please do a gnt-instance info to see the"
7739
                                 " status of disks"))
7740
    cstep = 5
7741
    if self.early_release:
7742
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7743
      cstep += 1
7744
      self._RemoveOldStorage(self.target_node, iv_names)
7745
      # WARNING: we release all node locks here, do not do other RPCs
7746
      # than WaitForSync to the primary node
7747
      self._ReleaseNodeLock([self.instance.primary_node,
7748
                             self.target_node,
7749
                             self.new_node])
7750

    
7751
    # Wait for sync
7752
    # This can fail as the old devices are degraded and _WaitForSync
7753
    # does a combined result over all disks, so we don't check its return value
7754
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7755
    cstep += 1
7756
    _WaitForSync(self.lu, self.instance)
7757

    
7758
    # Check all devices manually
7759
    self._CheckDevices(self.instance.primary_node, iv_names)
7760

    
7761
    # Step: remove old storage
7762
    if not self.early_release:
7763
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7764
      self._RemoveOldStorage(self.target_node, iv_names)
7765

    
7766

    
7767
class LURepairNodeStorage(NoHooksLU):
7768
  """Repairs the volume group on a node.
7769

7770
  """
7771
  _OP_REQP = ["node_name"]
7772
  REQ_BGL = False
7773

    
7774
  def CheckArguments(self):
7775
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7776

    
7777
    _CheckStorageType(self.op.storage_type)
7778

    
7779
  def ExpandNames(self):
7780
    self.needed_locks = {
7781
      locking.LEVEL_NODE: [self.op.node_name],
7782
      }
7783

    
7784
  def _CheckFaultyDisks(self, instance, node_name):
7785
    """Ensure faulty disks abort the opcode or at least warn."""
7786
    try:
7787
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7788
                                  node_name, True):
7789
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7790
                                   " node '%s'" % (instance.name, node_name),
7791
                                   errors.ECODE_STATE)
7792
    except errors.OpPrereqError, err:
7793
      if self.op.ignore_consistency:
7794
        self.proc.LogWarning(str(err.args[0]))
7795
      else:
7796
        raise
7797

    
7798
  def CheckPrereq(self):
7799
    """Check prerequisites.
7800

7801
    """
7802
    storage_type = self.op.storage_type
7803

    
7804
    if (constants.SO_FIX_CONSISTENCY not in
7805
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7806
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
7807
                                 " repaired" % storage_type,
7808
                                 errors.ECODE_INVAL)
7809

    
7810
    # Check whether any instance on this node has faulty disks
7811
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7812
      if not inst.admin_up:
7813
        continue
7814
      check_nodes = set(inst.all_nodes)
7815
      check_nodes.discard(self.op.node_name)
7816
      for inst_node_name in check_nodes:
7817
        self._CheckFaultyDisks(inst, inst_node_name)
7818

    
7819
  def Exec(self, feedback_fn):
7820
    feedback_fn("Repairing storage unit '%s' on %s ..." %
7821
                (self.op.name, self.op.node_name))
7822

    
7823
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7824
    result = self.rpc.call_storage_execute(self.op.node_name,
7825
                                           self.op.storage_type, st_args,
7826
                                           self.op.name,
7827
                                           constants.SO_FIX_CONSISTENCY)
7828
    result.Raise("Failed to repair storage unit '%s' on %s" %
7829
                 (self.op.name, self.op.node_name))
7830

    
7831

    
7832
class LUNodeEvacuationStrategy(NoHooksLU):
7833
  """Computes the node evacuation strategy.
7834

7835
  """
7836
  _OP_REQP = ["nodes"]
7837
  REQ_BGL = False
7838

    
7839
  def CheckArguments(self):
7840
    if not hasattr(self.op, "remote_node"):
7841
      self.op.remote_node = None
7842
    if not hasattr(self.op, "iallocator"):
7843
      self.op.iallocator = None
7844
    if self.op.remote_node is not None and self.op.iallocator is not None:
7845
      raise errors.OpPrereqError("Give either the iallocator or the new"
7846
                                 " secondary, not both", errors.ECODE_INVAL)
7847

    
7848
  def ExpandNames(self):
7849
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7850
    self.needed_locks = locks = {}
7851
    if self.op.remote_node is None:
7852
      locks[locking.LEVEL_NODE] = locking.ALL_SET
7853
    else:
7854
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7855
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7856

    
7857
  def CheckPrereq(self):
7858
    pass
7859

    
7860
  def Exec(self, feedback_fn):
7861
    if self.op.remote_node is not None:
7862
      instances = []
7863
      for node in self.op.nodes:
7864
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7865
      result = []
7866
      for i in instances:
7867
        if i.primary_node == self.op.remote_node:
7868
          raise errors.OpPrereqError("Node %s is the primary node of"
7869
                                     " instance %s, cannot use it as"
7870
                                     " secondary" %
7871
                                     (self.op.remote_node, i.name),
7872
                                     errors.ECODE_INVAL)
7873
        result.append([i.name, self.op.remote_node])
7874
    else:
7875
      ial = IAllocator(self.cfg, self.rpc,
7876
                       mode=constants.IALLOCATOR_MODE_MEVAC,
7877
                       evac_nodes=self.op.nodes)
7878
      ial.Run(self.op.iallocator, validate=True)
7879
      if not ial.success:
7880
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7881
                                 errors.ECODE_NORES)
7882
      result = ial.result
7883
    return result
7884

    
7885

    
7886
class LUGrowDisk(LogicalUnit):
7887
  """Grow a disk of an instance.
7888

7889
  """
7890
  HPATH = "disk-grow"
7891
  HTYPE = constants.HTYPE_INSTANCE
7892
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7893
  REQ_BGL = False
7894

    
7895
  def ExpandNames(self):
7896
    self._ExpandAndLockInstance()
7897
    self.needed_locks[locking.LEVEL_NODE] = []
7898
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7899

    
7900
  def DeclareLocks(self, level):
7901
    if level == locking.LEVEL_NODE:
7902
      self._LockInstancesNodes()
7903

    
7904
  def BuildHooksEnv(self):
7905
    """Build hooks env.
7906

7907
    This runs on the master, the primary and all the secondaries.
7908

7909
    """
7910
    env = {
7911
      "DISK": self.op.disk,
7912
      "AMOUNT": self.op.amount,
7913
      }
7914
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7915
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7916
    return env, nl, nl
7917

    
7918
  def CheckPrereq(self):
7919
    """Check prerequisites.
7920

7921
    This checks that the instance is in the cluster.
7922

7923
    """
7924
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7925
    assert instance is not None, \
7926
      "Cannot retrieve locked instance %s" % self.op.instance_name
7927
    nodenames = list(instance.all_nodes)
7928
    for node in nodenames:
7929
      _CheckNodeOnline(self, node)
7930

    
7931

    
7932
    self.instance = instance
7933

    
7934
    if instance.disk_template not in constants.DTS_GROWABLE:
7935
      raise errors.OpPrereqError("Instance's disk layout does not support"
7936
                                 " growing.", errors.ECODE_INVAL)
7937

    
7938
    self.disk = instance.FindDisk(self.op.disk)
7939

    
7940
    if instance.disk_template != constants.DT_FILE:
7941
      # TODO: check the free disk space for file, when that feature will be
7942
      # supported
7943
      _CheckNodesFreeDisk(self, nodenames, self.op.amount)
7944

    
7945
  def Exec(self, feedback_fn):
7946
    """Execute disk grow.
7947

7948
    """
7949
    instance = self.instance
7950
    disk = self.disk
7951

    
7952
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
7953
    if not disks_ok:
7954
      raise errors.OpExecError("Cannot activate block device to grow")
7955

    
7956
    for node in instance.all_nodes:
7957
      self.cfg.SetDiskID(disk, node)
7958
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7959
      result.Raise("Grow request failed to node %s" % node)
7960

    
7961
      # TODO: Rewrite code to work properly
7962
      # DRBD goes into sync mode for a short amount of time after executing the
7963
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7964
      # calling "resize" in sync mode fails. Sleeping for a short amount of
7965
      # time is a work-around.
7966
      time.sleep(5)
7967

    
7968
    disk.RecordGrow(self.op.amount)
7969
    self.cfg.Update(instance, feedback_fn)
7970
    if self.op.wait_for_sync:
7971
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
7972
      if disk_abort:
7973
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7974
                             " status.\nPlease check the instance.")
7975
      if not instance.admin_up:
7976
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
7977
    elif not instance.admin_up:
7978
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
7979
                           " not supposed to be running because no wait for"
7980
                           " sync mode was requested.")
7981

    
7982

    
7983
class LUQueryInstanceData(NoHooksLU):
7984
  """Query runtime instance data.
7985

7986
  """
7987
  _OP_REQP = ["instances", "static"]
7988
  REQ_BGL = False
7989

    
7990
  def ExpandNames(self):
7991
    self.needed_locks = {}
7992
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7993

    
7994
    if not isinstance(self.op.instances, list):
7995
      raise errors.OpPrereqError("Invalid argument type 'instances'",
7996
                                 errors.ECODE_INVAL)
7997

    
7998
    if self.op.instances:
7999
      self.wanted_names = []
8000
      for name in self.op.instances:
8001
        full_name = _ExpandInstanceName(self.cfg, name)
8002
        self.wanted_names.append(full_name)
8003
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8004
    else:
8005
      self.wanted_names = None
8006
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8007

    
8008
    self.needed_locks[locking.LEVEL_NODE] = []
8009
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8010

    
8011
  def DeclareLocks(self, level):
8012
    if level == locking.LEVEL_NODE:
8013
      self._LockInstancesNodes()
8014

    
8015
  def CheckPrereq(self):
8016
    """Check prerequisites.
8017

8018
    This only checks the optional instance list against the existing names.
8019

8020
    """
8021
    if self.wanted_names is None:
8022
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8023

    
8024
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8025
                             in self.wanted_names]
8026
    return
8027

    
8028
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
8029
    """Returns the status of a block device
8030

8031
    """
8032
    if self.op.static or not node:
8033
      return None
8034

    
8035
    self.cfg.SetDiskID(dev, node)
8036

    
8037
    result = self.rpc.call_blockdev_find(node, dev)
8038
    if result.offline:
8039
      return None
8040

    
8041
    result.Raise("Can't compute disk status for %s" % instance_name)
8042

    
8043
    status = result.payload
8044
    if status is None:
8045
      return None
8046

    
8047
    return (status.dev_path, status.major, status.minor,
8048
            status.sync_percent, status.estimated_time,
8049
            status.is_degraded, status.ldisk_status)
8050

    
8051
  def _ComputeDiskStatus(self, instance, snode, dev):
8052
    """Compute block device status.
8053

8054
    """
8055
    if dev.dev_type in constants.LDS_DRBD:
8056
      # we change the snode then (otherwise we use the one passed in)
8057
      if dev.logical_id[0] == instance.primary_node:
8058
        snode = dev.logical_id[1]
8059
      else:
8060
        snode = dev.logical_id[0]
8061

    
8062
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8063
                                              instance.name, dev)
8064
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8065

    
8066
    if dev.children:
8067
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
8068
                      for child in dev.children]
8069
    else:
8070
      dev_children = []
8071

    
8072
    data = {
8073
      "iv_name": dev.iv_name,
8074
      "dev_type": dev.dev_type,
8075
      "logical_id": dev.logical_id,
8076
      "physical_id": dev.physical_id,
8077
      "pstatus": dev_pstatus,
8078
      "sstatus": dev_sstatus,
8079
      "children": dev_children,
8080
      "mode": dev.mode,
8081
      "size": dev.size,
8082
      }
8083

    
8084
    return data
8085

    
8086
  def Exec(self, feedback_fn):
8087
    """Gather and return data"""
8088
    result = {}
8089

    
8090
    cluster = self.cfg.GetClusterInfo()
8091

    
8092
    for instance in self.wanted_instances:
8093
      if not self.op.static:
8094
        remote_info = self.rpc.call_instance_info(instance.primary_node,
8095
                                                  instance.name,
8096
                                                  instance.hypervisor)
8097
        remote_info.Raise("Error checking node %s" % instance.primary_node)
8098
        remote_info = remote_info.payload
8099
        if remote_info and "state" in remote_info:
8100
          remote_state = "up"
8101
        else:
8102
          remote_state = "down"
8103
      else:
8104
        remote_state = None
8105
      if instance.admin_up:
8106
        config_state = "up"
8107
      else:
8108
        config_state = "down"
8109

    
8110
      disks = [self._ComputeDiskStatus(instance, None, device)
8111
               for device in instance.disks]
8112

    
8113
      idict = {
8114
        "name": instance.name,
8115
        "config_state": config_state,
8116
        "run_state": remote_state,
8117
        "pnode": instance.primary_node,
8118
        "snodes": instance.secondary_nodes,
8119
        "os": instance.os,
8120
        # this happens to be the same format used for hooks
8121
        "nics": _NICListToTuple(self, instance.nics),
8122
        "disk_template": instance.disk_template,
8123
        "disks": disks,
8124
        "hypervisor": instance.hypervisor,
8125
        "network_port": instance.network_port,
8126
        "hv_instance": instance.hvparams,
8127
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
8128
        "be_instance": instance.beparams,
8129
        "be_actual": cluster.FillBE(instance),
8130
        "serial_no": instance.serial_no,
8131
        "mtime": instance.mtime,
8132
        "ctime": instance.ctime,
8133
        "uuid": instance.uuid,
8134
        }
8135

    
8136
      result[instance.name] = idict
8137

    
8138
    return result
8139

    
8140

    
8141
class LUSetInstanceParams(LogicalUnit):
8142
  """Modifies an instances's parameters.
8143

8144
  """
8145
  HPATH = "instance-modify"
8146
  HTYPE = constants.HTYPE_INSTANCE
8147
  _OP_REQP = ["instance_name"]
8148
  REQ_BGL = False
8149

    
8150
  def CheckArguments(self):
8151
    if not hasattr(self.op, 'nics'):
8152
      self.op.nics = []
8153
    if not hasattr(self.op, 'disks'):
8154
      self.op.disks = []
8155
    if not hasattr(self.op, 'beparams'):
8156
      self.op.beparams = {}
8157
    if not hasattr(self.op, 'hvparams'):
8158
      self.op.hvparams = {}
8159
    if not hasattr(self.op, "disk_template"):
8160
      self.op.disk_template = None
8161
    if not hasattr(self.op, "remote_node"):
8162
      self.op.remote_node = None
8163
    if not hasattr(self.op, "os_name"):
8164
      self.op.os_name = None
8165
    if not hasattr(self.op, "force_variant"):
8166
      self.op.force_variant = False
8167
    self.op.force = getattr(self.op, "force", False)
8168
    if not (self.op.nics or self.op.disks or self.op.disk_template or
8169
            self.op.hvparams or self.op.beparams or self.op.os_name):
8170
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8171

    
8172
    if self.op.hvparams:
8173
      _CheckGlobalHvParams(self.op.hvparams)
8174

    
8175
    # Disk validation
8176
    disk_addremove = 0
8177
    for disk_op, disk_dict in self.op.disks:
8178
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8179
      if disk_op == constants.DDM_REMOVE:
8180
        disk_addremove += 1
8181
        continue
8182
      elif disk_op == constants.DDM_ADD:
8183
        disk_addremove += 1
8184
      else:
8185
        if not isinstance(disk_op, int):
8186
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8187
        if not isinstance(disk_dict, dict):
8188
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8189
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8190

    
8191
      if disk_op == constants.DDM_ADD:
8192
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8193
        if mode not in constants.DISK_ACCESS_SET:
8194
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8195
                                     errors.ECODE_INVAL)
8196
        size = disk_dict.get('size', None)
8197
        if size is None:
8198
          raise errors.OpPrereqError("Required disk parameter size missing",
8199
                                     errors.ECODE_INVAL)
8200
        try:
8201
          size = int(size)
8202
        except (TypeError, ValueError), err:
8203
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8204
                                     str(err), errors.ECODE_INVAL)
8205
        disk_dict['size'] = size
8206
      else:
8207
        # modification of disk
8208
        if 'size' in disk_dict:
8209
          raise errors.OpPrereqError("Disk size change not possible, use"
8210
                                     " grow-disk", errors.ECODE_INVAL)
8211

    
8212
    if disk_addremove > 1:
8213
      raise errors.OpPrereqError("Only one disk add or remove operation"
8214
                                 " supported at a time", errors.ECODE_INVAL)
8215

    
8216
    if self.op.disks and self.op.disk_template is not None:
8217
      raise errors.OpPrereqError("Disk template conversion and other disk"
8218
                                 " changes not supported at the same time",
8219
                                 errors.ECODE_INVAL)
8220

    
8221
    if self.op.disk_template:
8222
      _CheckDiskTemplate(self.op.disk_template)
8223
      if (self.op.disk_template in constants.DTS_NET_MIRROR and
8224
          self.op.remote_node is None):
8225
        raise errors.OpPrereqError("Changing the disk template to a mirrored"
8226
                                   " one requires specifying a secondary node",
8227
                                   errors.ECODE_INVAL)
8228

    
8229
    # NIC validation
8230
    nic_addremove = 0
8231
    for nic_op, nic_dict in self.op.nics:
8232
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8233
      if nic_op == constants.DDM_REMOVE:
8234
        nic_addremove += 1
8235
        continue
8236
      elif nic_op == constants.DDM_ADD:
8237
        nic_addremove += 1
8238
      else:
8239
        if not isinstance(nic_op, int):
8240
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8241
        if not isinstance(nic_dict, dict):
8242
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8243
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8244

    
8245
      # nic_dict should be a dict
8246
      nic_ip = nic_dict.get('ip', None)
8247
      if nic_ip is not None:
8248
        if nic_ip.lower() == constants.VALUE_NONE:
8249
          nic_dict['ip'] = None
8250
        else:
8251
          if not utils.IsValidIP(nic_ip):
8252
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8253
                                       errors.ECODE_INVAL)
8254

    
8255
      nic_bridge = nic_dict.get('bridge', None)
8256
      nic_link = nic_dict.get('link', None)
8257
      if nic_bridge and nic_link:
8258
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8259
                                   " at the same time", errors.ECODE_INVAL)
8260
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8261
        nic_dict['bridge'] = None
8262
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8263
        nic_dict['link'] = None
8264

    
8265
      if nic_op == constants.DDM_ADD:
8266
        nic_mac = nic_dict.get('mac', None)
8267
        if nic_mac is None:
8268
          nic_dict['mac'] = constants.VALUE_AUTO
8269

    
8270
      if 'mac' in nic_dict:
8271
        nic_mac = nic_dict['mac']
8272
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8273
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8274

    
8275
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8276
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8277
                                     " modifying an existing nic",
8278
                                     errors.ECODE_INVAL)
8279

    
8280
    if nic_addremove > 1:
8281
      raise errors.OpPrereqError("Only one NIC add or remove operation"
8282
                                 " supported at a time", errors.ECODE_INVAL)
8283

    
8284
  def ExpandNames(self):
8285
    self._ExpandAndLockInstance()
8286
    self.needed_locks[locking.LEVEL_NODE] = []
8287
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8288

    
8289
  def DeclareLocks(self, level):
8290
    if level == locking.LEVEL_NODE:
8291
      self._LockInstancesNodes()
8292
      if self.op.disk_template and self.op.remote_node:
8293
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8294
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8295

    
8296
  def BuildHooksEnv(self):
8297
    """Build hooks env.
8298

8299
    This runs on the master, primary and secondaries.
8300

8301
    """
8302
    args = dict()
8303
    if constants.BE_MEMORY in self.be_new:
8304
      args['memory'] = self.be_new[constants.BE_MEMORY]
8305
    if constants.BE_VCPUS in self.be_new:
8306
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
8307
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8308
    # information at all.
8309
    if self.op.nics:
8310
      args['nics'] = []
8311
      nic_override = dict(self.op.nics)
8312
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
8313
      for idx, nic in enumerate(self.instance.nics):
8314
        if idx in nic_override:
8315
          this_nic_override = nic_override[idx]
8316
        else:
8317
          this_nic_override = {}
8318
        if 'ip' in this_nic_override:
8319
          ip = this_nic_override['ip']
8320
        else:
8321
          ip = nic.ip
8322
        if 'mac' in this_nic_override:
8323
          mac = this_nic_override['mac']
8324
        else:
8325
          mac = nic.mac
8326
        if idx in self.nic_pnew:
8327
          nicparams = self.nic_pnew[idx]
8328
        else:
8329
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
8330
        mode = nicparams[constants.NIC_MODE]
8331
        link = nicparams[constants.NIC_LINK]
8332
        args['nics'].append((ip, mac, mode, link))
8333
      if constants.DDM_ADD in nic_override:
8334
        ip = nic_override[constants.DDM_ADD].get('ip', None)
8335
        mac = nic_override[constants.DDM_ADD]['mac']
8336
        nicparams = self.nic_pnew[constants.DDM_ADD]
8337
        mode = nicparams[constants.NIC_MODE]
8338
        link = nicparams[constants.NIC_LINK]
8339
        args['nics'].append((ip, mac, mode, link))
8340
      elif constants.DDM_REMOVE in nic_override:
8341
        del args['nics'][-1]
8342

    
8343
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8344
    if self.op.disk_template:
8345
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8346
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8347
    return env, nl, nl
8348

    
8349
  @staticmethod
8350
  def _GetUpdatedParams(old_params, update_dict,
8351
                        default_values, parameter_types):
8352
    """Return the new params dict for the given params.
8353

8354
    @type old_params: dict
8355
    @param old_params: old parameters
8356
    @type update_dict: dict
8357
    @param update_dict: dict containing new parameter values,
8358
                        or constants.VALUE_DEFAULT to reset the
8359
                        parameter to its default value
8360
    @type default_values: dict
8361
    @param default_values: default values for the filled parameters
8362
    @type parameter_types: dict
8363
    @param parameter_types: dict mapping target dict keys to types
8364
                            in constants.ENFORCEABLE_TYPES
8365
    @rtype: (dict, dict)
8366
    @return: (new_parameters, filled_parameters)
8367

8368
    """
8369
    params_copy = copy.deepcopy(old_params)
8370
    for key, val in update_dict.iteritems():
8371
      if val == constants.VALUE_DEFAULT:
8372
        try:
8373
          del params_copy[key]
8374
        except KeyError:
8375
          pass
8376
      else:
8377
        params_copy[key] = val
8378
    utils.ForceDictType(params_copy, parameter_types)
8379
    params_filled = objects.FillDict(default_values, params_copy)
8380
    return (params_copy, params_filled)
8381

    
8382
  def CheckPrereq(self):
8383
    """Check prerequisites.
8384

8385
    This only checks the instance list against the existing names.
8386

8387
    """
8388
    self.force = self.op.force
8389

    
8390
    # checking the new params on the primary/secondary nodes
8391

    
8392
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8393
    cluster = self.cluster = self.cfg.GetClusterInfo()
8394
    assert self.instance is not None, \
8395
      "Cannot retrieve locked instance %s" % self.op.instance_name
8396
    pnode = instance.primary_node
8397
    nodelist = list(instance.all_nodes)
8398

    
8399
    if self.op.disk_template:
8400
      if instance.disk_template == self.op.disk_template:
8401
        raise errors.OpPrereqError("Instance already has disk template %s" %
8402
                                   instance.disk_template, errors.ECODE_INVAL)
8403

    
8404
      if (instance.disk_template,
8405
          self.op.disk_template) not in self._DISK_CONVERSIONS:
8406
        raise errors.OpPrereqError("Unsupported disk template conversion from"
8407
                                   " %s to %s" % (instance.disk_template,
8408
                                                  self.op.disk_template),
8409
                                   errors.ECODE_INVAL)
8410
      _CheckInstanceDown(self, instance, "cannot change disk template")
8411
      if self.op.disk_template in constants.DTS_NET_MIRROR:
8412
        if self.op.remote_node == pnode:
8413
          raise errors.OpPrereqError("Given new secondary node %s is the same"
8414
                                     " as the primary node of the instance" %
8415
                                     self.op.remote_node, errors.ECODE_STATE)
8416
        _CheckNodeOnline(self, self.op.remote_node)
8417
        _CheckNodeNotDrained(self, self.op.remote_node)
8418
        disks = [{"size": d.size} for d in instance.disks]
8419
        required = _ComputeDiskSize(self.op.disk_template, disks)
8420
        _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8421

    
8422
    # hvparams processing
8423
    if self.op.hvparams:
8424
      i_hvdict, hv_new = self._GetUpdatedParams(
8425
                             instance.hvparams, self.op.hvparams,
8426
                             cluster.hvparams[instance.hypervisor],
8427
                             constants.HVS_PARAMETER_TYPES)
8428
      # local check
8429
      hypervisor.GetHypervisor(
8430
        instance.hypervisor).CheckParameterSyntax(hv_new)
8431
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8432
      self.hv_new = hv_new # the new actual values
8433
      self.hv_inst = i_hvdict # the new dict (without defaults)
8434
    else:
8435
      self.hv_new = self.hv_inst = {}
8436

    
8437
    # beparams processing
8438
    if self.op.beparams:
8439
      i_bedict, be_new = self._GetUpdatedParams(
8440
                             instance.beparams, self.op.beparams,
8441
                             cluster.beparams[constants.PP_DEFAULT],
8442
                             constants.BES_PARAMETER_TYPES)
8443
      self.be_new = be_new # the new actual values
8444
      self.be_inst = i_bedict # the new dict (without defaults)
8445
    else:
8446
      self.be_new = self.be_inst = {}
8447

    
8448
    self.warn = []
8449

    
8450
    if constants.BE_MEMORY in self.op.beparams and not self.force:
8451
      mem_check_list = [pnode]
8452
      if be_new[constants.BE_AUTO_BALANCE]:
8453
        # either we changed auto_balance to yes or it was from before
8454
        mem_check_list.extend(instance.secondary_nodes)
8455
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
8456
                                                  instance.hypervisor)
8457
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8458
                                         instance.hypervisor)
8459
      pninfo = nodeinfo[pnode]
8460
      msg = pninfo.fail_msg
8461
      if msg:
8462
        # Assume the primary node is unreachable and go ahead
8463
        self.warn.append("Can't get info from primary node %s: %s" %
8464
                         (pnode,  msg))
8465
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
8466
        self.warn.append("Node data from primary node %s doesn't contain"
8467
                         " free memory information" % pnode)
8468
      elif instance_info.fail_msg:
8469
        self.warn.append("Can't get instance runtime information: %s" %
8470
                        instance_info.fail_msg)
8471
      else:
8472
        if instance_info.payload:
8473
          current_mem = int(instance_info.payload['memory'])
8474
        else:
8475
          # Assume instance not running
8476
          # (there is a slight race condition here, but it's not very probable,
8477
          # and we have no other way to check)
8478
          current_mem = 0
8479
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8480
                    pninfo.payload['memory_free'])
8481
        if miss_mem > 0:
8482
          raise errors.OpPrereqError("This change will prevent the instance"
8483
                                     " from starting, due to %d MB of memory"
8484
                                     " missing on its primary node" % miss_mem,
8485
                                     errors.ECODE_NORES)
8486

    
8487
      if be_new[constants.BE_AUTO_BALANCE]:
8488
        for node, nres in nodeinfo.items():
8489
          if node not in instance.secondary_nodes:
8490
            continue
8491
          msg = nres.fail_msg
8492
          if msg:
8493
            self.warn.append("Can't get info from secondary node %s: %s" %
8494
                             (node, msg))
8495
          elif not isinstance(nres.payload.get('memory_free', None), int):
8496
            self.warn.append("Secondary node %s didn't return free"
8497
                             " memory information" % node)
8498
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8499
            self.warn.append("Not enough memory to failover instance to"
8500
                             " secondary node %s" % node)
8501

    
8502
    # NIC processing
8503
    self.nic_pnew = {}
8504
    self.nic_pinst = {}
8505
    for nic_op, nic_dict in self.op.nics:
8506
      if nic_op == constants.DDM_REMOVE:
8507
        if not instance.nics:
8508
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8509
                                     errors.ECODE_INVAL)
8510
        continue
8511
      if nic_op != constants.DDM_ADD:
8512
        # an existing nic
8513
        if not instance.nics:
8514
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8515
                                     " no NICs" % nic_op,
8516
                                     errors.ECODE_INVAL)
8517
        if nic_op < 0 or nic_op >= len(instance.nics):
8518
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8519
                                     " are 0 to %d" %
8520
                                     (nic_op, len(instance.nics) - 1),
8521
                                     errors.ECODE_INVAL)
8522
        old_nic_params = instance.nics[nic_op].nicparams
8523
        old_nic_ip = instance.nics[nic_op].ip
8524
      else:
8525
        old_nic_params = {}
8526
        old_nic_ip = None
8527

    
8528
      update_params_dict = dict([(key, nic_dict[key])
8529
                                 for key in constants.NICS_PARAMETERS
8530
                                 if key in nic_dict])
8531

    
8532
      if 'bridge' in nic_dict:
8533
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8534

    
8535
      new_nic_params, new_filled_nic_params = \
8536
          self._GetUpdatedParams(old_nic_params, update_params_dict,
8537
                                 cluster.nicparams[constants.PP_DEFAULT],
8538
                                 constants.NICS_PARAMETER_TYPES)
8539
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8540
      self.nic_pinst[nic_op] = new_nic_params
8541
      self.nic_pnew[nic_op] = new_filled_nic_params
8542
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8543

    
8544
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
8545
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8546
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8547
        if msg:
8548
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8549
          if self.force:
8550
            self.warn.append(msg)
8551
          else:
8552
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8553
      if new_nic_mode == constants.NIC_MODE_ROUTED:
8554
        if 'ip' in nic_dict:
8555
          nic_ip = nic_dict['ip']
8556
        else:
8557
          nic_ip = old_nic_ip
8558
        if nic_ip is None:
8559
          raise errors.OpPrereqError('Cannot set the nic ip to None'
8560
                                     ' on a routed nic', errors.ECODE_INVAL)
8561
      if 'mac' in nic_dict:
8562
        nic_mac = nic_dict['mac']
8563
        if nic_mac is None:
8564
          raise errors.OpPrereqError('Cannot set the nic mac to None',
8565
                                     errors.ECODE_INVAL)
8566
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8567
          # otherwise generate the mac
8568
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8569
        else:
8570
          # or validate/reserve the current one
8571
          try:
8572
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8573
          except errors.ReservationError:
8574
            raise errors.OpPrereqError("MAC address %s already in use"
8575
                                       " in cluster" % nic_mac,
8576
                                       errors.ECODE_NOTUNIQUE)
8577

    
8578
    # DISK processing
8579
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8580
      raise errors.OpPrereqError("Disk operations not supported for"
8581
                                 " diskless instances",
8582
                                 errors.ECODE_INVAL)
8583
    for disk_op, _ in self.op.disks:
8584
      if disk_op == constants.DDM_REMOVE:
8585
        if len(instance.disks) == 1:
8586
          raise errors.OpPrereqError("Cannot remove the last disk of"
8587
                                     " an instance", errors.ECODE_INVAL)
8588
        _CheckInstanceDown(self, instance, "cannot remove disks")
8589

    
8590
      if (disk_op == constants.DDM_ADD and
8591
          len(instance.nics) >= constants.MAX_DISKS):
8592
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8593
                                   " add more" % constants.MAX_DISKS,
8594
                                   errors.ECODE_STATE)
8595
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8596
        # an existing disk
8597
        if disk_op < 0 or disk_op >= len(instance.disks):
8598
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
8599
                                     " are 0 to %d" %
8600
                                     (disk_op, len(instance.disks)),
8601
                                     errors.ECODE_INVAL)
8602

    
8603
    # OS change
8604
    if self.op.os_name and not self.op.force:
8605
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8606
                      self.op.force_variant)
8607

    
8608
    return
8609

    
8610
  def _ConvertPlainToDrbd(self, feedback_fn):
8611
    """Converts an instance from plain to drbd.
8612

8613
    """
8614
    feedback_fn("Converting template to drbd")
8615
    instance = self.instance
8616
    pnode = instance.primary_node
8617
    snode = self.op.remote_node
8618

    
8619
    # create a fake disk info for _GenerateDiskTemplate
8620
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8621
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8622
                                      instance.name, pnode, [snode],
8623
                                      disk_info, None, None, 0)
8624
    info = _GetInstanceInfoText(instance)
8625
    feedback_fn("Creating aditional volumes...")
8626
    # first, create the missing data and meta devices
8627
    for disk in new_disks:
8628
      # unfortunately this is... not too nice
8629
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8630
                            info, True)
8631
      for child in disk.children:
8632
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
8633
    # at this stage, all new LVs have been created, we can rename the
8634
    # old ones
8635
    feedback_fn("Renaming original volumes...")
8636
    rename_list = [(o, n.children[0].logical_id)
8637
                   for (o, n) in zip(instance.disks, new_disks)]
8638
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
8639
    result.Raise("Failed to rename original LVs")
8640

    
8641
    feedback_fn("Initializing DRBD devices...")
8642
    # all child devices are in place, we can now create the DRBD devices
8643
    for disk in new_disks:
8644
      for node in [pnode, snode]:
8645
        f_create = node == pnode
8646
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8647

    
8648
    # at this point, the instance has been modified
8649
    instance.disk_template = constants.DT_DRBD8
8650
    instance.disks = new_disks
8651
    self.cfg.Update(instance, feedback_fn)
8652

    
8653
    # disks are created, waiting for sync
8654
    disk_abort = not _WaitForSync(self, instance)
8655
    if disk_abort:
8656
      raise errors.OpExecError("There are some degraded disks for"
8657
                               " this instance, please cleanup manually")
8658

    
8659
  def _ConvertDrbdToPlain(self, feedback_fn):
8660
    """Converts an instance from drbd to plain.
8661

8662
    """
8663
    instance = self.instance
8664
    assert len(instance.secondary_nodes) == 1
8665
    pnode = instance.primary_node
8666
    snode = instance.secondary_nodes[0]
8667
    feedback_fn("Converting template to plain")
8668

    
8669
    old_disks = instance.disks
8670
    new_disks = [d.children[0] for d in old_disks]
8671

    
8672
    # copy over size and mode
8673
    for parent, child in zip(old_disks, new_disks):
8674
      child.size = parent.size
8675
      child.mode = parent.mode
8676

    
8677
    # update instance structure
8678
    instance.disks = new_disks
8679
    instance.disk_template = constants.DT_PLAIN
8680
    self.cfg.Update(instance, feedback_fn)
8681

    
8682
    feedback_fn("Removing volumes on the secondary node...")
8683
    for disk in old_disks:
8684
      self.cfg.SetDiskID(disk, snode)
8685
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8686
      if msg:
8687
        self.LogWarning("Could not remove block device %s on node %s,"
8688
                        " continuing anyway: %s", disk.iv_name, snode, msg)
8689

    
8690
    feedback_fn("Removing unneeded volumes on the primary node...")
8691
    for idx, disk in enumerate(old_disks):
8692
      meta = disk.children[1]
8693
      self.cfg.SetDiskID(meta, pnode)
8694
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8695
      if msg:
8696
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
8697
                        " continuing anyway: %s", idx, pnode, msg)
8698

    
8699

    
8700
  def Exec(self, feedback_fn):
8701
    """Modifies an instance.
8702

8703
    All parameters take effect only at the next restart of the instance.
8704

8705
    """
8706
    # Process here the warnings from CheckPrereq, as we don't have a
8707
    # feedback_fn there.
8708
    for warn in self.warn:
8709
      feedback_fn("WARNING: %s" % warn)
8710

    
8711
    result = []
8712
    instance = self.instance
8713
    # disk changes
8714
    for disk_op, disk_dict in self.op.disks:
8715
      if disk_op == constants.DDM_REMOVE:
8716
        # remove the last disk
8717
        device = instance.disks.pop()
8718
        device_idx = len(instance.disks)
8719
        for node, disk in device.ComputeNodeTree(instance.primary_node):
8720
          self.cfg.SetDiskID(disk, node)
8721
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8722
          if msg:
8723
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
8724
                            " continuing anyway", device_idx, node, msg)
8725
        result.append(("disk/%d" % device_idx, "remove"))
8726
      elif disk_op == constants.DDM_ADD:
8727
        # add a new disk
8728
        if instance.disk_template == constants.DT_FILE:
8729
          file_driver, file_path = instance.disks[0].logical_id
8730
          file_path = os.path.dirname(file_path)
8731
        else:
8732
          file_driver = file_path = None
8733
        disk_idx_base = len(instance.disks)
8734
        new_disk = _GenerateDiskTemplate(self,
8735
                                         instance.disk_template,
8736
                                         instance.name, instance.primary_node,
8737
                                         instance.secondary_nodes,
8738
                                         [disk_dict],
8739
                                         file_path,
8740
                                         file_driver,
8741
                                         disk_idx_base)[0]
8742
        instance.disks.append(new_disk)
8743
        info = _GetInstanceInfoText(instance)
8744

    
8745
        logging.info("Creating volume %s for instance %s",
8746
                     new_disk.iv_name, instance.name)
8747
        # Note: this needs to be kept in sync with _CreateDisks
8748
        #HARDCODE
8749
        for node in instance.all_nodes:
8750
          f_create = node == instance.primary_node
8751
          try:
8752
            _CreateBlockDev(self, node, instance, new_disk,
8753
                            f_create, info, f_create)
8754
          except errors.OpExecError, err:
8755
            self.LogWarning("Failed to create volume %s (%s) on"
8756
                            " node %s: %s",
8757
                            new_disk.iv_name, new_disk, node, err)
8758
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8759
                       (new_disk.size, new_disk.mode)))
8760
      else:
8761
        # change a given disk
8762
        instance.disks[disk_op].mode = disk_dict['mode']
8763
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8764

    
8765
    if self.op.disk_template:
8766
      r_shut = _ShutdownInstanceDisks(self, instance)
8767
      if not r_shut:
8768
        raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8769
                                 " proceed with disk template conversion")
8770
      mode = (instance.disk_template, self.op.disk_template)
8771
      try:
8772
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
8773
      except:
8774
        self.cfg.ReleaseDRBDMinors(instance.name)
8775
        raise
8776
      result.append(("disk_template", self.op.disk_template))
8777

    
8778
    # NIC changes
8779
    for nic_op, nic_dict in self.op.nics:
8780
      if nic_op == constants.DDM_REMOVE:
8781
        # remove the last nic
8782
        del instance.nics[-1]
8783
        result.append(("nic.%d" % len(instance.nics), "remove"))
8784
      elif nic_op == constants.DDM_ADD:
8785
        # mac and bridge should be set, by now
8786
        mac = nic_dict['mac']
8787
        ip = nic_dict.get('ip', None)
8788
        nicparams = self.nic_pinst[constants.DDM_ADD]
8789
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8790
        instance.nics.append(new_nic)
8791
        result.append(("nic.%d" % (len(instance.nics) - 1),
8792
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
8793
                       (new_nic.mac, new_nic.ip,
8794
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8795
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8796
                       )))
8797
      else:
8798
        for key in 'mac', 'ip':
8799
          if key in nic_dict:
8800
            setattr(instance.nics[nic_op], key, nic_dict[key])
8801
        if nic_op in self.nic_pinst:
8802
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8803
        for key, val in nic_dict.iteritems():
8804
          result.append(("nic.%s/%d" % (key, nic_op), val))
8805

    
8806
    # hvparams changes
8807
    if self.op.hvparams:
8808
      instance.hvparams = self.hv_inst
8809
      for key, val in self.op.hvparams.iteritems():
8810
        result.append(("hv/%s" % key, val))
8811

    
8812
    # beparams changes
8813
    if self.op.beparams:
8814
      instance.beparams = self.be_inst
8815
      for key, val in self.op.beparams.iteritems():
8816
        result.append(("be/%s" % key, val))
8817

    
8818
    # OS change
8819
    if self.op.os_name:
8820
      instance.os = self.op.os_name
8821

    
8822
    self.cfg.Update(instance, feedback_fn)
8823

    
8824
    return result
8825

    
8826
  _DISK_CONVERSIONS = {
8827
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8828
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8829
    }
8830

    
8831
class LUQueryExports(NoHooksLU):
8832
  """Query the exports list
8833

8834
  """
8835
  _OP_REQP = ['nodes']
8836
  REQ_BGL = False
8837

    
8838
  def ExpandNames(self):
8839
    self.needed_locks = {}
8840
    self.share_locks[locking.LEVEL_NODE] = 1
8841
    if not self.op.nodes:
8842
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8843
    else:
8844
      self.needed_locks[locking.LEVEL_NODE] = \
8845
        _GetWantedNodes(self, self.op.nodes)
8846

    
8847
  def CheckPrereq(self):
8848
    """Check prerequisites.
8849

8850
    """
8851
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8852

    
8853
  def Exec(self, feedback_fn):
8854
    """Compute the list of all the exported system images.
8855

8856
    @rtype: dict
8857
    @return: a dictionary with the structure node->(export-list)
8858
        where export-list is a list of the instances exported on
8859
        that node.
8860

8861
    """
8862
    rpcresult = self.rpc.call_export_list(self.nodes)
8863
    result = {}
8864
    for node in rpcresult:
8865
      if rpcresult[node].fail_msg:
8866
        result[node] = False
8867
      else:
8868
        result[node] = rpcresult[node].payload
8869

    
8870
    return result
8871

    
8872

    
8873
class LUExportInstance(LogicalUnit):
8874
  """Export an instance to an image in the cluster.
8875

8876
  """
8877
  HPATH = "instance-export"
8878
  HTYPE = constants.HTYPE_INSTANCE
8879
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
8880
  REQ_BGL = False
8881

    
8882
  def CheckArguments(self):
8883
    """Check the arguments.
8884

8885
    """
8886
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8887
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
8888

    
8889
  def ExpandNames(self):
8890
    self._ExpandAndLockInstance()
8891
    # FIXME: lock only instance primary and destination node
8892
    #
8893
    # Sad but true, for now we have do lock all nodes, as we don't know where
8894
    # the previous export might be, and and in this LU we search for it and
8895
    # remove it from its current node. In the future we could fix this by:
8896
    #  - making a tasklet to search (share-lock all), then create the new one,
8897
    #    then one to remove, after
8898
    #  - removing the removal operation altogether
8899
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8900

    
8901
  def DeclareLocks(self, level):
8902
    """Last minute lock declaration."""
8903
    # All nodes are locked anyway, so nothing to do here.
8904

    
8905
  def BuildHooksEnv(self):
8906
    """Build hooks env.
8907

8908
    This will run on the master, primary node and target node.
8909

8910
    """
8911
    env = {
8912
      "EXPORT_NODE": self.op.target_node,
8913
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8914
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8915
      }
8916
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8917
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8918
          self.op.target_node]
8919
    return env, nl, nl
8920

    
8921
  def CheckPrereq(self):
8922
    """Check prerequisites.
8923

8924
    This checks that the instance and node names are valid.
8925

8926
    """
8927
    instance_name = self.op.instance_name
8928
    self.instance = self.cfg.GetInstanceInfo(instance_name)
8929
    assert self.instance is not None, \
8930
          "Cannot retrieve locked instance %s" % self.op.instance_name
8931
    _CheckNodeOnline(self, self.instance.primary_node)
8932

    
8933
    self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8934
    self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
8935
    assert self.dst_node is not None
8936

    
8937
    _CheckNodeOnline(self, self.dst_node.name)
8938
    _CheckNodeNotDrained(self, self.dst_node.name)
8939

    
8940
    # instance disk type verification
8941
    for disk in self.instance.disks:
8942
      if disk.dev_type == constants.LD_FILE:
8943
        raise errors.OpPrereqError("Export not supported for instances with"
8944
                                   " file-based disks", errors.ECODE_INVAL)
8945

    
8946
  def _CreateSnapshots(self, feedback_fn):
8947
    """Creates an LVM snapshot for every disk of the instance.
8948

8949
    @return: List of snapshots as L{objects.Disk} instances
8950

8951
    """
8952
    instance = self.instance
8953
    src_node = instance.primary_node
8954

    
8955
    vgname = self.cfg.GetVGName()
8956

    
8957
    snap_disks = []
8958

    
8959
    for idx, disk in enumerate(instance.disks):
8960
      feedback_fn("Creating a snapshot of disk/%s on node %s" %
8961
                  (idx, src_node))
8962

    
8963
      # result.payload will be a snapshot of an lvm leaf of the one we
8964
      # passed
8965
      result = self.rpc.call_blockdev_snapshot(src_node, disk)
8966
      msg = result.fail_msg
8967
      if msg:
8968
        self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8969
                        idx, src_node, msg)
8970
        snap_disks.append(False)
8971
      else:
8972
        disk_id = (vgname, result.payload)
8973
        new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8974
                               logical_id=disk_id, physical_id=disk_id,
8975
                               iv_name=disk.iv_name)
8976
        snap_disks.append(new_dev)
8977

    
8978
    return snap_disks
8979

    
8980
  def _RemoveSnapshot(self, feedback_fn, snap_disks, disk_index):
8981
    """Removes an LVM snapshot.
8982

8983
    @type snap_disks: list
8984
    @param snap_disks: The list of all snapshots as returned by
8985
                       L{_CreateSnapshots}
8986
    @type disk_index: number
8987
    @param disk_index: Index of the snapshot to be removed
8988
    @rtype: bool
8989
    @return: Whether removal was successful or not
8990

8991
    """
8992
    disk = snap_disks[disk_index]
8993
    if disk:
8994
      src_node = self.instance.primary_node
8995

    
8996
      feedback_fn("Removing snapshot of disk/%s on node %s" %
8997
                  (disk_index, src_node))
8998

    
8999
      result = self.rpc.call_blockdev_remove(src_node, disk)
9000
      if not result.fail_msg:
9001
        return True
9002

    
9003
      self.LogWarning("Could not remove snapshot for disk/%d from node"
9004
                      " %s: %s", disk_index, src_node, result.fail_msg)
9005

    
9006
    return False
9007

    
9008
  def _CleanupExports(self, feedback_fn):
9009
    """Removes exports of current instance from all other nodes.
9010

9011
    If an instance in a cluster with nodes A..D was exported to node C, its
9012
    exports will be removed from the nodes A, B and D.
9013

9014
    """
9015
    nodelist = self.cfg.GetNodeList()
9016
    nodelist.remove(self.dst_node.name)
9017

    
9018
    # on one-node clusters nodelist will be empty after the removal
9019
    # if we proceed the backup would be removed because OpQueryExports
9020
    # substitutes an empty list with the full cluster node list.
9021
    iname = self.instance.name
9022
    if nodelist:
9023
      feedback_fn("Removing old exports for instance %s" % iname)
9024
      exportlist = self.rpc.call_export_list(nodelist)
9025
      for node in exportlist:
9026
        if exportlist[node].fail_msg:
9027
          continue
9028
        if iname in exportlist[node].payload:
9029
          msg = self.rpc.call_export_remove(node, iname).fail_msg
9030
          if msg:
9031
            self.LogWarning("Could not remove older export for instance %s"
9032
                            " on node %s: %s", iname, node, msg)
9033

    
9034
  def Exec(self, feedback_fn):
9035
    """Export an instance to an image in the cluster.
9036

9037
    """
9038
    instance = self.instance
9039
    dst_node = self.dst_node
9040
    src_node = instance.primary_node
9041

    
9042
    if self.op.shutdown:
9043
      # shutdown the instance, but not the disks
9044
      feedback_fn("Shutting down instance %s" % instance.name)
9045
      result = self.rpc.call_instance_shutdown(src_node, instance,
9046
                                               self.shutdown_timeout)
9047
      result.Raise("Could not shutdown instance %s on"
9048
                   " node %s" % (instance.name, src_node))
9049

    
9050
    # set the disks ID correctly since call_instance_start needs the
9051
    # correct drbd minor to create the symlinks
9052
    for disk in instance.disks:
9053
      self.cfg.SetDiskID(disk, src_node)
9054

    
9055
    activate_disks = (not instance.admin_up)
9056

    
9057
    if activate_disks:
9058
      # Activate the instance disks if we'exporting a stopped instance
9059
      feedback_fn("Activating disks for %s" % instance.name)
9060
      _StartInstanceDisks(self, instance, None)
9061

    
9062
    try:
9063
      # per-disk results
9064
      dresults = []
9065
      removed_snaps = [False] * len(instance.disks)
9066

    
9067
      snap_disks = None
9068
      try:
9069
        try:
9070
          snap_disks = self._CreateSnapshots(feedback_fn)
9071
        finally:
9072
          if self.op.shutdown and instance.admin_up:
9073
            feedback_fn("Starting instance %s" % instance.name)
9074
            result = self.rpc.call_instance_start(src_node, instance,
9075
                                                  None, None)
9076
            msg = result.fail_msg
9077
            if msg:
9078
              _ShutdownInstanceDisks(self, instance)
9079
              raise errors.OpExecError("Could not start instance: %s" % msg)
9080

    
9081
        assert len(snap_disks) == len(instance.disks)
9082
        assert len(removed_snaps) == len(instance.disks)
9083

    
9084
        # TODO: check for size
9085

    
9086
        cluster_name = self.cfg.GetClusterName()
9087
        for idx, dev in enumerate(snap_disks):
9088
          feedback_fn("Exporting snapshot %s from %s to %s" %
9089
                      (idx, src_node, dst_node.name))
9090
          if dev:
9091
            # FIXME: pass debug from opcode to backend
9092
            result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
9093
                                                   instance, cluster_name,
9094
                                                   idx, self.op.debug_level)
9095
            msg = result.fail_msg
9096
            if msg:
9097
              self.LogWarning("Could not export disk/%s from node %s to"
9098
                              " node %s: %s", idx, src_node, dst_node.name, msg)
9099
              dresults.append(False)
9100
            else:
9101
              dresults.append(True)
9102

    
9103
            # Remove snapshot
9104
            if self._RemoveSnapshot(feedback_fn, snap_disks, idx):
9105
              removed_snaps[idx] = True
9106
          else:
9107
            dresults.append(False)
9108

    
9109
        assert len(dresults) == len(instance.disks)
9110

    
9111
        # Check for backwards compatibility
9112
        assert compat.all(isinstance(i, bool) for i in dresults), \
9113
               "Not all results are boolean: %r" % dresults
9114

    
9115
        feedback_fn("Finalizing export on %s" % dst_node.name)
9116
        result = self.rpc.call_finalize_export(dst_node.name, instance,
9117
                                               snap_disks)
9118
        msg = result.fail_msg
9119
        fin_resu = not msg
9120
        if msg:
9121
          self.LogWarning("Could not finalize export for instance %s"
9122
                          " on node %s: %s", instance.name, dst_node.name, msg)
9123

    
9124
      finally:
9125
        # Remove all snapshots
9126
        assert len(removed_snaps) == len(instance.disks)
9127
        for idx, removed in enumerate(removed_snaps):
9128
          if not removed:
9129
            self._RemoveSnapshot(feedback_fn, snap_disks, idx)
9130

    
9131
    finally:
9132
      if activate_disks:
9133
        feedback_fn("Deactivating disks for %s" % instance.name)
9134
        _ShutdownInstanceDisks(self, instance)
9135

    
9136
    self._CleanupExports(feedback_fn)
9137

    
9138
    return fin_resu, dresults
9139

    
9140

    
9141
class LURemoveExport(NoHooksLU):
9142
  """Remove exports related to the named instance.
9143

9144
  """
9145
  _OP_REQP = ["instance_name"]
9146
  REQ_BGL = False
9147

    
9148
  def ExpandNames(self):
9149
    self.needed_locks = {}
9150
    # We need all nodes to be locked in order for RemoveExport to work, but we
9151
    # don't need to lock the instance itself, as nothing will happen to it (and
9152
    # we can remove exports also for a removed instance)
9153
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9154

    
9155
  def CheckPrereq(self):
9156
    """Check prerequisites.
9157
    """
9158
    pass
9159

    
9160
  def Exec(self, feedback_fn):
9161
    """Remove any export.
9162

9163
    """
9164
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9165
    # If the instance was not found we'll try with the name that was passed in.
9166
    # This will only work if it was an FQDN, though.
9167
    fqdn_warn = False
9168
    if not instance_name:
9169
      fqdn_warn = True
9170
      instance_name = self.op.instance_name
9171

    
9172
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9173
    exportlist = self.rpc.call_export_list(locked_nodes)
9174
    found = False
9175
    for node in exportlist:
9176
      msg = exportlist[node].fail_msg
9177
      if msg:
9178
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9179
        continue
9180
      if instance_name in exportlist[node].payload:
9181
        found = True
9182
        result = self.rpc.call_export_remove(node, instance_name)
9183
        msg = result.fail_msg
9184
        if msg:
9185
          logging.error("Could not remove export for instance %s"
9186
                        " on node %s: %s", instance_name, node, msg)
9187

    
9188
    if fqdn_warn and not found:
9189
      feedback_fn("Export not found. If trying to remove an export belonging"
9190
                  " to a deleted instance please use its Fully Qualified"
9191
                  " Domain Name.")
9192

    
9193

    
9194
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9195
  """Generic tags LU.
9196

9197
  This is an abstract class which is the parent of all the other tags LUs.
9198

9199
  """
9200

    
9201
  def ExpandNames(self):
9202
    self.needed_locks = {}
9203
    if self.op.kind == constants.TAG_NODE:
9204
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9205
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
9206
    elif self.op.kind == constants.TAG_INSTANCE:
9207
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9208
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9209

    
9210
  def CheckPrereq(self):
9211
    """Check prerequisites.
9212

9213
    """
9214
    if self.op.kind == constants.TAG_CLUSTER:
9215
      self.target = self.cfg.GetClusterInfo()
9216
    elif self.op.kind == constants.TAG_NODE:
9217
      self.target = self.cfg.GetNodeInfo(self.op.name)
9218
    elif self.op.kind == constants.TAG_INSTANCE:
9219
      self.target = self.cfg.GetInstanceInfo(self.op.name)
9220
    else:
9221
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9222
                                 str(self.op.kind), errors.ECODE_INVAL)
9223

    
9224

    
9225
class LUGetTags(TagsLU):
9226
  """Returns the tags of a given object.
9227

9228
  """
9229
  _OP_REQP = ["kind", "name"]
9230
  REQ_BGL = False
9231

    
9232
  def Exec(self, feedback_fn):
9233
    """Returns the tag list.
9234

9235
    """
9236
    return list(self.target.GetTags())
9237

    
9238

    
9239
class LUSearchTags(NoHooksLU):
9240
  """Searches the tags for a given pattern.
9241

9242
  """
9243
  _OP_REQP = ["pattern"]
9244
  REQ_BGL = False
9245

    
9246
  def ExpandNames(self):
9247
    self.needed_locks = {}
9248

    
9249
  def CheckPrereq(self):
9250
    """Check prerequisites.
9251

9252
    This checks the pattern passed for validity by compiling it.
9253

9254
    """
9255
    try:
9256
      self.re = re.compile(self.op.pattern)
9257
    except re.error, err:
9258
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9259
                                 (self.op.pattern, err), errors.ECODE_INVAL)
9260

    
9261
  def Exec(self, feedback_fn):
9262
    """Returns the tag list.
9263

9264
    """
9265
    cfg = self.cfg
9266
    tgts = [("/cluster", cfg.GetClusterInfo())]
9267
    ilist = cfg.GetAllInstancesInfo().values()
9268
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9269
    nlist = cfg.GetAllNodesInfo().values()
9270
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9271
    results = []
9272
    for path, target in tgts:
9273
      for tag in target.GetTags():
9274
        if self.re.search(tag):
9275
          results.append((path, tag))
9276
    return results
9277

    
9278

    
9279
class LUAddTags(TagsLU):
9280
  """Sets a tag on a given object.
9281

9282
  """
9283
  _OP_REQP = ["kind", "name", "tags"]
9284
  REQ_BGL = False
9285

    
9286
  def CheckPrereq(self):
9287
    """Check prerequisites.
9288

9289
    This checks the type and length of the tag name and value.
9290

9291
    """
9292
    TagsLU.CheckPrereq(self)
9293
    for tag in self.op.tags:
9294
      objects.TaggableObject.ValidateTag(tag)
9295

    
9296
  def Exec(self, feedback_fn):
9297
    """Sets the tag.
9298

9299
    """
9300
    try:
9301
      for tag in self.op.tags:
9302
        self.target.AddTag(tag)
9303
    except errors.TagError, err:
9304
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
9305
    self.cfg.Update(self.target, feedback_fn)
9306

    
9307

    
9308
class LUDelTags(TagsLU):
9309
  """Delete a list of tags from a given object.
9310

9311
  """
9312
  _OP_REQP = ["kind", "name", "tags"]
9313
  REQ_BGL = False
9314

    
9315
  def CheckPrereq(self):
9316
    """Check prerequisites.
9317

9318
    This checks that we have the given tag.
9319

9320
    """
9321
    TagsLU.CheckPrereq(self)
9322
    for tag in self.op.tags:
9323
      objects.TaggableObject.ValidateTag(tag)
9324
    del_tags = frozenset(self.op.tags)
9325
    cur_tags = self.target.GetTags()
9326
    if not del_tags <= cur_tags:
9327
      diff_tags = del_tags - cur_tags
9328
      diff_names = ["'%s'" % tag for tag in diff_tags]
9329
      diff_names.sort()
9330
      raise errors.OpPrereqError("Tag(s) %s not found" %
9331
                                 (",".join(diff_names)), errors.ECODE_NOENT)
9332

    
9333
  def Exec(self, feedback_fn):
9334
    """Remove the tag from the object.
9335

9336
    """
9337
    for tag in self.op.tags:
9338
      self.target.RemoveTag(tag)
9339
    self.cfg.Update(self.target, feedback_fn)
9340

    
9341

    
9342
class LUTestDelay(NoHooksLU):
9343
  """Sleep for a specified amount of time.
9344

9345
  This LU sleeps on the master and/or nodes for a specified amount of
9346
  time.
9347

9348
  """
9349
  _OP_REQP = ["duration", "on_master", "on_nodes"]
9350
  REQ_BGL = False
9351

    
9352
  def ExpandNames(self):
9353
    """Expand names and set required locks.
9354

9355
    This expands the node list, if any.
9356

9357
    """
9358
    self.needed_locks = {}
9359
    if self.op.on_nodes:
9360
      # _GetWantedNodes can be used here, but is not always appropriate to use
9361
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9362
      # more information.
9363
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9364
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9365

    
9366
  def CheckPrereq(self):
9367
    """Check prerequisites.
9368

9369
    """
9370

    
9371
  def Exec(self, feedback_fn):
9372
    """Do the actual sleep.
9373

9374
    """
9375
    if self.op.on_master:
9376
      if not utils.TestDelay(self.op.duration):
9377
        raise errors.OpExecError("Error during master delay test")
9378
    if self.op.on_nodes:
9379
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9380
      for node, node_result in result.items():
9381
        node_result.Raise("Failure during rpc call to node %s" % node)
9382

    
9383

    
9384
class IAllocator(object):
9385
  """IAllocator framework.
9386

9387
  An IAllocator instance has three sets of attributes:
9388
    - cfg that is needed to query the cluster
9389
    - input data (all members of the _KEYS class attribute are required)
9390
    - four buffer attributes (in|out_data|text), that represent the
9391
      input (to the external script) in text and data structure format,
9392
      and the output from it, again in two formats
9393
    - the result variables from the script (success, info, nodes) for
9394
      easy usage
9395

9396
  """
9397
  # pylint: disable-msg=R0902
9398
  # lots of instance attributes
9399
  _ALLO_KEYS = [
9400
    "name", "mem_size", "disks", "disk_template",
9401
    "os", "tags", "nics", "vcpus", "hypervisor",
9402
    ]
9403
  _RELO_KEYS = [
9404
    "name", "relocate_from",
9405
    ]
9406
  _EVAC_KEYS = [
9407
    "evac_nodes",
9408
    ]
9409

    
9410
  def __init__(self, cfg, rpc, mode, **kwargs):
9411
    self.cfg = cfg
9412
    self.rpc = rpc
9413
    # init buffer variables
9414
    self.in_text = self.out_text = self.in_data = self.out_data = None
9415
    # init all input fields so that pylint is happy
9416
    self.mode = mode
9417
    self.mem_size = self.disks = self.disk_template = None
9418
    self.os = self.tags = self.nics = self.vcpus = None
9419
    self.hypervisor = None
9420
    self.relocate_from = None
9421
    self.name = None
9422
    self.evac_nodes = None
9423
    # computed fields
9424
    self.required_nodes = None
9425
    # init result fields
9426
    self.success = self.info = self.result = None
9427
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9428
      keyset = self._ALLO_KEYS
9429
      fn = self._AddNewInstance
9430
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9431
      keyset = self._RELO_KEYS
9432
      fn = self._AddRelocateInstance
9433
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9434
      keyset = self._EVAC_KEYS
9435
      fn = self._AddEvacuateNodes
9436
    else:
9437
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9438
                                   " IAllocator" % self.mode)
9439
    for key in kwargs:
9440
      if key not in keyset:
9441
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
9442
                                     " IAllocator" % key)
9443
      setattr(self, key, kwargs[key])
9444

    
9445
    for key in keyset:
9446
      if key not in kwargs:
9447
        raise errors.ProgrammerError("Missing input parameter '%s' to"
9448
                                     " IAllocator" % key)
9449
    self._BuildInputData(fn)
9450

    
9451
  def _ComputeClusterData(self):
9452
    """Compute the generic allocator input data.
9453

9454
    This is the data that is independent of the actual operation.
9455

9456
    """
9457
    cfg = self.cfg
9458
    cluster_info = cfg.GetClusterInfo()
9459
    # cluster data
9460
    data = {
9461
      "version": constants.IALLOCATOR_VERSION,
9462
      "cluster_name": cfg.GetClusterName(),
9463
      "cluster_tags": list(cluster_info.GetTags()),
9464
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9465
      # we don't have job IDs
9466
      }
9467
    iinfo = cfg.GetAllInstancesInfo().values()
9468
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9469

    
9470
    # node data
9471
    node_results = {}
9472
    node_list = cfg.GetNodeList()
9473

    
9474
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9475
      hypervisor_name = self.hypervisor
9476
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9477
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9478
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9479
      hypervisor_name = cluster_info.enabled_hypervisors[0]
9480

    
9481
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9482
                                        hypervisor_name)
9483
    node_iinfo = \
9484
      self.rpc.call_all_instances_info(node_list,
9485
                                       cluster_info.enabled_hypervisors)
9486
    for nname, nresult in node_data.items():
9487
      # first fill in static (config-based) values
9488
      ninfo = cfg.GetNodeInfo(nname)
9489
      pnr = {
9490
        "tags": list(ninfo.GetTags()),
9491
        "primary_ip": ninfo.primary_ip,
9492
        "secondary_ip": ninfo.secondary_ip,
9493
        "offline": ninfo.offline,
9494
        "drained": ninfo.drained,
9495
        "master_candidate": ninfo.master_candidate,
9496
        }
9497

    
9498
      if not (ninfo.offline or ninfo.drained):
9499
        nresult.Raise("Can't get data for node %s" % nname)
9500
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9501
                                nname)
9502
        remote_info = nresult.payload
9503

    
9504
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
9505
                     'vg_size', 'vg_free', 'cpu_total']:
9506
          if attr not in remote_info:
9507
            raise errors.OpExecError("Node '%s' didn't return attribute"
9508
                                     " '%s'" % (nname, attr))
9509
          if not isinstance(remote_info[attr], int):
9510
            raise errors.OpExecError("Node '%s' returned invalid value"
9511
                                     " for '%s': %s" %
9512
                                     (nname, attr, remote_info[attr]))
9513
        # compute memory used by primary instances
9514
        i_p_mem = i_p_up_mem = 0
9515
        for iinfo, beinfo in i_list:
9516
          if iinfo.primary_node == nname:
9517
            i_p_mem += beinfo[constants.BE_MEMORY]
9518
            if iinfo.name not in node_iinfo[nname].payload:
9519
              i_used_mem = 0
9520
            else:
9521
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9522
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9523
            remote_info['memory_free'] -= max(0, i_mem_diff)
9524

    
9525
            if iinfo.admin_up:
9526
              i_p_up_mem += beinfo[constants.BE_MEMORY]
9527

    
9528
        # compute memory used by instances
9529
        pnr_dyn = {
9530
          "total_memory": remote_info['memory_total'],
9531
          "reserved_memory": remote_info['memory_dom0'],
9532
          "free_memory": remote_info['memory_free'],
9533
          "total_disk": remote_info['vg_size'],
9534
          "free_disk": remote_info['vg_free'],
9535
          "total_cpus": remote_info['cpu_total'],
9536
          "i_pri_memory": i_p_mem,
9537
          "i_pri_up_memory": i_p_up_mem,
9538
          }
9539
        pnr.update(pnr_dyn)
9540

    
9541
      node_results[nname] = pnr
9542
    data["nodes"] = node_results
9543

    
9544
    # instance data
9545
    instance_data = {}
9546
    for iinfo, beinfo in i_list:
9547
      nic_data = []
9548
      for nic in iinfo.nics:
9549
        filled_params = objects.FillDict(
9550
            cluster_info.nicparams[constants.PP_DEFAULT],
9551
            nic.nicparams)
9552
        nic_dict = {"mac": nic.mac,
9553
                    "ip": nic.ip,
9554
                    "mode": filled_params[constants.NIC_MODE],
9555
                    "link": filled_params[constants.NIC_LINK],
9556
                   }
9557
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9558
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9559
        nic_data.append(nic_dict)
9560
      pir = {
9561
        "tags": list(iinfo.GetTags()),
9562
        "admin_up": iinfo.admin_up,
9563
        "vcpus": beinfo[constants.BE_VCPUS],
9564
        "memory": beinfo[constants.BE_MEMORY],
9565
        "os": iinfo.os,
9566
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9567
        "nics": nic_data,
9568
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9569
        "disk_template": iinfo.disk_template,
9570
        "hypervisor": iinfo.hypervisor,
9571
        }
9572
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9573
                                                 pir["disks"])
9574
      instance_data[iinfo.name] = pir
9575

    
9576
    data["instances"] = instance_data
9577

    
9578
    self.in_data = data
9579

    
9580
  def _AddNewInstance(self):
9581
    """Add new instance data to allocator structure.
9582

9583
    This in combination with _AllocatorGetClusterData will create the
9584
    correct structure needed as input for the allocator.
9585

9586
    The checks for the completeness of the opcode must have already been
9587
    done.
9588

9589
    """
9590
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9591

    
9592
    if self.disk_template in constants.DTS_NET_MIRROR:
9593
      self.required_nodes = 2
9594
    else:
9595
      self.required_nodes = 1
9596
    request = {
9597
      "name": self.name,
9598
      "disk_template": self.disk_template,
9599
      "tags": self.tags,
9600
      "os": self.os,
9601
      "vcpus": self.vcpus,
9602
      "memory": self.mem_size,
9603
      "disks": self.disks,
9604
      "disk_space_total": disk_space,
9605
      "nics": self.nics,
9606
      "required_nodes": self.required_nodes,
9607
      }
9608
    return request
9609

    
9610
  def _AddRelocateInstance(self):
9611
    """Add relocate instance data to allocator structure.
9612

9613
    This in combination with _IAllocatorGetClusterData will create the
9614
    correct structure needed as input for the allocator.
9615

9616
    The checks for the completeness of the opcode must have already been
9617
    done.
9618

9619
    """
9620
    instance = self.cfg.GetInstanceInfo(self.name)
9621
    if instance is None:
9622
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
9623
                                   " IAllocator" % self.name)
9624

    
9625
    if instance.disk_template not in constants.DTS_NET_MIRROR:
9626
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9627
                                 errors.ECODE_INVAL)
9628

    
9629
    if len(instance.secondary_nodes) != 1:
9630
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
9631
                                 errors.ECODE_STATE)
9632

    
9633
    self.required_nodes = 1
9634
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
9635
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9636

    
9637
    request = {
9638
      "name": self.name,
9639
      "disk_space_total": disk_space,
9640
      "required_nodes": self.required_nodes,
9641
      "relocate_from": self.relocate_from,
9642
      }
9643
    return request
9644

    
9645
  def _AddEvacuateNodes(self):
9646
    """Add evacuate nodes data to allocator structure.
9647

9648
    """
9649
    request = {
9650
      "evac_nodes": self.evac_nodes
9651
      }
9652
    return request
9653

    
9654
  def _BuildInputData(self, fn):
9655
    """Build input data structures.
9656

9657
    """
9658
    self._ComputeClusterData()
9659

    
9660
    request = fn()
9661
    request["type"] = self.mode
9662
    self.in_data["request"] = request
9663

    
9664
    self.in_text = serializer.Dump(self.in_data)
9665

    
9666
  def Run(self, name, validate=True, call_fn=None):
9667
    """Run an instance allocator and return the results.
9668

9669
    """
9670
    if call_fn is None:
9671
      call_fn = self.rpc.call_iallocator_runner
9672

    
9673
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9674
    result.Raise("Failure while running the iallocator script")
9675

    
9676
    self.out_text = result.payload
9677
    if validate:
9678
      self._ValidateResult()
9679

    
9680
  def _ValidateResult(self):
9681
    """Process the allocator results.
9682

9683
    This will process and if successful save the result in
9684
    self.out_data and the other parameters.
9685

9686
    """
9687
    try:
9688
      rdict = serializer.Load(self.out_text)
9689
    except Exception, err:
9690
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9691

    
9692
    if not isinstance(rdict, dict):
9693
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
9694

    
9695
    # TODO: remove backwards compatiblity in later versions
9696
    if "nodes" in rdict and "result" not in rdict:
9697
      rdict["result"] = rdict["nodes"]
9698
      del rdict["nodes"]
9699

    
9700
    for key in "success", "info", "result":
9701
      if key not in rdict:
9702
        raise errors.OpExecError("Can't parse iallocator results:"
9703
                                 " missing key '%s'" % key)
9704
      setattr(self, key, rdict[key])
9705

    
9706
    if not isinstance(rdict["result"], list):
9707
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9708
                               " is not a list")
9709
    self.out_data = rdict
9710

    
9711

    
9712
class LUTestAllocator(NoHooksLU):
9713
  """Run allocator tests.
9714

9715
  This LU runs the allocator tests
9716

9717
  """
9718
  _OP_REQP = ["direction", "mode", "name"]
9719

    
9720
  def CheckPrereq(self):
9721
    """Check prerequisites.
9722

9723
    This checks the opcode parameters depending on the director and mode test.
9724

9725
    """
9726
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9727
      for attr in ["name", "mem_size", "disks", "disk_template",
9728
                   "os", "tags", "nics", "vcpus"]:
9729
        if not hasattr(self.op, attr):
9730
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9731
                                     attr, errors.ECODE_INVAL)
9732
      iname = self.cfg.ExpandInstanceName(self.op.name)
9733
      if iname is not None:
9734
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9735
                                   iname, errors.ECODE_EXISTS)
9736
      if not isinstance(self.op.nics, list):
9737
        raise errors.OpPrereqError("Invalid parameter 'nics'",
9738
                                   errors.ECODE_INVAL)
9739
      for row in self.op.nics:
9740
        if (not isinstance(row, dict) or
9741
            "mac" not in row or
9742
            "ip" not in row or
9743
            "bridge" not in row):
9744
          raise errors.OpPrereqError("Invalid contents of the 'nics'"
9745
                                     " parameter", errors.ECODE_INVAL)
9746
      if not isinstance(self.op.disks, list):
9747
        raise errors.OpPrereqError("Invalid parameter 'disks'",
9748
                                   errors.ECODE_INVAL)
9749
      for row in self.op.disks:
9750
        if (not isinstance(row, dict) or
9751
            "size" not in row or
9752
            not isinstance(row["size"], int) or
9753
            "mode" not in row or
9754
            row["mode"] not in ['r', 'w']):
9755
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
9756
                                     " parameter", errors.ECODE_INVAL)
9757
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9758
        self.op.hypervisor = self.cfg.GetHypervisorType()
9759
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9760
      if not hasattr(self.op, "name"):
9761
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9762
                                   errors.ECODE_INVAL)
9763
      fname = _ExpandInstanceName(self.cfg, self.op.name)
9764
      self.op.name = fname
9765
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9766
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9767
      if not hasattr(self.op, "evac_nodes"):
9768
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9769
                                   " opcode input", errors.ECODE_INVAL)
9770
    else:
9771
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9772
                                 self.op.mode, errors.ECODE_INVAL)
9773

    
9774
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9775
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
9776
        raise errors.OpPrereqError("Missing allocator name",
9777
                                   errors.ECODE_INVAL)
9778
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9779
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
9780
                                 self.op.direction, errors.ECODE_INVAL)
9781

    
9782
  def Exec(self, feedback_fn):
9783
    """Run the allocator test.
9784

9785
    """
9786
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9787
      ial = IAllocator(self.cfg, self.rpc,
9788
                       mode=self.op.mode,
9789
                       name=self.op.name,
9790
                       mem_size=self.op.mem_size,
9791
                       disks=self.op.disks,
9792
                       disk_template=self.op.disk_template,
9793
                       os=self.op.os,
9794
                       tags=self.op.tags,
9795
                       nics=self.op.nics,
9796
                       vcpus=self.op.vcpus,
9797
                       hypervisor=self.op.hypervisor,
9798
                       )
9799
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9800
      ial = IAllocator(self.cfg, self.rpc,
9801
                       mode=self.op.mode,
9802
                       name=self.op.name,
9803
                       relocate_from=list(self.relocate_from),
9804
                       )
9805
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9806
      ial = IAllocator(self.cfg, self.rpc,
9807
                       mode=self.op.mode,
9808
                       evac_nodes=self.op.evac_nodes)
9809
    else:
9810
      raise errors.ProgrammerError("Uncatched mode %s in"
9811
                                   " LUTestAllocator.Exec", self.op.mode)
9812

    
9813
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
9814
      result = ial.in_text
9815
    else:
9816
      ial.Run(self.op.allocator, validate=False)
9817
      result = ial.out_text
9818
    return result