Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 196ec587

History | View | Annotate | Download (341.6 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
import os
30
import os.path
31
import time
32
import re
33
import platform
34
import logging
35
import copy
36
import OpenSSL
37

    
38
from ganeti import ssh
39
from ganeti import utils
40
from ganeti import errors
41
from ganeti import hypervisor
42
from ganeti import locking
43
from ganeti import constants
44
from ganeti import objects
45
from ganeti import serializer
46
from ganeti import ssconf
47
from ganeti import uidpool
48
from ganeti import compat
49

    
50

    
51
class LogicalUnit(object):
52
  """Logical Unit base class.
53

54
  Subclasses must follow these rules:
55
    - implement ExpandNames
56
    - implement CheckPrereq (except when tasklets are used)
57
    - implement Exec (except when tasklets are used)
58
    - implement BuildHooksEnv
59
    - redefine HPATH and HTYPE
60
    - optionally redefine their run requirements:
61
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
62

63
  Note that all commands require root permissions.
64

65
  @ivar dry_run_result: the value (if any) that will be returned to the caller
66
      in dry-run mode (signalled by opcode dry_run parameter)
67

68
  """
69
  HPATH = None
70
  HTYPE = None
71
  _OP_REQP = []
72
  REQ_BGL = True
73

    
74
  def __init__(self, processor, op, context, rpc):
75
    """Constructor for LogicalUnit.
76

77
    This needs to be overridden in derived classes in order to check op
78
    validity.
79

80
    """
81
    self.proc = processor
82
    self.op = op
83
    self.cfg = context.cfg
84
    self.context = context
85
    self.rpc = rpc
86
    # Dicts used to declare locking needs to mcpu
87
    self.needed_locks = None
88
    self.acquired_locks = {}
89
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
90
    self.add_locks = {}
91
    self.remove_locks = {}
92
    # Used to force good behavior when calling helper functions
93
    self.recalculate_locks = {}
94
    self.__ssh = None
95
    # logging
96
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
97
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
98
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
99
    # support for dry-run
100
    self.dry_run_result = None
101
    # support for generic debug attribute
102
    if (not hasattr(self.op, "debug_level") or
103
        not isinstance(self.op.debug_level, int)):
104
      self.op.debug_level = 0
105

    
106
    # Tasklets
107
    self.tasklets = None
108

    
109
    for attr_name in self._OP_REQP:
110
      attr_val = getattr(op, attr_name, None)
111
      if attr_val is None:
112
        raise errors.OpPrereqError("Required parameter '%s' missing" %
113
                                   attr_name, errors.ECODE_INVAL)
114

    
115
    self.CheckArguments()
116

    
117
  def __GetSSH(self):
118
    """Returns the SshRunner object
119

120
    """
121
    if not self.__ssh:
122
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
123
    return self.__ssh
124

    
125
  ssh = property(fget=__GetSSH)
126

    
127
  def CheckArguments(self):
128
    """Check syntactic validity for the opcode arguments.
129

130
    This method is for doing a simple syntactic check and ensure
131
    validity of opcode parameters, without any cluster-related
132
    checks. While the same can be accomplished in ExpandNames and/or
133
    CheckPrereq, doing these separate is better because:
134

135
      - ExpandNames is left as as purely a lock-related function
136
      - CheckPrereq is run after we have acquired locks (and possible
137
        waited for them)
138

139
    The function is allowed to change the self.op attribute so that
140
    later methods can no longer worry about missing parameters.
141

142
    """
143
    pass
144

    
145
  def ExpandNames(self):
146
    """Expand names for this LU.
147

148
    This method is called before starting to execute the opcode, and it should
149
    update all the parameters of the opcode to their canonical form (e.g. a
150
    short node name must be fully expanded after this method has successfully
151
    completed). This way locking, hooks, logging, ecc. can work correctly.
152

153
    LUs which implement this method must also populate the self.needed_locks
154
    member, as a dict with lock levels as keys, and a list of needed lock names
155
    as values. Rules:
156

157
      - use an empty dict if you don't need any lock
158
      - if you don't need any lock at a particular level omit that level
159
      - don't put anything for the BGL level
160
      - if you want all locks at a level use locking.ALL_SET as a value
161

162
    If you need to share locks (rather than acquire them exclusively) at one
163
    level you can modify self.share_locks, setting a true value (usually 1) for
164
    that level. By default locks are not shared.
165

166
    This function can also define a list of tasklets, which then will be
167
    executed in order instead of the usual LU-level CheckPrereq and Exec
168
    functions, if those are not defined by the LU.
169

170
    Examples::
171

172
      # Acquire all nodes and one instance
173
      self.needed_locks = {
174
        locking.LEVEL_NODE: locking.ALL_SET,
175
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
176
      }
177
      # Acquire just two nodes
178
      self.needed_locks = {
179
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
180
      }
181
      # Acquire no locks
182
      self.needed_locks = {} # No, you can't leave it to the default value None
183

184
    """
185
    # The implementation of this method is mandatory only if the new LU is
186
    # concurrent, so that old LUs don't need to be changed all at the same
187
    # time.
188
    if self.REQ_BGL:
189
      self.needed_locks = {} # Exclusive LUs don't need locks.
190
    else:
191
      raise NotImplementedError
192

    
193
  def DeclareLocks(self, level):
194
    """Declare LU locking needs for a level
195

196
    While most LUs can just declare their locking needs at ExpandNames time,
197
    sometimes there's the need to calculate some locks after having acquired
198
    the ones before. This function is called just before acquiring locks at a
199
    particular level, but after acquiring the ones at lower levels, and permits
200
    such calculations. It can be used to modify self.needed_locks, and by
201
    default it does nothing.
202

203
    This function is only called if you have something already set in
204
    self.needed_locks for the level.
205

206
    @param level: Locking level which is going to be locked
207
    @type level: member of ganeti.locking.LEVELS
208

209
    """
210

    
211
  def CheckPrereq(self):
212
    """Check prerequisites for this LU.
213

214
    This method should check that the prerequisites for the execution
215
    of this LU are fulfilled. It can do internode communication, but
216
    it should be idempotent - no cluster or system changes are
217
    allowed.
218

219
    The method should raise errors.OpPrereqError in case something is
220
    not fulfilled. Its return value is ignored.
221

222
    This method should also update all the parameters of the opcode to
223
    their canonical form if it hasn't been done by ExpandNames before.
224

225
    """
226
    if self.tasklets is not None:
227
      for (idx, tl) in enumerate(self.tasklets):
228
        logging.debug("Checking prerequisites for tasklet %s/%s",
229
                      idx + 1, len(self.tasklets))
230
        tl.CheckPrereq()
231
    else:
232
      raise NotImplementedError
233

    
234
  def Exec(self, feedback_fn):
235
    """Execute the LU.
236

237
    This method should implement the actual work. It should raise
238
    errors.OpExecError for failures that are somewhat dealt with in
239
    code, or expected.
240

241
    """
242
    if self.tasklets is not None:
243
      for (idx, tl) in enumerate(self.tasklets):
244
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
245
        tl.Exec(feedback_fn)
246
    else:
247
      raise NotImplementedError
248

    
249
  def BuildHooksEnv(self):
250
    """Build hooks environment for this LU.
251

252
    This method should return a three-node tuple consisting of: a dict
253
    containing the environment that will be used for running the
254
    specific hook for this LU, a list of node names on which the hook
255
    should run before the execution, and a list of node names on which
256
    the hook should run after the execution.
257

258
    The keys of the dict must not have 'GANETI_' prefixed as this will
259
    be handled in the hooks runner. Also note additional keys will be
260
    added by the hooks runner. If the LU doesn't define any
261
    environment, an empty dict (and not None) should be returned.
262

263
    No nodes should be returned as an empty list (and not None).
264

265
    Note that if the HPATH for a LU class is None, this function will
266
    not be called.
267

268
    """
269
    raise NotImplementedError
270

    
271
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
272
    """Notify the LU about the results of its hooks.
273

274
    This method is called every time a hooks phase is executed, and notifies
275
    the Logical Unit about the hooks' result. The LU can then use it to alter
276
    its result based on the hooks.  By default the method does nothing and the
277
    previous result is passed back unchanged but any LU can define it if it
278
    wants to use the local cluster hook-scripts somehow.
279

280
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
281
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
282
    @param hook_results: the results of the multi-node hooks rpc call
283
    @param feedback_fn: function used send feedback back to the caller
284
    @param lu_result: the previous Exec result this LU had, or None
285
        in the PRE phase
286
    @return: the new Exec result, based on the previous result
287
        and hook results
288

289
    """
290
    # API must be kept, thus we ignore the unused argument and could
291
    # be a function warnings
292
    # pylint: disable-msg=W0613,R0201
293
    return lu_result
294

    
295
  def _ExpandAndLockInstance(self):
296
    """Helper function to expand and lock an instance.
297

298
    Many LUs that work on an instance take its name in self.op.instance_name
299
    and need to expand it and then declare the expanded name for locking. This
300
    function does it, and then updates self.op.instance_name to the expanded
301
    name. It also initializes needed_locks as a dict, if this hasn't been done
302
    before.
303

304
    """
305
    if self.needed_locks is None:
306
      self.needed_locks = {}
307
    else:
308
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
309
        "_ExpandAndLockInstance called with instance-level locks set"
310
    self.op.instance_name = _ExpandInstanceName(self.cfg,
311
                                                self.op.instance_name)
312
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
313

    
314
  def _LockInstancesNodes(self, primary_only=False):
315
    """Helper function to declare instances' nodes for locking.
316

317
    This function should be called after locking one or more instances to lock
318
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
319
    with all primary or secondary nodes for instances already locked and
320
    present in self.needed_locks[locking.LEVEL_INSTANCE].
321

322
    It should be called from DeclareLocks, and for safety only works if
323
    self.recalculate_locks[locking.LEVEL_NODE] is set.
324

325
    In the future it may grow parameters to just lock some instance's nodes, or
326
    to just lock primaries or secondary nodes, if needed.
327

328
    If should be called in DeclareLocks in a way similar to::
329

330
      if level == locking.LEVEL_NODE:
331
        self._LockInstancesNodes()
332

333
    @type primary_only: boolean
334
    @param primary_only: only lock primary nodes of locked instances
335

336
    """
337
    assert locking.LEVEL_NODE in self.recalculate_locks, \
338
      "_LockInstancesNodes helper function called with no nodes to recalculate"
339

    
340
    # TODO: check if we're really been called with the instance locks held
341

    
342
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
343
    # future we might want to have different behaviors depending on the value
344
    # of self.recalculate_locks[locking.LEVEL_NODE]
345
    wanted_nodes = []
346
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
347
      instance = self.context.cfg.GetInstanceInfo(instance_name)
348
      wanted_nodes.append(instance.primary_node)
349
      if not primary_only:
350
        wanted_nodes.extend(instance.secondary_nodes)
351

    
352
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
353
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
354
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
355
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
356

    
357
    del self.recalculate_locks[locking.LEVEL_NODE]
358

    
359

    
360
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
361
  """Simple LU which runs no hooks.
362

363
  This LU is intended as a parent for other LogicalUnits which will
364
  run no hooks, in order to reduce duplicate code.
365

366
  """
367
  HPATH = None
368
  HTYPE = None
369

    
370
  def BuildHooksEnv(self):
371
    """Empty BuildHooksEnv for NoHooksLu.
372

373
    This just raises an error.
374

375
    """
376
    assert False, "BuildHooksEnv called for NoHooksLUs"
377

    
378

    
379
class Tasklet:
380
  """Tasklet base class.
381

382
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
383
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
384
  tasklets know nothing about locks.
385

386
  Subclasses must follow these rules:
387
    - Implement CheckPrereq
388
    - Implement Exec
389

390
  """
391
  def __init__(self, lu):
392
    self.lu = lu
393

    
394
    # Shortcuts
395
    self.cfg = lu.cfg
396
    self.rpc = lu.rpc
397

    
398
  def CheckPrereq(self):
399
    """Check prerequisites for this tasklets.
400

401
    This method should check whether the prerequisites for the execution of
402
    this tasklet are fulfilled. It can do internode communication, but it
403
    should be idempotent - no cluster or system changes are allowed.
404

405
    The method should raise errors.OpPrereqError in case something is not
406
    fulfilled. Its return value is ignored.
407

408
    This method should also update all parameters to their canonical form if it
409
    hasn't been done before.
410

411
    """
412
    raise NotImplementedError
413

    
414
  def Exec(self, feedback_fn):
415
    """Execute the tasklet.
416

417
    This method should implement the actual work. It should raise
418
    errors.OpExecError for failures that are somewhat dealt with in code, or
419
    expected.
420

421
    """
422
    raise NotImplementedError
423

    
424

    
425
def _GetWantedNodes(lu, nodes):
426
  """Returns list of checked and expanded node names.
427

428
  @type lu: L{LogicalUnit}
429
  @param lu: the logical unit on whose behalf we execute
430
  @type nodes: list
431
  @param nodes: list of node names or None for all nodes
432
  @rtype: list
433
  @return: the list of nodes, sorted
434
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
435

436
  """
437
  if not isinstance(nodes, list):
438
    raise errors.OpPrereqError("Invalid argument type 'nodes'",
439
                               errors.ECODE_INVAL)
440

    
441
  if not nodes:
442
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
443
      " non-empty list of nodes whose name is to be expanded.")
444

    
445
  wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
446
  return utils.NiceSort(wanted)
447

    
448

    
449
def _GetWantedInstances(lu, instances):
450
  """Returns list of checked and expanded instance names.
451

452
  @type lu: L{LogicalUnit}
453
  @param lu: the logical unit on whose behalf we execute
454
  @type instances: list
455
  @param instances: list of instance names or None for all instances
456
  @rtype: list
457
  @return: the list of instances, sorted
458
  @raise errors.OpPrereqError: if the instances parameter is wrong type
459
  @raise errors.OpPrereqError: if any of the passed instances is not found
460

461
  """
462
  if not isinstance(instances, list):
463
    raise errors.OpPrereqError("Invalid argument type 'instances'",
464
                               errors.ECODE_INVAL)
465

    
466
  if instances:
467
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
468
  else:
469
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
470
  return wanted
471

    
472

    
473
def _CheckOutputFields(static, dynamic, selected):
474
  """Checks whether all selected fields are valid.
475

476
  @type static: L{utils.FieldSet}
477
  @param static: static fields set
478
  @type dynamic: L{utils.FieldSet}
479
  @param dynamic: dynamic fields set
480

481
  """
482
  f = utils.FieldSet()
483
  f.Extend(static)
484
  f.Extend(dynamic)
485

    
486
  delta = f.NonMatching(selected)
487
  if delta:
488
    raise errors.OpPrereqError("Unknown output fields selected: %s"
489
                               % ",".join(delta), errors.ECODE_INVAL)
490

    
491

    
492
def _CheckBooleanOpField(op, name):
493
  """Validates boolean opcode parameters.
494

495
  This will ensure that an opcode parameter is either a boolean value,
496
  or None (but that it always exists).
497

498
  """
499
  val = getattr(op, name, None)
500
  if not (val is None or isinstance(val, bool)):
501
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
502
                               (name, str(val)), errors.ECODE_INVAL)
503
  setattr(op, name, val)
504

    
505

    
506
def _CheckGlobalHvParams(params):
507
  """Validates that given hypervisor params are not global ones.
508

509
  This will ensure that instances don't get customised versions of
510
  global params.
511

512
  """
513
  used_globals = constants.HVC_GLOBALS.intersection(params)
514
  if used_globals:
515
    msg = ("The following hypervisor parameters are global and cannot"
516
           " be customized at instance level, please modify them at"
517
           " cluster level: %s" % utils.CommaJoin(used_globals))
518
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
519

    
520

    
521
def _CheckNodeOnline(lu, node):
522
  """Ensure that a given node is online.
523

524
  @param lu: the LU on behalf of which we make the check
525
  @param node: the node to check
526
  @raise errors.OpPrereqError: if the node is offline
527

528
  """
529
  if lu.cfg.GetNodeInfo(node).offline:
530
    raise errors.OpPrereqError("Can't use offline node %s" % node,
531
                               errors.ECODE_INVAL)
532

    
533

    
534
def _CheckNodeNotDrained(lu, node):
535
  """Ensure that a given node is not drained.
536

537
  @param lu: the LU on behalf of which we make the check
538
  @param node: the node to check
539
  @raise errors.OpPrereqError: if the node is drained
540

541
  """
542
  if lu.cfg.GetNodeInfo(node).drained:
543
    raise errors.OpPrereqError("Can't use drained node %s" % node,
544
                               errors.ECODE_INVAL)
545

    
546

    
547
def _CheckNodeHasOS(lu, node, os_name, force_variant):
548
  """Ensure that a node supports a given OS.
549

550
  @param lu: the LU on behalf of which we make the check
551
  @param node: the node to check
552
  @param os_name: the OS to query about
553
  @param force_variant: whether to ignore variant errors
554
  @raise errors.OpPrereqError: if the node is not supporting the OS
555

556
  """
557
  result = lu.rpc.call_os_get(node, os_name)
558
  result.Raise("OS '%s' not in supported OS list for node %s" %
559
               (os_name, node),
560
               prereq=True, ecode=errors.ECODE_INVAL)
561
  if not force_variant:
562
    _CheckOSVariant(result.payload, os_name)
563

    
564

    
565
def _RequireFileStorage():
566
  """Checks that file storage is enabled.
567

568
  @raise errors.OpPrereqError: when file storage is disabled
569

570
  """
571
  if not constants.ENABLE_FILE_STORAGE:
572
    raise errors.OpPrereqError("File storage disabled at configure time",
573
                               errors.ECODE_INVAL)
574

    
575

    
576
def _CheckDiskTemplate(template):
577
  """Ensure a given disk template is valid.
578

579
  """
580
  if template not in constants.DISK_TEMPLATES:
581
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
582
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
583
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
584
  if template == constants.DT_FILE:
585
    _RequireFileStorage()
586

    
587

    
588
def _CheckStorageType(storage_type):
589
  """Ensure a given storage type is valid.
590

591
  """
592
  if storage_type not in constants.VALID_STORAGE_TYPES:
593
    raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
594
                               errors.ECODE_INVAL)
595
  if storage_type == constants.ST_FILE:
596
    _RequireFileStorage()
597

    
598

    
599

    
600
def _CheckInstanceDown(lu, instance, reason):
601
  """Ensure that an instance is not running."""
602
  if instance.admin_up:
603
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
604
                               (instance.name, reason), errors.ECODE_STATE)
605

    
606
  pnode = instance.primary_node
607
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
608
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
609
              prereq=True, ecode=errors.ECODE_ENVIRON)
610

    
611
  if instance.name in ins_l.payload:
612
    raise errors.OpPrereqError("Instance %s is running, %s" %
613
                               (instance.name, reason), errors.ECODE_STATE)
614

    
615

    
616
def _ExpandItemName(fn, name, kind):
617
  """Expand an item name.
618

619
  @param fn: the function to use for expansion
620
  @param name: requested item name
621
  @param kind: text description ('Node' or 'Instance')
622
  @return: the resolved (full) name
623
  @raise errors.OpPrereqError: if the item is not found
624

625
  """
626
  full_name = fn(name)
627
  if full_name is None:
628
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
629
                               errors.ECODE_NOENT)
630
  return full_name
631

    
632

    
633
def _ExpandNodeName(cfg, name):
634
  """Wrapper over L{_ExpandItemName} for nodes."""
635
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
636

    
637

    
638
def _ExpandInstanceName(cfg, name):
639
  """Wrapper over L{_ExpandItemName} for instance."""
640
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
641

    
642

    
643
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
644
                          memory, vcpus, nics, disk_template, disks,
645
                          bep, hvp, hypervisor_name):
646
  """Builds instance related env variables for hooks
647

648
  This builds the hook environment from individual variables.
649

650
  @type name: string
651
  @param name: the name of the instance
652
  @type primary_node: string
653
  @param primary_node: the name of the instance's primary node
654
  @type secondary_nodes: list
655
  @param secondary_nodes: list of secondary nodes as strings
656
  @type os_type: string
657
  @param os_type: the name of the instance's OS
658
  @type status: boolean
659
  @param status: the should_run status of the instance
660
  @type memory: string
661
  @param memory: the memory size of the instance
662
  @type vcpus: string
663
  @param vcpus: the count of VCPUs the instance has
664
  @type nics: list
665
  @param nics: list of tuples (ip, mac, mode, link) representing
666
      the NICs the instance has
667
  @type disk_template: string
668
  @param disk_template: the disk template of the instance
669
  @type disks: list
670
  @param disks: the list of (size, mode) pairs
671
  @type bep: dict
672
  @param bep: the backend parameters for the instance
673
  @type hvp: dict
674
  @param hvp: the hypervisor parameters for the instance
675
  @type hypervisor_name: string
676
  @param hypervisor_name: the hypervisor for the instance
677
  @rtype: dict
678
  @return: the hook environment for this instance
679

680
  """
681
  if status:
682
    str_status = "up"
683
  else:
684
    str_status = "down"
685
  env = {
686
    "OP_TARGET": name,
687
    "INSTANCE_NAME": name,
688
    "INSTANCE_PRIMARY": primary_node,
689
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
690
    "INSTANCE_OS_TYPE": os_type,
691
    "INSTANCE_STATUS": str_status,
692
    "INSTANCE_MEMORY": memory,
693
    "INSTANCE_VCPUS": vcpus,
694
    "INSTANCE_DISK_TEMPLATE": disk_template,
695
    "INSTANCE_HYPERVISOR": hypervisor_name,
696
  }
697

    
698
  if nics:
699
    nic_count = len(nics)
700
    for idx, (ip, mac, mode, link) in enumerate(nics):
701
      if ip is None:
702
        ip = ""
703
      env["INSTANCE_NIC%d_IP" % idx] = ip
704
      env["INSTANCE_NIC%d_MAC" % idx] = mac
705
      env["INSTANCE_NIC%d_MODE" % idx] = mode
706
      env["INSTANCE_NIC%d_LINK" % idx] = link
707
      if mode == constants.NIC_MODE_BRIDGED:
708
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
709
  else:
710
    nic_count = 0
711

    
712
  env["INSTANCE_NIC_COUNT"] = nic_count
713

    
714
  if disks:
715
    disk_count = len(disks)
716
    for idx, (size, mode) in enumerate(disks):
717
      env["INSTANCE_DISK%d_SIZE" % idx] = size
718
      env["INSTANCE_DISK%d_MODE" % idx] = mode
719
  else:
720
    disk_count = 0
721

    
722
  env["INSTANCE_DISK_COUNT"] = disk_count
723

    
724
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
725
    for key, value in source.items():
726
      env["INSTANCE_%s_%s" % (kind, key)] = value
727

    
728
  return env
729

    
730

    
731
def _NICListToTuple(lu, nics):
732
  """Build a list of nic information tuples.
733

734
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
735
  value in LUQueryInstanceData.
736

737
  @type lu:  L{LogicalUnit}
738
  @param lu: the logical unit on whose behalf we execute
739
  @type nics: list of L{objects.NIC}
740
  @param nics: list of nics to convert to hooks tuples
741

742
  """
743
  hooks_nics = []
744
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
745
  for nic in nics:
746
    ip = nic.ip
747
    mac = nic.mac
748
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
749
    mode = filled_params[constants.NIC_MODE]
750
    link = filled_params[constants.NIC_LINK]
751
    hooks_nics.append((ip, mac, mode, link))
752
  return hooks_nics
753

    
754

    
755
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
756
  """Builds instance related env variables for hooks from an object.
757

758
  @type lu: L{LogicalUnit}
759
  @param lu: the logical unit on whose behalf we execute
760
  @type instance: L{objects.Instance}
761
  @param instance: the instance for which we should build the
762
      environment
763
  @type override: dict
764
  @param override: dictionary with key/values that will override
765
      our values
766
  @rtype: dict
767
  @return: the hook environment dictionary
768

769
  """
770
  cluster = lu.cfg.GetClusterInfo()
771
  bep = cluster.FillBE(instance)
772
  hvp = cluster.FillHV(instance)
773
  args = {
774
    'name': instance.name,
775
    'primary_node': instance.primary_node,
776
    'secondary_nodes': instance.secondary_nodes,
777
    'os_type': instance.os,
778
    'status': instance.admin_up,
779
    'memory': bep[constants.BE_MEMORY],
780
    'vcpus': bep[constants.BE_VCPUS],
781
    'nics': _NICListToTuple(lu, instance.nics),
782
    'disk_template': instance.disk_template,
783
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
784
    'bep': bep,
785
    'hvp': hvp,
786
    'hypervisor_name': instance.hypervisor,
787
  }
788
  if override:
789
    args.update(override)
790
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
791

    
792

    
793
def _AdjustCandidatePool(lu, exceptions):
794
  """Adjust the candidate pool after node operations.
795

796
  """
797
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
798
  if mod_list:
799
    lu.LogInfo("Promoted nodes to master candidate role: %s",
800
               utils.CommaJoin(node.name for node in mod_list))
801
    for name in mod_list:
802
      lu.context.ReaddNode(name)
803
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
804
  if mc_now > mc_max:
805
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
806
               (mc_now, mc_max))
807

    
808

    
809
def _DecideSelfPromotion(lu, exceptions=None):
810
  """Decide whether I should promote myself as a master candidate.
811

812
  """
813
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
814
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
815
  # the new node will increase mc_max with one, so:
816
  mc_should = min(mc_should + 1, cp_size)
817
  return mc_now < mc_should
818

    
819

    
820
def _CheckNicsBridgesExist(lu, target_nics, target_node,
821
                               profile=constants.PP_DEFAULT):
822
  """Check that the brigdes needed by a list of nics exist.
823

824
  """
825
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
826
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
827
                for nic in target_nics]
828
  brlist = [params[constants.NIC_LINK] for params in paramslist
829
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
830
  if brlist:
831
    result = lu.rpc.call_bridges_exist(target_node, brlist)
832
    result.Raise("Error checking bridges on destination node '%s'" %
833
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
834

    
835

    
836
def _CheckInstanceBridgesExist(lu, instance, node=None):
837
  """Check that the brigdes needed by an instance exist.
838

839
  """
840
  if node is None:
841
    node = instance.primary_node
842
  _CheckNicsBridgesExist(lu, instance.nics, node)
843

    
844

    
845
def _CheckOSVariant(os_obj, name):
846
  """Check whether an OS name conforms to the os variants specification.
847

848
  @type os_obj: L{objects.OS}
849
  @param os_obj: OS object to check
850
  @type name: string
851
  @param name: OS name passed by the user, to check for validity
852

853
  """
854
  if not os_obj.supported_variants:
855
    return
856
  try:
857
    variant = name.split("+", 1)[1]
858
  except IndexError:
859
    raise errors.OpPrereqError("OS name must include a variant",
860
                               errors.ECODE_INVAL)
861

    
862
  if variant not in os_obj.supported_variants:
863
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
864

    
865

    
866
def _GetNodeInstancesInner(cfg, fn):
867
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
868

    
869

    
870
def _GetNodeInstances(cfg, node_name):
871
  """Returns a list of all primary and secondary instances on a node.
872

873
  """
874

    
875
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
876

    
877

    
878
def _GetNodePrimaryInstances(cfg, node_name):
879
  """Returns primary instances on a node.
880

881
  """
882
  return _GetNodeInstancesInner(cfg,
883
                                lambda inst: node_name == inst.primary_node)
884

    
885

    
886
def _GetNodeSecondaryInstances(cfg, node_name):
887
  """Returns secondary instances on a node.
888

889
  """
890
  return _GetNodeInstancesInner(cfg,
891
                                lambda inst: node_name in inst.secondary_nodes)
892

    
893

    
894
def _GetStorageTypeArgs(cfg, storage_type):
895
  """Returns the arguments for a storage type.
896

897
  """
898
  # Special case for file storage
899
  if storage_type == constants.ST_FILE:
900
    # storage.FileStorage wants a list of storage directories
901
    return [[cfg.GetFileStorageDir()]]
902

    
903
  return []
904

    
905

    
906
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
907
  faulty = []
908

    
909
  for dev in instance.disks:
910
    cfg.SetDiskID(dev, node_name)
911

    
912
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
913
  result.Raise("Failed to get disk status from node %s" % node_name,
914
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
915

    
916
  for idx, bdev_status in enumerate(result.payload):
917
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
918
      faulty.append(idx)
919

    
920
  return faulty
921

    
922

    
923
def _FormatTimestamp(secs):
924
  """Formats a Unix timestamp with the local timezone.
925

926
  """
927
  return time.strftime("%F %T %Z", time.gmtime(secs))
928

    
929

    
930
class LUPostInitCluster(LogicalUnit):
931
  """Logical unit for running hooks after cluster initialization.
932

933
  """
934
  HPATH = "cluster-init"
935
  HTYPE = constants.HTYPE_CLUSTER
936
  _OP_REQP = []
937

    
938
  def BuildHooksEnv(self):
939
    """Build hooks env.
940

941
    """
942
    env = {"OP_TARGET": self.cfg.GetClusterName()}
943
    mn = self.cfg.GetMasterNode()
944
    return env, [], [mn]
945

    
946
  def CheckPrereq(self):
947
    """No prerequisites to check.
948

949
    """
950
    return True
951

    
952
  def Exec(self, feedback_fn):
953
    """Nothing to do.
954

955
    """
956
    return True
957

    
958

    
959
class LUDestroyCluster(LogicalUnit):
960
  """Logical unit for destroying the cluster.
961

962
  """
963
  HPATH = "cluster-destroy"
964
  HTYPE = constants.HTYPE_CLUSTER
965
  _OP_REQP = []
966

    
967
  def BuildHooksEnv(self):
968
    """Build hooks env.
969

970
    """
971
    env = {"OP_TARGET": self.cfg.GetClusterName()}
972
    return env, [], []
973

    
974
  def CheckPrereq(self):
975
    """Check prerequisites.
976

977
    This checks whether the cluster is empty.
978

979
    Any errors are signaled by raising errors.OpPrereqError.
980

981
    """
982
    master = self.cfg.GetMasterNode()
983

    
984
    nodelist = self.cfg.GetNodeList()
985
    if len(nodelist) != 1 or nodelist[0] != master:
986
      raise errors.OpPrereqError("There are still %d node(s) in"
987
                                 " this cluster." % (len(nodelist) - 1),
988
                                 errors.ECODE_INVAL)
989
    instancelist = self.cfg.GetInstanceList()
990
    if instancelist:
991
      raise errors.OpPrereqError("There are still %d instance(s) in"
992
                                 " this cluster." % len(instancelist),
993
                                 errors.ECODE_INVAL)
994

    
995
  def Exec(self, feedback_fn):
996
    """Destroys the cluster.
997

998
    """
999
    master = self.cfg.GetMasterNode()
1000
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1001

    
1002
    # Run post hooks on master node before it's removed
1003
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1004
    try:
1005
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1006
    except:
1007
      # pylint: disable-msg=W0702
1008
      self.LogWarning("Errors occurred running hooks on %s" % master)
1009

    
1010
    result = self.rpc.call_node_stop_master(master, False)
1011
    result.Raise("Could not disable the master role")
1012

    
1013
    if modify_ssh_setup:
1014
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1015
      utils.CreateBackup(priv_key)
1016
      utils.CreateBackup(pub_key)
1017

    
1018
    return master
1019

    
1020

    
1021
def _VerifyCertificateInner(filename, expired, not_before, not_after, now,
1022
                            warn_days=constants.SSL_CERT_EXPIRATION_WARN,
1023
                            error_days=constants.SSL_CERT_EXPIRATION_ERROR):
1024
  """Verifies certificate details for LUVerifyCluster.
1025

1026
  """
1027
  if expired:
1028
    msg = "Certificate %s is expired" % filename
1029

    
1030
    if not_before is not None and not_after is not None:
1031
      msg += (" (valid from %s to %s)" %
1032
              (_FormatTimestamp(not_before),
1033
               _FormatTimestamp(not_after)))
1034
    elif not_before is not None:
1035
      msg += " (valid from %s)" % _FormatTimestamp(not_before)
1036
    elif not_after is not None:
1037
      msg += " (valid until %s)" % _FormatTimestamp(not_after)
1038

    
1039
    return (LUVerifyCluster.ETYPE_ERROR, msg)
1040

    
1041
  elif not_before is not None and not_before > now:
1042
    return (LUVerifyCluster.ETYPE_WARNING,
1043
            "Certificate %s not yet valid (valid from %s)" %
1044
            (filename, _FormatTimestamp(not_before)))
1045

    
1046
  elif not_after is not None:
1047
    remaining_days = int((not_after - now) / (24 * 3600))
1048

    
1049
    msg = ("Certificate %s expires in %d days" % (filename, remaining_days))
1050

    
1051
    if remaining_days <= error_days:
1052
      return (LUVerifyCluster.ETYPE_ERROR, msg)
1053

    
1054
    if remaining_days <= warn_days:
1055
      return (LUVerifyCluster.ETYPE_WARNING, msg)
1056

    
1057
  return (None, None)
1058

    
1059

    
1060
def _VerifyCertificate(filename):
1061
  """Verifies a certificate for LUVerifyCluster.
1062

1063
  @type filename: string
1064
  @param filename: Path to PEM file
1065

1066
  """
1067
  try:
1068
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1069
                                           utils.ReadFile(filename))
1070
  except Exception, err: # pylint: disable-msg=W0703
1071
    return (LUVerifyCluster.ETYPE_ERROR,
1072
            "Failed to load X509 certificate %s: %s" % (filename, err))
1073

    
1074
  # Depending on the pyOpenSSL version, this can just return (None, None)
1075
  (not_before, not_after) = utils.GetX509CertValidity(cert)
1076

    
1077
  return _VerifyCertificateInner(filename, cert.has_expired(),
1078
                                 not_before, not_after, time.time())
1079

    
1080

    
1081
class LUVerifyCluster(LogicalUnit):
1082
  """Verifies the cluster status.
1083

1084
  """
1085
  HPATH = "cluster-verify"
1086
  HTYPE = constants.HTYPE_CLUSTER
1087
  _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1088
  REQ_BGL = False
1089

    
1090
  TCLUSTER = "cluster"
1091
  TNODE = "node"
1092
  TINSTANCE = "instance"
1093

    
1094
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1095
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1096
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1097
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1098
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1099
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1100
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1101
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1102
  ENODEDRBD = (TNODE, "ENODEDRBD")
1103
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1104
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1105
  ENODEHV = (TNODE, "ENODEHV")
1106
  ENODELVM = (TNODE, "ENODELVM")
1107
  ENODEN1 = (TNODE, "ENODEN1")
1108
  ENODENET = (TNODE, "ENODENET")
1109
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1110
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1111
  ENODERPC = (TNODE, "ENODERPC")
1112
  ENODESSH = (TNODE, "ENODESSH")
1113
  ENODEVERSION = (TNODE, "ENODEVERSION")
1114
  ENODESETUP = (TNODE, "ENODESETUP")
1115
  ENODETIME = (TNODE, "ENODETIME")
1116

    
1117
  ETYPE_FIELD = "code"
1118
  ETYPE_ERROR = "ERROR"
1119
  ETYPE_WARNING = "WARNING"
1120

    
1121
  class NodeImage(object):
1122
    """A class representing the logical and physical status of a node.
1123

1124
    @ivar volumes: a structure as returned from
1125
        L{ganeti.backend.GetVolumeList} (runtime)
1126
    @ivar instances: a list of running instances (runtime)
1127
    @ivar pinst: list of configured primary instances (config)
1128
    @ivar sinst: list of configured secondary instances (config)
1129
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1130
        of this node (config)
1131
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1132
    @ivar dfree: free disk, as reported by the node (runtime)
1133
    @ivar offline: the offline status (config)
1134
    @type rpc_fail: boolean
1135
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1136
        not whether the individual keys were correct) (runtime)
1137
    @type lvm_fail: boolean
1138
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1139
    @type hyp_fail: boolean
1140
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1141
    @type ghost: boolean
1142
    @ivar ghost: whether this is a known node or not (config)
1143

1144
    """
1145
    def __init__(self, offline=False):
1146
      self.volumes = {}
1147
      self.instances = []
1148
      self.pinst = []
1149
      self.sinst = []
1150
      self.sbp = {}
1151
      self.mfree = 0
1152
      self.dfree = 0
1153
      self.offline = offline
1154
      self.rpc_fail = False
1155
      self.lvm_fail = False
1156
      self.hyp_fail = False
1157
      self.ghost = False
1158

    
1159
  def ExpandNames(self):
1160
    self.needed_locks = {
1161
      locking.LEVEL_NODE: locking.ALL_SET,
1162
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1163
    }
1164
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1165

    
1166
  def _Error(self, ecode, item, msg, *args, **kwargs):
1167
    """Format an error message.
1168

1169
    Based on the opcode's error_codes parameter, either format a
1170
    parseable error code, or a simpler error string.
1171

1172
    This must be called only from Exec and functions called from Exec.
1173

1174
    """
1175
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1176
    itype, etxt = ecode
1177
    # first complete the msg
1178
    if args:
1179
      msg = msg % args
1180
    # then format the whole message
1181
    if self.op.error_codes:
1182
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1183
    else:
1184
      if item:
1185
        item = " " + item
1186
      else:
1187
        item = ""
1188
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1189
    # and finally report it via the feedback_fn
1190
    self._feedback_fn("  - %s" % msg)
1191

    
1192
  def _ErrorIf(self, cond, *args, **kwargs):
1193
    """Log an error message if the passed condition is True.
1194

1195
    """
1196
    cond = bool(cond) or self.op.debug_simulate_errors
1197
    if cond:
1198
      self._Error(*args, **kwargs)
1199
    # do not mark the operation as failed for WARN cases only
1200
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1201
      self.bad = self.bad or cond
1202

    
1203
  def _VerifyNode(self, ninfo, nresult):
1204
    """Run multiple tests against a node.
1205

1206
    Test list:
1207

1208
      - compares ganeti version
1209
      - checks vg existence and size > 20G
1210
      - checks config file checksum
1211
      - checks ssh to other nodes
1212

1213
    @type ninfo: L{objects.Node}
1214
    @param ninfo: the node to check
1215
    @param nresult: the results from the node
1216
    @rtype: boolean
1217
    @return: whether overall this call was successful (and we can expect
1218
         reasonable values in the respose)
1219

1220
    """
1221
    node = ninfo.name
1222
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1223

    
1224
    # main result, nresult should be a non-empty dict
1225
    test = not nresult or not isinstance(nresult, dict)
1226
    _ErrorIf(test, self.ENODERPC, node,
1227
                  "unable to verify node: no data returned")
1228
    if test:
1229
      return False
1230

    
1231
    # compares ganeti version
1232
    local_version = constants.PROTOCOL_VERSION
1233
    remote_version = nresult.get("version", None)
1234
    test = not (remote_version and
1235
                isinstance(remote_version, (list, tuple)) and
1236
                len(remote_version) == 2)
1237
    _ErrorIf(test, self.ENODERPC, node,
1238
             "connection to node returned invalid data")
1239
    if test:
1240
      return False
1241

    
1242
    test = local_version != remote_version[0]
1243
    _ErrorIf(test, self.ENODEVERSION, node,
1244
             "incompatible protocol versions: master %s,"
1245
             " node %s", local_version, remote_version[0])
1246
    if test:
1247
      return False
1248

    
1249
    # node seems compatible, we can actually try to look into its results
1250

    
1251
    # full package version
1252
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1253
                  self.ENODEVERSION, node,
1254
                  "software version mismatch: master %s, node %s",
1255
                  constants.RELEASE_VERSION, remote_version[1],
1256
                  code=self.ETYPE_WARNING)
1257

    
1258
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1259
    if isinstance(hyp_result, dict):
1260
      for hv_name, hv_result in hyp_result.iteritems():
1261
        test = hv_result is not None
1262
        _ErrorIf(test, self.ENODEHV, node,
1263
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1264

    
1265

    
1266
    test = nresult.get(constants.NV_NODESETUP,
1267
                           ["Missing NODESETUP results"])
1268
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1269
             "; ".join(test))
1270

    
1271
    return True
1272

    
1273
  def _VerifyNodeTime(self, ninfo, nresult,
1274
                      nvinfo_starttime, nvinfo_endtime):
1275
    """Check the node time.
1276

1277
    @type ninfo: L{objects.Node}
1278
    @param ninfo: the node to check
1279
    @param nresult: the remote results for the node
1280
    @param nvinfo_starttime: the start time of the RPC call
1281
    @param nvinfo_endtime: the end time of the RPC call
1282

1283
    """
1284
    node = ninfo.name
1285
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1286

    
1287
    ntime = nresult.get(constants.NV_TIME, None)
1288
    try:
1289
      ntime_merged = utils.MergeTime(ntime)
1290
    except (ValueError, TypeError):
1291
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1292
      return
1293

    
1294
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1295
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1296
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1297
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1298
    else:
1299
      ntime_diff = None
1300

    
1301
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1302
             "Node time diverges by at least %s from master node time",
1303
             ntime_diff)
1304

    
1305
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1306
    """Check the node time.
1307

1308
    @type ninfo: L{objects.Node}
1309
    @param ninfo: the node to check
1310
    @param nresult: the remote results for the node
1311
    @param vg_name: the configured VG name
1312

1313
    """
1314
    if vg_name is None:
1315
      return
1316

    
1317
    node = ninfo.name
1318
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1319

    
1320
    # checks vg existence and size > 20G
1321
    vglist = nresult.get(constants.NV_VGLIST, None)
1322
    test = not vglist
1323
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1324
    if not test:
1325
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1326
                                            constants.MIN_VG_SIZE)
1327
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1328

    
1329
    # check pv names
1330
    pvlist = nresult.get(constants.NV_PVLIST, None)
1331
    test = pvlist is None
1332
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1333
    if not test:
1334
      # check that ':' is not present in PV names, since it's a
1335
      # special character for lvcreate (denotes the range of PEs to
1336
      # use on the PV)
1337
      for _, pvname, owner_vg in pvlist:
1338
        test = ":" in pvname
1339
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1340
                 " '%s' of VG '%s'", pvname, owner_vg)
1341

    
1342
  def _VerifyNodeNetwork(self, ninfo, nresult):
1343
    """Check the node time.
1344

1345
    @type ninfo: L{objects.Node}
1346
    @param ninfo: the node to check
1347
    @param nresult: the remote results for the node
1348

1349
    """
1350
    node = ninfo.name
1351
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1352

    
1353
    test = constants.NV_NODELIST not in nresult
1354
    _ErrorIf(test, self.ENODESSH, node,
1355
             "node hasn't returned node ssh connectivity data")
1356
    if not test:
1357
      if nresult[constants.NV_NODELIST]:
1358
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1359
          _ErrorIf(True, self.ENODESSH, node,
1360
                   "ssh communication with node '%s': %s", a_node, a_msg)
1361

    
1362
    test = constants.NV_NODENETTEST not in nresult
1363
    _ErrorIf(test, self.ENODENET, node,
1364
             "node hasn't returned node tcp connectivity data")
1365
    if not test:
1366
      if nresult[constants.NV_NODENETTEST]:
1367
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1368
        for anode in nlist:
1369
          _ErrorIf(True, self.ENODENET, node,
1370
                   "tcp communication with node '%s': %s",
1371
                   anode, nresult[constants.NV_NODENETTEST][anode])
1372

    
1373
    test = constants.NV_MASTERIP not in nresult
1374
    _ErrorIf(test, self.ENODENET, node,
1375
             "node hasn't returned node master IP reachability data")
1376
    if not test:
1377
      if not nresult[constants.NV_MASTERIP]:
1378
        if node == self.master_node:
1379
          msg = "the master node cannot reach the master IP (not configured?)"
1380
        else:
1381
          msg = "cannot reach the master IP"
1382
        _ErrorIf(True, self.ENODENET, node, msg)
1383

    
1384

    
1385
  def _VerifyInstance(self, instance, instanceconfig, node_image):
1386
    """Verify an instance.
1387

1388
    This function checks to see if the required block devices are
1389
    available on the instance's node.
1390

1391
    """
1392
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1393
    node_current = instanceconfig.primary_node
1394

    
1395
    node_vol_should = {}
1396
    instanceconfig.MapLVsByNode(node_vol_should)
1397

    
1398
    for node in node_vol_should:
1399
      n_img = node_image[node]
1400
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1401
        # ignore missing volumes on offline or broken nodes
1402
        continue
1403
      for volume in node_vol_should[node]:
1404
        test = volume not in n_img.volumes
1405
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1406
                 "volume %s missing on node %s", volume, node)
1407

    
1408
    if instanceconfig.admin_up:
1409
      pri_img = node_image[node_current]
1410
      test = instance not in pri_img.instances and not pri_img.offline
1411
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1412
               "instance not running on its primary node %s",
1413
               node_current)
1414

    
1415
    for node, n_img in node_image.items():
1416
      if (not node == node_current):
1417
        test = instance in n_img.instances
1418
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1419
                 "instance should not run on node %s", node)
1420

    
1421
  def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1422
    """Verify if there are any unknown volumes in the cluster.
1423

1424
    The .os, .swap and backup volumes are ignored. All other volumes are
1425
    reported as unknown.
1426

1427
    """
1428
    for node, n_img in node_image.items():
1429
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1430
        # skip non-healthy nodes
1431
        continue
1432
      for volume in n_img.volumes:
1433
        test = (node not in node_vol_should or
1434
                volume not in node_vol_should[node])
1435
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1436
                      "volume %s is unknown", volume)
1437

    
1438
  def _VerifyOrphanInstances(self, instancelist, node_image):
1439
    """Verify the list of running instances.
1440

1441
    This checks what instances are running but unknown to the cluster.
1442

1443
    """
1444
    for node, n_img in node_image.items():
1445
      for o_inst in n_img.instances:
1446
        test = o_inst not in instancelist
1447
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1448
                      "instance %s on node %s should not exist", o_inst, node)
1449

    
1450
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1451
    """Verify N+1 Memory Resilience.
1452

1453
    Check that if one single node dies we can still start all the
1454
    instances it was primary for.
1455

1456
    """
1457
    for node, n_img in node_image.items():
1458
      # This code checks that every node which is now listed as
1459
      # secondary has enough memory to host all instances it is
1460
      # supposed to should a single other node in the cluster fail.
1461
      # FIXME: not ready for failover to an arbitrary node
1462
      # FIXME: does not support file-backed instances
1463
      # WARNING: we currently take into account down instances as well
1464
      # as up ones, considering that even if they're down someone
1465
      # might want to start them even in the event of a node failure.
1466
      for prinode, instances in n_img.sbp.items():
1467
        needed_mem = 0
1468
        for instance in instances:
1469
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1470
          if bep[constants.BE_AUTO_BALANCE]:
1471
            needed_mem += bep[constants.BE_MEMORY]
1472
        test = n_img.mfree < needed_mem
1473
        self._ErrorIf(test, self.ENODEN1, node,
1474
                      "not enough memory on to accommodate"
1475
                      " failovers should peer node %s fail", prinode)
1476

    
1477
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1478
                       master_files):
1479
    """Verifies and computes the node required file checksums.
1480

1481
    @type ninfo: L{objects.Node}
1482
    @param ninfo: the node to check
1483
    @param nresult: the remote results for the node
1484
    @param file_list: required list of files
1485
    @param local_cksum: dictionary of local files and their checksums
1486
    @param master_files: list of files that only masters should have
1487

1488
    """
1489
    node = ninfo.name
1490
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1491

    
1492
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1493
    test = not isinstance(remote_cksum, dict)
1494
    _ErrorIf(test, self.ENODEFILECHECK, node,
1495
             "node hasn't returned file checksum data")
1496
    if test:
1497
      return
1498

    
1499
    for file_name in file_list:
1500
      node_is_mc = ninfo.master_candidate
1501
      must_have = (file_name not in master_files) or node_is_mc
1502
      # missing
1503
      test1 = file_name not in remote_cksum
1504
      # invalid checksum
1505
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1506
      # existing and good
1507
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1508
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1509
               "file '%s' missing", file_name)
1510
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1511
               "file '%s' has wrong checksum", file_name)
1512
      # not candidate and this is not a must-have file
1513
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1514
               "file '%s' should not exist on non master"
1515
               " candidates (and the file is outdated)", file_name)
1516
      # all good, except non-master/non-must have combination
1517
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1518
               "file '%s' should not exist"
1519
               " on non master candidates", file_name)
1520

    
1521
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1522
    """Verifies and the node DRBD status.
1523

1524
    @type ninfo: L{objects.Node}
1525
    @param ninfo: the node to check
1526
    @param nresult: the remote results for the node
1527
    @param instanceinfo: the dict of instances
1528
    @param drbd_map: the DRBD map as returned by
1529
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1530

1531
    """
1532
    node = ninfo.name
1533
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1534

    
1535
    # compute the DRBD minors
1536
    node_drbd = {}
1537
    for minor, instance in drbd_map[node].items():
1538
      test = instance not in instanceinfo
1539
      _ErrorIf(test, self.ECLUSTERCFG, None,
1540
               "ghost instance '%s' in temporary DRBD map", instance)
1541
        # ghost instance should not be running, but otherwise we
1542
        # don't give double warnings (both ghost instance and
1543
        # unallocated minor in use)
1544
      if test:
1545
        node_drbd[minor] = (instance, False)
1546
      else:
1547
        instance = instanceinfo[instance]
1548
        node_drbd[minor] = (instance.name, instance.admin_up)
1549

    
1550
    # and now check them
1551
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1552
    test = not isinstance(used_minors, (tuple, list))
1553
    _ErrorIf(test, self.ENODEDRBD, node,
1554
             "cannot parse drbd status file: %s", str(used_minors))
1555
    if test:
1556
      # we cannot check drbd status
1557
      return
1558

    
1559
    for minor, (iname, must_exist) in node_drbd.items():
1560
      test = minor not in used_minors and must_exist
1561
      _ErrorIf(test, self.ENODEDRBD, node,
1562
               "drbd minor %d of instance %s is not active", minor, iname)
1563
    for minor in used_minors:
1564
      test = minor not in node_drbd
1565
      _ErrorIf(test, self.ENODEDRBD, node,
1566
               "unallocated drbd minor %d is in use", minor)
1567

    
1568
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1569
    """Verifies and updates the node volume data.
1570

1571
    This function will update a L{NodeImage}'s internal structures
1572
    with data from the remote call.
1573

1574
    @type ninfo: L{objects.Node}
1575
    @param ninfo: the node to check
1576
    @param nresult: the remote results for the node
1577
    @param nimg: the node image object
1578
    @param vg_name: the configured VG name
1579

1580
    """
1581
    node = ninfo.name
1582
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1583

    
1584
    nimg.lvm_fail = True
1585
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1586
    if vg_name is None:
1587
      pass
1588
    elif isinstance(lvdata, basestring):
1589
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1590
               utils.SafeEncode(lvdata))
1591
    elif not isinstance(lvdata, dict):
1592
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1593
    else:
1594
      nimg.volumes = lvdata
1595
      nimg.lvm_fail = False
1596

    
1597
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1598
    """Verifies and updates the node instance list.
1599

1600
    If the listing was successful, then updates this node's instance
1601
    list. Otherwise, it marks the RPC call as failed for the instance
1602
    list key.
1603

1604
    @type ninfo: L{objects.Node}
1605
    @param ninfo: the node to check
1606
    @param nresult: the remote results for the node
1607
    @param nimg: the node image object
1608

1609
    """
1610
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1611
    test = not isinstance(idata, list)
1612
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1613
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1614
    if test:
1615
      nimg.hyp_fail = True
1616
    else:
1617
      nimg.instances = idata
1618

    
1619
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1620
    """Verifies and computes a node information map
1621

1622
    @type ninfo: L{objects.Node}
1623
    @param ninfo: the node to check
1624
    @param nresult: the remote results for the node
1625
    @param nimg: the node image object
1626
    @param vg_name: the configured VG name
1627

1628
    """
1629
    node = ninfo.name
1630
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1631

    
1632
    # try to read free memory (from the hypervisor)
1633
    hv_info = nresult.get(constants.NV_HVINFO, None)
1634
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1635
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1636
    if not test:
1637
      try:
1638
        nimg.mfree = int(hv_info["memory_free"])
1639
      except (ValueError, TypeError):
1640
        _ErrorIf(True, self.ENODERPC, node,
1641
                 "node returned invalid nodeinfo, check hypervisor")
1642

    
1643
    # FIXME: devise a free space model for file based instances as well
1644
    if vg_name is not None:
1645
      test = (constants.NV_VGLIST not in nresult or
1646
              vg_name not in nresult[constants.NV_VGLIST])
1647
      _ErrorIf(test, self.ENODELVM, node,
1648
               "node didn't return data for the volume group '%s'"
1649
               " - it is either missing or broken", vg_name)
1650
      if not test:
1651
        try:
1652
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1653
        except (ValueError, TypeError):
1654
          _ErrorIf(True, self.ENODERPC, node,
1655
                   "node returned invalid LVM info, check LVM status")
1656

    
1657
  def CheckPrereq(self):
1658
    """Check prerequisites.
1659

1660
    Transform the list of checks we're going to skip into a set and check that
1661
    all its members are valid.
1662

1663
    """
1664
    self.skip_set = frozenset(self.op.skip_checks)
1665
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1666
      raise errors.OpPrereqError("Invalid checks to be skipped specified",
1667
                                 errors.ECODE_INVAL)
1668

    
1669
  def BuildHooksEnv(self):
1670
    """Build hooks env.
1671

1672
    Cluster-Verify hooks just ran in the post phase and their failure makes
1673
    the output be logged in the verify output and the verification to fail.
1674

1675
    """
1676
    all_nodes = self.cfg.GetNodeList()
1677
    env = {
1678
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1679
      }
1680
    for node in self.cfg.GetAllNodesInfo().values():
1681
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1682

    
1683
    return env, [], all_nodes
1684

    
1685
  def Exec(self, feedback_fn):
1686
    """Verify integrity of cluster, performing various test on nodes.
1687

1688
    """
1689
    self.bad = False
1690
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1691
    verbose = self.op.verbose
1692
    self._feedback_fn = feedback_fn
1693
    feedback_fn("* Verifying global settings")
1694
    for msg in self.cfg.VerifyConfig():
1695
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1696

    
1697
    # Check the cluster certificates
1698
    for cert_filename in constants.ALL_CERT_FILES:
1699
      (errcode, msg) = _VerifyCertificate(cert_filename)
1700
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1701

    
1702
    vg_name = self.cfg.GetVGName()
1703
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1704
    cluster = self.cfg.GetClusterInfo()
1705
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1706
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1707
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1708
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1709
                        for iname in instancelist)
1710
    i_non_redundant = [] # Non redundant instances
1711
    i_non_a_balanced = [] # Non auto-balanced instances
1712
    n_offline = 0 # Count of offline nodes
1713
    n_drained = 0 # Count of nodes being drained
1714
    node_vol_should = {}
1715

    
1716
    # FIXME: verify OS list
1717
    # do local checksums
1718
    master_files = [constants.CLUSTER_CONF_FILE]
1719
    master_node = self.master_node = self.cfg.GetMasterNode()
1720
    master_ip = self.cfg.GetMasterIP()
1721

    
1722
    file_names = ssconf.SimpleStore().GetFileList()
1723
    file_names.extend(constants.ALL_CERT_FILES)
1724
    file_names.extend(master_files)
1725
    if cluster.modify_etc_hosts:
1726
      file_names.append(constants.ETC_HOSTS)
1727

    
1728
    local_checksums = utils.FingerprintFiles(file_names)
1729

    
1730
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1731
    node_verify_param = {
1732
      constants.NV_FILELIST: file_names,
1733
      constants.NV_NODELIST: [node.name for node in nodeinfo
1734
                              if not node.offline],
1735
      constants.NV_HYPERVISOR: hypervisors,
1736
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1737
                                  node.secondary_ip) for node in nodeinfo
1738
                                 if not node.offline],
1739
      constants.NV_INSTANCELIST: hypervisors,
1740
      constants.NV_VERSION: None,
1741
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1742
      constants.NV_NODESETUP: None,
1743
      constants.NV_TIME: None,
1744
      constants.NV_MASTERIP: (master_node, master_ip),
1745
      }
1746

    
1747
    if vg_name is not None:
1748
      node_verify_param[constants.NV_VGLIST] = None
1749
      node_verify_param[constants.NV_LVLIST] = vg_name
1750
      node_verify_param[constants.NV_PVLIST] = [vg_name]
1751
      node_verify_param[constants.NV_DRBDLIST] = None
1752

    
1753
    # Build our expected cluster state
1754
    node_image = dict((node.name, self.NodeImage(offline=node.offline))
1755
                      for node in nodeinfo)
1756

    
1757
    for instance in instancelist:
1758
      inst_config = instanceinfo[instance]
1759

    
1760
      for nname in inst_config.all_nodes:
1761
        if nname not in node_image:
1762
          # ghost node
1763
          gnode = self.NodeImage()
1764
          gnode.ghost = True
1765
          node_image[nname] = gnode
1766

    
1767
      inst_config.MapLVsByNode(node_vol_should)
1768

    
1769
      pnode = inst_config.primary_node
1770
      node_image[pnode].pinst.append(instance)
1771

    
1772
      for snode in inst_config.secondary_nodes:
1773
        nimg = node_image[snode]
1774
        nimg.sinst.append(instance)
1775
        if pnode not in nimg.sbp:
1776
          nimg.sbp[pnode] = []
1777
        nimg.sbp[pnode].append(instance)
1778

    
1779
    # At this point, we have the in-memory data structures complete,
1780
    # except for the runtime information, which we'll gather next
1781

    
1782
    # Due to the way our RPC system works, exact response times cannot be
1783
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1784
    # time before and after executing the request, we can at least have a time
1785
    # window.
1786
    nvinfo_starttime = time.time()
1787
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1788
                                           self.cfg.GetClusterName())
1789
    nvinfo_endtime = time.time()
1790

    
1791
    all_drbd_map = self.cfg.ComputeDRBDMap()
1792

    
1793
    feedback_fn("* Verifying node status")
1794
    for node_i in nodeinfo:
1795
      node = node_i.name
1796
      nimg = node_image[node]
1797

    
1798
      if node_i.offline:
1799
        if verbose:
1800
          feedback_fn("* Skipping offline node %s" % (node,))
1801
        n_offline += 1
1802
        continue
1803

    
1804
      if node == master_node:
1805
        ntype = "master"
1806
      elif node_i.master_candidate:
1807
        ntype = "master candidate"
1808
      elif node_i.drained:
1809
        ntype = "drained"
1810
        n_drained += 1
1811
      else:
1812
        ntype = "regular"
1813
      if verbose:
1814
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1815

    
1816
      msg = all_nvinfo[node].fail_msg
1817
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1818
      if msg:
1819
        nimg.rpc_fail = True
1820
        continue
1821

    
1822
      nresult = all_nvinfo[node].payload
1823

    
1824
      nimg.call_ok = self._VerifyNode(node_i, nresult)
1825
      self._VerifyNodeNetwork(node_i, nresult)
1826
      self._VerifyNodeLVM(node_i, nresult, vg_name)
1827
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1828
                            master_files)
1829
      self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1830
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1831

    
1832
      self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1833
      self._UpdateNodeInstances(node_i, nresult, nimg)
1834
      self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1835

    
1836
    feedback_fn("* Verifying instance status")
1837
    for instance in instancelist:
1838
      if verbose:
1839
        feedback_fn("* Verifying instance %s" % instance)
1840
      inst_config = instanceinfo[instance]
1841
      self._VerifyInstance(instance, inst_config, node_image)
1842
      inst_nodes_offline = []
1843

    
1844
      pnode = inst_config.primary_node
1845
      pnode_img = node_image[pnode]
1846
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
1847
               self.ENODERPC, pnode, "instance %s, connection to"
1848
               " primary node failed", instance)
1849

    
1850
      if pnode_img.offline:
1851
        inst_nodes_offline.append(pnode)
1852

    
1853
      # If the instance is non-redundant we cannot survive losing its primary
1854
      # node, so we are not N+1 compliant. On the other hand we have no disk
1855
      # templates with more than one secondary so that situation is not well
1856
      # supported either.
1857
      # FIXME: does not support file-backed instances
1858
      if not inst_config.secondary_nodes:
1859
        i_non_redundant.append(instance)
1860
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
1861
               instance, "instance has multiple secondary nodes: %s",
1862
               utils.CommaJoin(inst_config.secondary_nodes),
1863
               code=self.ETYPE_WARNING)
1864

    
1865
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1866
        i_non_a_balanced.append(instance)
1867

    
1868
      for snode in inst_config.secondary_nodes:
1869
        s_img = node_image[snode]
1870
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
1871
                 "instance %s, connection to secondary node failed", instance)
1872

    
1873
        if s_img.offline:
1874
          inst_nodes_offline.append(snode)
1875

    
1876
      # warn that the instance lives on offline nodes
1877
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1878
               "instance lives on offline node(s) %s",
1879
               utils.CommaJoin(inst_nodes_offline))
1880
      # ... or ghost nodes
1881
      for node in inst_config.all_nodes:
1882
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
1883
                 "instance lives on ghost node %s", node)
1884

    
1885
    feedback_fn("* Verifying orphan volumes")
1886
    self._VerifyOrphanVolumes(node_vol_should, node_image)
1887

    
1888
    feedback_fn("* Verifying orphan instances")
1889
    self._VerifyOrphanInstances(instancelist, node_image)
1890

    
1891
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1892
      feedback_fn("* Verifying N+1 Memory redundancy")
1893
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
1894

    
1895
    feedback_fn("* Other Notes")
1896
    if i_non_redundant:
1897
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1898
                  % len(i_non_redundant))
1899

    
1900
    if i_non_a_balanced:
1901
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1902
                  % len(i_non_a_balanced))
1903

    
1904
    if n_offline:
1905
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
1906

    
1907
    if n_drained:
1908
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
1909

    
1910
    return not self.bad
1911

    
1912
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1913
    """Analyze the post-hooks' result
1914

1915
    This method analyses the hook result, handles it, and sends some
1916
    nicely-formatted feedback back to the user.
1917

1918
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1919
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1920
    @param hooks_results: the results of the multi-node hooks rpc call
1921
    @param feedback_fn: function used send feedback back to the caller
1922
    @param lu_result: previous Exec result
1923
    @return: the new Exec result, based on the previous result
1924
        and hook results
1925

1926
    """
1927
    # We only really run POST phase hooks, and are only interested in
1928
    # their results
1929
    if phase == constants.HOOKS_PHASE_POST:
1930
      # Used to change hooks' output to proper indentation
1931
      indent_re = re.compile('^', re.M)
1932
      feedback_fn("* Hooks Results")
1933
      assert hooks_results, "invalid result from hooks"
1934

    
1935
      for node_name in hooks_results:
1936
        res = hooks_results[node_name]
1937
        msg = res.fail_msg
1938
        test = msg and not res.offline
1939
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
1940
                      "Communication failure in hooks execution: %s", msg)
1941
        if res.offline or msg:
1942
          # No need to investigate payload if node is offline or gave an error.
1943
          # override manually lu_result here as _ErrorIf only
1944
          # overrides self.bad
1945
          lu_result = 1
1946
          continue
1947
        for script, hkr, output in res.payload:
1948
          test = hkr == constants.HKR_FAIL
1949
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
1950
                        "Script %s failed, output:", script)
1951
          if test:
1952
            output = indent_re.sub('      ', output)
1953
            feedback_fn("%s" % output)
1954
            lu_result = 0
1955

    
1956
      return lu_result
1957

    
1958

    
1959
class LUVerifyDisks(NoHooksLU):
1960
  """Verifies the cluster disks status.
1961

1962
  """
1963
  _OP_REQP = []
1964
  REQ_BGL = False
1965

    
1966
  def ExpandNames(self):
1967
    self.needed_locks = {
1968
      locking.LEVEL_NODE: locking.ALL_SET,
1969
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1970
    }
1971
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1972

    
1973
  def CheckPrereq(self):
1974
    """Check prerequisites.
1975

1976
    This has no prerequisites.
1977

1978
    """
1979
    pass
1980

    
1981
  def Exec(self, feedback_fn):
1982
    """Verify integrity of cluster disks.
1983

1984
    @rtype: tuple of three items
1985
    @return: a tuple of (dict of node-to-node_error, list of instances
1986
        which need activate-disks, dict of instance: (node, volume) for
1987
        missing volumes
1988

1989
    """
1990
    result = res_nodes, res_instances, res_missing = {}, [], {}
1991

    
1992
    vg_name = self.cfg.GetVGName()
1993
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1994
    instances = [self.cfg.GetInstanceInfo(name)
1995
                 for name in self.cfg.GetInstanceList()]
1996

    
1997
    nv_dict = {}
1998
    for inst in instances:
1999
      inst_lvs = {}
2000
      if (not inst.admin_up or
2001
          inst.disk_template not in constants.DTS_NET_MIRROR):
2002
        continue
2003
      inst.MapLVsByNode(inst_lvs)
2004
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2005
      for node, vol_list in inst_lvs.iteritems():
2006
        for vol in vol_list:
2007
          nv_dict[(node, vol)] = inst
2008

    
2009
    if not nv_dict:
2010
      return result
2011

    
2012
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2013

    
2014
    for node in nodes:
2015
      # node_volume
2016
      node_res = node_lvs[node]
2017
      if node_res.offline:
2018
        continue
2019
      msg = node_res.fail_msg
2020
      if msg:
2021
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2022
        res_nodes[node] = msg
2023
        continue
2024

    
2025
      lvs = node_res.payload
2026
      for lv_name, (_, _, lv_online) in lvs.items():
2027
        inst = nv_dict.pop((node, lv_name), None)
2028
        if (not lv_online and inst is not None
2029
            and inst.name not in res_instances):
2030
          res_instances.append(inst.name)
2031

    
2032
    # any leftover items in nv_dict are missing LVs, let's arrange the
2033
    # data better
2034
    for key, inst in nv_dict.iteritems():
2035
      if inst.name not in res_missing:
2036
        res_missing[inst.name] = []
2037
      res_missing[inst.name].append(key)
2038

    
2039
    return result
2040

    
2041

    
2042
class LURepairDiskSizes(NoHooksLU):
2043
  """Verifies the cluster disks sizes.
2044

2045
  """
2046
  _OP_REQP = ["instances"]
2047
  REQ_BGL = False
2048

    
2049
  def ExpandNames(self):
2050
    if not isinstance(self.op.instances, list):
2051
      raise errors.OpPrereqError("Invalid argument type 'instances'",
2052
                                 errors.ECODE_INVAL)
2053

    
2054
    if self.op.instances:
2055
      self.wanted_names = []
2056
      for name in self.op.instances:
2057
        full_name = _ExpandInstanceName(self.cfg, name)
2058
        self.wanted_names.append(full_name)
2059
      self.needed_locks = {
2060
        locking.LEVEL_NODE: [],
2061
        locking.LEVEL_INSTANCE: self.wanted_names,
2062
        }
2063
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2064
    else:
2065
      self.wanted_names = None
2066
      self.needed_locks = {
2067
        locking.LEVEL_NODE: locking.ALL_SET,
2068
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2069
        }
2070
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2071

    
2072
  def DeclareLocks(self, level):
2073
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2074
      self._LockInstancesNodes(primary_only=True)
2075

    
2076
  def CheckPrereq(self):
2077
    """Check prerequisites.
2078

2079
    This only checks the optional instance list against the existing names.
2080

2081
    """
2082
    if self.wanted_names is None:
2083
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2084

    
2085
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2086
                             in self.wanted_names]
2087

    
2088
  def _EnsureChildSizes(self, disk):
2089
    """Ensure children of the disk have the needed disk size.
2090

2091
    This is valid mainly for DRBD8 and fixes an issue where the
2092
    children have smaller disk size.
2093

2094
    @param disk: an L{ganeti.objects.Disk} object
2095

2096
    """
2097
    if disk.dev_type == constants.LD_DRBD8:
2098
      assert disk.children, "Empty children for DRBD8?"
2099
      fchild = disk.children[0]
2100
      mismatch = fchild.size < disk.size
2101
      if mismatch:
2102
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2103
                     fchild.size, disk.size)
2104
        fchild.size = disk.size
2105

    
2106
      # and we recurse on this child only, not on the metadev
2107
      return self._EnsureChildSizes(fchild) or mismatch
2108
    else:
2109
      return False
2110

    
2111
  def Exec(self, feedback_fn):
2112
    """Verify the size of cluster disks.
2113

2114
    """
2115
    # TODO: check child disks too
2116
    # TODO: check differences in size between primary/secondary nodes
2117
    per_node_disks = {}
2118
    for instance in self.wanted_instances:
2119
      pnode = instance.primary_node
2120
      if pnode not in per_node_disks:
2121
        per_node_disks[pnode] = []
2122
      for idx, disk in enumerate(instance.disks):
2123
        per_node_disks[pnode].append((instance, idx, disk))
2124

    
2125
    changed = []
2126
    for node, dskl in per_node_disks.items():
2127
      newl = [v[2].Copy() for v in dskl]
2128
      for dsk in newl:
2129
        self.cfg.SetDiskID(dsk, node)
2130
      result = self.rpc.call_blockdev_getsizes(node, newl)
2131
      if result.fail_msg:
2132
        self.LogWarning("Failure in blockdev_getsizes call to node"
2133
                        " %s, ignoring", node)
2134
        continue
2135
      if len(result.data) != len(dskl):
2136
        self.LogWarning("Invalid result from node %s, ignoring node results",
2137
                        node)
2138
        continue
2139
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2140
        if size is None:
2141
          self.LogWarning("Disk %d of instance %s did not return size"
2142
                          " information, ignoring", idx, instance.name)
2143
          continue
2144
        if not isinstance(size, (int, long)):
2145
          self.LogWarning("Disk %d of instance %s did not return valid"
2146
                          " size information, ignoring", idx, instance.name)
2147
          continue
2148
        size = size >> 20
2149
        if size != disk.size:
2150
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2151
                       " correcting: recorded %d, actual %d", idx,
2152
                       instance.name, disk.size, size)
2153
          disk.size = size
2154
          self.cfg.Update(instance, feedback_fn)
2155
          changed.append((instance.name, idx, size))
2156
        if self._EnsureChildSizes(disk):
2157
          self.cfg.Update(instance, feedback_fn)
2158
          changed.append((instance.name, idx, disk.size))
2159
    return changed
2160

    
2161

    
2162
class LURenameCluster(LogicalUnit):
2163
  """Rename the cluster.
2164

2165
  """
2166
  HPATH = "cluster-rename"
2167
  HTYPE = constants.HTYPE_CLUSTER
2168
  _OP_REQP = ["name"]
2169

    
2170
  def BuildHooksEnv(self):
2171
    """Build hooks env.
2172

2173
    """
2174
    env = {
2175
      "OP_TARGET": self.cfg.GetClusterName(),
2176
      "NEW_NAME": self.op.name,
2177
      }
2178
    mn = self.cfg.GetMasterNode()
2179
    all_nodes = self.cfg.GetNodeList()
2180
    return env, [mn], all_nodes
2181

    
2182
  def CheckPrereq(self):
2183
    """Verify that the passed name is a valid one.
2184

2185
    """
2186
    hostname = utils.GetHostInfo(self.op.name)
2187

    
2188
    new_name = hostname.name
2189
    self.ip = new_ip = hostname.ip
2190
    old_name = self.cfg.GetClusterName()
2191
    old_ip = self.cfg.GetMasterIP()
2192
    if new_name == old_name and new_ip == old_ip:
2193
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2194
                                 " cluster has changed",
2195
                                 errors.ECODE_INVAL)
2196
    if new_ip != old_ip:
2197
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2198
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2199
                                   " reachable on the network. Aborting." %
2200
                                   new_ip, errors.ECODE_NOTUNIQUE)
2201

    
2202
    self.op.name = new_name
2203

    
2204
  def Exec(self, feedback_fn):
2205
    """Rename the cluster.
2206

2207
    """
2208
    clustername = self.op.name
2209
    ip = self.ip
2210

    
2211
    # shutdown the master IP
2212
    master = self.cfg.GetMasterNode()
2213
    result = self.rpc.call_node_stop_master(master, False)
2214
    result.Raise("Could not disable the master role")
2215

    
2216
    try:
2217
      cluster = self.cfg.GetClusterInfo()
2218
      cluster.cluster_name = clustername
2219
      cluster.master_ip = ip
2220
      self.cfg.Update(cluster, feedback_fn)
2221

    
2222
      # update the known hosts file
2223
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2224
      node_list = self.cfg.GetNodeList()
2225
      try:
2226
        node_list.remove(master)
2227
      except ValueError:
2228
        pass
2229
      result = self.rpc.call_upload_file(node_list,
2230
                                         constants.SSH_KNOWN_HOSTS_FILE)
2231
      for to_node, to_result in result.iteritems():
2232
        msg = to_result.fail_msg
2233
        if msg:
2234
          msg = ("Copy of file %s to node %s failed: %s" %
2235
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2236
          self.proc.LogWarning(msg)
2237

    
2238
    finally:
2239
      result = self.rpc.call_node_start_master(master, False, False)
2240
      msg = result.fail_msg
2241
      if msg:
2242
        self.LogWarning("Could not re-enable the master role on"
2243
                        " the master, please restart manually: %s", msg)
2244

    
2245

    
2246
def _RecursiveCheckIfLVMBased(disk):
2247
  """Check if the given disk or its children are lvm-based.
2248

2249
  @type disk: L{objects.Disk}
2250
  @param disk: the disk to check
2251
  @rtype: boolean
2252
  @return: boolean indicating whether a LD_LV dev_type was found or not
2253

2254
  """
2255
  if disk.children:
2256
    for chdisk in disk.children:
2257
      if _RecursiveCheckIfLVMBased(chdisk):
2258
        return True
2259
  return disk.dev_type == constants.LD_LV
2260

    
2261

    
2262
class LUSetClusterParams(LogicalUnit):
2263
  """Change the parameters of the cluster.
2264

2265
  """
2266
  HPATH = "cluster-modify"
2267
  HTYPE = constants.HTYPE_CLUSTER
2268
  _OP_REQP = []
2269
  REQ_BGL = False
2270

    
2271
  def CheckArguments(self):
2272
    """Check parameters
2273

2274
    """
2275
    for attr in ["candidate_pool_size",
2276
                 "uid_pool", "add_uids", "remove_uids"]:
2277
      if not hasattr(self.op, attr):
2278
        setattr(self.op, attr, None)
2279

    
2280
    if self.op.candidate_pool_size is not None:
2281
      try:
2282
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2283
      except (ValueError, TypeError), err:
2284
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2285
                                   str(err), errors.ECODE_INVAL)
2286
      if self.op.candidate_pool_size < 1:
2287
        raise errors.OpPrereqError("At least one master candidate needed",
2288
                                   errors.ECODE_INVAL)
2289

    
2290
    _CheckBooleanOpField(self.op, "maintain_node_health")
2291

    
2292
    if self.op.uid_pool:
2293
      uidpool.CheckUidPool(self.op.uid_pool)
2294

    
2295
    if self.op.add_uids:
2296
      uidpool.CheckUidPool(self.op.add_uids)
2297

    
2298
    if self.op.remove_uids:
2299
      uidpool.CheckUidPool(self.op.remove_uids)
2300

    
2301
  def ExpandNames(self):
2302
    # FIXME: in the future maybe other cluster params won't require checking on
2303
    # all nodes to be modified.
2304
    self.needed_locks = {
2305
      locking.LEVEL_NODE: locking.ALL_SET,
2306
    }
2307
    self.share_locks[locking.LEVEL_NODE] = 1
2308

    
2309
  def BuildHooksEnv(self):
2310
    """Build hooks env.
2311

2312
    """
2313
    env = {
2314
      "OP_TARGET": self.cfg.GetClusterName(),
2315
      "NEW_VG_NAME": self.op.vg_name,
2316
      }
2317
    mn = self.cfg.GetMasterNode()
2318
    return env, [mn], [mn]
2319

    
2320
  def CheckPrereq(self):
2321
    """Check prerequisites.
2322

2323
    This checks whether the given params don't conflict and
2324
    if the given volume group is valid.
2325

2326
    """
2327
    if self.op.vg_name is not None and not self.op.vg_name:
2328
      instances = self.cfg.GetAllInstancesInfo().values()
2329
      for inst in instances:
2330
        for disk in inst.disks:
2331
          if _RecursiveCheckIfLVMBased(disk):
2332
            raise errors.OpPrereqError("Cannot disable lvm storage while"
2333
                                       " lvm-based instances exist",
2334
                                       errors.ECODE_INVAL)
2335

    
2336
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2337

    
2338
    # if vg_name not None, checks given volume group on all nodes
2339
    if self.op.vg_name:
2340
      vglist = self.rpc.call_vg_list(node_list)
2341
      for node in node_list:
2342
        msg = vglist[node].fail_msg
2343
        if msg:
2344
          # ignoring down node
2345
          self.LogWarning("Error while gathering data on node %s"
2346
                          " (ignoring node): %s", node, msg)
2347
          continue
2348
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2349
                                              self.op.vg_name,
2350
                                              constants.MIN_VG_SIZE)
2351
        if vgstatus:
2352
          raise errors.OpPrereqError("Error on node '%s': %s" %
2353
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2354

    
2355
    self.cluster = cluster = self.cfg.GetClusterInfo()
2356
    # validate params changes
2357
    if self.op.beparams:
2358
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2359
      self.new_beparams = objects.FillDict(
2360
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2361

    
2362
    if self.op.nicparams:
2363
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2364
      self.new_nicparams = objects.FillDict(
2365
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2366
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2367
      nic_errors = []
2368

    
2369
      # check all instances for consistency
2370
      for instance in self.cfg.GetAllInstancesInfo().values():
2371
        for nic_idx, nic in enumerate(instance.nics):
2372
          params_copy = copy.deepcopy(nic.nicparams)
2373
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2374

    
2375
          # check parameter syntax
2376
          try:
2377
            objects.NIC.CheckParameterSyntax(params_filled)
2378
          except errors.ConfigurationError, err:
2379
            nic_errors.append("Instance %s, nic/%d: %s" %
2380
                              (instance.name, nic_idx, err))
2381

    
2382
          # if we're moving instances to routed, check that they have an ip
2383
          target_mode = params_filled[constants.NIC_MODE]
2384
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2385
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2386
                              (instance.name, nic_idx))
2387
      if nic_errors:
2388
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2389
                                   "\n".join(nic_errors))
2390

    
2391
    # hypervisor list/parameters
2392
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2393
    if self.op.hvparams:
2394
      if not isinstance(self.op.hvparams, dict):
2395
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2396
                                   errors.ECODE_INVAL)
2397
      for hv_name, hv_dict in self.op.hvparams.items():
2398
        if hv_name not in self.new_hvparams:
2399
          self.new_hvparams[hv_name] = hv_dict
2400
        else:
2401
          self.new_hvparams[hv_name].update(hv_dict)
2402

    
2403
    # os hypervisor parameters
2404
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2405
    if self.op.os_hvp:
2406
      if not isinstance(self.op.os_hvp, dict):
2407
        raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2408
                                   errors.ECODE_INVAL)
2409
      for os_name, hvs in self.op.os_hvp.items():
2410
        if not isinstance(hvs, dict):
2411
          raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2412
                                      " input"), errors.ECODE_INVAL)
2413
        if os_name not in self.new_os_hvp:
2414
          self.new_os_hvp[os_name] = hvs
2415
        else:
2416
          for hv_name, hv_dict in hvs.items():
2417
            if hv_name not in self.new_os_hvp[os_name]:
2418
              self.new_os_hvp[os_name][hv_name] = hv_dict
2419
            else:
2420
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2421

    
2422
    # changes to the hypervisor list
2423
    if self.op.enabled_hypervisors is not None:
2424
      self.hv_list = self.op.enabled_hypervisors
2425
      if not self.hv_list:
2426
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2427
                                   " least one member",
2428
                                   errors.ECODE_INVAL)
2429
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2430
      if invalid_hvs:
2431
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2432
                                   " entries: %s" %
2433
                                   utils.CommaJoin(invalid_hvs),
2434
                                   errors.ECODE_INVAL)
2435
      for hv in self.hv_list:
2436
        # if the hypervisor doesn't already exist in the cluster
2437
        # hvparams, we initialize it to empty, and then (in both
2438
        # cases) we make sure to fill the defaults, as we might not
2439
        # have a complete defaults list if the hypervisor wasn't
2440
        # enabled before
2441
        if hv not in new_hvp:
2442
          new_hvp[hv] = {}
2443
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2444
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2445
    else:
2446
      self.hv_list = cluster.enabled_hypervisors
2447

    
2448
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2449
      # either the enabled list has changed, or the parameters have, validate
2450
      for hv_name, hv_params in self.new_hvparams.items():
2451
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2452
            (self.op.enabled_hypervisors and
2453
             hv_name in self.op.enabled_hypervisors)):
2454
          # either this is a new hypervisor, or its parameters have changed
2455
          hv_class = hypervisor.GetHypervisor(hv_name)
2456
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2457
          hv_class.CheckParameterSyntax(hv_params)
2458
          _CheckHVParams(self, node_list, hv_name, hv_params)
2459

    
2460
    if self.op.os_hvp:
2461
      # no need to check any newly-enabled hypervisors, since the
2462
      # defaults have already been checked in the above code-block
2463
      for os_name, os_hvp in self.new_os_hvp.items():
2464
        for hv_name, hv_params in os_hvp.items():
2465
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2466
          # we need to fill in the new os_hvp on top of the actual hv_p
2467
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2468
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2469
          hv_class = hypervisor.GetHypervisor(hv_name)
2470
          hv_class.CheckParameterSyntax(new_osp)
2471
          _CheckHVParams(self, node_list, hv_name, new_osp)
2472

    
2473

    
2474
  def Exec(self, feedback_fn):
2475
    """Change the parameters of the cluster.
2476

2477
    """
2478
    if self.op.vg_name is not None:
2479
      new_volume = self.op.vg_name
2480
      if not new_volume:
2481
        new_volume = None
2482
      if new_volume != self.cfg.GetVGName():
2483
        self.cfg.SetVGName(new_volume)
2484
      else:
2485
        feedback_fn("Cluster LVM configuration already in desired"
2486
                    " state, not changing")
2487
    if self.op.hvparams:
2488
      self.cluster.hvparams = self.new_hvparams
2489
    if self.op.os_hvp:
2490
      self.cluster.os_hvp = self.new_os_hvp
2491
    if self.op.enabled_hypervisors is not None:
2492
      self.cluster.hvparams = self.new_hvparams
2493
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2494
    if self.op.beparams:
2495
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2496
    if self.op.nicparams:
2497
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2498

    
2499
    if self.op.candidate_pool_size is not None:
2500
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2501
      # we need to update the pool size here, otherwise the save will fail
2502
      _AdjustCandidatePool(self, [])
2503

    
2504
    if self.op.maintain_node_health is not None:
2505
      self.cluster.maintain_node_health = self.op.maintain_node_health
2506

    
2507
    if self.op.add_uids is not None:
2508
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2509

    
2510
    if self.op.remove_uids is not None:
2511
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2512

    
2513
    if self.op.uid_pool is not None:
2514
      self.cluster.uid_pool = self.op.uid_pool
2515

    
2516
    self.cfg.Update(self.cluster, feedback_fn)
2517

    
2518

    
2519
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2520
  """Distribute additional files which are part of the cluster configuration.
2521

2522
  ConfigWriter takes care of distributing the config and ssconf files, but
2523
  there are more files which should be distributed to all nodes. This function
2524
  makes sure those are copied.
2525

2526
  @param lu: calling logical unit
2527
  @param additional_nodes: list of nodes not in the config to distribute to
2528

2529
  """
2530
  # 1. Gather target nodes
2531
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2532
  dist_nodes = lu.cfg.GetOnlineNodeList()
2533
  if additional_nodes is not None:
2534
    dist_nodes.extend(additional_nodes)
2535
  if myself.name in dist_nodes:
2536
    dist_nodes.remove(myself.name)
2537

    
2538
  # 2. Gather files to distribute
2539
  dist_files = set([constants.ETC_HOSTS,
2540
                    constants.SSH_KNOWN_HOSTS_FILE,
2541
                    constants.RAPI_CERT_FILE,
2542
                    constants.RAPI_USERS_FILE,
2543
                    constants.CONFD_HMAC_KEY,
2544
                   ])
2545

    
2546
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2547
  for hv_name in enabled_hypervisors:
2548
    hv_class = hypervisor.GetHypervisor(hv_name)
2549
    dist_files.update(hv_class.GetAncillaryFiles())
2550

    
2551
  # 3. Perform the files upload
2552
  for fname in dist_files:
2553
    if os.path.exists(fname):
2554
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2555
      for to_node, to_result in result.items():
2556
        msg = to_result.fail_msg
2557
        if msg:
2558
          msg = ("Copy of file %s to node %s failed: %s" %
2559
                 (fname, to_node, msg))
2560
          lu.proc.LogWarning(msg)
2561

    
2562

    
2563
class LURedistributeConfig(NoHooksLU):
2564
  """Force the redistribution of cluster configuration.
2565

2566
  This is a very simple LU.
2567

2568
  """
2569
  _OP_REQP = []
2570
  REQ_BGL = False
2571

    
2572
  def ExpandNames(self):
2573
    self.needed_locks = {
2574
      locking.LEVEL_NODE: locking.ALL_SET,
2575
    }
2576
    self.share_locks[locking.LEVEL_NODE] = 1
2577

    
2578
  def CheckPrereq(self):
2579
    """Check prerequisites.
2580

2581
    """
2582

    
2583
  def Exec(self, feedback_fn):
2584
    """Redistribute the configuration.
2585

2586
    """
2587
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2588
    _RedistributeAncillaryFiles(self)
2589

    
2590

    
2591
def _WaitForSync(lu, instance, oneshot=False):
2592
  """Sleep and poll for an instance's disk to sync.
2593

2594
  """
2595
  if not instance.disks:
2596
    return True
2597

    
2598
  if not oneshot:
2599
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2600

    
2601
  node = instance.primary_node
2602

    
2603
  for dev in instance.disks:
2604
    lu.cfg.SetDiskID(dev, node)
2605

    
2606
  # TODO: Convert to utils.Retry
2607

    
2608
  retries = 0
2609
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2610
  while True:
2611
    max_time = 0
2612
    done = True
2613
    cumul_degraded = False
2614
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2615
    msg = rstats.fail_msg
2616
    if msg:
2617
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2618
      retries += 1
2619
      if retries >= 10:
2620
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2621
                                 " aborting." % node)
2622
      time.sleep(6)
2623
      continue
2624
    rstats = rstats.payload
2625
    retries = 0
2626
    for i, mstat in enumerate(rstats):
2627
      if mstat is None:
2628
        lu.LogWarning("Can't compute data for node %s/%s",
2629
                           node, instance.disks[i].iv_name)
2630
        continue
2631

    
2632
      cumul_degraded = (cumul_degraded or
2633
                        (mstat.is_degraded and mstat.sync_percent is None))
2634
      if mstat.sync_percent is not None:
2635
        done = False
2636
        if mstat.estimated_time is not None:
2637
          rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2638
          max_time = mstat.estimated_time
2639
        else:
2640
          rem_time = "no time estimate"
2641
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2642
                        (instance.disks[i].iv_name, mstat.sync_percent,
2643
                         rem_time))
2644

    
2645
    # if we're done but degraded, let's do a few small retries, to
2646
    # make sure we see a stable and not transient situation; therefore
2647
    # we force restart of the loop
2648
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2649
      logging.info("Degraded disks found, %d retries left", degr_retries)
2650
      degr_retries -= 1
2651
      time.sleep(1)
2652
      continue
2653

    
2654
    if done or oneshot:
2655
      break
2656

    
2657
    time.sleep(min(60, max_time))
2658

    
2659
  if done:
2660
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2661
  return not cumul_degraded
2662

    
2663

    
2664
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2665
  """Check that mirrors are not degraded.
2666

2667
  The ldisk parameter, if True, will change the test from the
2668
  is_degraded attribute (which represents overall non-ok status for
2669
  the device(s)) to the ldisk (representing the local storage status).
2670

2671
  """
2672
  lu.cfg.SetDiskID(dev, node)
2673

    
2674
  result = True
2675

    
2676
  if on_primary or dev.AssembleOnSecondary():
2677
    rstats = lu.rpc.call_blockdev_find(node, dev)
2678
    msg = rstats.fail_msg
2679
    if msg:
2680
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2681
      result = False
2682
    elif not rstats.payload:
2683
      lu.LogWarning("Can't find disk on node %s", node)
2684
      result = False
2685
    else:
2686
      if ldisk:
2687
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2688
      else:
2689
        result = result and not rstats.payload.is_degraded
2690

    
2691
  if dev.children:
2692
    for child in dev.children:
2693
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2694

    
2695
  return result
2696

    
2697

    
2698
class LUDiagnoseOS(NoHooksLU):
2699
  """Logical unit for OS diagnose/query.
2700

2701
  """
2702
  _OP_REQP = ["output_fields", "names"]
2703
  REQ_BGL = False
2704
  _FIELDS_STATIC = utils.FieldSet()
2705
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2706
  # Fields that need calculation of global os validity
2707
  _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2708

    
2709
  def ExpandNames(self):
2710
    if self.op.names:
2711
      raise errors.OpPrereqError("Selective OS query not supported",
2712
                                 errors.ECODE_INVAL)
2713

    
2714
    _CheckOutputFields(static=self._FIELDS_STATIC,
2715
                       dynamic=self._FIELDS_DYNAMIC,
2716
                       selected=self.op.output_fields)
2717

    
2718
    # Lock all nodes, in shared mode
2719
    # Temporary removal of locks, should be reverted later
2720
    # TODO: reintroduce locks when they are lighter-weight
2721
    self.needed_locks = {}
2722
    #self.share_locks[locking.LEVEL_NODE] = 1
2723
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2724

    
2725
  def CheckPrereq(self):
2726
    """Check prerequisites.
2727

2728
    """
2729

    
2730
  @staticmethod
2731
  def _DiagnoseByOS(rlist):
2732
    """Remaps a per-node return list into an a per-os per-node dictionary
2733

2734
    @param rlist: a map with node names as keys and OS objects as values
2735

2736
    @rtype: dict
2737
    @return: a dictionary with osnames as keys and as value another map, with
2738
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
2739

2740
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2741
                                     (/srv/..., False, "invalid api")],
2742
                           "node2": [(/srv/..., True, "")]}
2743
          }
2744

2745
    """
2746
    all_os = {}
2747
    # we build here the list of nodes that didn't fail the RPC (at RPC
2748
    # level), so that nodes with a non-responding node daemon don't
2749
    # make all OSes invalid
2750
    good_nodes = [node_name for node_name in rlist
2751
                  if not rlist[node_name].fail_msg]
2752
    for node_name, nr in rlist.items():
2753
      if nr.fail_msg or not nr.payload:
2754
        continue
2755
      for name, path, status, diagnose, variants in nr.payload:
2756
        if name not in all_os:
2757
          # build a list of nodes for this os containing empty lists
2758
          # for each node in node_list
2759
          all_os[name] = {}
2760
          for nname in good_nodes:
2761
            all_os[name][nname] = []
2762
        all_os[name][node_name].append((path, status, diagnose, variants))
2763
    return all_os
2764

    
2765
  def Exec(self, feedback_fn):
2766
    """Compute the list of OSes.
2767

2768
    """
2769
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2770
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2771
    pol = self._DiagnoseByOS(node_data)
2772
    output = []
2773
    calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2774
    calc_variants = "variants" in self.op.output_fields
2775

    
2776
    for os_name, os_data in pol.items():
2777
      row = []
2778
      if calc_valid:
2779
        valid = True
2780
        variants = None
2781
        for osl in os_data.values():
2782
          valid = valid and osl and osl[0][1]
2783
          if not valid:
2784
            variants = None
2785
            break
2786
          if calc_variants:
2787
            node_variants = osl[0][3]
2788
            if variants is None:
2789
              variants = node_variants
2790
            else:
2791
              variants = [v for v in variants if v in node_variants]
2792

    
2793
      for field in self.op.output_fields:
2794
        if field == "name":
2795
          val = os_name
2796
        elif field == "valid":
2797
          val = valid
2798
        elif field == "node_status":
2799
          # this is just a copy of the dict
2800
          val = {}
2801
          for node_name, nos_list in os_data.items():
2802
            val[node_name] = nos_list
2803
        elif field == "variants":
2804
          val =  variants
2805
        else:
2806
          raise errors.ParameterError(field)
2807
        row.append(val)
2808
      output.append(row)
2809

    
2810
    return output
2811

    
2812

    
2813
class LURemoveNode(LogicalUnit):
2814
  """Logical unit for removing a node.
2815

2816
  """
2817
  HPATH = "node-remove"
2818
  HTYPE = constants.HTYPE_NODE
2819
  _OP_REQP = ["node_name"]
2820

    
2821
  def BuildHooksEnv(self):
2822
    """Build hooks env.
2823

2824
    This doesn't run on the target node in the pre phase as a failed
2825
    node would then be impossible to remove.
2826

2827
    """
2828
    env = {
2829
      "OP_TARGET": self.op.node_name,
2830
      "NODE_NAME": self.op.node_name,
2831
      }
2832
    all_nodes = self.cfg.GetNodeList()
2833
    try:
2834
      all_nodes.remove(self.op.node_name)
2835
    except ValueError:
2836
      logging.warning("Node %s which is about to be removed not found"
2837
                      " in the all nodes list", self.op.node_name)
2838
    return env, all_nodes, all_nodes
2839

    
2840
  def CheckPrereq(self):
2841
    """Check prerequisites.
2842

2843
    This checks:
2844
     - the node exists in the configuration
2845
     - it does not have primary or secondary instances
2846
     - it's not the master
2847

2848
    Any errors are signaled by raising errors.OpPrereqError.
2849

2850
    """
2851
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2852
    node = self.cfg.GetNodeInfo(self.op.node_name)
2853
    assert node is not None
2854

    
2855
    instance_list = self.cfg.GetInstanceList()
2856

    
2857
    masternode = self.cfg.GetMasterNode()
2858
    if node.name == masternode:
2859
      raise errors.OpPrereqError("Node is the master node,"
2860
                                 " you need to failover first.",
2861
                                 errors.ECODE_INVAL)
2862

    
2863
    for instance_name in instance_list:
2864
      instance = self.cfg.GetInstanceInfo(instance_name)
2865
      if node.name in instance.all_nodes:
2866
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2867
                                   " please remove first." % instance_name,
2868
                                   errors.ECODE_INVAL)
2869
    self.op.node_name = node.name
2870
    self.node = node
2871

    
2872
  def Exec(self, feedback_fn):
2873
    """Removes the node from the cluster.
2874

2875
    """
2876
    node = self.node
2877
    logging.info("Stopping the node daemon and removing configs from node %s",
2878
                 node.name)
2879

    
2880
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2881

    
2882
    # Promote nodes to master candidate as needed
2883
    _AdjustCandidatePool(self, exceptions=[node.name])
2884
    self.context.RemoveNode(node.name)
2885

    
2886
    # Run post hooks on the node before it's removed
2887
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2888
    try:
2889
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2890
    except:
2891
      # pylint: disable-msg=W0702
2892
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
2893

    
2894
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2895
    msg = result.fail_msg
2896
    if msg:
2897
      self.LogWarning("Errors encountered on the remote node while leaving"
2898
                      " the cluster: %s", msg)
2899

    
2900
    # Remove node from our /etc/hosts
2901
    if self.cfg.GetClusterInfo().modify_etc_hosts:
2902
      # FIXME: this should be done via an rpc call to node daemon
2903
      utils.RemoveHostFromEtcHosts(node.name)
2904
      _RedistributeAncillaryFiles(self)
2905

    
2906

    
2907
class LUQueryNodes(NoHooksLU):
2908
  """Logical unit for querying nodes.
2909

2910
  """
2911
  # pylint: disable-msg=W0142
2912
  _OP_REQP = ["output_fields", "names", "use_locking"]
2913
  REQ_BGL = False
2914

    
2915
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2916
                    "master_candidate", "offline", "drained"]
2917

    
2918
  _FIELDS_DYNAMIC = utils.FieldSet(
2919
    "dtotal", "dfree",
2920
    "mtotal", "mnode", "mfree",
2921
    "bootid",
2922
    "ctotal", "cnodes", "csockets",
2923
    )
2924

    
2925
  _FIELDS_STATIC = utils.FieldSet(*[
2926
    "pinst_cnt", "sinst_cnt",
2927
    "pinst_list", "sinst_list",
2928
    "pip", "sip", "tags",
2929
    "master",
2930
    "role"] + _SIMPLE_FIELDS
2931
    )
2932

    
2933
  def ExpandNames(self):
2934
    _CheckOutputFields(static=self._FIELDS_STATIC,
2935
                       dynamic=self._FIELDS_DYNAMIC,
2936
                       selected=self.op.output_fields)
2937

    
2938
    self.needed_locks = {}
2939
    self.share_locks[locking.LEVEL_NODE] = 1
2940

    
2941
    if self.op.names:
2942
      self.wanted = _GetWantedNodes(self, self.op.names)
2943
    else:
2944
      self.wanted = locking.ALL_SET
2945

    
2946
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2947
    self.do_locking = self.do_node_query and self.op.use_locking
2948
    if self.do_locking:
2949
      # if we don't request only static fields, we need to lock the nodes
2950
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2951

    
2952
  def CheckPrereq(self):
2953
    """Check prerequisites.
2954

2955
    """
2956
    # The validation of the node list is done in the _GetWantedNodes,
2957
    # if non empty, and if empty, there's no validation to do
2958
    pass
2959

    
2960
  def Exec(self, feedback_fn):
2961
    """Computes the list of nodes and their attributes.
2962

2963
    """
2964
    all_info = self.cfg.GetAllNodesInfo()
2965
    if self.do_locking:
2966
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2967
    elif self.wanted != locking.ALL_SET:
2968
      nodenames = self.wanted
2969
      missing = set(nodenames).difference(all_info.keys())
2970
      if missing:
2971
        raise errors.OpExecError(
2972
          "Some nodes were removed before retrieving their data: %s" % missing)
2973
    else:
2974
      nodenames = all_info.keys()
2975

    
2976
    nodenames = utils.NiceSort(nodenames)
2977
    nodelist = [all_info[name] for name in nodenames]
2978

    
2979
    # begin data gathering
2980

    
2981
    if self.do_node_query:
2982
      live_data = {}
2983
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2984
                                          self.cfg.GetHypervisorType())
2985
      for name in nodenames:
2986
        nodeinfo = node_data[name]
2987
        if not nodeinfo.fail_msg and nodeinfo.payload:
2988
          nodeinfo = nodeinfo.payload
2989
          fn = utils.TryConvert
2990
          live_data[name] = {
2991
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2992
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2993
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2994
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2995
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2996
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2997
            "bootid": nodeinfo.get('bootid', None),
2998
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2999
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3000
            }
3001
        else:
3002
          live_data[name] = {}
3003
    else:
3004
      live_data = dict.fromkeys(nodenames, {})
3005

    
3006
    node_to_primary = dict([(name, set()) for name in nodenames])
3007
    node_to_secondary = dict([(name, set()) for name in nodenames])
3008

    
3009
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
3010
                             "sinst_cnt", "sinst_list"))
3011
    if inst_fields & frozenset(self.op.output_fields):
3012
      inst_data = self.cfg.GetAllInstancesInfo()
3013

    
3014
      for inst in inst_data.values():
3015
        if inst.primary_node in node_to_primary:
3016
          node_to_primary[inst.primary_node].add(inst.name)
3017
        for secnode in inst.secondary_nodes:
3018
          if secnode in node_to_secondary:
3019
            node_to_secondary[secnode].add(inst.name)
3020

    
3021
    master_node = self.cfg.GetMasterNode()
3022

    
3023
    # end data gathering
3024

    
3025
    output = []
3026
    for node in nodelist:
3027
      node_output = []
3028
      for field in self.op.output_fields:
3029
        if field in self._SIMPLE_FIELDS:
3030
          val = getattr(node, field)
3031
        elif field == "pinst_list":
3032
          val = list(node_to_primary[node.name])
3033
        elif field == "sinst_list":
3034
          val = list(node_to_secondary[node.name])
3035
        elif field == "pinst_cnt":
3036
          val = len(node_to_primary[node.name])
3037
        elif field == "sinst_cnt":
3038
          val = len(node_to_secondary[node.name])
3039
        elif field == "pip":
3040
          val = node.primary_ip
3041
        elif field == "sip":
3042
          val = node.secondary_ip
3043
        elif field == "tags":
3044
          val = list(node.GetTags())
3045
        elif field == "master":
3046
          val = node.name == master_node
3047
        elif self._FIELDS_DYNAMIC.Matches(field):
3048
          val = live_data[node.name].get(field, None)
3049
        elif field == "role":
3050
          if node.name == master_node:
3051
            val = "M"
3052
          elif node.master_candidate:
3053
            val = "C"
3054
          elif node.drained:
3055
            val = "D"
3056
          elif node.offline:
3057
            val = "O"
3058
          else:
3059
            val = "R"
3060
        else:
3061
          raise errors.ParameterError(field)
3062
        node_output.append(val)
3063
      output.append(node_output)
3064

    
3065
    return output
3066

    
3067

    
3068
class LUQueryNodeVolumes(NoHooksLU):
3069
  """Logical unit for getting volumes on node(s).
3070

3071
  """
3072
  _OP_REQP = ["nodes", "output_fields"]
3073
  REQ_BGL = False
3074
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3075
  _FIELDS_STATIC = utils.FieldSet("node")
3076

    
3077
  def ExpandNames(self):
3078
    _CheckOutputFields(static=self._FIELDS_STATIC,
3079
                       dynamic=self._FIELDS_DYNAMIC,
3080
                       selected=self.op.output_fields)
3081

    
3082
    self.needed_locks = {}
3083
    self.share_locks[locking.LEVEL_NODE] = 1
3084
    if not self.op.nodes:
3085
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3086
    else:
3087
      self.needed_locks[locking.LEVEL_NODE] = \
3088
        _GetWantedNodes(self, self.op.nodes)
3089

    
3090
  def CheckPrereq(self):
3091
    """Check prerequisites.
3092

3093
    This checks that the fields required are valid output fields.
3094

3095
    """
3096
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3097

    
3098
  def Exec(self, feedback_fn):
3099
    """Computes the list of nodes and their attributes.
3100

3101
    """
3102
    nodenames = self.nodes
3103
    volumes = self.rpc.call_node_volumes(nodenames)
3104

    
3105
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3106
             in self.cfg.GetInstanceList()]
3107

    
3108
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3109

    
3110
    output = []
3111
    for node in nodenames:
3112
      nresult = volumes[node]
3113
      if nresult.offline:
3114
        continue
3115
      msg = nresult.fail_msg
3116
      if msg:
3117
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3118
        continue
3119

    
3120
      node_vols = nresult.payload[:]
3121
      node_vols.sort(key=lambda vol: vol['dev'])
3122

    
3123
      for vol in node_vols:
3124
        node_output = []
3125
        for field in self.op.output_fields:
3126
          if field == "node":
3127
            val = node
3128
          elif field == "phys":
3129
            val = vol['dev']
3130
          elif field == "vg":
3131
            val = vol['vg']
3132
          elif field == "name":
3133
            val = vol['name']
3134
          elif field == "size":
3135
            val = int(float(vol['size']))
3136
          elif field == "instance":
3137
            for inst in ilist:
3138
              if node not in lv_by_node[inst]:
3139
                continue
3140
              if vol['name'] in lv_by_node[inst][node]:
3141
                val = inst.name
3142
                break
3143
            else:
3144
              val = '-'
3145
          else:
3146
            raise errors.ParameterError(field)
3147
          node_output.append(str(val))
3148

    
3149
        output.append(node_output)
3150

    
3151
    return output
3152

    
3153

    
3154
class LUQueryNodeStorage(NoHooksLU):
3155
  """Logical unit for getting information on storage units on node(s).
3156

3157
  """
3158
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
3159
  REQ_BGL = False
3160
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3161

    
3162
  def CheckArguments(self):
3163
    _CheckStorageType(self.op.storage_type)
3164

    
3165
    _CheckOutputFields(static=self._FIELDS_STATIC,
3166
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3167
                       selected=self.op.output_fields)
3168

    
3169
  def ExpandNames(self):
3170
    self.needed_locks = {}
3171
    self.share_locks[locking.LEVEL_NODE] = 1
3172

    
3173
    if self.op.nodes:
3174
      self.needed_locks[locking.LEVEL_NODE] = \
3175
        _GetWantedNodes(self, self.op.nodes)
3176
    else:
3177
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3178

    
3179
  def CheckPrereq(self):
3180
    """Check prerequisites.
3181

3182
    This checks that the fields required are valid output fields.
3183

3184
    """
3185
    self.op.name = getattr(self.op, "name", None)
3186

    
3187
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3188

    
3189
  def Exec(self, feedback_fn):
3190
    """Computes the list of nodes and their attributes.
3191

3192
    """
3193
    # Always get name to sort by
3194
    if constants.SF_NAME in self.op.output_fields:
3195
      fields = self.op.output_fields[:]
3196
    else:
3197
      fields = [constants.SF_NAME] + self.op.output_fields
3198

    
3199
    # Never ask for node or type as it's only known to the LU
3200
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3201
      while extra in fields:
3202
        fields.remove(extra)
3203

    
3204
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3205
    name_idx = field_idx[constants.SF_NAME]
3206

    
3207
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3208
    data = self.rpc.call_storage_list(self.nodes,
3209
                                      self.op.storage_type, st_args,
3210
                                      self.op.name, fields)
3211

    
3212
    result = []
3213

    
3214
    for node in utils.NiceSort(self.nodes):
3215
      nresult = data[node]
3216
      if nresult.offline:
3217
        continue
3218

    
3219
      msg = nresult.fail_msg
3220
      if msg:
3221
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3222
        continue
3223

    
3224
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3225

    
3226
      for name in utils.NiceSort(rows.keys()):
3227
        row = rows[name]
3228

    
3229
        out = []
3230

    
3231
        for field in self.op.output_fields:
3232
          if field == constants.SF_NODE:
3233
            val = node
3234
          elif field == constants.SF_TYPE:
3235
            val = self.op.storage_type
3236
          elif field in field_idx:
3237
            val = row[field_idx[field]]
3238
          else:
3239
            raise errors.ParameterError(field)
3240

    
3241
          out.append(val)
3242

    
3243
        result.append(out)
3244

    
3245
    return result
3246

    
3247

    
3248
class LUModifyNodeStorage(NoHooksLU):
3249
  """Logical unit for modifying a storage volume on a node.
3250

3251
  """
3252
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3253
  REQ_BGL = False
3254

    
3255
  def CheckArguments(self):
3256
    self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
3257

    
3258
    _CheckStorageType(self.op.storage_type)
3259

    
3260
  def ExpandNames(self):
3261
    self.needed_locks = {
3262
      locking.LEVEL_NODE: self.op.node_name,
3263
      }
3264

    
3265
  def CheckPrereq(self):
3266
    """Check prerequisites.
3267

3268
    """
3269
    storage_type = self.op.storage_type
3270

    
3271
    try:
3272
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3273
    except KeyError:
3274
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3275
                                 " modified" % storage_type,
3276
                                 errors.ECODE_INVAL)
3277

    
3278
    diff = set(self.op.changes.keys()) - modifiable
3279
    if diff:
3280
      raise errors.OpPrereqError("The following fields can not be modified for"
3281
                                 " storage units of type '%s': %r" %
3282
                                 (storage_type, list(diff)),
3283
                                 errors.ECODE_INVAL)
3284

    
3285
  def Exec(self, feedback_fn):
3286
    """Computes the list of nodes and their attributes.
3287

3288
    """
3289
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3290
    result = self.rpc.call_storage_modify(self.op.node_name,
3291
                                          self.op.storage_type, st_args,
3292
                                          self.op.name, self.op.changes)
3293
    result.Raise("Failed to modify storage unit '%s' on %s" %
3294
                 (self.op.name, self.op.node_name))
3295

    
3296

    
3297
class LUAddNode(LogicalUnit):
3298
  """Logical unit for adding node to the cluster.
3299

3300
  """
3301
  HPATH = "node-add"
3302
  HTYPE = constants.HTYPE_NODE
3303
  _OP_REQP = ["node_name"]
3304

    
3305
  def CheckArguments(self):
3306
    # validate/normalize the node name
3307
    self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3308

    
3309
  def BuildHooksEnv(self):
3310
    """Build hooks env.
3311

3312
    This will run on all nodes before, and on all nodes + the new node after.
3313

3314
    """
3315
    env = {
3316
      "OP_TARGET": self.op.node_name,
3317
      "NODE_NAME": self.op.node_name,
3318
      "NODE_PIP": self.op.primary_ip,
3319
      "NODE_SIP": self.op.secondary_ip,
3320
      }
3321
    nodes_0 = self.cfg.GetNodeList()
3322
    nodes_1 = nodes_0 + [self.op.node_name, ]
3323
    return env, nodes_0, nodes_1
3324

    
3325
  def CheckPrereq(self):
3326
    """Check prerequisites.
3327

3328
    This checks:
3329
     - the new node is not already in the config
3330
     - it is resolvable
3331
     - its parameters (single/dual homed) matches the cluster
3332

3333
    Any errors are signaled by raising errors.OpPrereqError.
3334

3335
    """
3336
    node_name = self.op.node_name
3337
    cfg = self.cfg
3338

    
3339
    dns_data = utils.GetHostInfo(node_name)
3340

    
3341
    node = dns_data.name
3342
    primary_ip = self.op.primary_ip = dns_data.ip
3343
    secondary_ip = getattr(self.op, "secondary_ip", None)
3344
    if secondary_ip is None:
3345
      secondary_ip = primary_ip
3346
    if not utils.IsValidIP(secondary_ip):
3347
      raise errors.OpPrereqError("Invalid secondary IP given",
3348
                                 errors.ECODE_INVAL)
3349
    self.op.secondary_ip = secondary_ip
3350

    
3351
    node_list = cfg.GetNodeList()
3352
    if not self.op.readd and node in node_list:
3353
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3354
                                 node, errors.ECODE_EXISTS)
3355
    elif self.op.readd and node not in node_list:
3356
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3357
                                 errors.ECODE_NOENT)
3358

    
3359
    self.changed_primary_ip = False
3360

    
3361
    for existing_node_name in node_list:
3362
      existing_node = cfg.GetNodeInfo(existing_node_name)
3363

    
3364
      if self.op.readd and node == existing_node_name:
3365
        if existing_node.secondary_ip != secondary_ip:
3366
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
3367
                                     " address configuration as before",
3368
                                     errors.ECODE_INVAL)
3369
        if existing_node.primary_ip != primary_ip:
3370
          self.changed_primary_ip = True
3371

    
3372
        continue
3373

    
3374
      if (existing_node.primary_ip == primary_ip or
3375
          existing_node.secondary_ip == primary_ip or
3376
          existing_node.primary_ip == secondary_ip or
3377
          existing_node.secondary_ip == secondary_ip):
3378
        raise errors.OpPrereqError("New node ip address(es) conflict with"
3379
                                   " existing node %s" % existing_node.name,
3380
                                   errors.ECODE_NOTUNIQUE)
3381

    
3382
    # check that the type of the node (single versus dual homed) is the
3383
    # same as for the master
3384
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3385
    master_singlehomed = myself.secondary_ip == myself.primary_ip
3386
    newbie_singlehomed = secondary_ip == primary_ip
3387
    if master_singlehomed != newbie_singlehomed:
3388
      if master_singlehomed:
3389
        raise errors.OpPrereqError("The master has no private ip but the"
3390
                                   " new node has one",
3391
                                   errors.ECODE_INVAL)
3392
      else:
3393
        raise errors.OpPrereqError("The master has a private ip but the"
3394
                                   " new node doesn't have one",
3395
                                   errors.ECODE_INVAL)
3396

    
3397
    # checks reachability
3398
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3399
      raise errors.OpPrereqError("Node not reachable by ping",
3400
                                 errors.ECODE_ENVIRON)
3401

    
3402
    if not newbie_singlehomed:
3403
      # check reachability from my secondary ip to newbie's secondary ip
3404
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3405
                           source=myself.secondary_ip):
3406
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3407
                                   " based ping to noded port",
3408
                                   errors.ECODE_ENVIRON)
3409

    
3410
    if self.op.readd:
3411
      exceptions = [node]
3412
    else:
3413
      exceptions = []
3414

    
3415
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3416

    
3417
    if self.op.readd:
3418
      self.new_node = self.cfg.GetNodeInfo(node)
3419
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
3420
    else:
3421
      self.new_node = objects.Node(name=node,
3422
                                   primary_ip=primary_ip,
3423
                                   secondary_ip=secondary_ip,
3424
                                   master_candidate=self.master_candidate,
3425
                                   offline=False, drained=False)
3426

    
3427
  def Exec(self, feedback_fn):
3428
    """Adds the new node to the cluster.
3429

3430
    """
3431
    new_node = self.new_node
3432
    node = new_node.name
3433

    
3434
    # for re-adds, reset the offline/drained/master-candidate flags;
3435
    # we need to reset here, otherwise offline would prevent RPC calls
3436
    # later in the procedure; this also means that if the re-add
3437
    # fails, we are left with a non-offlined, broken node
3438
    if self.op.readd:
3439
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3440
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3441
      # if we demote the node, we do cleanup later in the procedure
3442
      new_node.master_candidate = self.master_candidate
3443
      if self.changed_primary_ip:
3444
        new_node.primary_ip = self.op.primary_ip
3445

    
3446
    # notify the user about any possible mc promotion
3447
    if new_node.master_candidate:
3448
      self.LogInfo("Node will be a master candidate")
3449

    
3450
    # check connectivity
3451
    result = self.rpc.call_version([node])[node]
3452
    result.Raise("Can't get version information from node %s" % node)
3453
    if constants.PROTOCOL_VERSION == result.payload:
3454
      logging.info("Communication to node %s fine, sw version %s match",
3455
                   node, result.payload)
3456
    else:
3457
      raise errors.OpExecError("Version mismatch master version %s,"
3458
                               " node version %s" %
3459
                               (constants.PROTOCOL_VERSION, result.payload))
3460

    
3461
    # setup ssh on node
3462
    if self.cfg.GetClusterInfo().modify_ssh_setup:
3463
      logging.info("Copy ssh key to node %s", node)
3464
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3465
      keyarray = []
3466
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3467
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3468
                  priv_key, pub_key]
3469

    
3470
      for i in keyfiles:
3471
        keyarray.append(utils.ReadFile(i))
3472

    
3473
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3474
                                      keyarray[2], keyarray[3], keyarray[4],
3475
                                      keyarray[5])
3476
      result.Raise("Cannot transfer ssh keys to the new node")
3477

    
3478
    # Add node to our /etc/hosts, and add key to known_hosts
3479
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3480
      # FIXME: this should be done via an rpc call to node daemon
3481
      utils.AddHostToEtcHosts(new_node.name)
3482

    
3483
    if new_node.secondary_ip != new_node.primary_ip:
3484
      result = self.rpc.call_node_has_ip_address(new_node.name,
3485
                                                 new_node.secondary_ip)
3486
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3487
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3488
      if not result.payload:
3489
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3490
                                 " you gave (%s). Please fix and re-run this"
3491
                                 " command." % new_node.secondary_ip)
3492

    
3493
    node_verify_list = [self.cfg.GetMasterNode()]
3494
    node_verify_param = {
3495
      constants.NV_NODELIST: [node],
3496
      # TODO: do a node-net-test as well?
3497
    }
3498

    
3499
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3500
                                       self.cfg.GetClusterName())
3501
    for verifier in node_verify_list:
3502
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3503
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3504
      if nl_payload:
3505
        for failed in nl_payload:
3506
          feedback_fn("ssh/hostname verification failed"
3507
                      " (checking from %s): %s" %
3508
                      (verifier, nl_payload[failed]))
3509
        raise errors.OpExecError("ssh/hostname verification failed.")
3510

    
3511
    if self.op.readd:
3512
      _RedistributeAncillaryFiles(self)
3513
      self.context.ReaddNode(new_node)
3514
      # make sure we redistribute the config
3515
      self.cfg.Update(new_node, feedback_fn)
3516
      # and make sure the new node will not have old files around
3517
      if not new_node.master_candidate:
3518
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3519
        msg = result.fail_msg
3520
        if msg:
3521
          self.LogWarning("Node failed to demote itself from master"
3522
                          " candidate status: %s" % msg)
3523
    else:
3524
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3525
      self.context.AddNode(new_node, self.proc.GetECId())
3526

    
3527

    
3528
class LUSetNodeParams(LogicalUnit):
3529
  """Modifies the parameters of a node.
3530

3531
  """
3532
  HPATH = "node-modify"
3533
  HTYPE = constants.HTYPE_NODE
3534
  _OP_REQP = ["node_name"]
3535
  REQ_BGL = False
3536

    
3537
  def CheckArguments(self):
3538
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3539
    _CheckBooleanOpField(self.op, 'master_candidate')
3540
    _CheckBooleanOpField(self.op, 'offline')
3541
    _CheckBooleanOpField(self.op, 'drained')
3542
    _CheckBooleanOpField(self.op, 'auto_promote')
3543
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3544
    if all_mods.count(None) == 3:
3545
      raise errors.OpPrereqError("Please pass at least one modification",
3546
                                 errors.ECODE_INVAL)
3547
    if all_mods.count(True) > 1:
3548
      raise errors.OpPrereqError("Can't set the node into more than one"
3549
                                 " state at the same time",
3550
                                 errors.ECODE_INVAL)
3551

    
3552
    # Boolean value that tells us whether we're offlining or draining the node
3553
    self.offline_or_drain = (self.op.offline == True or
3554
                             self.op.drained == True)
3555
    self.deoffline_or_drain = (self.op.offline == False or
3556
                               self.op.drained == False)
3557
    self.might_demote = (self.op.master_candidate == False or
3558
                         self.offline_or_drain)
3559

    
3560
    self.lock_all = self.op.auto_promote and self.might_demote
3561

    
3562

    
3563
  def ExpandNames(self):
3564
    if self.lock_all:
3565
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3566
    else:
3567
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3568

    
3569
  def BuildHooksEnv(self):
3570
    """Build hooks env.
3571

3572
    This runs on the master node.
3573

3574
    """
3575
    env = {
3576
      "OP_TARGET": self.op.node_name,
3577
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3578
      "OFFLINE": str(self.op.offline),
3579
      "DRAINED": str(self.op.drained),
3580
      }
3581
    nl = [self.cfg.GetMasterNode(),
3582
          self.op.node_name]
3583
    return env, nl, nl
3584

    
3585
  def CheckPrereq(self):
3586
    """Check prerequisites.
3587

3588
    This only checks the instance list against the existing names.
3589

3590
    """
3591
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3592

    
3593
    if (self.op.master_candidate is not None or
3594
        self.op.drained is not None or
3595
        self.op.offline is not None):
3596
      # we can't change the master's node flags
3597
      if self.op.node_name == self.cfg.GetMasterNode():
3598
        raise errors.OpPrereqError("The master role can be changed"
3599
                                   " only via masterfailover",
3600
                                   errors.ECODE_INVAL)
3601

    
3602

    
3603
    if node.master_candidate and self.might_demote and not self.lock_all:
3604
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
3605
      # check if after removing the current node, we're missing master
3606
      # candidates
3607
      (mc_remaining, mc_should, _) = \
3608
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3609
      if mc_remaining < mc_should:
3610
        raise errors.OpPrereqError("Not enough master candidates, please"
3611
                                   " pass auto_promote to allow promotion",
3612
                                   errors.ECODE_INVAL)
3613

    
3614
    if (self.op.master_candidate == True and
3615
        ((node.offline and not self.op.offline == False) or
3616
         (node.drained and not self.op.drained == False))):
3617
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3618
                                 " to master_candidate" % node.name,
3619
                                 errors.ECODE_INVAL)
3620

    
3621
    # If we're being deofflined/drained, we'll MC ourself if needed
3622
    if (self.deoffline_or_drain and not self.offline_or_drain and not
3623
        self.op.master_candidate == True and not node.master_candidate):
3624
      self.op.master_candidate = _DecideSelfPromotion(self)
3625
      if self.op.master_candidate:
3626
        self.LogInfo("Autopromoting node to master candidate")
3627

    
3628
    return
3629

    
3630
  def Exec(self, feedback_fn):
3631
    """Modifies a node.
3632

3633
    """
3634
    node = self.node
3635

    
3636
    result = []
3637
    changed_mc = False
3638

    
3639
    if self.op.offline is not None:
3640
      node.offline = self.op.offline
3641
      result.append(("offline", str(self.op.offline)))
3642
      if self.op.offline == True:
3643
        if node.master_candidate:
3644
          node.master_candidate = False
3645
          changed_mc = True
3646
          result.append(("master_candidate", "auto-demotion due to offline"))
3647
        if node.drained:
3648
          node.drained = False
3649
          result.append(("drained", "clear drained status due to offline"))
3650

    
3651
    if self.op.master_candidate is not None:
3652
      node.master_candidate = self.op.master_candidate
3653
      changed_mc = True
3654
      result.append(("master_candidate", str(self.op.master_candidate)))
3655
      if self.op.master_candidate == False:
3656
        rrc = self.rpc.call_node_demote_from_mc(node.name)
3657
        msg = rrc.fail_msg
3658
        if msg:
3659
          self.LogWarning("Node failed to demote itself: %s" % msg)
3660

    
3661
    if self.op.drained is not None:
3662
      node.drained = self.op.drained
3663
      result.append(("drained", str(self.op.drained)))
3664
      if self.op.drained == True:
3665
        if node.master_candidate:
3666
          node.master_candidate = False
3667
          changed_mc = True
3668
          result.append(("master_candidate", "auto-demotion due to drain"))
3669
          rrc = self.rpc.call_node_demote_from_mc(node.name)
3670
          msg = rrc.fail_msg
3671
          if msg:
3672
            self.LogWarning("Node failed to demote itself: %s" % msg)
3673
        if node.offline:
3674
          node.offline = False
3675
          result.append(("offline", "clear offline status due to drain"))
3676

    
3677
    # we locked all nodes, we adjust the CP before updating this node
3678
    if self.lock_all:
3679
      _AdjustCandidatePool(self, [node.name])
3680

    
3681
    # this will trigger configuration file update, if needed
3682
    self.cfg.Update(node, feedback_fn)
3683

    
3684
    # this will trigger job queue propagation or cleanup
3685
    if changed_mc:
3686
      self.context.ReaddNode(node)
3687

    
3688
    return result
3689

    
3690

    
3691
class LUPowercycleNode(NoHooksLU):
3692
  """Powercycles a node.
3693

3694
  """
3695
  _OP_REQP = ["node_name", "force"]
3696
  REQ_BGL = False
3697

    
3698
  def CheckArguments(self):
3699
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3700
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3701
      raise errors.OpPrereqError("The node is the master and the force"
3702
                                 " parameter was not set",
3703
                                 errors.ECODE_INVAL)
3704

    
3705
  def ExpandNames(self):
3706
    """Locking for PowercycleNode.
3707

3708
    This is a last-resort option and shouldn't block on other
3709
    jobs. Therefore, we grab no locks.
3710

3711
    """
3712
    self.needed_locks = {}
3713

    
3714
  def CheckPrereq(self):
3715
    """Check prerequisites.
3716

3717
    This LU has no prereqs.
3718

3719
    """
3720
    pass
3721

    
3722
  def Exec(self, feedback_fn):
3723
    """Reboots a node.
3724

3725
    """
3726
    result = self.rpc.call_node_powercycle(self.op.node_name,
3727
                                           self.cfg.GetHypervisorType())
3728
    result.Raise("Failed to schedule the reboot")
3729
    return result.payload
3730

    
3731

    
3732
class LUQueryClusterInfo(NoHooksLU):
3733
  """Query cluster configuration.
3734

3735
  """
3736
  _OP_REQP = []
3737
  REQ_BGL = False
3738

    
3739
  def ExpandNames(self):
3740
    self.needed_locks = {}
3741

    
3742
  def CheckPrereq(self):
3743
    """No prerequsites needed for this LU.
3744

3745
    """
3746
    pass
3747

    
3748
  def Exec(self, feedback_fn):
3749
    """Return cluster config.
3750

3751
    """
3752
    cluster = self.cfg.GetClusterInfo()
3753
    os_hvp = {}
3754

    
3755
    # Filter just for enabled hypervisors
3756
    for os_name, hv_dict in cluster.os_hvp.items():
3757
      os_hvp[os_name] = {}
3758
      for hv_name, hv_params in hv_dict.items():
3759
        if hv_name in cluster.enabled_hypervisors:
3760
          os_hvp[os_name][hv_name] = hv_params
3761

    
3762
    result = {
3763
      "software_version": constants.RELEASE_VERSION,
3764
      "protocol_version": constants.PROTOCOL_VERSION,
3765
      "config_version": constants.CONFIG_VERSION,
3766
      "os_api_version": max(constants.OS_API_VERSIONS),
3767
      "export_version": constants.EXPORT_VERSION,
3768
      "architecture": (platform.architecture()[0], platform.machine()),
3769
      "name": cluster.cluster_name,
3770
      "master": cluster.master_node,
3771
      "default_hypervisor": cluster.enabled_hypervisors[0],
3772
      "enabled_hypervisors": cluster.enabled_hypervisors,
3773
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3774
                        for hypervisor_name in cluster.enabled_hypervisors]),
3775
      "os_hvp": os_hvp,
3776
      "beparams": cluster.beparams,
3777
      "nicparams": cluster.nicparams,
3778
      "candidate_pool_size": cluster.candidate_pool_size,
3779
      "master_netdev": cluster.master_netdev,
3780
      "volume_group_name": cluster.volume_group_name,
3781
      "file_storage_dir": cluster.file_storage_dir,
3782
      "maintain_node_health": cluster.maintain_node_health,
3783
      "ctime": cluster.ctime,
3784
      "mtime": cluster.mtime,
3785
      "uuid": cluster.uuid,
3786
      "tags": list(cluster.GetTags()),
3787
      "uid_pool": cluster.uid_pool,
3788
      }
3789

    
3790
    return result
3791

    
3792

    
3793
class LUQueryConfigValues(NoHooksLU):
3794
  """Return configuration values.
3795

3796
  """
3797
  _OP_REQP = []
3798
  REQ_BGL = False
3799
  _FIELDS_DYNAMIC = utils.FieldSet()
3800
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3801
                                  "watcher_pause")
3802

    
3803
  def ExpandNames(self):
3804
    self.needed_locks = {}
3805

    
3806
    _CheckOutputFields(static=self._FIELDS_STATIC,
3807
                       dynamic=self._FIELDS_DYNAMIC,
3808
                       selected=self.op.output_fields)
3809

    
3810
  def CheckPrereq(self):
3811
    """No prerequisites.
3812

3813
    """
3814
    pass
3815

    
3816
  def Exec(self, feedback_fn):
3817
    """Dump a representation of the cluster config to the standard output.
3818

3819
    """
3820
    values = []
3821
    for field in self.op.output_fields:
3822
      if field == "cluster_name":
3823
        entry = self.cfg.GetClusterName()
3824
      elif field == "master_node":
3825
        entry = self.cfg.GetMasterNode()
3826
      elif field == "drain_flag":
3827
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3828
      elif field == "watcher_pause":
3829
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3830
      else:
3831
        raise errors.ParameterError(field)
3832
      values.append(entry)
3833
    return values
3834

    
3835

    
3836
class LUActivateInstanceDisks(NoHooksLU):
3837
  """Bring up an instance's disks.
3838

3839
  """
3840
  _OP_REQP = ["instance_name"]
3841
  REQ_BGL = False
3842

    
3843
  def ExpandNames(self):
3844
    self._ExpandAndLockInstance()
3845
    self.needed_locks[locking.LEVEL_NODE] = []
3846
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3847

    
3848
  def DeclareLocks(self, level):
3849
    if level == locking.LEVEL_NODE:
3850
      self._LockInstancesNodes()
3851

    
3852
  def CheckPrereq(self):
3853
    """Check prerequisites.
3854

3855
    This checks that the instance is in the cluster.
3856

3857
    """
3858
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3859
    assert self.instance is not None, \
3860
      "Cannot retrieve locked instance %s" % self.op.instance_name
3861
    _CheckNodeOnline(self, self.instance.primary_node)
3862
    if not hasattr(self.op, "ignore_size"):
3863
      self.op.ignore_size = False
3864

    
3865
  def Exec(self, feedback_fn):
3866
    """Activate the disks.
3867

3868
    """
3869
    disks_ok, disks_info = \
3870
              _AssembleInstanceDisks(self, self.instance,
3871
                                     ignore_size=self.op.ignore_size)
3872
    if not disks_ok:
3873
      raise errors.OpExecError("Cannot activate block devices")
3874

    
3875
    return disks_info
3876

    
3877

    
3878
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3879
                           ignore_size=False):
3880
  """Prepare the block devices for an instance.
3881

3882
  This sets up the block devices on all nodes.
3883

3884
  @type lu: L{LogicalUnit}
3885
  @param lu: the logical unit on whose behalf we execute
3886
  @type instance: L{objects.Instance}
3887
  @param instance: the instance for whose disks we assemble
3888
  @type ignore_secondaries: boolean
3889
  @param ignore_secondaries: if true, errors on secondary nodes
3890
      won't result in an error return from the function
3891
  @type ignore_size: boolean
3892
  @param ignore_size: if true, the current known size of the disk
3893
      will not be used during the disk activation, useful for cases
3894
      when the size is wrong
3895
  @return: False if the operation failed, otherwise a list of
3896
      (host, instance_visible_name, node_visible_name)
3897
      with the mapping from node devices to instance devices
3898

3899
  """
3900
  device_info = []
3901
  disks_ok = True
3902
  iname = instance.name
3903
  # With the two passes mechanism we try to reduce the window of
3904
  # opportunity for the race condition of switching DRBD to primary
3905
  # before handshaking occured, but we do not eliminate it
3906

    
3907
  # The proper fix would be to wait (with some limits) until the
3908
  # connection has been made and drbd transitions from WFConnection
3909
  # into any other network-connected state (Connected, SyncTarget,
3910
  # SyncSource, etc.)
3911

    
3912
  # 1st pass, assemble on all nodes in secondary mode
3913
  for inst_disk in instance.disks:
3914
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3915
      if ignore_size:
3916
        node_disk = node_disk.Copy()
3917
        node_disk.UnsetSize()
3918
      lu.cfg.SetDiskID(node_disk, node)
3919
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3920
      msg = result.fail_msg
3921
      if msg:
3922
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3923
                           " (is_primary=False, pass=1): %s",
3924
                           inst_disk.iv_name, node, msg)
3925
        if not ignore_secondaries:
3926
          disks_ok = False
3927

    
3928
  # FIXME: race condition on drbd migration to primary
3929

    
3930
  # 2nd pass, do only the primary node
3931
  for inst_disk in instance.disks:
3932
    dev_path = None
3933

    
3934
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3935
      if node != instance.primary_node:
3936
        continue
3937
      if ignore_size:
3938
        node_disk = node_disk.Copy()
3939
        node_disk.UnsetSize()
3940
      lu.cfg.SetDiskID(node_disk, node)
3941
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3942
      msg = result.fail_msg
3943
      if msg:
3944
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3945
                           " (is_primary=True, pass=2): %s",
3946
                           inst_disk.iv_name, node, msg)
3947
        disks_ok = False
3948
      else:
3949
        dev_path = result.payload
3950

    
3951
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3952

    
3953
  # leave the disks configured for the primary node
3954
  # this is a workaround that would be fixed better by
3955
  # improving the logical/physical id handling
3956
  for disk in instance.disks:
3957
    lu.cfg.SetDiskID(disk, instance.primary_node)
3958

    
3959
  return disks_ok, device_info
3960

    
3961

    
3962
def _StartInstanceDisks(lu, instance, force):
3963
  """Start the disks of an instance.
3964

3965
  """
3966
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3967
                                           ignore_secondaries=force)
3968
  if not disks_ok:
3969
    _ShutdownInstanceDisks(lu, instance)
3970
    if force is not None and not force:
3971
      lu.proc.LogWarning("", hint="If the message above refers to a"
3972
                         " secondary node,"
3973
                         " you can retry the operation using '--force'.")
3974
    raise errors.OpExecError("Disk consistency error")
3975

    
3976

    
3977
class LUDeactivateInstanceDisks(NoHooksLU):
3978
  """Shutdown an instance's disks.
3979

3980
  """
3981
  _OP_REQP = ["instance_name"]
3982
  REQ_BGL = False
3983

    
3984
  def ExpandNames(self):
3985
    self._ExpandAndLockInstance()
3986
    self.needed_locks[locking.LEVEL_NODE] = []
3987
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3988

    
3989
  def DeclareLocks(self, level):
3990
    if level == locking.LEVEL_NODE:
3991
      self._LockInstancesNodes()
3992

    
3993
  def CheckPrereq(self):
3994
    """Check prerequisites.
3995

3996
    This checks that the instance is in the cluster.
3997

3998
    """
3999
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4000
    assert self.instance is not None, \
4001
      "Cannot retrieve locked instance %s" % self.op.instance_name
4002

    
4003
  def Exec(self, feedback_fn):
4004
    """Deactivate the disks
4005

4006
    """
4007
    instance = self.instance
4008
    _SafeShutdownInstanceDisks(self, instance)
4009

    
4010

    
4011
def _SafeShutdownInstanceDisks(lu, instance):
4012
  """Shutdown block devices of an instance.
4013

4014
  This function checks if an instance is running, before calling
4015
  _ShutdownInstanceDisks.
4016

4017
  """
4018
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4019
  _ShutdownInstanceDisks(lu, instance)
4020

    
4021

    
4022
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
4023
  """Shutdown block devices of an instance.
4024

4025
  This does the shutdown on all nodes of the instance.
4026

4027
  If the ignore_primary is false, errors on the primary node are
4028
  ignored.
4029

4030
  """
4031
  all_result = True
4032
  for disk in instance.disks:
4033
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4034
      lu.cfg.SetDiskID(top_disk, node)
4035
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4036
      msg = result.fail_msg
4037
      if msg:
4038
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4039
                      disk.iv_name, node, msg)
4040
        if not ignore_primary or node != instance.primary_node:
4041
          all_result = False
4042
  return all_result
4043

    
4044

    
4045
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4046
  """Checks if a node has enough free memory.
4047

4048
  This function check if a given node has the needed amount of free
4049
  memory. In case the node has less memory or we cannot get the
4050
  information from the node, this function raise an OpPrereqError
4051
  exception.
4052

4053
  @type lu: C{LogicalUnit}
4054
  @param lu: a logical unit from which we get configuration data
4055
  @type node: C{str}
4056
  @param node: the node to check
4057
  @type reason: C{str}
4058
  @param reason: string to use in the error message
4059
  @type requested: C{int}
4060
  @param requested: the amount of memory in MiB to check for
4061
  @type hypervisor_name: C{str}
4062
  @param hypervisor_name: the hypervisor to ask for memory stats
4063
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4064
      we cannot check the node
4065

4066
  """
4067
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4068
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4069
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4070
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4071
  if not isinstance(free_mem, int):
4072
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4073
                               " was '%s'" % (node, free_mem),
4074
                               errors.ECODE_ENVIRON)
4075
  if requested > free_mem:
4076
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4077
                               " needed %s MiB, available %s MiB" %
4078
                               (node, reason, requested, free_mem),
4079
                               errors.ECODE_NORES)
4080

    
4081

    
4082
def _CheckNodesFreeDisk(lu, nodenames, requested):
4083
  """Checks if nodes have enough free disk space in the default VG.
4084

4085
  This function check if all given nodes have the needed amount of
4086
  free disk. In case any node has less disk or we cannot get the
4087
  information from the node, this function raise an OpPrereqError
4088
  exception.
4089

4090
  @type lu: C{LogicalUnit}
4091
  @param lu: a logical unit from which we get configuration data
4092
  @type nodenames: C{list}
4093
  @param nodenames: the list of node names to check
4094
  @type requested: C{int}
4095
  @param requested: the amount of disk in MiB to check for
4096
  @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4097
      we cannot check the node
4098

4099
  """
4100
  nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4101
                                   lu.cfg.GetHypervisorType())
4102
  for node in nodenames:
4103
    info = nodeinfo[node]
4104
    info.Raise("Cannot get current information from node %s" % node,
4105
               prereq=True, ecode=errors.ECODE_ENVIRON)
4106
    vg_free = info.payload.get("vg_free", None)
4107
    if not isinstance(vg_free, int):
4108
      raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4109
                                 " result was '%s'" % (node, vg_free),
4110
                                 errors.ECODE_ENVIRON)
4111
    if requested > vg_free:
4112
      raise errors.OpPrereqError("Not enough disk space on target node %s:"
4113
                                 " required %d MiB, available %d MiB" %
4114
                                 (node, requested, vg_free),
4115
                                 errors.ECODE_NORES)
4116

    
4117

    
4118
class LUStartupInstance(LogicalUnit):
4119
  """Starts an instance.
4120

4121
  """
4122
  HPATH = "instance-start"
4123
  HTYPE = constants.HTYPE_INSTANCE
4124
  _OP_REQP = ["instance_name", "force"]
4125
  REQ_BGL = False
4126

    
4127
  def ExpandNames(self):
4128
    self._ExpandAndLockInstance()
4129

    
4130
  def BuildHooksEnv(self):
4131
    """Build hooks env.
4132

4133
    This runs on master, primary and secondary nodes of the instance.
4134

4135
    """
4136
    env = {
4137
      "FORCE": self.op.force,
4138
      }
4139
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4140
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4141
    return env, nl, nl
4142

    
4143
  def CheckPrereq(self):
4144
    """Check prerequisites.
4145

4146
    This checks that the instance is in the cluster.
4147

4148
    """
4149
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4150
    assert self.instance is not None, \
4151
      "Cannot retrieve locked instance %s" % self.op.instance_name
4152

    
4153
    # extra beparams
4154
    self.beparams = getattr(self.op, "beparams", {})
4155
    if self.beparams:
4156
      if not isinstance(self.beparams, dict):
4157
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4158
                                   " dict" % (type(self.beparams), ),
4159
                                   errors.ECODE_INVAL)
4160
      # fill the beparams dict
4161
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
4162
      self.op.beparams = self.beparams
4163

    
4164
    # extra hvparams
4165
    self.hvparams = getattr(self.op, "hvparams", {})
4166
    if self.hvparams:
4167
      if not isinstance(self.hvparams, dict):
4168
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4169
                                   " dict" % (type(self.hvparams), ),
4170
                                   errors.ECODE_INVAL)
4171

    
4172
      # check hypervisor parameter syntax (locally)
4173
      cluster = self.cfg.GetClusterInfo()
4174
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
4175
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
4176
                                    instance.hvparams)
4177
      filled_hvp.update(self.hvparams)
4178
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4179
      hv_type.CheckParameterSyntax(filled_hvp)
4180
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4181
      self.op.hvparams = self.hvparams
4182

    
4183
    _CheckNodeOnline(self, instance.primary_node)
4184

    
4185
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4186
    # check bridges existence
4187
    _CheckInstanceBridgesExist(self, instance)
4188

    
4189
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4190
                                              instance.name,
4191
                                              instance.hypervisor)
4192
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4193
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4194
    if not remote_info.payload: # not running already
4195
      _CheckNodeFreeMemory(self, instance.primary_node,
4196
                           "starting instance %s" % instance.name,
4197
                           bep[constants.BE_MEMORY], instance.hypervisor)
4198

    
4199
  def Exec(self, feedback_fn):
4200
    """Start the instance.
4201

4202
    """
4203
    instance = self.instance
4204
    force = self.op.force
4205

    
4206
    self.cfg.MarkInstanceUp(instance.name)
4207

    
4208
    node_current = instance.primary_node
4209

    
4210
    _StartInstanceDisks(self, instance, force)
4211

    
4212
    result = self.rpc.call_instance_start(node_current, instance,
4213
                                          self.hvparams, self.beparams)
4214
    msg = result.fail_msg
4215
    if msg:
4216
      _ShutdownInstanceDisks(self, instance)
4217
      raise errors.OpExecError("Could not start instance: %s" % msg)
4218

    
4219

    
4220
class LURebootInstance(LogicalUnit):
4221
  """Reboot an instance.
4222

4223
  """
4224
  HPATH = "instance-reboot"
4225
  HTYPE = constants.HTYPE_INSTANCE
4226
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4227
  REQ_BGL = False
4228

    
4229
  def CheckArguments(self):
4230
    """Check the arguments.
4231

4232
    """
4233
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4234
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4235

    
4236
  def ExpandNames(self):
4237
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
4238
                                   constants.INSTANCE_REBOOT_HARD,
4239
                                   constants.INSTANCE_REBOOT_FULL]:
4240
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
4241
                                  (constants.INSTANCE_REBOOT_SOFT,
4242
                                   constants.INSTANCE_REBOOT_HARD,
4243
                                   constants.INSTANCE_REBOOT_FULL))
4244
    self._ExpandAndLockInstance()
4245

    
4246
  def BuildHooksEnv(self):
4247
    """Build hooks env.
4248

4249
    This runs on master, primary and secondary nodes of the instance.
4250

4251
    """
4252
    env = {
4253
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4254
      "REBOOT_TYPE": self.op.reboot_type,
4255
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4256
      }
4257
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4258
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4259
    return env, nl, nl
4260

    
4261
  def CheckPrereq(self):
4262
    """Check prerequisites.
4263

4264
    This checks that the instance is in the cluster.
4265

4266
    """
4267
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4268
    assert self.instance is not None, \
4269
      "Cannot retrieve locked instance %s" % self.op.instance_name
4270

    
4271
    _CheckNodeOnline(self, instance.primary_node)
4272

    
4273
    # check bridges existence
4274
    _CheckInstanceBridgesExist(self, instance)
4275

    
4276
  def Exec(self, feedback_fn):
4277
    """Reboot the instance.
4278

4279
    """
4280
    instance = self.instance
4281
    ignore_secondaries = self.op.ignore_secondaries
4282
    reboot_type = self.op.reboot_type
4283

    
4284
    node_current = instance.primary_node
4285

    
4286
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4287
                       constants.INSTANCE_REBOOT_HARD]:
4288
      for disk in instance.disks:
4289
        self.cfg.SetDiskID(disk, node_current)
4290
      result = self.rpc.call_instance_reboot(node_current, instance,
4291
                                             reboot_type,
4292
                                             self.shutdown_timeout)
4293
      result.Raise("Could not reboot instance")
4294
    else:
4295
      result = self.rpc.call_instance_shutdown(node_current, instance,
4296
                                               self.shutdown_timeout)
4297
      result.Raise("Could not shutdown instance for full reboot")
4298
      _ShutdownInstanceDisks(self, instance)
4299
      _StartInstanceDisks(self, instance, ignore_secondaries)
4300
      result = self.rpc.call_instance_start(node_current, instance, None, None)
4301
      msg = result.fail_msg
4302
      if msg:
4303
        _ShutdownInstanceDisks(self, instance)
4304
        raise errors.OpExecError("Could not start instance for"
4305
                                 " full reboot: %s" % msg)
4306

    
4307
    self.cfg.MarkInstanceUp(instance.name)
4308

    
4309

    
4310
class LUShutdownInstance(LogicalUnit):
4311
  """Shutdown an instance.
4312

4313
  """
4314
  HPATH = "instance-stop"
4315
  HTYPE = constants.HTYPE_INSTANCE
4316
  _OP_REQP = ["instance_name"]
4317
  REQ_BGL = False
4318

    
4319
  def CheckArguments(self):
4320
    """Check the arguments.
4321

4322
    """
4323
    self.timeout = getattr(self.op, "timeout",
4324
                           constants.DEFAULT_SHUTDOWN_TIMEOUT)
4325

    
4326
  def ExpandNames(self):
4327
    self._ExpandAndLockInstance()
4328

    
4329
  def BuildHooksEnv(self):
4330
    """Build hooks env.
4331

4332
    This runs on master, primary and secondary nodes of the instance.
4333

4334
    """
4335
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4336
    env["TIMEOUT"] = self.timeout
4337
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4338
    return env, nl, nl
4339

    
4340
  def CheckPrereq(self):
4341
    """Check prerequisites.
4342

4343
    This checks that the instance is in the cluster.
4344

4345
    """
4346
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4347
    assert self.instance is not None, \
4348
      "Cannot retrieve locked instance %s" % self.op.instance_name
4349
    _CheckNodeOnline(self, self.instance.primary_node)
4350

    
4351
  def Exec(self, feedback_fn):
4352
    """Shutdown the instance.
4353

4354
    """
4355
    instance = self.instance
4356
    node_current = instance.primary_node
4357
    timeout = self.timeout
4358
    self.cfg.MarkInstanceDown(instance.name)
4359
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4360
    msg = result.fail_msg
4361
    if msg:
4362
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4363

    
4364
    _ShutdownInstanceDisks(self, instance)
4365

    
4366

    
4367
class LUReinstallInstance(LogicalUnit):
4368
  """Reinstall an instance.
4369

4370
  """
4371
  HPATH = "instance-reinstall"
4372
  HTYPE = constants.HTYPE_INSTANCE
4373
  _OP_REQP = ["instance_name"]
4374
  REQ_BGL = False
4375

    
4376
  def ExpandNames(self):
4377
    self._ExpandAndLockInstance()
4378

    
4379
  def BuildHooksEnv(self):
4380
    """Build hooks env.
4381

4382
    This runs on master, primary and secondary nodes of the instance.
4383

4384
    """
4385
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4386
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4387
    return env, nl, nl
4388

    
4389
  def CheckPrereq(self):
4390
    """Check prerequisites.
4391

4392
    This checks that the instance is in the cluster and is not running.
4393

4394
    """
4395
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4396
    assert instance is not None, \
4397
      "Cannot retrieve locked instance %s" % self.op.instance_name
4398
    _CheckNodeOnline(self, instance.primary_node)
4399

    
4400
    if instance.disk_template == constants.DT_DISKLESS:
4401
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4402
                                 self.op.instance_name,
4403
                                 errors.ECODE_INVAL)
4404
    _CheckInstanceDown(self, instance, "cannot reinstall")
4405

    
4406
    self.op.os_type = getattr(self.op, "os_type", None)
4407
    self.op.force_variant = getattr(self.op, "force_variant", False)
4408
    if self.op.os_type is not None:
4409
      # OS verification
4410
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4411
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4412

    
4413
    self.instance = instance
4414

    
4415
  def Exec(self, feedback_fn):
4416
    """Reinstall the instance.
4417

4418
    """
4419
    inst = self.instance
4420

    
4421
    if self.op.os_type is not None:
4422
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4423
      inst.os = self.op.os_type
4424
      self.cfg.Update(inst, feedback_fn)
4425

    
4426
    _StartInstanceDisks(self, inst, None)
4427
    try:
4428
      feedback_fn("Running the instance OS create scripts...")
4429
      # FIXME: pass debug option from opcode to backend
4430
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4431
                                             self.op.debug_level)
4432
      result.Raise("Could not install OS for instance %s on node %s" %
4433
                   (inst.name, inst.primary_node))
4434
    finally:
4435
      _ShutdownInstanceDisks(self, inst)
4436

    
4437

    
4438
class LURecreateInstanceDisks(LogicalUnit):
4439
  """Recreate an instance's missing disks.
4440

4441
  """
4442
  HPATH = "instance-recreate-disks"
4443
  HTYPE = constants.HTYPE_INSTANCE
4444
  _OP_REQP = ["instance_name", "disks"]
4445
  REQ_BGL = False
4446

    
4447
  def CheckArguments(self):
4448
    """Check the arguments.
4449

4450
    """
4451
    if not isinstance(self.op.disks, list):
4452
      raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4453
    for item in self.op.disks:
4454
      if (not isinstance(item, int) or
4455
          item < 0):
4456
        raise errors.OpPrereqError("Invalid disk specification '%s'" %
4457
                                   str(item), errors.ECODE_INVAL)
4458

    
4459
  def ExpandNames(self):
4460
    self._ExpandAndLockInstance()
4461

    
4462
  def BuildHooksEnv(self):
4463
    """Build hooks env.
4464

4465
    This runs on master, primary and secondary nodes of the instance.
4466

4467
    """
4468
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4469
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4470
    return env, nl, nl
4471

    
4472
  def CheckPrereq(self):
4473
    """Check prerequisites.
4474

4475
    This checks that the instance is in the cluster and is not running.
4476

4477
    """
4478
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4479
    assert instance is not None, \
4480
      "Cannot retrieve locked instance %s" % self.op.instance_name
4481
    _CheckNodeOnline(self, instance.primary_node)
4482

    
4483
    if instance.disk_template == constants.DT_DISKLESS:
4484
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4485
                                 self.op.instance_name, errors.ECODE_INVAL)
4486
    _CheckInstanceDown(self, instance, "cannot recreate disks")
4487

    
4488
    if not self.op.disks:
4489
      self.op.disks = range(len(instance.disks))
4490
    else:
4491
      for idx in self.op.disks:
4492
        if idx >= len(instance.disks):
4493
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4494
                                     errors.ECODE_INVAL)
4495

    
4496
    self.instance = instance
4497

    
4498
  def Exec(self, feedback_fn):
4499
    """Recreate the disks.
4500

4501
    """
4502
    to_skip = []
4503
    for idx, _ in enumerate(self.instance.disks):
4504
      if idx not in self.op.disks: # disk idx has not been passed in
4505
        to_skip.append(idx)
4506
        continue
4507

    
4508
    _CreateDisks(self, self.instance, to_skip=to_skip)
4509

    
4510

    
4511
class LURenameInstance(LogicalUnit):
4512
  """Rename an instance.
4513

4514
  """
4515
  HPATH = "instance-rename"
4516
  HTYPE = constants.HTYPE_INSTANCE
4517
  _OP_REQP = ["instance_name", "new_name"]
4518

    
4519
  def BuildHooksEnv(self):
4520
    """Build hooks env.
4521

4522
    This runs on master, primary and secondary nodes of the instance.
4523

4524
    """
4525
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4526
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4527
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4528
    return env, nl, nl
4529

    
4530
  def CheckPrereq(self):
4531
    """Check prerequisites.
4532

4533
    This checks that the instance is in the cluster and is not running.
4534

4535
    """
4536
    self.op.instance_name = _ExpandInstanceName(self.cfg,
4537
                                                self.op.instance_name)
4538
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4539
    assert instance is not None
4540
    _CheckNodeOnline(self, instance.primary_node)
4541
    _CheckInstanceDown(self, instance, "cannot rename")
4542
    self.instance = instance
4543

    
4544
    # new name verification
4545
    name_info = utils.GetHostInfo(self.op.new_name)
4546

    
4547
    self.op.new_name = new_name = name_info.name
4548
    instance_list = self.cfg.GetInstanceList()
4549
    if new_name in instance_list:
4550
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4551
                                 new_name, errors.ECODE_EXISTS)
4552

    
4553
    if not getattr(self.op, "ignore_ip", False):
4554
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4555
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4556
                                   (name_info.ip, new_name),
4557
                                   errors.ECODE_NOTUNIQUE)
4558

    
4559

    
4560
  def Exec(self, feedback_fn):
4561
    """Reinstall the instance.
4562

4563
    """
4564
    inst = self.instance
4565
    old_name = inst.name
4566

    
4567
    if inst.disk_template == constants.DT_FILE:
4568
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4569

    
4570
    self.cfg.RenameInstance(inst.name, self.op.new_name)
4571
    # Change the instance lock. This is definitely safe while we hold the BGL
4572
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4573
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4574

    
4575
    # re-read the instance from the configuration after rename
4576
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4577

    
4578
    if inst.disk_template == constants.DT_FILE:
4579
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4580
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4581
                                                     old_file_storage_dir,
4582
                                                     new_file_storage_dir)
4583
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4584
                   " (but the instance has been renamed in Ganeti)" %
4585
                   (inst.primary_node, old_file_storage_dir,
4586
                    new_file_storage_dir))
4587

    
4588
    _StartInstanceDisks(self, inst, None)
4589
    try:
4590
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4591
                                                 old_name, self.op.debug_level)
4592
      msg = result.fail_msg
4593
      if msg:
4594
        msg = ("Could not run OS rename script for instance %s on node %s"
4595
               " (but the instance has been renamed in Ganeti): %s" %
4596
               (inst.name, inst.primary_node, msg))
4597
        self.proc.LogWarning(msg)
4598
    finally:
4599
      _ShutdownInstanceDisks(self, inst)
4600

    
4601

    
4602
class LURemoveInstance(LogicalUnit):
4603
  """Remove an instance.
4604

4605
  """
4606
  HPATH = "instance-remove"
4607
  HTYPE = constants.HTYPE_INSTANCE
4608
  _OP_REQP = ["instance_name", "ignore_failures"]
4609
  REQ_BGL = False
4610

    
4611
  def CheckArguments(self):
4612
    """Check the arguments.
4613

4614
    """
4615
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4616
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4617

    
4618
  def ExpandNames(self):
4619
    self._ExpandAndLockInstance()
4620
    self.needed_locks[locking.LEVEL_NODE] = []
4621
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4622

    
4623
  def DeclareLocks(self, level):
4624
    if level == locking.LEVEL_NODE:
4625
      self._LockInstancesNodes()
4626

    
4627
  def BuildHooksEnv(self):
4628
    """Build hooks env.
4629

4630
    This runs on master, primary and secondary nodes of the instance.
4631

4632
    """
4633
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4634
    env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4635
    nl = [self.cfg.GetMasterNode()]
4636
    nl_post = list(self.instance.all_nodes) + nl
4637
    return env, nl, nl_post
4638

    
4639
  def CheckPrereq(self):
4640
    """Check prerequisites.
4641

4642
    This checks that the instance is in the cluster.
4643

4644
    """
4645
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4646
    assert self.instance is not None, \
4647
      "Cannot retrieve locked instance %s" % self.op.instance_name
4648

    
4649
  def Exec(self, feedback_fn):
4650
    """Remove the instance.
4651

4652
    """
4653
    instance = self.instance
4654
    logging.info("Shutting down instance %s on node %s",
4655
                 instance.name, instance.primary_node)
4656

    
4657
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4658
                                             self.shutdown_timeout)
4659
    msg = result.fail_msg
4660
    if msg:
4661
      if self.op.ignore_failures:
4662
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
4663
      else:
4664
        raise errors.OpExecError("Could not shutdown instance %s on"
4665
                                 " node %s: %s" %
4666
                                 (instance.name, instance.primary_node, msg))
4667

    
4668
    logging.info("Removing block devices for instance %s", instance.name)
4669

    
4670
    if not _RemoveDisks(self, instance):
4671
      if self.op.ignore_failures:
4672
        feedback_fn("Warning: can't remove instance's disks")
4673
      else:
4674
        raise errors.OpExecError("Can't remove instance's disks")
4675

    
4676
    logging.info("Removing instance %s out of cluster config", instance.name)
4677

    
4678
    self.cfg.RemoveInstance(instance.name)
4679
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4680

    
4681

    
4682
class LUQueryInstances(NoHooksLU):
4683
  """Logical unit for querying instances.
4684

4685
  """
4686
  # pylint: disable-msg=W0142
4687
  _OP_REQP = ["output_fields", "names", "use_locking"]
4688
  REQ_BGL = False
4689
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4690
                    "serial_no", "ctime", "mtime", "uuid"]
4691
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4692
                                    "admin_state",
4693
                                    "disk_template", "ip", "mac", "bridge",
4694
                                    "nic_mode", "nic_link",
4695
                                    "sda_size", "sdb_size", "vcpus", "tags",
4696
                                    "network_port", "beparams",
4697
                                    r"(disk)\.(size)/([0-9]+)",
4698
                                    r"(disk)\.(sizes)", "disk_usage",
4699
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4700
                                    r"(nic)\.(bridge)/([0-9]+)",
4701
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
4702
                                    r"(disk|nic)\.(count)",
4703
                                    "hvparams",
4704
                                    ] + _SIMPLE_FIELDS +
4705
                                  ["hv/%s" % name
4706
                                   for name in constants.HVS_PARAMETERS
4707
                                   if name not in constants.HVC_GLOBALS] +
4708
                                  ["be/%s" % name
4709
                                   for name in constants.BES_PARAMETERS])
4710
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4711

    
4712

    
4713
  def ExpandNames(self):
4714
    _CheckOutputFields(static=self._FIELDS_STATIC,
4715
                       dynamic=self._FIELDS_DYNAMIC,
4716
                       selected=self.op.output_fields)
4717

    
4718
    self.needed_locks = {}
4719
    self.share_locks[locking.LEVEL_INSTANCE] = 1
4720
    self.share_locks[locking.LEVEL_NODE] = 1
4721

    
4722
    if self.op.names:
4723
      self.wanted = _GetWantedInstances(self, self.op.names)
4724
    else:
4725
      self.wanted = locking.ALL_SET
4726

    
4727
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4728
    self.do_locking = self.do_node_query and self.op.use_locking
4729
    if self.do_locking:
4730
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4731
      self.needed_locks[locking.LEVEL_NODE] = []
4732
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4733

    
4734
  def DeclareLocks(self, level):
4735
    if level == locking.LEVEL_NODE and self.do_locking:
4736
      self._LockInstancesNodes()
4737

    
4738
  def CheckPrereq(self):
4739
    """Check prerequisites.
4740

4741
    """
4742
    pass
4743

    
4744
  def Exec(self, feedback_fn):
4745
    """Computes the list of nodes and their attributes.
4746

4747
    """
4748
    # pylint: disable-msg=R0912
4749
    # way too many branches here
4750
    all_info = self.cfg.GetAllInstancesInfo()
4751
    if self.wanted == locking.ALL_SET:
4752
      # caller didn't specify instance names, so ordering is not important
4753
      if self.do_locking:
4754
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4755
      else:
4756
        instance_names = all_info.keys()
4757
      instance_names = utils.NiceSort(instance_names)
4758
    else:
4759
      # caller did specify names, so we must keep the ordering
4760
      if self.do_locking:
4761
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4762
      else:
4763
        tgt_set = all_info.keys()
4764
      missing = set(self.wanted).difference(tgt_set)
4765
      if missing:
4766
        raise errors.OpExecError("Some instances were removed before"
4767
                                 " retrieving their data: %s" % missing)
4768
      instance_names = self.wanted
4769

    
4770
    instance_list = [all_info[iname] for iname in instance_names]
4771

    
4772
    # begin data gathering
4773

    
4774
    nodes = frozenset([inst.primary_node for inst in instance_list])
4775
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4776

    
4777
    bad_nodes = []
4778
    off_nodes = []
4779
    if self.do_node_query:
4780
      live_data = {}
4781
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4782
      for name in nodes:
4783
        result = node_data[name]
4784
        if result.offline:
4785
          # offline nodes will be in both lists
4786
          off_nodes.append(name)
4787
        if result.fail_msg:
4788
          bad_nodes.append(name)
4789
        else:
4790
          if result.payload:
4791
            live_data.update(result.payload)
4792
          # else no instance is alive
4793
    else:
4794
      live_data = dict([(name, {}) for name in instance_names])
4795

    
4796
    # end data gathering
4797

    
4798
    HVPREFIX = "hv/"
4799
    BEPREFIX = "be/"
4800
    output = []
4801
    cluster = self.cfg.GetClusterInfo()
4802
    for instance in instance_list:
4803
      iout = []
4804
      i_hv = cluster.FillHV(instance, skip_globals=True)
4805
      i_be = cluster.FillBE(instance)
4806
      i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4807
                                 nic.nicparams) for nic in instance.nics]
4808
      for field in self.op.output_fields:
4809
        st_match = self._FIELDS_STATIC.Matches(field)
4810
        if field in self._SIMPLE_FIELDS:
4811
          val = getattr(instance, field)
4812
        elif field == "pnode":
4813
          val = instance.primary_node
4814
        elif field == "snodes":
4815
          val = list(instance.secondary_nodes)
4816
        elif field == "admin_state":
4817
          val = instance.admin_up
4818
        elif field == "oper_state":
4819
          if instance.primary_node in bad_nodes:
4820
            val = None
4821
          else:
4822
            val = bool(live_data.get(instance.name))
4823
        elif field == "status":
4824
          if instance.primary_node in off_nodes:
4825
            val = "ERROR_nodeoffline"
4826
          elif instance.primary_node in bad_nodes:
4827
            val = "ERROR_nodedown"
4828
          else:
4829
            running = bool(live_data.get(instance.name))
4830
            if running:
4831
              if instance.admin_up:
4832
                val = "running"
4833
              else:
4834
                val = "ERROR_up"
4835
            else:
4836
              if instance.admin_up:
4837
                val = "ERROR_down"
4838
              else:
4839
                val = "ADMIN_down"
4840
        elif field == "oper_ram":
4841
          if instance.primary_node in bad_nodes:
4842
            val = None
4843
          elif instance.name in live_data:
4844
            val = live_data[instance.name].get("memory", "?")
4845
          else:
4846
            val = "-"
4847
        elif field == "vcpus":
4848
          val = i_be[constants.BE_VCPUS]
4849
        elif field == "disk_template":
4850
          val = instance.disk_template
4851
        elif field == "ip":
4852
          if instance.nics:
4853
            val = instance.nics[0].ip
4854
          else:
4855
            val = None
4856
        elif field == "nic_mode":
4857
          if instance.nics:
4858
            val = i_nicp[0][constants.NIC_MODE]
4859
          else:
4860
            val = None
4861
        elif field == "nic_link":
4862
          if instance.nics:
4863
            val = i_nicp[0][constants.NIC_LINK]
4864
          else:
4865
            val = None
4866
        elif field == "bridge":
4867
          if (instance.nics and
4868
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4869
            val = i_nicp[0][constants.NIC_LINK]
4870
          else:
4871
            val = None
4872
        elif field == "mac":
4873
          if instance.nics:
4874
            val = instance.nics[0].mac
4875
          else:
4876
            val = None
4877
        elif field == "sda_size" or field == "sdb_size":
4878
          idx = ord(field[2]) - ord('a')
4879
          try:
4880
            val = instance.FindDisk(idx).size
4881
          except errors.OpPrereqError:
4882
            val = None
4883
        elif field == "disk_usage": # total disk usage per node
4884
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
4885
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4886
        elif field == "tags":
4887
          val = list(instance.GetTags())
4888
        elif field == "hvparams":
4889
          val = i_hv
4890
        elif (field.startswith(HVPREFIX) and
4891
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4892
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4893
          val = i_hv.get(field[len(HVPREFIX):], None)
4894
        elif field == "beparams":
4895
          val = i_be
4896
        elif (field.startswith(BEPREFIX) and
4897
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4898
          val = i_be.get(field[len(BEPREFIX):], None)
4899
        elif st_match and st_match.groups():
4900
          # matches a variable list
4901
          st_groups = st_match.groups()
4902
          if st_groups and st_groups[0] == "disk":
4903
            if st_groups[1] == "count":
4904
              val = len(instance.disks)
4905
            elif st_groups[1] == "sizes":
4906
              val = [disk.size for disk in instance.disks]
4907
            elif st_groups[1] == "size":
4908
              try:
4909
                val = instance.FindDisk(st_groups[2]).size
4910
              except errors.OpPrereqError:
4911
                val = None
4912
            else:
4913
              assert False, "Unhandled disk parameter"
4914
          elif st_groups[0] == "nic":
4915
            if st_groups[1] == "count":
4916
              val = len(instance.nics)
4917
            elif st_groups[1] == "macs":
4918
              val = [nic.mac for nic in instance.nics]
4919
            elif st_groups[1] == "ips":
4920
              val = [nic.ip for nic in instance.nics]
4921
            elif st_groups[1] == "modes":
4922
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4923
            elif st_groups[1] == "links":
4924
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4925
            elif st_groups[1] == "bridges":
4926
              val = []
4927
              for nicp in i_nicp:
4928
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4929
                  val.append(nicp[constants.NIC_LINK])
4930
                else:
4931
                  val.append(None)
4932
            else:
4933
              # index-based item
4934
              nic_idx = int(st_groups[2])
4935
              if nic_idx >= len(instance.nics):
4936
                val = None
4937
              else:
4938
                if st_groups[1] == "mac":
4939
                  val = instance.nics[nic_idx].mac
4940
                elif st_groups[1] == "ip":
4941
                  val = instance.nics[nic_idx].ip
4942
                elif st_groups[1] == "mode":
4943
                  val = i_nicp[nic_idx][constants.NIC_MODE]
4944
                elif st_groups[1] == "link":
4945
                  val = i_nicp[nic_idx][constants.NIC_LINK]
4946
                elif st_groups[1] == "bridge":
4947
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4948
                  if nic_mode == constants.NIC_MODE_BRIDGED:
4949
                    val = i_nicp[nic_idx][constants.NIC_LINK]
4950
                  else:
4951
                    val = None
4952
                else:
4953
                  assert False, "Unhandled NIC parameter"
4954
          else:
4955
            assert False, ("Declared but unhandled variable parameter '%s'" %
4956
                           field)
4957
        else:
4958
          assert False, "Declared but unhandled parameter '%s'" % field
4959
        iout.append(val)
4960
      output.append(iout)
4961

    
4962
    return output
4963

    
4964

    
4965
class LUFailoverInstance(LogicalUnit):
4966
  """Failover an instance.
4967

4968
  """
4969
  HPATH = "instance-failover"
4970
  HTYPE = constants.HTYPE_INSTANCE
4971
  _OP_REQP = ["instance_name", "ignore_consistency"]
4972
  REQ_BGL = False
4973

    
4974
  def CheckArguments(self):
4975
    """Check the arguments.
4976

4977
    """
4978
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4979
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4980

    
4981
  def ExpandNames(self):
4982
    self._ExpandAndLockInstance()
4983
    self.needed_locks[locking.LEVEL_NODE] = []
4984
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4985

    
4986
  def DeclareLocks(self, level):
4987
    if level == locking.LEVEL_NODE:
4988
      self._LockInstancesNodes()
4989

    
4990
  def BuildHooksEnv(self):
4991
    """Build hooks env.
4992

4993
    This runs on master, primary and secondary nodes of the instance.
4994

4995
    """
4996
    instance = self.instance
4997
    source_node = instance.primary_node
4998
    target_node = instance.secondary_nodes[0]
4999
    env = {
5000
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5001
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5002
      "OLD_PRIMARY": source_node,
5003
      "OLD_SECONDARY": target_node,
5004
      "NEW_PRIMARY": target_node,
5005
      "NEW_SECONDARY": source_node,
5006
      }
5007
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5008
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5009
    nl_post = list(nl)
5010
    nl_post.append(source_node)
5011
    return env, nl, nl_post
5012

    
5013
  def CheckPrereq(self):
5014
    """Check prerequisites.
5015

5016
    This checks that the instance is in the cluster.
5017

5018
    """
5019
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5020
    assert self.instance is not None, \
5021
      "Cannot retrieve locked instance %s" % self.op.instance_name
5022

    
5023
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5024
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5025
      raise errors.OpPrereqError("Instance's disk layout is not"
5026
                                 " network mirrored, cannot failover.",
5027
                                 errors.ECODE_STATE)
5028

    
5029
    secondary_nodes = instance.secondary_nodes
5030
    if not secondary_nodes:
5031
      raise errors.ProgrammerError("no secondary node but using "
5032
                                   "a mirrored disk template")
5033

    
5034
    target_node = secondary_nodes[0]
5035
    _CheckNodeOnline(self, target_node)
5036
    _CheckNodeNotDrained(self, target_node)
5037
    if instance.admin_up:
5038
      # check memory requirements on the secondary node
5039
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5040
                           instance.name, bep[constants.BE_MEMORY],
5041
                           instance.hypervisor)
5042
    else:
5043
      self.LogInfo("Not checking memory on the secondary node as"
5044
                   " instance will not be started")
5045

    
5046
    # check bridge existance
5047
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5048

    
5049
  def Exec(self, feedback_fn):
5050
    """Failover an instance.
5051

5052
    The failover is done by shutting it down on its present node and
5053
    starting it on the secondary.
5054

5055
    """
5056
    instance = self.instance
5057

    
5058
    source_node = instance.primary_node
5059
    target_node = instance.secondary_nodes[0]
5060

    
5061
    if instance.admin_up:
5062
      feedback_fn("* checking disk consistency between source and target")
5063
      for dev in instance.disks:
5064
        # for drbd, these are drbd over lvm
5065
        if not _CheckDiskConsistency(self, dev, target_node, False):
5066
          if not self.op.ignore_consistency:
5067
            raise errors.OpExecError("Disk %s is degraded on target node,"
5068
                                     " aborting failover." % dev.iv_name)
5069
    else:
5070
      feedback_fn("* not checking disk consistency as instance is not running")
5071

    
5072
    feedback_fn("* shutting down instance on source node")
5073
    logging.info("Shutting down instance %s on node %s",
5074
                 instance.name, source_node)
5075

    
5076
    result = self.rpc.call_instance_shutdown(source_node, instance,
5077
                                             self.shutdown_timeout)
5078
    msg = result.fail_msg
5079
    if msg:
5080
      if self.op.ignore_consistency:
5081
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5082
                             " Proceeding anyway. Please make sure node"
5083
                             " %s is down. Error details: %s",
5084
                             instance.name, source_node, source_node, msg)
5085
      else:
5086
        raise errors.OpExecError("Could not shutdown instance %s on"
5087
                                 " node %s: %s" %
5088
                                 (instance.name, source_node, msg))
5089

    
5090
    feedback_fn("* deactivating the instance's disks on source node")
5091
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5092
      raise errors.OpExecError("Can't shut down the instance's disks.")
5093

    
5094
    instance.primary_node = target_node
5095
    # distribute new instance config to the other nodes
5096
    self.cfg.Update(instance, feedback_fn)
5097

    
5098
    # Only start the instance if it's marked as up
5099
    if instance.admin_up:
5100
      feedback_fn("* activating the instance's disks on target node")
5101
      logging.info("Starting instance %s on node %s",
5102
                   instance.name, target_node)
5103

    
5104
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5105
                                               ignore_secondaries=True)
5106
      if not disks_ok:
5107
        _ShutdownInstanceDisks(self, instance)
5108
        raise errors.OpExecError("Can't activate the instance's disks")
5109

    
5110
      feedback_fn("* starting the instance on the target node")
5111
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5112
      msg = result.fail_msg
5113
      if msg:
5114
        _ShutdownInstanceDisks(self, instance)
5115
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5116
                                 (instance.name, target_node, msg))
5117

    
5118

    
5119
class LUMigrateInstance(LogicalUnit):
5120
  """Migrate an instance.
5121

5122
  This is migration without shutting down, compared to the failover,
5123
  which is done with shutdown.
5124

5125
  """
5126
  HPATH = "instance-migrate"
5127
  HTYPE = constants.HTYPE_INSTANCE
5128
  _OP_REQP = ["instance_name", "live", "cleanup"]
5129

    
5130
  REQ_BGL = False
5131

    
5132
  def ExpandNames(self):
5133
    self._ExpandAndLockInstance()
5134

    
5135
    self.needed_locks[locking.LEVEL_NODE] = []
5136
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5137

    
5138
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5139
                                       self.op.live, self.op.cleanup)
5140
    self.tasklets = [self._migrater]
5141

    
5142
  def DeclareLocks(self, level):
5143
    if level == locking.LEVEL_NODE:
5144
      self._LockInstancesNodes()
5145

    
5146
  def BuildHooksEnv(self):
5147
    """Build hooks env.
5148

5149
    This runs on master, primary and secondary nodes of the instance.
5150

5151
    """
5152
    instance = self._migrater.instance
5153
    source_node = instance.primary_node
5154
    target_node = instance.secondary_nodes[0]
5155
    env = _BuildInstanceHookEnvByObject(self, instance)
5156
    env["MIGRATE_LIVE"] = self.op.live
5157
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5158
    env.update({
5159
        "OLD_PRIMARY": source_node,
5160
        "OLD_SECONDARY": target_node,
5161
        "NEW_PRIMARY": target_node,
5162
        "NEW_SECONDARY": source_node,
5163
        })
5164
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5165
    nl_post = list(nl)
5166
    nl_post.append(source_node)
5167
    return env, nl, nl_post
5168

    
5169

    
5170
class LUMoveInstance(LogicalUnit):
5171
  """Move an instance by data-copying.
5172

5173
  """
5174
  HPATH = "instance-move"
5175
  HTYPE = constants.HTYPE_INSTANCE
5176
  _OP_REQP = ["instance_name", "target_node"]
5177
  REQ_BGL = False
5178

    
5179
  def CheckArguments(self):
5180
    """Check the arguments.
5181

5182
    """
5183
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5184
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
5185

    
5186
  def ExpandNames(self):
5187
    self._ExpandAndLockInstance()
5188
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5189
    self.op.target_node = target_node
5190
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5191
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5192

    
5193
  def DeclareLocks(self, level):
5194
    if level == locking.LEVEL_NODE:
5195
      self._LockInstancesNodes(primary_only=True)
5196

    
5197
  def BuildHooksEnv(self):
5198
    """Build hooks env.
5199

5200
    This runs on master, primary and secondary nodes of the instance.
5201

5202
    """
5203
    env = {
5204
      "TARGET_NODE": self.op.target_node,
5205
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5206
      }
5207
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5208
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5209
                                       self.op.target_node]
5210
    return env, nl, nl
5211

    
5212
  def CheckPrereq(self):
5213
    """Check prerequisites.
5214

5215
    This checks that the instance is in the cluster.
5216

5217
    """
5218
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5219
    assert self.instance is not None, \
5220
      "Cannot retrieve locked instance %s" % self.op.instance_name
5221

    
5222
    node = self.cfg.GetNodeInfo(self.op.target_node)
5223
    assert node is not None, \
5224
      "Cannot retrieve locked node %s" % self.op.target_node
5225

    
5226
    self.target_node = target_node = node.name
5227

    
5228
    if target_node == instance.primary_node:
5229
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
5230
                                 (instance.name, target_node),
5231
                                 errors.ECODE_STATE)
5232

    
5233
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5234

    
5235
    for idx, dsk in enumerate(instance.disks):
5236
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5237
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5238
                                   " cannot copy" % idx, errors.ECODE_STATE)
5239

    
5240
    _CheckNodeOnline(self, target_node)
5241
    _CheckNodeNotDrained(self, target_node)
5242

    
5243
    if instance.admin_up:
5244
      # check memory requirements on the secondary node
5245
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5246
                           instance.name, bep[constants.BE_MEMORY],
5247
                           instance.hypervisor)
5248
    else:
5249
      self.LogInfo("Not checking memory on the secondary node as"
5250
                   " instance will not be started")
5251

    
5252
    # check bridge existance
5253
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5254

    
5255
  def Exec(self, feedback_fn):
5256
    """Move an instance.
5257

5258
    The move is done by shutting it down on its present node, copying
5259
    the data over (slow) and starting it on the new node.
5260

5261
    """
5262
    instance = self.instance
5263

    
5264
    source_node = instance.primary_node
5265
    target_node = self.target_node
5266

    
5267
    self.LogInfo("Shutting down instance %s on source node %s",
5268
                 instance.name, source_node)
5269

    
5270
    result = self.rpc.call_instance_shutdown(source_node, instance,
5271
                                             self.shutdown_timeout)
5272
    msg = result.fail_msg
5273
    if msg:
5274
      if self.op.ignore_consistency:
5275
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5276
                             " Proceeding anyway. Please make sure node"
5277
                             " %s is down. Error details: %s",
5278
                             instance.name, source_node, source_node, msg)
5279
      else:
5280
        raise errors.OpExecError("Could not shutdown instance %s on"
5281
                                 " node %s: %s" %
5282
                                 (instance.name, source_node, msg))
5283

    
5284
    # create the target disks
5285
    try:
5286
      _CreateDisks(self, instance, target_node=target_node)
5287
    except errors.OpExecError:
5288
      self.LogWarning("Device creation failed, reverting...")
5289
      try:
5290
        _RemoveDisks(self, instance, target_node=target_node)
5291
      finally:
5292
        self.cfg.ReleaseDRBDMinors(instance.name)
5293
        raise
5294

    
5295
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5296

    
5297
    errs = []
5298
    # activate, get path, copy the data over
5299
    for idx, disk in enumerate(instance.disks):
5300
      self.LogInfo("Copying data for disk %d", idx)
5301
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5302
                                               instance.name, True)
5303
      if result.fail_msg:
5304
        self.LogWarning("Can't assemble newly created disk %d: %s",
5305
                        idx, result.fail_msg)
5306
        errs.append(result.fail_msg)
5307
        break
5308
      dev_path = result.payload
5309
      result = self.rpc.call_blockdev_export(source_node, disk,
5310
                                             target_node, dev_path,
5311
                                             cluster_name)
5312
      if result.fail_msg:
5313
        self.LogWarning("Can't copy data over for disk %d: %s",
5314
                        idx, result.fail_msg)
5315
        errs.append(result.fail_msg)
5316
        break
5317

    
5318
    if errs:
5319
      self.LogWarning("Some disks failed to copy, aborting")
5320
      try:
5321
        _RemoveDisks(self, instance, target_node=target_node)
5322
      finally:
5323
        self.cfg.ReleaseDRBDMinors(instance.name)
5324
        raise errors.OpExecError("Errors during disk copy: %s" %
5325
                                 (",".join(errs),))
5326

    
5327
    instance.primary_node = target_node
5328
    self.cfg.Update(instance, feedback_fn)
5329

    
5330
    self.LogInfo("Removing the disks on the original node")
5331
    _RemoveDisks(self, instance, target_node=source_node)
5332

    
5333
    # Only start the instance if it's marked as up
5334
    if instance.admin_up:
5335
      self.LogInfo("Starting instance %s on node %s",
5336
                   instance.name, target_node)
5337

    
5338
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5339
                                           ignore_secondaries=True)
5340
      if not disks_ok:
5341
        _ShutdownInstanceDisks(self, instance)
5342
        raise errors.OpExecError("Can't activate the instance's disks")
5343

    
5344
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5345
      msg = result.fail_msg
5346
      if msg:
5347
        _ShutdownInstanceDisks(self, instance)
5348
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5349
                                 (instance.name, target_node, msg))
5350

    
5351

    
5352
class LUMigrateNode(LogicalUnit):
5353
  """Migrate all instances from a node.
5354

5355
  """
5356
  HPATH = "node-migrate"
5357
  HTYPE = constants.HTYPE_NODE
5358
  _OP_REQP = ["node_name", "live"]
5359
  REQ_BGL = False
5360

    
5361
  def ExpandNames(self):
5362
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5363

    
5364
    self.needed_locks = {
5365
      locking.LEVEL_NODE: [self.op.node_name],
5366
      }
5367

    
5368
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5369

    
5370
    # Create tasklets for migrating instances for all instances on this node
5371
    names = []
5372
    tasklets = []
5373

    
5374
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5375
      logging.debug("Migrating instance %s", inst.name)
5376
      names.append(inst.name)
5377

    
5378
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5379

    
5380
    self.tasklets = tasklets
5381

    
5382
    # Declare instance locks
5383
    self.needed_locks[locking.LEVEL_INSTANCE] = names
5384

    
5385
  def DeclareLocks(self, level):
5386
    if level == locking.LEVEL_NODE:
5387
      self._LockInstancesNodes()
5388

    
5389
  def BuildHooksEnv(self):
5390
    """Build hooks env.
5391

5392
    This runs on the master, the primary and all the secondaries.
5393

5394
    """
5395
    env = {
5396
      "NODE_NAME": self.op.node_name,
5397
      }
5398

    
5399
    nl = [self.cfg.GetMasterNode()]
5400

    
5401
    return (env, nl, nl)
5402

    
5403

    
5404
class TLMigrateInstance(Tasklet):
5405
  def __init__(self, lu, instance_name, live, cleanup):
5406
    """Initializes this class.
5407

5408
    """
5409
    Tasklet.__init__(self, lu)
5410

    
5411
    # Parameters
5412
    self.instance_name = instance_name
5413
    self.live = live
5414
    self.cleanup = cleanup
5415

    
5416
  def CheckPrereq(self):
5417
    """Check prerequisites.
5418

5419
    This checks that the instance is in the cluster.
5420

5421
    """
5422
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5423
    instance = self.cfg.GetInstanceInfo(instance_name)
5424
    assert instance is not None
5425

    
5426
    if instance.disk_template != constants.DT_DRBD8:
5427
      raise errors.OpPrereqError("Instance's disk layout is not"
5428
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5429

    
5430
    secondary_nodes = instance.secondary_nodes
5431
    if not secondary_nodes:
5432
      raise errors.ConfigurationError("No secondary node but using"
5433
                                      " drbd8 disk template")
5434

    
5435
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
5436

    
5437
    target_node = secondary_nodes[0]
5438
    # check memory requirements on the secondary node
5439
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5440
                         instance.name, i_be[constants.BE_MEMORY],
5441
                         instance.hypervisor)
5442

    
5443
    # check bridge existance
5444
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5445

    
5446
    if not self.cleanup:
5447
      _CheckNodeNotDrained(self, target_node)
5448
      result = self.rpc.call_instance_migratable(instance.primary_node,
5449
                                                 instance)
5450
      result.Raise("Can't migrate, please use failover",
5451
                   prereq=True, ecode=errors.ECODE_STATE)
5452

    
5453
    self.instance = instance
5454

    
5455
  def _WaitUntilSync(self):
5456
    """Poll with custom rpc for disk sync.
5457

5458
    This uses our own step-based rpc call.
5459

5460
    """
5461
    self.feedback_fn("* wait until resync is done")
5462
    all_done = False
5463
    while not all_done:
5464
      all_done = True
5465
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5466
                                            self.nodes_ip,
5467
                                            self.instance.disks)
5468
      min_percent = 100
5469
      for node, nres in result.items():
5470
        nres.Raise("Cannot resync disks on node %s" % node)
5471
        node_done, node_percent = nres.payload
5472
        all_done = all_done and node_done
5473
        if node_percent is not None:
5474
          min_percent = min(min_percent, node_percent)
5475
      if not all_done:
5476
        if min_percent < 100:
5477
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
5478
        time.sleep(2)
5479

    
5480
  def _EnsureSecondary(self, node):
5481
    """Demote a node to secondary.
5482

5483
    """
5484
    self.feedback_fn("* switching node %s to secondary mode" % node)
5485

    
5486
    for dev in self.instance.disks:
5487
      self.cfg.SetDiskID(dev, node)
5488

    
5489
    result = self.rpc.call_blockdev_close(node, self.instance.name,
5490
                                          self.instance.disks)
5491
    result.Raise("Cannot change disk to secondary on node %s" % node)
5492

    
5493
  def _GoStandalone(self):
5494
    """Disconnect from the network.
5495

5496
    """
5497
    self.feedback_fn("* changing into standalone mode")
5498
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5499
                                               self.instance.disks)
5500
    for node, nres in result.items():
5501
      nres.Raise("Cannot disconnect disks node %s" % node)
5502

    
5503
  def _GoReconnect(self, multimaster):
5504
    """Reconnect to the network.
5505

5506
    """
5507
    if multimaster:
5508
      msg = "dual-master"
5509
    else:
5510
      msg = "single-master"
5511
    self.feedback_fn("* changing disks into %s mode" % msg)
5512
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5513
                                           self.instance.disks,
5514
                                           self.instance.name, multimaster)
5515
    for node, nres in result.items():
5516
      nres.Raise("Cannot change disks config on node %s" % node)
5517

    
5518
  def _ExecCleanup(self):
5519
    """Try to cleanup after a failed migration.
5520

5521
    The cleanup is done by:
5522
      - check that the instance is running only on one node
5523
        (and update the config if needed)
5524
      - change disks on its secondary node to secondary
5525
      - wait until disks are fully synchronized
5526
      - disconnect from the network
5527
      - change disks into single-master mode
5528
      - wait again until disks are fully synchronized
5529

5530
    """
5531
    instance = self.instance
5532
    target_node = self.target_node
5533
    source_node = self.source_node
5534

    
5535
    # check running on only one node
5536
    self.feedback_fn("* checking where the instance actually runs"
5537
                     " (if this hangs, the hypervisor might be in"
5538
                     " a bad state)")
5539
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5540
    for node, result in ins_l.items():
5541
      result.Raise("Can't contact node %s" % node)
5542

    
5543
    runningon_source = instance.name in ins_l[source_node].payload
5544
    runningon_target = instance.name in ins_l[target_node].payload
5545

    
5546
    if runningon_source and runningon_target:
5547
      raise errors.OpExecError("Instance seems to be running on two nodes,"
5548
                               " or the hypervisor is confused. You will have"
5549
                               " to ensure manually that it runs only on one"
5550
                               " and restart this operation.")
5551

    
5552
    if not (runningon_source or runningon_target):
5553
      raise errors.OpExecError("Instance does not seem to be running at all."
5554
                               " In this case, it's safer to repair by"
5555
                               " running 'gnt-instance stop' to ensure disk"
5556
                               " shutdown, and then restarting it.")
5557

    
5558
    if runningon_target:
5559
      # the migration has actually succeeded, we need to update the config
5560
      self.feedback_fn("* instance running on secondary node (%s),"
5561
                       " updating config" % target_node)
5562
      instance.primary_node = target_node
5563
      self.cfg.Update(instance, self.feedback_fn)
5564
      demoted_node = source_node
5565
    else:
5566
      self.feedback_fn("* instance confirmed to be running on its"
5567
                       " primary node (%s)" % source_node)
5568
      demoted_node = target_node
5569

    
5570
    self._EnsureSecondary(demoted_node)
5571
    try:
5572
      self._WaitUntilSync()
5573
    except errors.OpExecError:
5574
      # we ignore here errors, since if the device is standalone, it
5575
      # won't be able to sync
5576
      pass
5577
    self._GoStandalone()
5578
    self._GoReconnect(False)
5579
    self._WaitUntilSync()
5580

    
5581
    self.feedback_fn("* done")
5582

    
5583
  def _RevertDiskStatus(self):
5584
    """Try to revert the disk status after a failed migration.
5585

5586
    """
5587
    target_node = self.target_node
5588
    try:
5589
      self._EnsureSecondary(target_node)
5590
      self._GoStandalone()
5591
      self._GoReconnect(False)
5592
      self._WaitUntilSync()
5593
    except errors.OpExecError, err:
5594
      self.lu.LogWarning("Migration failed and I can't reconnect the"
5595
                         " drives: error '%s'\n"
5596
                         "Please look and recover the instance status" %
5597
                         str(err))
5598

    
5599
  def _AbortMigration(self):
5600
    """Call the hypervisor code to abort a started migration.
5601

5602
    """
5603
    instance = self.instance
5604
    target_node = self.target_node
5605
    migration_info = self.migration_info
5606

    
5607
    abort_result = self.rpc.call_finalize_migration(target_node,
5608
                                                    instance,
5609
                                                    migration_info,
5610
                                                    False)
5611
    abort_msg = abort_result.fail_msg
5612
    if abort_msg:
5613
      logging.error("Aborting migration failed on target node %s: %s",
5614
                    target_node, abort_msg)
5615
      # Don't raise an exception here, as we stil have to try to revert the
5616
      # disk status, even if this step failed.
5617

    
5618
  def _ExecMigration(self):
5619
    """Migrate an instance.
5620

5621
    The migrate is done by:
5622
      - change the disks into dual-master mode
5623
      - wait until disks are fully synchronized again
5624
      - migrate the instance
5625
      - change disks on the new secondary node (the old primary) to secondary
5626
      - wait until disks are fully synchronized
5627
      - change disks into single-master mode
5628

5629
    """
5630
    instance = self.instance
5631
    target_node = self.target_node
5632
    source_node = self.source_node
5633

    
5634
    self.feedback_fn("* checking disk consistency between source and target")
5635
    for dev in instance.disks:
5636
      if not _CheckDiskConsistency(self, dev, target_node, False):
5637
        raise errors.OpExecError("Disk %s is degraded or not fully"
5638
                                 " synchronized on target node,"
5639
                                 " aborting migrate." % dev.iv_name)
5640

    
5641
    # First get the migration information from the remote node
5642
    result = self.rpc.call_migration_info(source_node, instance)
5643
    msg = result.fail_msg
5644
    if msg:
5645
      log_err = ("Failed fetching source migration information from %s: %s" %
5646
                 (source_node, msg))
5647
      logging.error(log_err)
5648
      raise errors.OpExecError(log_err)
5649

    
5650
    self.migration_info = migration_info = result.payload
5651

    
5652
    # Then switch the disks to master/master mode
5653
    self._EnsureSecondary(target_node)
5654
    self._GoStandalone()
5655
    self._GoReconnect(True)
5656
    self._WaitUntilSync()
5657

    
5658
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
5659
    result = self.rpc.call_accept_instance(target_node,
5660
                                           instance,
5661
                                           migration_info,
5662
                                           self.nodes_ip[target_node])
5663

    
5664
    msg = result.fail_msg
5665
    if msg:
5666
      logging.error("Instance pre-migration failed, trying to revert"
5667
                    " disk status: %s", msg)
5668
      self.feedback_fn("Pre-migration failed, aborting")
5669
      self._AbortMigration()
5670
      self._RevertDiskStatus()
5671
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5672
                               (instance.name, msg))
5673

    
5674
    self.feedback_fn("* migrating instance to %s" % target_node)
5675
    time.sleep(10)
5676
    result = self.rpc.call_instance_migrate(source_node, instance,
5677
                                            self.nodes_ip[target_node],
5678
                                            self.live)
5679
    msg = result.fail_msg
5680
    if msg:
5681
      logging.error("Instance migration failed, trying to revert"
5682
                    " disk status: %s", msg)
5683
      self.feedback_fn("Migration failed, aborting")
5684
      self._AbortMigration()
5685
      self._RevertDiskStatus()
5686
      raise errors.OpExecError("Could not migrate instance %s: %s" %
5687
                               (instance.name, msg))
5688
    time.sleep(10)
5689

    
5690
    instance.primary_node = target_node
5691
    # distribute new instance config to the other nodes
5692
    self.cfg.Update(instance, self.feedback_fn)
5693

    
5694
    result = self.rpc.call_finalize_migration(target_node,
5695
                                              instance,
5696
                                              migration_info,
5697
                                              True)
5698
    msg = result.fail_msg
5699
    if msg:
5700
      logging.error("Instance migration succeeded, but finalization failed:"
5701
                    " %s", msg)
5702
      raise errors.OpExecError("Could not finalize instance migration: %s" %
5703
                               msg)
5704

    
5705
    self._EnsureSecondary(source_node)
5706
    self._WaitUntilSync()
5707
    self._GoStandalone()
5708
    self._GoReconnect(False)
5709
    self._WaitUntilSync()
5710

    
5711
    self.feedback_fn("* done")
5712

    
5713
  def Exec(self, feedback_fn):
5714
    """Perform the migration.
5715

5716
    """
5717
    feedback_fn("Migrating instance %s" % self.instance.name)
5718

    
5719
    self.feedback_fn = feedback_fn
5720

    
5721
    self.source_node = self.instance.primary_node
5722
    self.target_node = self.instance.secondary_nodes[0]
5723
    self.all_nodes = [self.source_node, self.target_node]
5724
    self.nodes_ip = {
5725
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5726
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5727
      }
5728

    
5729
    if self.cleanup:
5730
      return self._ExecCleanup()
5731
    else:
5732
      return self._ExecMigration()
5733

    
5734

    
5735
def _CreateBlockDev(lu, node, instance, device, force_create,
5736
                    info, force_open):
5737
  """Create a tree of block devices on a given node.
5738

5739
  If this device type has to be created on secondaries, create it and
5740
  all its children.
5741

5742
  If not, just recurse to children keeping the same 'force' value.
5743

5744
  @param lu: the lu on whose behalf we execute
5745
  @param node: the node on which to create the device
5746
  @type instance: L{objects.Instance}
5747
  @param instance: the instance which owns the device
5748
  @type device: L{objects.Disk}
5749
  @param device: the device to create
5750
  @type force_create: boolean
5751
  @param force_create: whether to force creation of this device; this
5752
      will be change to True whenever we find a device which has
5753
      CreateOnSecondary() attribute
5754
  @param info: the extra 'metadata' we should attach to the device
5755
      (this will be represented as a LVM tag)
5756
  @type force_open: boolean
5757
  @param force_open: this parameter will be passes to the
5758
      L{backend.BlockdevCreate} function where it specifies
5759
      whether we run on primary or not, and it affects both
5760
      the child assembly and the device own Open() execution
5761

5762
  """
5763
  if device.CreateOnSecondary():
5764
    force_create = True
5765

    
5766
  if device.children:
5767
    for child in device.children:
5768
      _CreateBlockDev(lu, node, instance, child, force_create,
5769
                      info, force_open)
5770

    
5771
  if not force_create:
5772
    return
5773

    
5774
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5775

    
5776

    
5777
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5778
  """Create a single block device on a given node.
5779

5780
  This will not recurse over children of the device, so they must be
5781
  created in advance.
5782

5783
  @param lu: the lu on whose behalf we execute
5784
  @param node: the node on which to create the device
5785
  @type instance: L{objects.Instance}
5786
  @param instance: the instance which owns the device
5787
  @type device: L{objects.Disk}
5788
  @param device: the device to create
5789
  @param info: the extra 'metadata' we should attach to the device
5790
      (this will be represented as a LVM tag)
5791
  @type force_open: boolean
5792
  @param force_open: this parameter will be passes to the
5793
      L{backend.BlockdevCreate} function where it specifies
5794
      whether we run on primary or not, and it affects both
5795
      the child assembly and the device own Open() execution
5796

5797
  """
5798
  lu.cfg.SetDiskID(device, node)
5799
  result = lu.rpc.call_blockdev_create(node, device, device.size,
5800
                                       instance.name, force_open, info)
5801
  result.Raise("Can't create block device %s on"
5802
               " node %s for instance %s" % (device, node, instance.name))
5803
  if device.physical_id is None:
5804
    device.physical_id = result.payload
5805

    
5806

    
5807
def _GenerateUniqueNames(lu, exts):
5808
  """Generate a suitable LV name.
5809

5810
  This will generate a logical volume name for the given instance.
5811

5812
  """
5813
  results = []
5814
  for val in exts:
5815
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5816
    results.append("%s%s" % (new_id, val))
5817
  return results
5818

    
5819

    
5820
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5821
                         p_minor, s_minor):
5822
  """Generate a drbd8 device complete with its children.
5823

5824
  """
5825
  port = lu.cfg.AllocatePort()
5826
  vgname = lu.cfg.GetVGName()
5827
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5828
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5829
                          logical_id=(vgname, names[0]))
5830
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5831
                          logical_id=(vgname, names[1]))
5832
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5833
                          logical_id=(primary, secondary, port,
5834
                                      p_minor, s_minor,
5835
                                      shared_secret),
5836
                          children=[dev_data, dev_meta],
5837
                          iv_name=iv_name)
5838
  return drbd_dev
5839

    
5840

    
5841
def _GenerateDiskTemplate(lu, template_name,
5842
                          instance_name, primary_node,
5843
                          secondary_nodes, disk_info,
5844
                          file_storage_dir, file_driver,
5845
                          base_index):
5846
  """Generate the entire disk layout for a given template type.
5847

5848
  """
5849
  #TODO: compute space requirements
5850

    
5851
  vgname = lu.cfg.GetVGName()
5852
  disk_count = len(disk_info)
5853
  disks = []
5854
  if template_name == constants.DT_DISKLESS:
5855
    pass
5856
  elif template_name == constants.DT_PLAIN:
5857
    if len(secondary_nodes) != 0:
5858
      raise errors.ProgrammerError("Wrong template configuration")
5859

    
5860
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5861
                                      for i in range(disk_count)])
5862
    for idx, disk in enumerate(disk_info):
5863
      disk_index = idx + base_index
5864
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5865
                              logical_id=(vgname, names[idx]),
5866
                              iv_name="disk/%d" % disk_index,
5867
                              mode=disk["mode"])
5868
      disks.append(disk_dev)
5869
  elif template_name == constants.DT_DRBD8:
5870
    if len(secondary_nodes) != 1:
5871
      raise errors.ProgrammerError("Wrong template configuration")
5872
    remote_node = secondary_nodes[0]
5873
    minors = lu.cfg.AllocateDRBDMinor(
5874
      [primary_node, remote_node] * len(disk_info), instance_name)
5875

    
5876
    names = []
5877
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5878
                                               for i in range(disk_count)]):
5879
      names.append(lv_prefix + "_data")
5880
      names.append(lv_prefix + "_meta")
5881
    for idx, disk in enumerate(disk_info):
5882
      disk_index = idx + base_index
5883
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5884
                                      disk["size"], names[idx*2:idx*2+2],
5885
                                      "disk/%d" % disk_index,
5886
                                      minors[idx*2], minors[idx*2+1])
5887
      disk_dev.mode = disk["mode"]
5888
      disks.append(disk_dev)
5889
  elif template_name == constants.DT_FILE:
5890
    if len(secondary_nodes) != 0:
5891
      raise errors.ProgrammerError("Wrong template configuration")
5892

    
5893
    _RequireFileStorage()
5894

    
5895
    for idx, disk in enumerate(disk_info):
5896
      disk_index = idx + base_index
5897
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5898
                              iv_name="disk/%d" % disk_index,
5899
                              logical_id=(file_driver,
5900
                                          "%s/disk%d" % (file_storage_dir,
5901
                                                         disk_index)),
5902
                              mode=disk["mode"])
5903
      disks.append(disk_dev)
5904
  else:
5905
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5906
  return disks
5907

    
5908

    
5909
def _GetInstanceInfoText(instance):
5910
  """Compute that text that should be added to the disk's metadata.
5911

5912
  """
5913
  return "originstname+%s" % instance.name
5914

    
5915

    
5916
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5917
  """Create all disks for an instance.
5918

5919
  This abstracts away some work from AddInstance.
5920

5921
  @type lu: L{LogicalUnit}
5922
  @param lu: the logical unit on whose behalf we execute
5923
  @type instance: L{objects.Instance}
5924
  @param instance: the instance whose disks we should create
5925
  @type to_skip: list
5926
  @param to_skip: list of indices to skip
5927
  @type target_node: string
5928
  @param target_node: if passed, overrides the target node for creation
5929
  @rtype: boolean
5930
  @return: the success of the creation
5931

5932
  """
5933
  info = _GetInstanceInfoText(instance)
5934
  if target_node is None:
5935
    pnode = instance.primary_node
5936
    all_nodes = instance.all_nodes
5937
  else:
5938
    pnode = target_node
5939
    all_nodes = [pnode]
5940

    
5941
  if instance.disk_template == constants.DT_FILE:
5942
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5943
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5944

    
5945
    result.Raise("Failed to create directory '%s' on"
5946
                 " node %s" % (file_storage_dir, pnode))
5947

    
5948
  # Note: this needs to be kept in sync with adding of disks in
5949
  # LUSetInstanceParams
5950
  for idx, device in enumerate(instance.disks):
5951
    if to_skip and idx in to_skip:
5952
      continue
5953
    logging.info("Creating volume %s for instance %s",
5954
                 device.iv_name, instance.name)
5955
    #HARDCODE
5956
    for node in all_nodes:
5957
      f_create = node == pnode
5958
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5959

    
5960

    
5961
def _RemoveDisks(lu, instance, target_node=None):
5962
  """Remove all disks for an instance.
5963

5964
  This abstracts away some work from `AddInstance()` and
5965
  `RemoveInstance()`. Note that in case some of the devices couldn't
5966
  be removed, the removal will continue with the other ones (compare
5967
  with `_CreateDisks()`).
5968

5969
  @type lu: L{LogicalUnit}
5970
  @param lu: the logical unit on whose behalf we execute
5971
  @type instance: L{objects.Instance}
5972
  @param instance: the instance whose disks we should remove
5973
  @type target_node: string
5974
  @param target_node: used to override the node on which to remove the disks
5975
  @rtype: boolean
5976
  @return: the success of the removal
5977

5978
  """
5979
  logging.info("Removing block devices for instance %s", instance.name)
5980

    
5981
  all_result = True
5982
  for device in instance.disks:
5983
    if target_node:
5984
      edata = [(target_node, device)]
5985
    else:
5986
      edata = device.ComputeNodeTree(instance.primary_node)
5987
    for node, disk in edata:
5988
      lu.cfg.SetDiskID(disk, node)
5989
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5990
      if msg:
5991
        lu.LogWarning("Could not remove block device %s on node %s,"
5992
                      " continuing anyway: %s", device.iv_name, node, msg)
5993
        all_result = False
5994

    
5995
  if instance.disk_template == constants.DT_FILE:
5996
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5997
    if target_node:
5998
      tgt = target_node
5999
    else:
6000
      tgt = instance.primary_node
6001
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6002
    if result.fail_msg:
6003
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6004
                    file_storage_dir, instance.primary_node, result.fail_msg)
6005
      all_result = False
6006

    
6007
  return all_result
6008

    
6009

    
6010
def _ComputeDiskSize(disk_template, disks):
6011
  """Compute disk size requirements in the volume group
6012

6013
  """
6014
  # Required free disk space as a function of disk and swap space
6015
  req_size_dict = {
6016
    constants.DT_DISKLESS: None,
6017
    constants.DT_PLAIN: sum(d["size"] for d in disks),
6018
    # 128 MB are added for drbd metadata for each disk
6019
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6020
    constants.DT_FILE: None,
6021
  }
6022

    
6023
  if disk_template not in req_size_dict:
6024
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6025
                                 " is unknown" %  disk_template)
6026

    
6027
  return req_size_dict[disk_template]
6028

    
6029

    
6030
def _CheckHVParams(lu, nodenames, hvname, hvparams):
6031
  """Hypervisor parameter validation.
6032

6033
  This function abstract the hypervisor parameter validation to be
6034
  used in both instance create and instance modify.
6035

6036
  @type lu: L{LogicalUnit}
6037
  @param lu: the logical unit for which we check
6038
  @type nodenames: list
6039
  @param nodenames: the list of nodes on which we should check
6040
  @type hvname: string
6041
  @param hvname: the name of the hypervisor we should use
6042
  @type hvparams: dict
6043
  @param hvparams: the parameters which we need to check
6044
  @raise errors.OpPrereqError: if the parameters are not valid
6045

6046
  """
6047
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6048
                                                  hvname,
6049
                                                  hvparams)
6050
  for node in nodenames:
6051
    info = hvinfo[node]
6052
    if info.offline:
6053
      continue
6054
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6055

    
6056

    
6057
class LUCreateInstance(LogicalUnit):
6058
  """Create an instance.
6059

6060
  """
6061
  HPATH = "instance-add"
6062
  HTYPE = constants.HTYPE_INSTANCE
6063
  _OP_REQP = ["instance_name", "disks",
6064
              "mode", "start",
6065
              "wait_for_sync", "ip_check", "nics",
6066
              "hvparams", "beparams"]
6067
  REQ_BGL = False
6068

    
6069
  def CheckArguments(self):
6070
    """Check arguments.
6071

6072
    """
6073
    # set optional parameters to none if they don't exist
6074
    for attr in ["pnode", "snode", "iallocator", "hypervisor",
6075
                 "disk_template", "identify_defaults"]:
6076
      if not hasattr(self.op, attr):
6077
        setattr(self.op, attr, None)
6078

    
6079
    # do not require name_check to ease forward/backward compatibility
6080
    # for tools
6081
    if not hasattr(self.op, "name_check"):
6082
      self.op.name_check = True
6083
    if not hasattr(self.op, "no_install"):
6084
      self.op.no_install = False
6085
    if self.op.no_install and self.op.start:
6086
      self.LogInfo("No-installation mode selected, disabling startup")
6087
      self.op.start = False
6088
    # validate/normalize the instance name
6089
    self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6090
    if self.op.ip_check and not self.op.name_check:
6091
      # TODO: make the ip check more flexible and not depend on the name check
6092
      raise errors.OpPrereqError("Cannot do ip checks without a name check",
6093
                                 errors.ECODE_INVAL)
6094
    # check disk information: either all adopt, or no adopt
6095
    has_adopt = has_no_adopt = False
6096
    for disk in self.op.disks:
6097
      if "adopt" in disk:
6098
        has_adopt = True
6099
      else:
6100
        has_no_adopt = True
6101
    if has_adopt and has_no_adopt:
6102
      raise errors.OpPrereqError("Either all disks are adopted or none is",
6103
                                 errors.ECODE_INVAL)
6104
    if has_adopt:
6105
      if self.op.disk_template != constants.DT_PLAIN:
6106
        raise errors.OpPrereqError("Disk adoption is only supported for the"
6107
                                   " 'plain' disk template",
6108
                                   errors.ECODE_INVAL)
6109
      if self.op.iallocator is not None:
6110
        raise errors.OpPrereqError("Disk adoption not allowed with an"
6111
                                   " iallocator script", errors.ECODE_INVAL)
6112
      if self.op.mode == constants.INSTANCE_IMPORT:
6113
        raise errors.OpPrereqError("Disk adoption not allowed for"
6114
                                   " instance import", errors.ECODE_INVAL)
6115

    
6116
    self.adopt_disks = has_adopt
6117

    
6118
    # verify creation mode
6119
    if self.op.mode not in (constants.INSTANCE_CREATE,
6120
                            constants.INSTANCE_IMPORT):
6121
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6122
                                 self.op.mode, errors.ECODE_INVAL)
6123

    
6124
    # instance name verification
6125
    if self.op.name_check:
6126
      self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6127
      self.op.instance_name = self.hostname1.name
6128
      # used in CheckPrereq for ip ping check
6129
      self.check_ip = self.hostname1.ip
6130
    else:
6131
      self.check_ip = None
6132

    
6133
    # file storage checks
6134
    if (self.op.file_driver and
6135
        not self.op.file_driver in constants.FILE_DRIVER):
6136
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6137
                                 self.op.file_driver, errors.ECODE_INVAL)
6138

    
6139
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6140
      raise errors.OpPrereqError("File storage directory path not absolute",
6141
                                 errors.ECODE_INVAL)
6142

    
6143
    ### Node/iallocator related checks
6144
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
6145
      raise errors.OpPrereqError("One and only one of iallocator and primary"
6146
                                 " node must be given",
6147
                                 errors.ECODE_INVAL)
6148

    
6149
    if self.op.mode == constants.INSTANCE_IMPORT:
6150
      # On import force_variant must be True, because if we forced it at
6151
      # initial install, our only chance when importing it back is that it
6152
      # works again!
6153
      self.op.force_variant = True
6154

    
6155
      if self.op.no_install:
6156
        self.LogInfo("No-installation mode has no effect during import")
6157

    
6158
    else: # INSTANCE_CREATE
6159
      if getattr(self.op, "os_type", None) is None:
6160
        raise errors.OpPrereqError("No guest OS specified",
6161
                                   errors.ECODE_INVAL)
6162
      self.op.force_variant = getattr(self.op, "force_variant", False)
6163
      if self.op.disk_template is None:
6164
        raise errors.OpPrereqError("No disk template specified",
6165
                                   errors.ECODE_INVAL)
6166

    
6167
  def ExpandNames(self):
6168
    """ExpandNames for CreateInstance.
6169

6170
    Figure out the right locks for instance creation.
6171

6172
    """
6173
    self.needed_locks = {}
6174

    
6175
    instance_name = self.op.instance_name
6176
    # this is just a preventive check, but someone might still add this
6177
    # instance in the meantime, and creation will fail at lock-add time
6178
    if instance_name in self.cfg.GetInstanceList():
6179
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6180
                                 instance_name, errors.ECODE_EXISTS)
6181

    
6182
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6183

    
6184
    if self.op.iallocator:
6185
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6186
    else:
6187
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6188
      nodelist = [self.op.pnode]
6189
      if self.op.snode is not None:
6190
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6191
        nodelist.append(self.op.snode)
6192
      self.needed_locks[locking.LEVEL_NODE] = nodelist
6193

    
6194
    # in case of import lock the source node too
6195
    if self.op.mode == constants.INSTANCE_IMPORT:
6196
      src_node = getattr(self.op, "src_node", None)
6197
      src_path = getattr(self.op, "src_path", None)
6198

    
6199
      if src_path is None:
6200
        self.op.src_path = src_path = self.op.instance_name
6201

    
6202
      if src_node is None:
6203
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6204
        self.op.src_node = None
6205
        if os.path.isabs(src_path):
6206
          raise errors.OpPrereqError("Importing an instance from an absolute"
6207
                                     " path requires a source node option.",
6208
                                     errors.ECODE_INVAL)
6209
      else:
6210
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6211
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6212
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
6213
        if not os.path.isabs(src_path):
6214
          self.op.src_path = src_path = \
6215
            utils.PathJoin(constants.EXPORT_DIR, src_path)
6216

    
6217
  def _RunAllocator(self):
6218
    """Run the allocator based on input opcode.
6219

6220
    """
6221
    nics = [n.ToDict() for n in self.nics]
6222
    ial = IAllocator(self.cfg, self.rpc,
6223
                     mode=constants.IALLOCATOR_MODE_ALLOC,
6224
                     name=self.op.instance_name,
6225
                     disk_template=self.op.disk_template,
6226
                     tags=[],
6227
                     os=self.op.os_type,
6228
                     vcpus=self.be_full[constants.BE_VCPUS],
6229
                     mem_size=self.be_full[constants.BE_MEMORY],
6230
                     disks=self.disks,
6231
                     nics=nics,
6232
                     hypervisor=self.op.hypervisor,
6233
                     )
6234

    
6235
    ial.Run(self.op.iallocator)
6236

    
6237
    if not ial.success:
6238
      raise errors.OpPrereqError("Can't compute nodes using"
6239
                                 " iallocator '%s': %s" %
6240
                                 (self.op.iallocator, ial.info),
6241
                                 errors.ECODE_NORES)
6242
    if len(ial.result) != ial.required_nodes:
6243
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6244
                                 " of nodes (%s), required %s" %
6245
                                 (self.op.iallocator, len(ial.result),
6246
                                  ial.required_nodes), errors.ECODE_FAULT)
6247
    self.op.pnode = ial.result[0]
6248
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6249
                 self.op.instance_name, self.op.iallocator,
6250
                 utils.CommaJoin(ial.result))
6251
    if ial.required_nodes == 2:
6252
      self.op.snode = ial.result[1]
6253

    
6254
  def BuildHooksEnv(self):
6255
    """Build hooks env.
6256

6257
    This runs on master, primary and secondary nodes of the instance.
6258

6259
    """
6260
    env = {
6261
      "ADD_MODE": self.op.mode,
6262
      }
6263
    if self.op.mode == constants.INSTANCE_IMPORT:
6264
      env["SRC_NODE"] = self.op.src_node
6265
      env["SRC_PATH"] = self.op.src_path
6266
      env["SRC_IMAGES"] = self.src_images
6267

    
6268
    env.update(_BuildInstanceHookEnv(
6269
      name=self.op.instance_name,
6270
      primary_node=self.op.pnode,
6271
      secondary_nodes=self.secondaries,
6272
      status=self.op.start,
6273
      os_type=self.op.os_type,
6274
      memory=self.be_full[constants.BE_MEMORY],
6275
      vcpus=self.be_full[constants.BE_VCPUS],
6276
      nics=_NICListToTuple(self, self.nics),
6277
      disk_template=self.op.disk_template,
6278
      disks=[(d["size"], d["mode"]) for d in self.disks],
6279
      bep=self.be_full,
6280
      hvp=self.hv_full,
6281
      hypervisor_name=self.op.hypervisor,
6282
    ))
6283

    
6284
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6285
          self.secondaries)
6286
    return env, nl, nl
6287

    
6288
  def _ReadExportInfo(self):
6289
    """Reads the export information from disk.
6290

6291
    It will override the opcode source node and path with the actual
6292
    information, if these two were not specified before.
6293

6294
    @return: the export information
6295

6296
    """
6297
    assert self.op.mode == constants.INSTANCE_IMPORT
6298

    
6299
    src_node = self.op.src_node
6300
    src_path = self.op.src_path
6301

    
6302
    if src_node is None:
6303
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6304
      exp_list = self.rpc.call_export_list(locked_nodes)
6305
      found = False
6306
      for node in exp_list:
6307
        if exp_list[node].fail_msg:
6308
          continue
6309
        if src_path in exp_list[node].payload:
6310
          found = True
6311
          self.op.src_node = src_node = node
6312
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6313
                                                       src_path)
6314
          break
6315
      if not found:
6316
        raise errors.OpPrereqError("No export found for relative path %s" %
6317
                                    src_path, errors.ECODE_INVAL)
6318

    
6319
    _CheckNodeOnline(self, src_node)
6320
    result = self.rpc.call_export_info(src_node, src_path)
6321
    result.Raise("No export or invalid export found in dir %s" % src_path)
6322

    
6323
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6324
    if not export_info.has_section(constants.INISECT_EXP):
6325
      raise errors.ProgrammerError("Corrupted export config",
6326
                                   errors.ECODE_ENVIRON)
6327

    
6328
    ei_version = export_info.get(constants.INISECT_EXP, "version")
6329
    if (int(ei_version) != constants.EXPORT_VERSION):
6330
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6331
                                 (ei_version, constants.EXPORT_VERSION),
6332
                                 errors.ECODE_ENVIRON)
6333
    return export_info
6334

    
6335
  def _ReadExportParams(self, einfo):
6336
    """Use export parameters as defaults.
6337

6338
    In case the opcode doesn't specify (as in override) some instance
6339
    parameters, then try to use them from the export information, if
6340
    that declares them.
6341

6342
    """
6343
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6344

    
6345
    if self.op.disk_template is None:
6346
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
6347
        self.op.disk_template = einfo.get(constants.INISECT_INS,
6348
                                          "disk_template")
6349
      else:
6350
        raise errors.OpPrereqError("No disk template specified and the export"
6351
                                   " is missing the disk_template information",
6352
                                   errors.ECODE_INVAL)
6353

    
6354
    if not self.op.disks:
6355
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
6356
        disks = []
6357
        # TODO: import the disk iv_name too
6358
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6359
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6360
          disks.append({"size": disk_sz})
6361
        self.op.disks = disks
6362
      else:
6363
        raise errors.OpPrereqError("No disk info specified and the export"
6364
                                   " is missing the disk information",
6365
                                   errors.ECODE_INVAL)
6366

    
6367
    if (not self.op.nics and
6368
        einfo.has_option(constants.INISECT_INS, "nic_count")):
6369
      nics = []
6370
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6371
        ndict = {}
6372
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6373
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6374
          ndict[name] = v
6375
        nics.append(ndict)
6376
      self.op.nics = nics
6377

    
6378
    if (self.op.hypervisor is None and
6379
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
6380
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6381
    if einfo.has_section(constants.INISECT_HYP):
6382
      # use the export parameters but do not override the ones
6383
      # specified by the user
6384
      for name, value in einfo.items(constants.INISECT_HYP):
6385
        if name not in self.op.hvparams:
6386
          self.op.hvparams[name] = value
6387

    
6388
    if einfo.has_section(constants.INISECT_BEP):
6389
      # use the parameters, without overriding
6390
      for name, value in einfo.items(constants.INISECT_BEP):
6391
        if name not in self.op.beparams:
6392
          self.op.beparams[name] = value
6393
    else:
6394
      # try to read the parameters old style, from the main section
6395
      for name in constants.BES_PARAMETERS:
6396
        if (name not in self.op.beparams and
6397
            einfo.has_option(constants.INISECT_INS, name)):
6398
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6399

    
6400
  def _RevertToDefaults(self, cluster):
6401
    """Revert the instance parameters to the default values.
6402

6403
    """
6404
    # hvparams
6405
    hv_defs = cluster.GetHVDefaults(self.op.hypervisor, self.op.os_type)
6406
    for name in self.op.hvparams.keys():
6407
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6408
        del self.op.hvparams[name]
6409
    # beparams
6410
    be_defs = cluster.beparams.get(constants.PP_DEFAULT, {})
6411
    for name in self.op.beparams.keys():
6412
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
6413
        del self.op.beparams[name]
6414
    # nic params
6415
    nic_defs = cluster.nicparams.get(constants.PP_DEFAULT, {})
6416
    for nic in self.op.nics:
6417
      for name in constants.NICS_PARAMETERS:
6418
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6419
          del nic[name]
6420

    
6421
  def CheckPrereq(self):
6422
    """Check prerequisites.
6423

6424
    """
6425
    if self.op.mode == constants.INSTANCE_IMPORT:
6426
      export_info = self._ReadExportInfo()
6427
      self._ReadExportParams(export_info)
6428

    
6429
    _CheckDiskTemplate(self.op.disk_template)
6430

    
6431
    if (not self.cfg.GetVGName() and
6432
        self.op.disk_template not in constants.DTS_NOT_LVM):
6433
      raise errors.OpPrereqError("Cluster does not support lvm-based"
6434
                                 " instances", errors.ECODE_STATE)
6435

    
6436
    if self.op.hypervisor is None:
6437
      self.op.hypervisor = self.cfg.GetHypervisorType()
6438

    
6439
    cluster = self.cfg.GetClusterInfo()
6440
    enabled_hvs = cluster.enabled_hypervisors
6441
    if self.op.hypervisor not in enabled_hvs:
6442
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6443
                                 " cluster (%s)" % (self.op.hypervisor,
6444
                                  ",".join(enabled_hvs)),
6445
                                 errors.ECODE_STATE)
6446

    
6447
    # check hypervisor parameter syntax (locally)
6448
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6449
    filled_hvp = objects.FillDict(cluster.GetHVDefaults(self.op.hypervisor,
6450
                                                        self.op.os_type),
6451
                                  self.op.hvparams)
6452
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6453
    hv_type.CheckParameterSyntax(filled_hvp)
6454
    self.hv_full = filled_hvp
6455
    # check that we don't specify global parameters on an instance
6456
    _CheckGlobalHvParams(self.op.hvparams)
6457

    
6458
    # fill and remember the beparams dict
6459
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6460
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
6461
                                    self.op.beparams)
6462

    
6463
    # now that hvp/bep are in final format, let's reset to defaults,
6464
    # if told to do so
6465
    if self.op.identify_defaults:
6466
      self._RevertToDefaults(cluster)
6467

    
6468
    # NIC buildup
6469
    self.nics = []
6470
    for idx, nic in enumerate(self.op.nics):
6471
      nic_mode_req = nic.get("mode", None)
6472
      nic_mode = nic_mode_req
6473
      if nic_mode is None:
6474
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6475

    
6476
      # in routed mode, for the first nic, the default ip is 'auto'
6477
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6478
        default_ip_mode = constants.VALUE_AUTO
6479
      else:
6480
        default_ip_mode = constants.VALUE_NONE
6481

    
6482
      # ip validity checks
6483
      ip = nic.get("ip", default_ip_mode)
6484
      if ip is None or ip.lower() == constants.VALUE_NONE:
6485
        nic_ip = None
6486
      elif ip.lower() == constants.VALUE_AUTO:
6487
        if not self.op.name_check:
6488
          raise errors.OpPrereqError("IP address set to auto but name checks"
6489
                                     " have been skipped. Aborting.",
6490
                                     errors.ECODE_INVAL)
6491
        nic_ip = self.hostname1.ip
6492
      else:
6493
        if not utils.IsValidIP(ip):
6494
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6495
                                     " like a valid IP" % ip,
6496
                                     errors.ECODE_INVAL)
6497
        nic_ip = ip
6498

    
6499
      # TODO: check the ip address for uniqueness
6500
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6501
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
6502
                                   errors.ECODE_INVAL)
6503

    
6504
      # MAC address verification
6505
      mac = nic.get("mac", constants.VALUE_AUTO)
6506
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6507
        mac = utils.NormalizeAndValidateMac(mac)
6508

    
6509
        try:
6510
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
6511
        except errors.ReservationError:
6512
          raise errors.OpPrereqError("MAC address %s already in use"
6513
                                     " in cluster" % mac,
6514
                                     errors.ECODE_NOTUNIQUE)
6515

    
6516
      # bridge verification
6517
      bridge = nic.get("bridge", None)
6518
      link = nic.get("link", None)
6519
      if bridge and link:
6520
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6521
                                   " at the same time", errors.ECODE_INVAL)
6522
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6523
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6524
                                   errors.ECODE_INVAL)
6525
      elif bridge:
6526
        link = bridge
6527

    
6528
      nicparams = {}
6529
      if nic_mode_req:
6530
        nicparams[constants.NIC_MODE] = nic_mode_req
6531
      if link:
6532
        nicparams[constants.NIC_LINK] = link
6533

    
6534
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
6535
                                      nicparams)
6536
      objects.NIC.CheckParameterSyntax(check_params)
6537
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6538

    
6539
    # disk checks/pre-build
6540
    self.disks = []
6541
    for disk in self.op.disks:
6542
      mode = disk.get("mode", constants.DISK_RDWR)
6543
      if mode not in constants.DISK_ACCESS_SET:
6544
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6545
                                   mode, errors.ECODE_INVAL)
6546
      size = disk.get("size", None)
6547
      if size is None:
6548
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6549
      try:
6550
        size = int(size)
6551
      except (TypeError, ValueError):
6552
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6553
                                   errors.ECODE_INVAL)
6554
      new_disk = {"size": size, "mode": mode}
6555
      if "adopt" in disk:
6556
        new_disk["adopt"] = disk["adopt"]
6557
      self.disks.append(new_disk)
6558

    
6559
    if self.op.mode == constants.INSTANCE_IMPORT:
6560

    
6561
      # Check that the new instance doesn't have less disks than the export
6562
      instance_disks = len(self.disks)
6563
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6564
      if instance_disks < export_disks:
6565
        raise errors.OpPrereqError("Not enough disks to import."
6566
                                   " (instance: %d, export: %d)" %
6567
                                   (instance_disks, export_disks),
6568
                                   errors.ECODE_INVAL)
6569

    
6570
      disk_images = []
6571
      for idx in range(export_disks):
6572
        option = 'disk%d_dump' % idx
6573
        if export_info.has_option(constants.INISECT_INS, option):
6574
          # FIXME: are the old os-es, disk sizes, etc. useful?
6575
          export_name = export_info.get(constants.INISECT_INS, option)
6576
          image = utils.PathJoin(self.op.src_path, export_name)
6577
          disk_images.append(image)
6578
        else:
6579
          disk_images.append(False)
6580

    
6581
      self.src_images = disk_images
6582

    
6583
      old_name = export_info.get(constants.INISECT_INS, 'name')
6584
      try:
6585
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6586
      except (TypeError, ValueError), err:
6587
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
6588
                                   " an integer: %s" % str(err),
6589
                                   errors.ECODE_STATE)
6590
      if self.op.instance_name == old_name:
6591
        for idx, nic in enumerate(self.nics):
6592
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6593
            nic_mac_ini = 'nic%d_mac' % idx
6594
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6595

    
6596
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6597

    
6598
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
6599
    if self.op.ip_check:
6600
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6601
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6602
                                   (self.check_ip, self.op.instance_name),
6603
                                   errors.ECODE_NOTUNIQUE)
6604

    
6605
    #### mac address generation
6606
    # By generating here the mac address both the allocator and the hooks get
6607
    # the real final mac address rather than the 'auto' or 'generate' value.
6608
    # There is a race condition between the generation and the instance object
6609
    # creation, which means that we know the mac is valid now, but we're not
6610
    # sure it will be when we actually add the instance. If things go bad
6611
    # adding the instance will abort because of a duplicate mac, and the
6612
    # creation job will fail.
6613
    for nic in self.nics:
6614
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6615
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6616

    
6617
    #### allocator run
6618

    
6619
    if self.op.iallocator is not None:
6620
      self._RunAllocator()
6621

    
6622
    #### node related checks
6623

    
6624
    # check primary node
6625
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6626
    assert self.pnode is not None, \
6627
      "Cannot retrieve locked node %s" % self.op.pnode
6628
    if pnode.offline:
6629
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6630
                                 pnode.name, errors.ECODE_STATE)
6631
    if pnode.drained:
6632
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6633
                                 pnode.name, errors.ECODE_STATE)
6634

    
6635
    self.secondaries = []
6636

    
6637
    # mirror node verification
6638
    if self.op.disk_template in constants.DTS_NET_MIRROR:
6639
      if self.op.snode is None:
6640
        raise errors.OpPrereqError("The networked disk templates need"
6641
                                   " a mirror node", errors.ECODE_INVAL)
6642
      if self.op.snode == pnode.name:
6643
        raise errors.OpPrereqError("The secondary node cannot be the"
6644
                                   " primary node.", errors.ECODE_INVAL)
6645
      _CheckNodeOnline(self, self.op.snode)
6646
      _CheckNodeNotDrained(self, self.op.snode)
6647
      self.secondaries.append(self.op.snode)
6648

    
6649
    nodenames = [pnode.name] + self.secondaries
6650

    
6651
    req_size = _ComputeDiskSize(self.op.disk_template,
6652
                                self.disks)
6653

    
6654
    # Check lv size requirements, if not adopting
6655
    if req_size is not None and not self.adopt_disks:
6656
      _CheckNodesFreeDisk(self, nodenames, req_size)
6657

    
6658
    if self.adopt_disks: # instead, we must check the adoption data
6659
      all_lvs = set([i["adopt"] for i in self.disks])
6660
      if len(all_lvs) != len(self.disks):
6661
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
6662
                                   errors.ECODE_INVAL)
6663
      for lv_name in all_lvs:
6664
        try:
6665
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6666
        except errors.ReservationError:
6667
          raise errors.OpPrereqError("LV named %s used by another instance" %
6668
                                     lv_name, errors.ECODE_NOTUNIQUE)
6669

    
6670
      node_lvs = self.rpc.call_lv_list([pnode.name],
6671
                                       self.cfg.GetVGName())[pnode.name]
6672
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6673
      node_lvs = node_lvs.payload
6674
      delta = all_lvs.difference(node_lvs.keys())
6675
      if delta:
6676
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
6677
                                   utils.CommaJoin(delta),
6678
                                   errors.ECODE_INVAL)
6679
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6680
      if online_lvs:
6681
        raise errors.OpPrereqError("Online logical volumes found, cannot"
6682
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
6683
                                   errors.ECODE_STATE)
6684
      # update the size of disk based on what is found
6685
      for dsk in self.disks:
6686
        dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6687

    
6688
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6689

    
6690
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6691

    
6692
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6693

    
6694
    # memory check on primary node
6695
    if self.op.start:
6696
      _CheckNodeFreeMemory(self, self.pnode.name,
6697
                           "creating instance %s" % self.op.instance_name,
6698
                           self.be_full[constants.BE_MEMORY],
6699
                           self.op.hypervisor)
6700

    
6701
    self.dry_run_result = list(nodenames)
6702

    
6703
  def Exec(self, feedback_fn):
6704
    """Create and add the instance to the cluster.
6705

6706
    """
6707
    instance = self.op.instance_name
6708
    pnode_name = self.pnode.name
6709

    
6710
    ht_kind = self.op.hypervisor
6711
    if ht_kind in constants.HTS_REQ_PORT:
6712
      network_port = self.cfg.AllocatePort()
6713
    else:
6714
      network_port = None
6715

    
6716
    if constants.ENABLE_FILE_STORAGE:
6717
      # this is needed because os.path.join does not accept None arguments
6718
      if self.op.file_storage_dir is None:
6719
        string_file_storage_dir = ""
6720
      else:
6721
        string_file_storage_dir = self.op.file_storage_dir
6722

    
6723
      # build the full file storage dir path
6724
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6725
                                        string_file_storage_dir, instance)
6726
    else:
6727
      file_storage_dir = ""
6728

    
6729

    
6730
    disks = _GenerateDiskTemplate(self,
6731
                                  self.op.disk_template,
6732
                                  instance, pnode_name,
6733
                                  self.secondaries,
6734
                                  self.disks,
6735
                                  file_storage_dir,
6736
                                  self.op.file_driver,
6737
                                  0)
6738

    
6739
    iobj = objects.Instance(name=instance, os=self.op.os_type,
6740
                            primary_node=pnode_name,
6741
                            nics=self.nics, disks=disks,
6742
                            disk_template=self.op.disk_template,
6743
                            admin_up=False,
6744
                            network_port=network_port,
6745
                            beparams=self.op.beparams,
6746
                            hvparams=self.op.hvparams,
6747
                            hypervisor=self.op.hypervisor,
6748
                            )
6749

    
6750
    if self.adopt_disks:
6751
      # rename LVs to the newly-generated names; we need to construct
6752
      # 'fake' LV disks with the old data, plus the new unique_id
6753
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6754
      rename_to = []
6755
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6756
        rename_to.append(t_dsk.logical_id)
6757
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6758
        self.cfg.SetDiskID(t_dsk, pnode_name)
6759
      result = self.rpc.call_blockdev_rename(pnode_name,
6760
                                             zip(tmp_disks, rename_to))
6761
      result.Raise("Failed to rename adoped LVs")
6762
    else:
6763
      feedback_fn("* creating instance disks...")
6764
      try:
6765
        _CreateDisks(self, iobj)
6766
      except errors.OpExecError:
6767
        self.LogWarning("Device creation failed, reverting...")
6768
        try:
6769
          _RemoveDisks(self, iobj)
6770
        finally:
6771
          self.cfg.ReleaseDRBDMinors(instance)
6772
          raise
6773

    
6774
    feedback_fn("adding instance %s to cluster config" % instance)
6775

    
6776
    self.cfg.AddInstance(iobj, self.proc.GetECId())
6777

    
6778
    # Declare that we don't want to remove the instance lock anymore, as we've
6779
    # added the instance to the config
6780
    del self.remove_locks[locking.LEVEL_INSTANCE]
6781
    # Unlock all the nodes
6782
    if self.op.mode == constants.INSTANCE_IMPORT:
6783
      nodes_keep = [self.op.src_node]
6784
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6785
                       if node != self.op.src_node]
6786
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6787
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6788
    else:
6789
      self.context.glm.release(locking.LEVEL_NODE)
6790
      del self.acquired_locks[locking.LEVEL_NODE]
6791

    
6792
    if self.op.wait_for_sync:
6793
      disk_abort = not _WaitForSync(self, iobj)
6794
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
6795
      # make sure the disks are not degraded (still sync-ing is ok)
6796
      time.sleep(15)
6797
      feedback_fn("* checking mirrors status")
6798
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6799
    else:
6800
      disk_abort = False
6801

    
6802
    if disk_abort:
6803
      _RemoveDisks(self, iobj)
6804
      self.cfg.RemoveInstance(iobj.name)
6805
      # Make sure the instance lock gets removed
6806
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6807
      raise errors.OpExecError("There are some degraded disks for"
6808
                               " this instance")
6809

    
6810
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6811
      if self.op.mode == constants.INSTANCE_CREATE:
6812
        if not self.op.no_install:
6813
          feedback_fn("* running the instance OS create scripts...")
6814
          # FIXME: pass debug option from opcode to backend
6815
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6816
                                                 self.op.debug_level)
6817
          result.Raise("Could not add os for instance %s"
6818
                       " on node %s" % (instance, pnode_name))
6819

    
6820
      elif self.op.mode == constants.INSTANCE_IMPORT:
6821
        feedback_fn("* running the instance OS import scripts...")
6822
        src_node = self.op.src_node
6823
        src_images = self.src_images
6824
        cluster_name = self.cfg.GetClusterName()
6825
        # FIXME: pass debug option from opcode to backend
6826
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6827
                                                         src_node, src_images,
6828
                                                         cluster_name,
6829
                                                         self.op.debug_level)
6830
        msg = import_result.fail_msg
6831
        if msg:
6832
          self.LogWarning("Error while importing the disk images for instance"
6833
                          " %s on node %s: %s" % (instance, pnode_name, msg))
6834
      else:
6835
        # also checked in the prereq part
6836
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6837
                                     % self.op.mode)
6838

    
6839
    if self.op.start:
6840
      iobj.admin_up = True
6841
      self.cfg.Update(iobj, feedback_fn)
6842
      logging.info("Starting instance %s on node %s", instance, pnode_name)
6843
      feedback_fn("* starting instance...")
6844
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6845
      result.Raise("Could not start instance")
6846

    
6847
    return list(iobj.all_nodes)
6848

    
6849

    
6850
class LUConnectConsole(NoHooksLU):
6851
  """Connect to an instance's console.
6852

6853
  This is somewhat special in that it returns the command line that
6854
  you need to run on the master node in order to connect to the
6855
  console.
6856

6857
  """
6858
  _OP_REQP = ["instance_name"]
6859
  REQ_BGL = False
6860

    
6861
  def ExpandNames(self):
6862
    self._ExpandAndLockInstance()
6863

    
6864
  def CheckPrereq(self):
6865
    """Check prerequisites.
6866

6867
    This checks that the instance is in the cluster.
6868

6869
    """
6870
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6871
    assert self.instance is not None, \
6872
      "Cannot retrieve locked instance %s" % self.op.instance_name
6873
    _CheckNodeOnline(self, self.instance.primary_node)
6874

    
6875
  def Exec(self, feedback_fn):
6876
    """Connect to the console of an instance
6877

6878
    """
6879
    instance = self.instance
6880
    node = instance.primary_node
6881

    
6882
    node_insts = self.rpc.call_instance_list([node],
6883
                                             [instance.hypervisor])[node]
6884
    node_insts.Raise("Can't get node information from %s" % node)
6885

    
6886
    if instance.name not in node_insts.payload:
6887
      raise errors.OpExecError("Instance %s is not running." % instance.name)
6888

    
6889
    logging.debug("Connecting to console of %s on %s", instance.name, node)
6890

    
6891
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
6892
    cluster = self.cfg.GetClusterInfo()
6893
    # beparams and hvparams are passed separately, to avoid editing the
6894
    # instance and then saving the defaults in the instance itself.
6895
    hvparams = cluster.FillHV(instance)
6896
    beparams = cluster.FillBE(instance)
6897
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6898

    
6899
    # build ssh cmdline
6900
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6901

    
6902

    
6903
class LUReplaceDisks(LogicalUnit):
6904
  """Replace the disks of an instance.
6905

6906
  """
6907
  HPATH = "mirrors-replace"
6908
  HTYPE = constants.HTYPE_INSTANCE
6909
  _OP_REQP = ["instance_name", "mode", "disks"]
6910
  REQ_BGL = False
6911

    
6912
  def CheckArguments(self):
6913
    if not hasattr(self.op, "remote_node"):
6914
      self.op.remote_node = None
6915
    if not hasattr(self.op, "iallocator"):
6916
      self.op.iallocator = None
6917
    if not hasattr(self.op, "early_release"):
6918
      self.op.early_release = False
6919

    
6920
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6921
                                  self.op.iallocator)
6922

    
6923
  def ExpandNames(self):
6924
    self._ExpandAndLockInstance()
6925

    
6926
    if self.op.iallocator is not None:
6927
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6928

    
6929
    elif self.op.remote_node is not None:
6930
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6931
      self.op.remote_node = remote_node
6932

    
6933
      # Warning: do not remove the locking of the new secondary here
6934
      # unless DRBD8.AddChildren is changed to work in parallel;
6935
      # currently it doesn't since parallel invocations of
6936
      # FindUnusedMinor will conflict
6937
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6938
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6939

    
6940
    else:
6941
      self.needed_locks[locking.LEVEL_NODE] = []
6942
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6943

    
6944
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6945
                                   self.op.iallocator, self.op.remote_node,
6946
                                   self.op.disks, False, self.op.early_release)
6947

    
6948
    self.tasklets = [self.replacer]
6949

    
6950
  def DeclareLocks(self, level):
6951
    # If we're not already locking all nodes in the set we have to declare the
6952
    # instance's primary/secondary nodes.
6953
    if (level == locking.LEVEL_NODE and
6954
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6955
      self._LockInstancesNodes()
6956

    
6957
  def BuildHooksEnv(self):
6958
    """Build hooks env.
6959

6960
    This runs on the master, the primary and all the secondaries.
6961

6962
    """
6963
    instance = self.replacer.instance
6964
    env = {
6965
      "MODE": self.op.mode,
6966
      "NEW_SECONDARY": self.op.remote_node,
6967
      "OLD_SECONDARY": instance.secondary_nodes[0],
6968
      }
6969
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6970
    nl = [
6971
      self.cfg.GetMasterNode(),
6972
      instance.primary_node,
6973
      ]
6974
    if self.op.remote_node is not None:
6975
      nl.append(self.op.remote_node)
6976
    return env, nl, nl
6977

    
6978

    
6979
class LUEvacuateNode(LogicalUnit):
6980
  """Relocate the secondary instances from a node.
6981

6982
  """
6983
  HPATH = "node-evacuate"
6984
  HTYPE = constants.HTYPE_NODE
6985
  _OP_REQP = ["node_name"]
6986
  REQ_BGL = False
6987

    
6988
  def CheckArguments(self):
6989
    if not hasattr(self.op, "remote_node"):
6990
      self.op.remote_node = None
6991
    if not hasattr(self.op, "iallocator"):
6992
      self.op.iallocator = None
6993
    if not hasattr(self.op, "early_release"):
6994
      self.op.early_release = False
6995

    
6996
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6997
                                  self.op.remote_node,
6998
                                  self.op.iallocator)
6999

    
7000
  def ExpandNames(self):
7001
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7002

    
7003
    self.needed_locks = {}
7004

    
7005
    # Declare node locks
7006
    if self.op.iallocator is not None:
7007
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7008

    
7009
    elif self.op.remote_node is not None:
7010
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7011

    
7012
      # Warning: do not remove the locking of the new secondary here
7013
      # unless DRBD8.AddChildren is changed to work in parallel;
7014
      # currently it doesn't since parallel invocations of
7015
      # FindUnusedMinor will conflict
7016
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
7017
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7018

    
7019
    else:
7020
      raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
7021

    
7022
    # Create tasklets for replacing disks for all secondary instances on this
7023
    # node
7024
    names = []
7025
    tasklets = []
7026

    
7027
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
7028
      logging.debug("Replacing disks for instance %s", inst.name)
7029
      names.append(inst.name)
7030

    
7031
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
7032
                                self.op.iallocator, self.op.remote_node, [],
7033
                                True, self.op.early_release)
7034
      tasklets.append(replacer)
7035

    
7036
    self.tasklets = tasklets
7037
    self.instance_names = names
7038

    
7039
    # Declare instance locks
7040
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
7041

    
7042
  def DeclareLocks(self, level):
7043
    # If we're not already locking all nodes in the set we have to declare the
7044
    # instance's primary/secondary nodes.
7045
    if (level == locking.LEVEL_NODE and
7046
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7047
      self._LockInstancesNodes()
7048

    
7049
  def BuildHooksEnv(self):
7050
    """Build hooks env.
7051

7052
    This runs on the master, the primary and all the secondaries.
7053

7054
    """
7055
    env = {
7056
      "NODE_NAME": self.op.node_name,
7057
      }
7058

    
7059
    nl = [self.cfg.GetMasterNode()]
7060

    
7061
    if self.op.remote_node is not None:
7062
      env["NEW_SECONDARY"] = self.op.remote_node
7063
      nl.append(self.op.remote_node)
7064

    
7065
    return (env, nl, nl)
7066

    
7067

    
7068
class TLReplaceDisks(Tasklet):
7069
  """Replaces disks for an instance.
7070

7071
  Note: Locking is not within the scope of this class.
7072

7073
  """
7074
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7075
               disks, delay_iallocator, early_release):
7076
    """Initializes this class.
7077

7078
    """
7079
    Tasklet.__init__(self, lu)
7080

    
7081
    # Parameters
7082
    self.instance_name = instance_name
7083
    self.mode = mode
7084
    self.iallocator_name = iallocator_name
7085
    self.remote_node = remote_node
7086
    self.disks = disks
7087
    self.delay_iallocator = delay_iallocator
7088
    self.early_release = early_release
7089

    
7090
    # Runtime data
7091
    self.instance = None
7092
    self.new_node = None
7093
    self.target_node = None
7094
    self.other_node = None
7095
    self.remote_node_info = None
7096
    self.node_secondary_ip = None
7097

    
7098
  @staticmethod
7099
  def CheckArguments(mode, remote_node, iallocator):
7100
    """Helper function for users of this class.
7101

7102
    """
7103
    # check for valid parameter combination
7104
    if mode == constants.REPLACE_DISK_CHG:
7105
      if remote_node is None and iallocator is None:
7106
        raise errors.OpPrereqError("When changing the secondary either an"
7107
                                   " iallocator script must be used or the"
7108
                                   " new node given", errors.ECODE_INVAL)
7109

    
7110
      if remote_node is not None and iallocator is not None:
7111
        raise errors.OpPrereqError("Give either the iallocator or the new"
7112
                                   " secondary, not both", errors.ECODE_INVAL)
7113

    
7114
    elif remote_node is not None or iallocator is not None:
7115
      # Not replacing the secondary
7116
      raise errors.OpPrereqError("The iallocator and new node options can"
7117
                                 " only be used when changing the"
7118
                                 " secondary node", errors.ECODE_INVAL)
7119

    
7120
  @staticmethod
7121
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7122
    """Compute a new secondary node using an IAllocator.
7123

7124
    """
7125
    ial = IAllocator(lu.cfg, lu.rpc,
7126
                     mode=constants.IALLOCATOR_MODE_RELOC,
7127
                     name=instance_name,
7128
                     relocate_from=relocate_from)
7129

    
7130
    ial.Run(iallocator_name)
7131

    
7132
    if not ial.success:
7133
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7134
                                 " %s" % (iallocator_name, ial.info),
7135
                                 errors.ECODE_NORES)
7136

    
7137
    if len(ial.result) != ial.required_nodes:
7138
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7139
                                 " of nodes (%s), required %s" %
7140
                                 (iallocator_name,
7141
                                  len(ial.result), ial.required_nodes),
7142
                                 errors.ECODE_FAULT)
7143

    
7144
    remote_node_name = ial.result[0]
7145

    
7146
    lu.LogInfo("Selected new secondary for instance '%s': %s",
7147
               instance_name, remote_node_name)
7148

    
7149
    return remote_node_name
7150

    
7151
  def _FindFaultyDisks(self, node_name):
7152
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7153
                                    node_name, True)
7154

    
7155
  def CheckPrereq(self):
7156
    """Check prerequisites.
7157

7158
    This checks that the instance is in the cluster.
7159

7160
    """
7161
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7162
    assert instance is not None, \
7163
      "Cannot retrieve locked instance %s" % self.instance_name
7164

    
7165
    if instance.disk_template != constants.DT_DRBD8:
7166
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7167
                                 " instances", errors.ECODE_INVAL)
7168

    
7169
    if len(instance.secondary_nodes) != 1:
7170
      raise errors.OpPrereqError("The instance has a strange layout,"
7171
                                 " expected one secondary but found %d" %
7172
                                 len(instance.secondary_nodes),
7173
                                 errors.ECODE_FAULT)
7174

    
7175
    if not self.delay_iallocator:
7176
      self._CheckPrereq2()
7177

    
7178
  def _CheckPrereq2(self):
7179
    """Check prerequisites, second part.
7180

7181
    This function should always be part of CheckPrereq. It was separated and is
7182
    now called from Exec because during node evacuation iallocator was only
7183
    called with an unmodified cluster model, not taking planned changes into
7184
    account.
7185

7186
    """
7187
    instance = self.instance
7188
    secondary_node = instance.secondary_nodes[0]
7189

    
7190
    if self.iallocator_name is None:
7191
      remote_node = self.remote_node
7192
    else:
7193
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7194
                                       instance.name, instance.secondary_nodes)
7195

    
7196
    if remote_node is not None:
7197
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7198
      assert self.remote_node_info is not None, \
7199
        "Cannot retrieve locked node %s" % remote_node
7200
    else:
7201
      self.remote_node_info = None
7202

    
7203
    if remote_node == self.instance.primary_node:
7204
      raise errors.OpPrereqError("The specified node is the primary node of"
7205
                                 " the instance.", errors.ECODE_INVAL)
7206

    
7207
    if remote_node == secondary_node:
7208
      raise errors.OpPrereqError("The specified node is already the"
7209
                                 " secondary node of the instance.",
7210
                                 errors.ECODE_INVAL)
7211

    
7212
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7213
                                    constants.REPLACE_DISK_CHG):
7214
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
7215
                                 errors.ECODE_INVAL)
7216

    
7217
    if self.mode == constants.REPLACE_DISK_AUTO:
7218
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
7219
      faulty_secondary = self._FindFaultyDisks(secondary_node)
7220

    
7221
      if faulty_primary and faulty_secondary:
7222
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7223
                                   " one node and can not be repaired"
7224
                                   " automatically" % self.instance_name,
7225
                                   errors.ECODE_STATE)
7226

    
7227
      if faulty_primary:
7228
        self.disks = faulty_primary
7229
        self.target_node = instance.primary_node
7230
        self.other_node = secondary_node
7231
        check_nodes = [self.target_node, self.other_node]
7232
      elif faulty_secondary:
7233
        self.disks = faulty_secondary
7234
        self.target_node = secondary_node
7235
        self.other_node = instance.primary_node
7236
        check_nodes = [self.target_node, self.other_node]
7237
      else:
7238
        self.disks = []
7239
        check_nodes = []
7240

    
7241
    else:
7242
      # Non-automatic modes
7243
      if self.mode == constants.REPLACE_DISK_PRI:
7244
        self.target_node = instance.primary_node
7245
        self.other_node = secondary_node
7246
        check_nodes = [self.target_node, self.other_node]
7247

    
7248
      elif self.mode == constants.REPLACE_DISK_SEC:
7249
        self.target_node = secondary_node
7250
        self.other_node = instance.primary_node
7251
        check_nodes = [self.target_node, self.other_node]
7252

    
7253
      elif self.mode == constants.REPLACE_DISK_CHG:
7254
        self.new_node = remote_node
7255
        self.other_node = instance.primary_node
7256
        self.target_node = secondary_node
7257
        check_nodes = [self.new_node, self.other_node]
7258

    
7259
        _CheckNodeNotDrained(self.lu, remote_node)
7260

    
7261
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
7262
        assert old_node_info is not None
7263
        if old_node_info.offline and not self.early_release:
7264
          # doesn't make sense to delay the release
7265
          self.early_release = True
7266
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7267
                          " early-release mode", secondary_node)
7268

    
7269
      else:
7270
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7271
                                     self.mode)
7272

    
7273
      # If not specified all disks should be replaced
7274
      if not self.disks:
7275
        self.disks = range(len(self.instance.disks))
7276

    
7277
    for node in check_nodes:
7278
      _CheckNodeOnline(self.lu, node)
7279

    
7280
    # Check whether disks are valid
7281
    for disk_idx in self.disks:
7282
      instance.FindDisk(disk_idx)
7283

    
7284
    # Get secondary node IP addresses
7285
    node_2nd_ip = {}
7286

    
7287
    for node_name in [self.target_node, self.other_node, self.new_node]:
7288
      if node_name is not None:
7289
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7290

    
7291
    self.node_secondary_ip = node_2nd_ip
7292

    
7293
  def Exec(self, feedback_fn):
7294
    """Execute disk replacement.
7295

7296
    This dispatches the disk replacement to the appropriate handler.
7297

7298
    """
7299
    if self.delay_iallocator:
7300
      self._CheckPrereq2()
7301

    
7302
    if not self.disks:
7303
      feedback_fn("No disks need replacement")
7304
      return
7305

    
7306
    feedback_fn("Replacing disk(s) %s for %s" %
7307
                (utils.CommaJoin(self.disks), self.instance.name))
7308

    
7309
    activate_disks = (not self.instance.admin_up)
7310

    
7311
    # Activate the instance disks if we're replacing them on a down instance
7312
    if activate_disks:
7313
      _StartInstanceDisks(self.lu, self.instance, True)
7314

    
7315
    try:
7316
      # Should we replace the secondary node?
7317
      if self.new_node is not None:
7318
        fn = self._ExecDrbd8Secondary
7319
      else:
7320
        fn = self._ExecDrbd8DiskOnly
7321

    
7322
      return fn(feedback_fn)
7323

    
7324
    finally:
7325
      # Deactivate the instance disks if we're replacing them on a
7326
      # down instance
7327
      if activate_disks:
7328
        _SafeShutdownInstanceDisks(self.lu, self.instance)
7329

    
7330
  def _CheckVolumeGroup(self, nodes):
7331
    self.lu.LogInfo("Checking volume groups")
7332

    
7333
    vgname = self.cfg.GetVGName()
7334

    
7335
    # Make sure volume group exists on all involved nodes
7336
    results = self.rpc.call_vg_list(nodes)
7337
    if not results:
7338
      raise errors.OpExecError("Can't list volume groups on the nodes")
7339

    
7340
    for node in nodes:
7341
      res = results[node]
7342
      res.Raise("Error checking node %s" % node)
7343
      if vgname not in res.payload:
7344
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
7345
                                 (vgname, node))
7346

    
7347
  def _CheckDisksExistence(self, nodes):
7348
    # Check disk existence
7349
    for idx, dev in enumerate(self.instance.disks):
7350
      if idx not in self.disks:
7351
        continue
7352

    
7353
      for node in nodes:
7354
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7355
        self.cfg.SetDiskID(dev, node)
7356

    
7357
        result = self.rpc.call_blockdev_find(node, dev)
7358

    
7359
        msg = result.fail_msg
7360
        if msg or not result.payload:
7361
          if not msg:
7362
            msg = "disk not found"
7363
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7364
                                   (idx, node, msg))
7365

    
7366
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7367
    for idx, dev in enumerate(self.instance.disks):
7368
      if idx not in self.disks:
7369
        continue
7370

    
7371
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7372
                      (idx, node_name))
7373

    
7374
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7375
                                   ldisk=ldisk):
7376
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7377
                                 " replace disks for instance %s" %
7378
                                 (node_name, self.instance.name))
7379

    
7380
  def _CreateNewStorage(self, node_name):
7381
    vgname = self.cfg.GetVGName()
7382
    iv_names = {}
7383

    
7384
    for idx, dev in enumerate(self.instance.disks):
7385
      if idx not in self.disks:
7386
        continue
7387

    
7388
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7389

    
7390
      self.cfg.SetDiskID(dev, node_name)
7391

    
7392
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7393
      names = _GenerateUniqueNames(self.lu, lv_names)
7394

    
7395
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7396
                             logical_id=(vgname, names[0]))
7397
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7398
                             logical_id=(vgname, names[1]))
7399

    
7400
      new_lvs = [lv_data, lv_meta]
7401
      old_lvs = dev.children
7402
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7403

    
7404
      # we pass force_create=True to force the LVM creation
7405
      for new_lv in new_lvs:
7406
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7407
                        _GetInstanceInfoText(self.instance), False)
7408

    
7409
    return iv_names
7410

    
7411
  def _CheckDevices(self, node_name, iv_names):
7412
    for name, (dev, _, _) in iv_names.iteritems():
7413
      self.cfg.SetDiskID(dev, node_name)
7414

    
7415
      result = self.rpc.call_blockdev_find(node_name, dev)
7416

    
7417
      msg = result.fail_msg
7418
      if msg or not result.payload:
7419
        if not msg:
7420
          msg = "disk not found"
7421
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
7422
                                 (name, msg))
7423

    
7424
      if result.payload.is_degraded:
7425
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
7426

    
7427
  def _RemoveOldStorage(self, node_name, iv_names):
7428
    for name, (_, old_lvs, _) in iv_names.iteritems():
7429
      self.lu.LogInfo("Remove logical volumes for %s" % name)
7430

    
7431
      for lv in old_lvs:
7432
        self.cfg.SetDiskID(lv, node_name)
7433

    
7434
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7435
        if msg:
7436
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
7437
                             hint="remove unused LVs manually")
7438

    
7439
  def _ReleaseNodeLock(self, node_name):
7440
    """Releases the lock for a given node."""
7441
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7442

    
7443
  def _ExecDrbd8DiskOnly(self, feedback_fn):
7444
    """Replace a disk on the primary or secondary for DRBD 8.
7445

7446
    The algorithm for replace is quite complicated:
7447

7448
      1. for each disk to be replaced:
7449

7450
        1. create new LVs on the target node with unique names
7451
        1. detach old LVs from the drbd device
7452
        1. rename old LVs to name_replaced.<time_t>
7453
        1. rename new LVs to old LVs
7454
        1. attach the new LVs (with the old names now) to the drbd device
7455

7456
      1. wait for sync across all devices
7457

7458
      1. for each modified disk:
7459

7460
        1. remove old LVs (which have the name name_replaces.<time_t>)
7461

7462
    Failures are not very well handled.
7463

7464
    """
7465
    steps_total = 6
7466

    
7467
    # Step: check device activation
7468
    self.lu.LogStep(1, steps_total, "Check device existence")
7469
    self._CheckDisksExistence([self.other_node, self.target_node])
7470
    self._CheckVolumeGroup([self.target_node, self.other_node])
7471

    
7472
    # Step: check other node consistency
7473
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7474
    self._CheckDisksConsistency(self.other_node,
7475
                                self.other_node == self.instance.primary_node,
7476
                                False)
7477

    
7478
    # Step: create new storage
7479
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7480
    iv_names = self._CreateNewStorage(self.target_node)
7481

    
7482
    # Step: for each lv, detach+rename*2+attach
7483
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7484
    for dev, old_lvs, new_lvs in iv_names.itervalues():
7485
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7486

    
7487
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7488
                                                     old_lvs)
7489
      result.Raise("Can't detach drbd from local storage on node"
7490
                   " %s for device %s" % (self.target_node, dev.iv_name))
7491
      #dev.children = []
7492
      #cfg.Update(instance)
7493

    
7494
      # ok, we created the new LVs, so now we know we have the needed
7495
      # storage; as such, we proceed on the target node to rename
7496
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7497
      # using the assumption that logical_id == physical_id (which in
7498
      # turn is the unique_id on that node)
7499

    
7500
      # FIXME(iustin): use a better name for the replaced LVs
7501
      temp_suffix = int(time.time())
7502
      ren_fn = lambda d, suff: (d.physical_id[0],
7503
                                d.physical_id[1] + "_replaced-%s" % suff)
7504

    
7505
      # Build the rename list based on what LVs exist on the node
7506
      rename_old_to_new = []
7507
      for to_ren in old_lvs:
7508
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7509
        if not result.fail_msg and result.payload:
7510
          # device exists
7511
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7512

    
7513
      self.lu.LogInfo("Renaming the old LVs on the target node")
7514
      result = self.rpc.call_blockdev_rename(self.target_node,
7515
                                             rename_old_to_new)
7516
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
7517

    
7518
      # Now we rename the new LVs to the old LVs
7519
      self.lu.LogInfo("Renaming the new LVs on the target node")
7520
      rename_new_to_old = [(new, old.physical_id)
7521
                           for old, new in zip(old_lvs, new_lvs)]
7522
      result = self.rpc.call_blockdev_rename(self.target_node,
7523
                                             rename_new_to_old)
7524
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
7525

    
7526
      for old, new in zip(old_lvs, new_lvs):
7527
        new.logical_id = old.logical_id
7528
        self.cfg.SetDiskID(new, self.target_node)
7529

    
7530
      for disk in old_lvs:
7531
        disk.logical_id = ren_fn(disk, temp_suffix)
7532
        self.cfg.SetDiskID(disk, self.target_node)
7533

    
7534
      # Now that the new lvs have the old name, we can add them to the device
7535
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7536
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7537
                                                  new_lvs)
7538
      msg = result.fail_msg
7539
      if msg:
7540
        for new_lv in new_lvs:
7541
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
7542
                                               new_lv).fail_msg
7543
          if msg2:
7544
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7545
                               hint=("cleanup manually the unused logical"
7546
                                     "volumes"))
7547
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7548

    
7549
      dev.children = new_lvs
7550

    
7551
      self.cfg.Update(self.instance, feedback_fn)
7552

    
7553
    cstep = 5
7554
    if self.early_release:
7555
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7556
      cstep += 1
7557
      self._RemoveOldStorage(self.target_node, iv_names)
7558
      # WARNING: we release both node locks here, do not do other RPCs
7559
      # than WaitForSync to the primary node
7560
      self._ReleaseNodeLock([self.target_node, self.other_node])
7561

    
7562
    # Wait for sync
7563
    # This can fail as the old devices are degraded and _WaitForSync
7564
    # does a combined result over all disks, so we don't check its return value
7565
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7566
    cstep += 1
7567
    _WaitForSync(self.lu, self.instance)
7568

    
7569
    # Check all devices manually
7570
    self._CheckDevices(self.instance.primary_node, iv_names)
7571

    
7572
    # Step: remove old storage
7573
    if not self.early_release:
7574
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7575
      cstep += 1
7576
      self._RemoveOldStorage(self.target_node, iv_names)
7577

    
7578
  def _ExecDrbd8Secondary(self, feedback_fn):
7579
    """Replace the secondary node for DRBD 8.
7580

7581
    The algorithm for replace is quite complicated:
7582
      - for all disks of the instance:
7583
        - create new LVs on the new node with same names
7584
        - shutdown the drbd device on the old secondary
7585
        - disconnect the drbd network on the primary
7586
        - create the drbd device on the new secondary
7587
        - network attach the drbd on the primary, using an artifice:
7588
          the drbd code for Attach() will connect to the network if it
7589
          finds a device which is connected to the good local disks but
7590
          not network enabled
7591
      - wait for sync across all devices
7592
      - remove all disks from the old secondary
7593

7594
    Failures are not very well handled.
7595

7596
    """
7597
    steps_total = 6
7598

    
7599
    # Step: check device activation
7600
    self.lu.LogStep(1, steps_total, "Check device existence")
7601
    self._CheckDisksExistence([self.instance.primary_node])
7602
    self._CheckVolumeGroup([self.instance.primary_node])
7603

    
7604
    # Step: check other node consistency
7605
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7606
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
7607

    
7608
    # Step: create new storage
7609
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7610
    for idx, dev in enumerate(self.instance.disks):
7611
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7612
                      (self.new_node, idx))
7613
      # we pass force_create=True to force LVM creation
7614
      for new_lv in dev.children:
7615
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7616
                        _GetInstanceInfoText(self.instance), False)
7617

    
7618
    # Step 4: dbrd minors and drbd setups changes
7619
    # after this, we must manually remove the drbd minors on both the
7620
    # error and the success paths
7621
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7622
    minors = self.cfg.AllocateDRBDMinor([self.new_node
7623
                                         for dev in self.instance.disks],
7624
                                        self.instance.name)
7625
    logging.debug("Allocated minors %r", minors)
7626

    
7627
    iv_names = {}
7628
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7629
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7630
                      (self.new_node, idx))
7631
      # create new devices on new_node; note that we create two IDs:
7632
      # one without port, so the drbd will be activated without
7633
      # networking information on the new node at this stage, and one
7634
      # with network, for the latter activation in step 4
7635
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7636
      if self.instance.primary_node == o_node1:
7637
        p_minor = o_minor1
7638
      else:
7639
        assert self.instance.primary_node == o_node2, "Three-node instance?"
7640
        p_minor = o_minor2
7641

    
7642
      new_alone_id = (self.instance.primary_node, self.new_node, None,
7643
                      p_minor, new_minor, o_secret)
7644
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
7645
                    p_minor, new_minor, o_secret)
7646

    
7647
      iv_names[idx] = (dev, dev.children, new_net_id)
7648
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7649
                    new_net_id)
7650
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7651
                              logical_id=new_alone_id,
7652
                              children=dev.children,
7653
                              size=dev.size)
7654
      try:
7655
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7656
                              _GetInstanceInfoText(self.instance), False)
7657
      except errors.GenericError:
7658
        self.cfg.ReleaseDRBDMinors(self.instance.name)
7659
        raise
7660

    
7661
    # We have new devices, shutdown the drbd on the old secondary
7662
    for idx, dev in enumerate(self.instance.disks):
7663
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7664
      self.cfg.SetDiskID(dev, self.target_node)
7665
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7666
      if msg:
7667
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7668
                           "node: %s" % (idx, msg),
7669
                           hint=("Please cleanup this device manually as"
7670
                                 " soon as possible"))
7671

    
7672
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7673
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7674
                                               self.node_secondary_ip,
7675
                                               self.instance.disks)\
7676
                                              [self.instance.primary_node]
7677

    
7678
    msg = result.fail_msg
7679
    if msg:
7680
      # detaches didn't succeed (unlikely)
7681
      self.cfg.ReleaseDRBDMinors(self.instance.name)
7682
      raise errors.OpExecError("Can't detach the disks from the network on"
7683
                               " old node: %s" % (msg,))
7684

    
7685
    # if we managed to detach at least one, we update all the disks of
7686
    # the instance to point to the new secondary
7687
    self.lu.LogInfo("Updating instance configuration")
7688
    for dev, _, new_logical_id in iv_names.itervalues():
7689
      dev.logical_id = new_logical_id
7690
      self.cfg.SetDiskID(dev, self.instance.primary_node)
7691

    
7692
    self.cfg.Update(self.instance, feedback_fn)
7693

    
7694
    # and now perform the drbd attach
7695
    self.lu.LogInfo("Attaching primary drbds to new secondary"
7696
                    " (standalone => connected)")
7697
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7698
                                            self.new_node],
7699
                                           self.node_secondary_ip,
7700
                                           self.instance.disks,
7701
                                           self.instance.name,
7702
                                           False)
7703
    for to_node, to_result in result.items():
7704
      msg = to_result.fail_msg
7705
      if msg:
7706
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7707
                           to_node, msg,
7708
                           hint=("please do a gnt-instance info to see the"
7709
                                 " status of disks"))
7710
    cstep = 5
7711
    if self.early_release:
7712
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7713
      cstep += 1
7714
      self._RemoveOldStorage(self.target_node, iv_names)
7715
      # WARNING: we release all node locks here, do not do other RPCs
7716
      # than WaitForSync to the primary node
7717
      self._ReleaseNodeLock([self.instance.primary_node,
7718
                             self.target_node,
7719
                             self.new_node])
7720

    
7721
    # Wait for sync
7722
    # This can fail as the old devices are degraded and _WaitForSync
7723
    # does a combined result over all disks, so we don't check its return value
7724
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7725
    cstep += 1
7726
    _WaitForSync(self.lu, self.instance)
7727

    
7728
    # Check all devices manually
7729
    self._CheckDevices(self.instance.primary_node, iv_names)
7730

    
7731
    # Step: remove old storage
7732
    if not self.early_release:
7733
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7734
      self._RemoveOldStorage(self.target_node, iv_names)
7735

    
7736

    
7737
class LURepairNodeStorage(NoHooksLU):
7738
  """Repairs the volume group on a node.
7739

7740
  """
7741
  _OP_REQP = ["node_name"]
7742
  REQ_BGL = False
7743

    
7744
  def CheckArguments(self):
7745
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7746

    
7747
    _CheckStorageType(self.op.storage_type)
7748

    
7749
  def ExpandNames(self):
7750
    self.needed_locks = {
7751
      locking.LEVEL_NODE: [self.op.node_name],
7752
      }
7753

    
7754
  def _CheckFaultyDisks(self, instance, node_name):
7755
    """Ensure faulty disks abort the opcode or at least warn."""
7756
    try:
7757
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7758
                                  node_name, True):
7759
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7760
                                   " node '%s'" % (instance.name, node_name),
7761
                                   errors.ECODE_STATE)
7762
    except errors.OpPrereqError, err:
7763
      if self.op.ignore_consistency:
7764
        self.proc.LogWarning(str(err.args[0]))
7765
      else:
7766
        raise
7767

    
7768
  def CheckPrereq(self):
7769
    """Check prerequisites.
7770

7771
    """
7772
    storage_type = self.op.storage_type
7773

    
7774
    if (constants.SO_FIX_CONSISTENCY not in
7775
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7776
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
7777
                                 " repaired" % storage_type,
7778
                                 errors.ECODE_INVAL)
7779

    
7780
    # Check whether any instance on this node has faulty disks
7781
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7782
      if not inst.admin_up:
7783
        continue
7784
      check_nodes = set(inst.all_nodes)
7785
      check_nodes.discard(self.op.node_name)
7786
      for inst_node_name in check_nodes:
7787
        self._CheckFaultyDisks(inst, inst_node_name)
7788

    
7789
  def Exec(self, feedback_fn):
7790
    feedback_fn("Repairing storage unit '%s' on %s ..." %
7791
                (self.op.name, self.op.node_name))
7792

    
7793
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7794
    result = self.rpc.call_storage_execute(self.op.node_name,
7795
                                           self.op.storage_type, st_args,
7796
                                           self.op.name,
7797
                                           constants.SO_FIX_CONSISTENCY)
7798
    result.Raise("Failed to repair storage unit '%s' on %s" %
7799
                 (self.op.name, self.op.node_name))
7800

    
7801

    
7802
class LUNodeEvacuationStrategy(NoHooksLU):
7803
  """Computes the node evacuation strategy.
7804

7805
  """
7806
  _OP_REQP = ["nodes"]
7807
  REQ_BGL = False
7808

    
7809
  def CheckArguments(self):
7810
    if not hasattr(self.op, "remote_node"):
7811
      self.op.remote_node = None
7812
    if not hasattr(self.op, "iallocator"):
7813
      self.op.iallocator = None
7814
    if self.op.remote_node is not None and self.op.iallocator is not None:
7815
      raise errors.OpPrereqError("Give either the iallocator or the new"
7816
                                 " secondary, not both", errors.ECODE_INVAL)
7817

    
7818
  def ExpandNames(self):
7819
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7820
    self.needed_locks = locks = {}
7821
    if self.op.remote_node is None:
7822
      locks[locking.LEVEL_NODE] = locking.ALL_SET
7823
    else:
7824
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7825
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7826

    
7827
  def CheckPrereq(self):
7828
    pass
7829

    
7830
  def Exec(self, feedback_fn):
7831
    if self.op.remote_node is not None:
7832
      instances = []
7833
      for node in self.op.nodes:
7834
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7835
      result = []
7836
      for i in instances:
7837
        if i.primary_node == self.op.remote_node:
7838
          raise errors.OpPrereqError("Node %s is the primary node of"
7839
                                     " instance %s, cannot use it as"
7840
                                     " secondary" %
7841
                                     (self.op.remote_node, i.name),
7842
                                     errors.ECODE_INVAL)
7843
        result.append([i.name, self.op.remote_node])
7844
    else:
7845
      ial = IAllocator(self.cfg, self.rpc,
7846
                       mode=constants.IALLOCATOR_MODE_MEVAC,
7847
                       evac_nodes=self.op.nodes)
7848
      ial.Run(self.op.iallocator, validate=True)
7849
      if not ial.success:
7850
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7851
                                 errors.ECODE_NORES)
7852
      result = ial.result
7853
    return result
7854

    
7855

    
7856
class LUGrowDisk(LogicalUnit):
7857
  """Grow a disk of an instance.
7858

7859
  """
7860
  HPATH = "disk-grow"
7861
  HTYPE = constants.HTYPE_INSTANCE
7862
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7863
  REQ_BGL = False
7864

    
7865
  def ExpandNames(self):
7866
    self._ExpandAndLockInstance()
7867
    self.needed_locks[locking.LEVEL_NODE] = []
7868
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7869

    
7870
  def DeclareLocks(self, level):
7871
    if level == locking.LEVEL_NODE:
7872
      self._LockInstancesNodes()
7873

    
7874
  def BuildHooksEnv(self):
7875
    """Build hooks env.
7876

7877
    This runs on the master, the primary and all the secondaries.
7878

7879
    """
7880
    env = {
7881
      "DISK": self.op.disk,
7882
      "AMOUNT": self.op.amount,
7883
      }
7884
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7885
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7886
    return env, nl, nl
7887

    
7888
  def CheckPrereq(self):
7889
    """Check prerequisites.
7890

7891
    This checks that the instance is in the cluster.
7892

7893
    """
7894
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7895
    assert instance is not None, \
7896
      "Cannot retrieve locked instance %s" % self.op.instance_name
7897
    nodenames = list(instance.all_nodes)
7898
    for node in nodenames:
7899
      _CheckNodeOnline(self, node)
7900

    
7901

    
7902
    self.instance = instance
7903

    
7904
    if instance.disk_template not in constants.DTS_GROWABLE:
7905
      raise errors.OpPrereqError("Instance's disk layout does not support"
7906
                                 " growing.", errors.ECODE_INVAL)
7907

    
7908
    self.disk = instance.FindDisk(self.op.disk)
7909

    
7910
    if instance.disk_template != constants.DT_FILE:
7911
      # TODO: check the free disk space for file, when that feature will be
7912
      # supported
7913
      _CheckNodesFreeDisk(self, nodenames, self.op.amount)
7914

    
7915
  def Exec(self, feedback_fn):
7916
    """Execute disk grow.
7917

7918
    """
7919
    instance = self.instance
7920
    disk = self.disk
7921
    for node in instance.all_nodes:
7922
      self.cfg.SetDiskID(disk, node)
7923
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7924
      result.Raise("Grow request failed to node %s" % node)
7925

    
7926
      # TODO: Rewrite code to work properly
7927
      # DRBD goes into sync mode for a short amount of time after executing the
7928
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7929
      # calling "resize" in sync mode fails. Sleeping for a short amount of
7930
      # time is a work-around.
7931
      time.sleep(5)
7932

    
7933
    disk.RecordGrow(self.op.amount)
7934
    self.cfg.Update(instance, feedback_fn)
7935
    if self.op.wait_for_sync:
7936
      disk_abort = not _WaitForSync(self, instance)
7937
      if disk_abort:
7938
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7939
                             " status.\nPlease check the instance.")
7940

    
7941

    
7942
class LUQueryInstanceData(NoHooksLU):
7943
  """Query runtime instance data.
7944

7945
  """
7946
  _OP_REQP = ["instances", "static"]
7947
  REQ_BGL = False
7948

    
7949
  def ExpandNames(self):
7950
    self.needed_locks = {}
7951
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7952

    
7953
    if not isinstance(self.op.instances, list):
7954
      raise errors.OpPrereqError("Invalid argument type 'instances'",
7955
                                 errors.ECODE_INVAL)
7956

    
7957
    if self.op.instances:
7958
      self.wanted_names = []
7959
      for name in self.op.instances:
7960
        full_name = _ExpandInstanceName(self.cfg, name)
7961
        self.wanted_names.append(full_name)
7962
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7963
    else:
7964
      self.wanted_names = None
7965
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7966

    
7967
    self.needed_locks[locking.LEVEL_NODE] = []
7968
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7969

    
7970
  def DeclareLocks(self, level):
7971
    if level == locking.LEVEL_NODE:
7972
      self._LockInstancesNodes()
7973

    
7974
  def CheckPrereq(self):
7975
    """Check prerequisites.
7976

7977
    This only checks the optional instance list against the existing names.
7978

7979
    """
7980
    if self.wanted_names is None:
7981
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7982

    
7983
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7984
                             in self.wanted_names]
7985
    return
7986

    
7987
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
7988
    """Returns the status of a block device
7989

7990
    """
7991
    if self.op.static or not node:
7992
      return None
7993

    
7994
    self.cfg.SetDiskID(dev, node)
7995

    
7996
    result = self.rpc.call_blockdev_find(node, dev)
7997
    if result.offline:
7998
      return None
7999

    
8000
    result.Raise("Can't compute disk status for %s" % instance_name)
8001

    
8002
    status = result.payload
8003
    if status is None:
8004
      return None
8005

    
8006
    return (status.dev_path, status.major, status.minor,
8007
            status.sync_percent, status.estimated_time,
8008
            status.is_degraded, status.ldisk_status)
8009

    
8010
  def _ComputeDiskStatus(self, instance, snode, dev):
8011
    """Compute block device status.
8012

8013
    """
8014
    if dev.dev_type in constants.LDS_DRBD:
8015
      # we change the snode then (otherwise we use the one passed in)
8016
      if dev.logical_id[0] == instance.primary_node:
8017
        snode = dev.logical_id[1]
8018
      else:
8019
        snode = dev.logical_id[0]
8020

    
8021
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8022
                                              instance.name, dev)
8023
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8024

    
8025
    if dev.children:
8026
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
8027
                      for child in dev.children]
8028
    else:
8029
      dev_children = []
8030

    
8031
    data = {
8032
      "iv_name": dev.iv_name,
8033
      "dev_type": dev.dev_type,
8034
      "logical_id": dev.logical_id,
8035
      "physical_id": dev.physical_id,
8036
      "pstatus": dev_pstatus,
8037
      "sstatus": dev_sstatus,
8038
      "children": dev_children,
8039
      "mode": dev.mode,
8040
      "size": dev.size,
8041
      }
8042

    
8043
    return data
8044

    
8045
  def Exec(self, feedback_fn):
8046
    """Gather and return data"""
8047
    result = {}
8048

    
8049
    cluster = self.cfg.GetClusterInfo()
8050

    
8051
    for instance in self.wanted_instances:
8052
      if not self.op.static:
8053
        remote_info = self.rpc.call_instance_info(instance.primary_node,
8054
                                                  instance.name,
8055
                                                  instance.hypervisor)
8056
        remote_info.Raise("Error checking node %s" % instance.primary_node)
8057
        remote_info = remote_info.payload
8058
        if remote_info and "state" in remote_info:
8059
          remote_state = "up"
8060
        else:
8061
          remote_state = "down"
8062
      else:
8063
        remote_state = None
8064
      if instance.admin_up:
8065
        config_state = "up"
8066
      else:
8067
        config_state = "down"
8068

    
8069
      disks = [self._ComputeDiskStatus(instance, None, device)
8070
               for device in instance.disks]
8071

    
8072
      idict = {
8073
        "name": instance.name,
8074
        "config_state": config_state,
8075
        "run_state": remote_state,
8076
        "pnode": instance.primary_node,
8077
        "snodes": instance.secondary_nodes,
8078
        "os": instance.os,
8079
        # this happens to be the same format used for hooks
8080
        "nics": _NICListToTuple(self, instance.nics),
8081
        "disk_template": instance.disk_template,
8082
        "disks": disks,
8083
        "hypervisor": instance.hypervisor,
8084
        "network_port": instance.network_port,
8085
        "hv_instance": instance.hvparams,
8086
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
8087
        "be_instance": instance.beparams,
8088
        "be_actual": cluster.FillBE(instance),
8089
        "serial_no": instance.serial_no,
8090
        "mtime": instance.mtime,
8091
        "ctime": instance.ctime,
8092
        "uuid": instance.uuid,
8093
        }
8094

    
8095
      result[instance.name] = idict
8096

    
8097
    return result
8098

    
8099

    
8100
class LUSetInstanceParams(LogicalUnit):
8101
  """Modifies an instances's parameters.
8102

8103
  """
8104
  HPATH = "instance-modify"
8105
  HTYPE = constants.HTYPE_INSTANCE
8106
  _OP_REQP = ["instance_name"]
8107
  REQ_BGL = False
8108

    
8109
  def CheckArguments(self):
8110
    if not hasattr(self.op, 'nics'):
8111
      self.op.nics = []
8112
    if not hasattr(self.op, 'disks'):
8113
      self.op.disks = []
8114
    if not hasattr(self.op, 'beparams'):
8115
      self.op.beparams = {}
8116
    if not hasattr(self.op, 'hvparams'):
8117
      self.op.hvparams = {}
8118
    if not hasattr(self.op, "disk_template"):
8119
      self.op.disk_template = None
8120
    if not hasattr(self.op, "remote_node"):
8121
      self.op.remote_node = None
8122
    if not hasattr(self.op, "os_name"):
8123
      self.op.os_name = None
8124
    if not hasattr(self.op, "force_variant"):
8125
      self.op.force_variant = False
8126
    self.op.force = getattr(self.op, "force", False)
8127
    if not (self.op.nics or self.op.disks or self.op.disk_template or
8128
            self.op.hvparams or self.op.beparams or self.op.os_name):
8129
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8130

    
8131
    if self.op.hvparams:
8132
      _CheckGlobalHvParams(self.op.hvparams)
8133

    
8134
    # Disk validation
8135
    disk_addremove = 0
8136
    for disk_op, disk_dict in self.op.disks:
8137
      if disk_op == constants.DDM_REMOVE:
8138
        disk_addremove += 1
8139
        continue
8140
      elif disk_op == constants.DDM_ADD:
8141
        disk_addremove += 1
8142
      else:
8143
        if not isinstance(disk_op, int):
8144
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8145
        if not isinstance(disk_dict, dict):
8146
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8147
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8148

    
8149
      if disk_op == constants.DDM_ADD:
8150
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8151
        if mode not in constants.DISK_ACCESS_SET:
8152
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8153
                                     errors.ECODE_INVAL)
8154
        size = disk_dict.get('size', None)
8155
        if size is None:
8156
          raise errors.OpPrereqError("Required disk parameter size missing",
8157
                                     errors.ECODE_INVAL)
8158
        try:
8159
          size = int(size)
8160
        except (TypeError, ValueError), err:
8161
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8162
                                     str(err), errors.ECODE_INVAL)
8163
        disk_dict['size'] = size
8164
      else:
8165
        # modification of disk
8166
        if 'size' in disk_dict:
8167
          raise errors.OpPrereqError("Disk size change not possible, use"
8168
                                     " grow-disk", errors.ECODE_INVAL)
8169

    
8170
    if disk_addremove > 1:
8171
      raise errors.OpPrereqError("Only one disk add or remove operation"
8172
                                 " supported at a time", errors.ECODE_INVAL)
8173

    
8174
    if self.op.disks and self.op.disk_template is not None:
8175
      raise errors.OpPrereqError("Disk template conversion and other disk"
8176
                                 " changes not supported at the same time",
8177
                                 errors.ECODE_INVAL)
8178

    
8179
    if self.op.disk_template:
8180
      _CheckDiskTemplate(self.op.disk_template)
8181
      if (self.op.disk_template in constants.DTS_NET_MIRROR and
8182
          self.op.remote_node is None):
8183
        raise errors.OpPrereqError("Changing the disk template to a mirrored"
8184
                                   " one requires specifying a secondary node",
8185
                                   errors.ECODE_INVAL)
8186

    
8187
    # NIC validation
8188
    nic_addremove = 0
8189
    for nic_op, nic_dict in self.op.nics:
8190
      if nic_op == constants.DDM_REMOVE:
8191
        nic_addremove += 1
8192
        continue
8193
      elif nic_op == constants.DDM_ADD:
8194
        nic_addremove += 1
8195
      else:
8196
        if not isinstance(nic_op, int):
8197
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8198
        if not isinstance(nic_dict, dict):
8199
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8200
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8201

    
8202
      # nic_dict should be a dict
8203
      nic_ip = nic_dict.get('ip', None)
8204
      if nic_ip is not None:
8205
        if nic_ip.lower() == constants.VALUE_NONE:
8206
          nic_dict['ip'] = None
8207
        else:
8208
          if not utils.IsValidIP(nic_ip):
8209
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8210
                                       errors.ECODE_INVAL)
8211

    
8212
      nic_bridge = nic_dict.get('bridge', None)
8213
      nic_link = nic_dict.get('link', None)
8214
      if nic_bridge and nic_link:
8215
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8216
                                   " at the same time", errors.ECODE_INVAL)
8217
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8218
        nic_dict['bridge'] = None
8219
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8220
        nic_dict['link'] = None
8221

    
8222
      if nic_op == constants.DDM_ADD:
8223
        nic_mac = nic_dict.get('mac', None)
8224
        if nic_mac is None:
8225
          nic_dict['mac'] = constants.VALUE_AUTO
8226

    
8227
      if 'mac' in nic_dict:
8228
        nic_mac = nic_dict['mac']
8229
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8230
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8231

    
8232
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8233
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8234
                                     " modifying an existing nic",
8235
                                     errors.ECODE_INVAL)
8236

    
8237
    if nic_addremove > 1:
8238
      raise errors.OpPrereqError("Only one NIC add or remove operation"
8239
                                 " supported at a time", errors.ECODE_INVAL)
8240

    
8241
  def ExpandNames(self):
8242
    self._ExpandAndLockInstance()
8243
    self.needed_locks[locking.LEVEL_NODE] = []
8244
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8245

    
8246
  def DeclareLocks(self, level):
8247
    if level == locking.LEVEL_NODE:
8248
      self._LockInstancesNodes()
8249
      if self.op.disk_template and self.op.remote_node:
8250
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8251
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8252

    
8253
  def BuildHooksEnv(self):
8254
    """Build hooks env.
8255

8256
    This runs on the master, primary and secondaries.
8257

8258
    """
8259
    args = dict()
8260
    if constants.BE_MEMORY in self.be_new:
8261
      args['memory'] = self.be_new[constants.BE_MEMORY]
8262
    if constants.BE_VCPUS in self.be_new:
8263
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
8264
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8265
    # information at all.
8266
    if self.op.nics:
8267
      args['nics'] = []
8268
      nic_override = dict(self.op.nics)
8269
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
8270
      for idx, nic in enumerate(self.instance.nics):
8271
        if idx in nic_override:
8272
          this_nic_override = nic_override[idx]
8273
        else:
8274
          this_nic_override = {}
8275
        if 'ip' in this_nic_override:
8276
          ip = this_nic_override['ip']
8277
        else:
8278
          ip = nic.ip
8279
        if 'mac' in this_nic_override:
8280
          mac = this_nic_override['mac']
8281
        else:
8282
          mac = nic.mac
8283
        if idx in self.nic_pnew:
8284
          nicparams = self.nic_pnew[idx]
8285
        else:
8286
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
8287
        mode = nicparams[constants.NIC_MODE]
8288
        link = nicparams[constants.NIC_LINK]
8289
        args['nics'].append((ip, mac, mode, link))
8290
      if constants.DDM_ADD in nic_override:
8291
        ip = nic_override[constants.DDM_ADD].get('ip', None)
8292
        mac = nic_override[constants.DDM_ADD]['mac']
8293
        nicparams = self.nic_pnew[constants.DDM_ADD]
8294
        mode = nicparams[constants.NIC_MODE]
8295
        link = nicparams[constants.NIC_LINK]
8296
        args['nics'].append((ip, mac, mode, link))
8297
      elif constants.DDM_REMOVE in nic_override:
8298
        del args['nics'][-1]
8299

    
8300
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8301
    if self.op.disk_template:
8302
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8303
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8304
    return env, nl, nl
8305

    
8306
  @staticmethod
8307
  def _GetUpdatedParams(old_params, update_dict,
8308
                        default_values, parameter_types):
8309
    """Return the new params dict for the given params.
8310

8311
    @type old_params: dict
8312
    @param old_params: old parameters
8313
    @type update_dict: dict
8314
    @param update_dict: dict containing new parameter values,
8315
                        or constants.VALUE_DEFAULT to reset the
8316
                        parameter to its default value
8317
    @type default_values: dict
8318
    @param default_values: default values for the filled parameters
8319
    @type parameter_types: dict
8320
    @param parameter_types: dict mapping target dict keys to types
8321
                            in constants.ENFORCEABLE_TYPES
8322
    @rtype: (dict, dict)
8323
    @return: (new_parameters, filled_parameters)
8324

8325
    """
8326
    params_copy = copy.deepcopy(old_params)
8327
    for key, val in update_dict.iteritems():
8328
      if val == constants.VALUE_DEFAULT:
8329
        try:
8330
          del params_copy[key]
8331
        except KeyError:
8332
          pass
8333
      else:
8334
        params_copy[key] = val
8335
    utils.ForceDictType(params_copy, parameter_types)
8336
    params_filled = objects.FillDict(default_values, params_copy)
8337
    return (params_copy, params_filled)
8338

    
8339
  def CheckPrereq(self):
8340
    """Check prerequisites.
8341

8342
    This only checks the instance list against the existing names.
8343

8344
    """
8345
    self.force = self.op.force
8346

    
8347
    # checking the new params on the primary/secondary nodes
8348

    
8349
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8350
    cluster = self.cluster = self.cfg.GetClusterInfo()
8351
    assert self.instance is not None, \
8352
      "Cannot retrieve locked instance %s" % self.op.instance_name
8353
    pnode = instance.primary_node
8354
    nodelist = list(instance.all_nodes)
8355

    
8356
    if self.op.disk_template:
8357
      if instance.disk_template == self.op.disk_template:
8358
        raise errors.OpPrereqError("Instance already has disk template %s" %
8359
                                   instance.disk_template, errors.ECODE_INVAL)
8360

    
8361
      if (instance.disk_template,
8362
          self.op.disk_template) not in self._DISK_CONVERSIONS:
8363
        raise errors.OpPrereqError("Unsupported disk template conversion from"
8364
                                   " %s to %s" % (instance.disk_template,
8365
                                                  self.op.disk_template),
8366
                                   errors.ECODE_INVAL)
8367
      if self.op.disk_template in constants.DTS_NET_MIRROR:
8368
        _CheckNodeOnline(self, self.op.remote_node)
8369
        _CheckNodeNotDrained(self, self.op.remote_node)
8370
        disks = [{"size": d.size} for d in instance.disks]
8371
        required = _ComputeDiskSize(self.op.disk_template, disks)
8372
        _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8373
        _CheckInstanceDown(self, instance, "cannot change disk template")
8374

    
8375
    # hvparams processing
8376
    if self.op.hvparams:
8377
      i_hvdict, hv_new = self._GetUpdatedParams(
8378
                             instance.hvparams, self.op.hvparams,
8379
                             cluster.hvparams[instance.hypervisor],
8380
                             constants.HVS_PARAMETER_TYPES)
8381
      # local check
8382
      hypervisor.GetHypervisor(
8383
        instance.hypervisor).CheckParameterSyntax(hv_new)
8384
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8385
      self.hv_new = hv_new # the new actual values
8386
      self.hv_inst = i_hvdict # the new dict (without defaults)
8387
    else:
8388
      self.hv_new = self.hv_inst = {}
8389

    
8390
    # beparams processing
8391
    if self.op.beparams:
8392
      i_bedict, be_new = self._GetUpdatedParams(
8393
                             instance.beparams, self.op.beparams,
8394
                             cluster.beparams[constants.PP_DEFAULT],
8395
                             constants.BES_PARAMETER_TYPES)
8396
      self.be_new = be_new # the new actual values
8397
      self.be_inst = i_bedict # the new dict (without defaults)
8398
    else:
8399
      self.be_new = self.be_inst = {}
8400

    
8401
    self.warn = []
8402

    
8403
    if constants.BE_MEMORY in self.op.beparams and not self.force:
8404
      mem_check_list = [pnode]
8405
      if be_new[constants.BE_AUTO_BALANCE]:
8406
        # either we changed auto_balance to yes or it was from before
8407
        mem_check_list.extend(instance.secondary_nodes)
8408
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
8409
                                                  instance.hypervisor)
8410
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8411
                                         instance.hypervisor)
8412
      pninfo = nodeinfo[pnode]
8413
      msg = pninfo.fail_msg
8414
      if msg:
8415
        # Assume the primary node is unreachable and go ahead
8416
        self.warn.append("Can't get info from primary node %s: %s" %
8417
                         (pnode,  msg))
8418
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
8419
        self.warn.append("Node data from primary node %s doesn't contain"
8420
                         " free memory information" % pnode)
8421
      elif instance_info.fail_msg:
8422
        self.warn.append("Can't get instance runtime information: %s" %
8423
                        instance_info.fail_msg)
8424
      else:
8425
        if instance_info.payload:
8426
          current_mem = int(instance_info.payload['memory'])
8427
        else:
8428
          # Assume instance not running
8429
          # (there is a slight race condition here, but it's not very probable,
8430
          # and we have no other way to check)
8431
          current_mem = 0
8432
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8433
                    pninfo.payload['memory_free'])
8434
        if miss_mem > 0:
8435
          raise errors.OpPrereqError("This change will prevent the instance"
8436
                                     " from starting, due to %d MB of memory"
8437
                                     " missing on its primary node" % miss_mem,
8438
                                     errors.ECODE_NORES)
8439

    
8440
      if be_new[constants.BE_AUTO_BALANCE]:
8441
        for node, nres in nodeinfo.items():
8442
          if node not in instance.secondary_nodes:
8443
            continue
8444
          msg = nres.fail_msg
8445
          if msg:
8446
            self.warn.append("Can't get info from secondary node %s: %s" %
8447
                             (node, msg))
8448
          elif not isinstance(nres.payload.get('memory_free', None), int):
8449
            self.warn.append("Secondary node %s didn't return free"
8450
                             " memory information" % node)
8451
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8452
            self.warn.append("Not enough memory to failover instance to"
8453
                             " secondary node %s" % node)
8454

    
8455
    # NIC processing
8456
    self.nic_pnew = {}
8457
    self.nic_pinst = {}
8458
    for nic_op, nic_dict in self.op.nics:
8459
      if nic_op == constants.DDM_REMOVE:
8460
        if not instance.nics:
8461
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8462
                                     errors.ECODE_INVAL)
8463
        continue
8464
      if nic_op != constants.DDM_ADD:
8465
        # an existing nic
8466
        if not instance.nics:
8467
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8468
                                     " no NICs" % nic_op,
8469
                                     errors.ECODE_INVAL)
8470
        if nic_op < 0 or nic_op >= len(instance.nics):
8471
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8472
                                     " are 0 to %d" %
8473
                                     (nic_op, len(instance.nics) - 1),
8474
                                     errors.ECODE_INVAL)
8475
        old_nic_params = instance.nics[nic_op].nicparams
8476
        old_nic_ip = instance.nics[nic_op].ip
8477
      else:
8478
        old_nic_params = {}
8479
        old_nic_ip = None
8480

    
8481
      update_params_dict = dict([(key, nic_dict[key])
8482
                                 for key in constants.NICS_PARAMETERS
8483
                                 if key in nic_dict])
8484

    
8485
      if 'bridge' in nic_dict:
8486
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8487

    
8488
      new_nic_params, new_filled_nic_params = \
8489
          self._GetUpdatedParams(old_nic_params, update_params_dict,
8490
                                 cluster.nicparams[constants.PP_DEFAULT],
8491
                                 constants.NICS_PARAMETER_TYPES)
8492
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8493
      self.nic_pinst[nic_op] = new_nic_params
8494
      self.nic_pnew[nic_op] = new_filled_nic_params
8495
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8496

    
8497
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
8498
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8499
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8500
        if msg:
8501
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8502
          if self.force:
8503
            self.warn.append(msg)
8504
          else:
8505
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8506
      if new_nic_mode == constants.NIC_MODE_ROUTED:
8507
        if 'ip' in nic_dict:
8508
          nic_ip = nic_dict['ip']
8509
        else:
8510
          nic_ip = old_nic_ip
8511
        if nic_ip is None:
8512
          raise errors.OpPrereqError('Cannot set the nic ip to None'
8513
                                     ' on a routed nic', errors.ECODE_INVAL)
8514
      if 'mac' in nic_dict:
8515
        nic_mac = nic_dict['mac']
8516
        if nic_mac is None:
8517
          raise errors.OpPrereqError('Cannot set the nic mac to None',
8518
                                     errors.ECODE_INVAL)
8519
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8520
          # otherwise generate the mac
8521
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8522
        else:
8523
          # or validate/reserve the current one
8524
          try:
8525
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8526
          except errors.ReservationError:
8527
            raise errors.OpPrereqError("MAC address %s already in use"
8528
                                       " in cluster" % nic_mac,
8529
                                       errors.ECODE_NOTUNIQUE)
8530

    
8531
    # DISK processing
8532
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8533
      raise errors.OpPrereqError("Disk operations not supported for"
8534
                                 " diskless instances",
8535
                                 errors.ECODE_INVAL)
8536
    for disk_op, _ in self.op.disks:
8537
      if disk_op == constants.DDM_REMOVE:
8538
        if len(instance.disks) == 1:
8539
          raise errors.OpPrereqError("Cannot remove the last disk of"
8540
                                     " an instance", errors.ECODE_INVAL)
8541
        _CheckInstanceDown(self, instance, "cannot remove disks")
8542

    
8543
      if (disk_op == constants.DDM_ADD and
8544
          len(instance.nics) >= constants.MAX_DISKS):
8545
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8546
                                   " add more" % constants.MAX_DISKS,
8547
                                   errors.ECODE_STATE)
8548
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8549
        # an existing disk
8550
        if disk_op < 0 or disk_op >= len(instance.disks):
8551
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
8552
                                     " are 0 to %d" %
8553
                                     (disk_op, len(instance.disks)),
8554
                                     errors.ECODE_INVAL)
8555

    
8556
    # OS change
8557
    if self.op.os_name and not self.op.force:
8558
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8559
                      self.op.force_variant)
8560

    
8561
    return
8562

    
8563
  def _ConvertPlainToDrbd(self, feedback_fn):
8564
    """Converts an instance from plain to drbd.
8565

8566
    """
8567
    feedback_fn("Converting template to drbd")
8568
    instance = self.instance
8569
    pnode = instance.primary_node
8570
    snode = self.op.remote_node
8571

    
8572
    # create a fake disk info for _GenerateDiskTemplate
8573
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8574
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8575
                                      instance.name, pnode, [snode],
8576
                                      disk_info, None, None, 0)
8577
    info = _GetInstanceInfoText(instance)
8578
    feedback_fn("Creating aditional volumes...")
8579
    # first, create the missing data and meta devices
8580
    for disk in new_disks:
8581
      # unfortunately this is... not too nice
8582
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8583
                            info, True)
8584
      for child in disk.children:
8585
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
8586
    # at this stage, all new LVs have been created, we can rename the
8587
    # old ones
8588
    feedback_fn("Renaming original volumes...")
8589
    rename_list = [(o, n.children[0].logical_id)
8590
                   for (o, n) in zip(instance.disks, new_disks)]
8591
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
8592
    result.Raise("Failed to rename original LVs")
8593

    
8594
    feedback_fn("Initializing DRBD devices...")
8595
    # all child devices are in place, we can now create the DRBD devices
8596
    for disk in new_disks:
8597
      for node in [pnode, snode]:
8598
        f_create = node == pnode
8599
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8600

    
8601
    # at this point, the instance has been modified
8602
    instance.disk_template = constants.DT_DRBD8
8603
    instance.disks = new_disks
8604
    self.cfg.Update(instance, feedback_fn)
8605

    
8606
    # disks are created, waiting for sync
8607
    disk_abort = not _WaitForSync(self, instance)
8608
    if disk_abort:
8609
      raise errors.OpExecError("There are some degraded disks for"
8610
                               " this instance, please cleanup manually")
8611

    
8612
  def _ConvertDrbdToPlain(self, feedback_fn):
8613
    """Converts an instance from drbd to plain.
8614

8615
    """
8616
    instance = self.instance
8617
    assert len(instance.secondary_nodes) == 1
8618
    pnode = instance.primary_node
8619
    snode = instance.secondary_nodes[0]
8620
    feedback_fn("Converting template to plain")
8621

    
8622
    old_disks = instance.disks
8623
    new_disks = [d.children[0] for d in old_disks]
8624

    
8625
    # copy over size and mode
8626
    for parent, child in zip(old_disks, new_disks):
8627
      child.size = parent.size
8628
      child.mode = parent.mode
8629

    
8630
    # update instance structure
8631
    instance.disks = new_disks
8632
    instance.disk_template = constants.DT_PLAIN
8633
    self.cfg.Update(instance, feedback_fn)
8634

    
8635
    feedback_fn("Removing volumes on the secondary node...")
8636
    for disk in old_disks:
8637
      self.cfg.SetDiskID(disk, snode)
8638
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8639
      if msg:
8640
        self.LogWarning("Could not remove block device %s on node %s,"
8641
                        " continuing anyway: %s", disk.iv_name, snode, msg)
8642

    
8643
    feedback_fn("Removing unneeded volumes on the primary node...")
8644
    for idx, disk in enumerate(old_disks):
8645
      meta = disk.children[1]
8646
      self.cfg.SetDiskID(meta, pnode)
8647
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8648
      if msg:
8649
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
8650
                        " continuing anyway: %s", idx, pnode, msg)
8651

    
8652

    
8653
  def Exec(self, feedback_fn):
8654
    """Modifies an instance.
8655

8656
    All parameters take effect only at the next restart of the instance.
8657

8658
    """
8659
    # Process here the warnings from CheckPrereq, as we don't have a
8660
    # feedback_fn there.
8661
    for warn in self.warn:
8662
      feedback_fn("WARNING: %s" % warn)
8663

    
8664
    result = []
8665
    instance = self.instance
8666
    # disk changes
8667
    for disk_op, disk_dict in self.op.disks:
8668
      if disk_op == constants.DDM_REMOVE:
8669
        # remove the last disk
8670
        device = instance.disks.pop()
8671
        device_idx = len(instance.disks)
8672
        for node, disk in device.ComputeNodeTree(instance.primary_node):
8673
          self.cfg.SetDiskID(disk, node)
8674
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8675
          if msg:
8676
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
8677
                            " continuing anyway", device_idx, node, msg)
8678
        result.append(("disk/%d" % device_idx, "remove"))
8679
      elif disk_op == constants.DDM_ADD:
8680
        # add a new disk
8681
        if instance.disk_template == constants.DT_FILE:
8682
          file_driver, file_path = instance.disks[0].logical_id
8683
          file_path = os.path.dirname(file_path)
8684
        else:
8685
          file_driver = file_path = None
8686
        disk_idx_base = len(instance.disks)
8687
        new_disk = _GenerateDiskTemplate(self,
8688
                                         instance.disk_template,
8689
                                         instance.name, instance.primary_node,
8690
                                         instance.secondary_nodes,
8691
                                         [disk_dict],
8692
                                         file_path,
8693
                                         file_driver,
8694
                                         disk_idx_base)[0]
8695
        instance.disks.append(new_disk)
8696
        info = _GetInstanceInfoText(instance)
8697

    
8698
        logging.info("Creating volume %s for instance %s",
8699
                     new_disk.iv_name, instance.name)
8700
        # Note: this needs to be kept in sync with _CreateDisks
8701
        #HARDCODE
8702
        for node in instance.all_nodes:
8703
          f_create = node == instance.primary_node
8704
          try:
8705
            _CreateBlockDev(self, node, instance, new_disk,
8706
                            f_create, info, f_create)
8707
          except errors.OpExecError, err:
8708
            self.LogWarning("Failed to create volume %s (%s) on"
8709
                            " node %s: %s",
8710
                            new_disk.iv_name, new_disk, node, err)
8711
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8712
                       (new_disk.size, new_disk.mode)))
8713
      else:
8714
        # change a given disk
8715
        instance.disks[disk_op].mode = disk_dict['mode']
8716
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8717

    
8718
    if self.op.disk_template:
8719
      r_shut = _ShutdownInstanceDisks(self, instance)
8720
      if not r_shut:
8721
        raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8722
                                 " proceed with disk template conversion")
8723
      mode = (instance.disk_template, self.op.disk_template)
8724
      try:
8725
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
8726
      except:
8727
        self.cfg.ReleaseDRBDMinors(instance.name)
8728
        raise
8729
      result.append(("disk_template", self.op.disk_template))
8730

    
8731
    # NIC changes
8732
    for nic_op, nic_dict in self.op.nics:
8733
      if nic_op == constants.DDM_REMOVE:
8734
        # remove the last nic
8735
        del instance.nics[-1]
8736
        result.append(("nic.%d" % len(instance.nics), "remove"))
8737
      elif nic_op == constants.DDM_ADD:
8738
        # mac and bridge should be set, by now
8739
        mac = nic_dict['mac']
8740
        ip = nic_dict.get('ip', None)
8741
        nicparams = self.nic_pinst[constants.DDM_ADD]
8742
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8743
        instance.nics.append(new_nic)
8744
        result.append(("nic.%d" % (len(instance.nics) - 1),
8745
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
8746
                       (new_nic.mac, new_nic.ip,
8747
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8748
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8749
                       )))
8750
      else:
8751
        for key in 'mac', 'ip':
8752
          if key in nic_dict:
8753
            setattr(instance.nics[nic_op], key, nic_dict[key])
8754
        if nic_op in self.nic_pinst:
8755
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8756
        for key, val in nic_dict.iteritems():
8757
          result.append(("nic.%s/%d" % (key, nic_op), val))
8758

    
8759
    # hvparams changes
8760
    if self.op.hvparams:
8761
      instance.hvparams = self.hv_inst
8762
      for key, val in self.op.hvparams.iteritems():
8763
        result.append(("hv/%s" % key, val))
8764

    
8765
    # beparams changes
8766
    if self.op.beparams:
8767
      instance.beparams = self.be_inst
8768
      for key, val in self.op.beparams.iteritems():
8769
        result.append(("be/%s" % key, val))
8770

    
8771
    # OS change
8772
    if self.op.os_name:
8773
      instance.os = self.op.os_name
8774

    
8775
    self.cfg.Update(instance, feedback_fn)
8776

    
8777
    return result
8778

    
8779
  _DISK_CONVERSIONS = {
8780
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8781
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8782
    }
8783

    
8784
class LUQueryExports(NoHooksLU):
8785
  """Query the exports list
8786

8787
  """
8788
  _OP_REQP = ['nodes']
8789
  REQ_BGL = False
8790

    
8791
  def ExpandNames(self):
8792
    self.needed_locks = {}
8793
    self.share_locks[locking.LEVEL_NODE] = 1
8794
    if not self.op.nodes:
8795
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8796
    else:
8797
      self.needed_locks[locking.LEVEL_NODE] = \
8798
        _GetWantedNodes(self, self.op.nodes)
8799

    
8800
  def CheckPrereq(self):
8801
    """Check prerequisites.
8802

8803
    """
8804
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8805

    
8806
  def Exec(self, feedback_fn):
8807
    """Compute the list of all the exported system images.
8808

8809
    @rtype: dict
8810
    @return: a dictionary with the structure node->(export-list)
8811
        where export-list is a list of the instances exported on
8812
        that node.
8813

8814
    """
8815
    rpcresult = self.rpc.call_export_list(self.nodes)
8816
    result = {}
8817
    for node in rpcresult:
8818
      if rpcresult[node].fail_msg:
8819
        result[node] = False
8820
      else:
8821
        result[node] = rpcresult[node].payload
8822

    
8823
    return result
8824

    
8825

    
8826
class LUExportInstance(LogicalUnit):
8827
  """Export an instance to an image in the cluster.
8828

8829
  """
8830
  HPATH = "instance-export"
8831
  HTYPE = constants.HTYPE_INSTANCE
8832
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
8833
  REQ_BGL = False
8834

    
8835
  def CheckArguments(self):
8836
    """Check the arguments.
8837

8838
    """
8839
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8840
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
8841

    
8842
  def ExpandNames(self):
8843
    self._ExpandAndLockInstance()
8844
    # FIXME: lock only instance primary and destination node
8845
    #
8846
    # Sad but true, for now we have do lock all nodes, as we don't know where
8847
    # the previous export might be, and and in this LU we search for it and
8848
    # remove it from its current node. In the future we could fix this by:
8849
    #  - making a tasklet to search (share-lock all), then create the new one,
8850
    #    then one to remove, after
8851
    #  - removing the removal operation altogether
8852
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8853

    
8854
  def DeclareLocks(self, level):
8855
    """Last minute lock declaration."""
8856
    # All nodes are locked anyway, so nothing to do here.
8857

    
8858
  def BuildHooksEnv(self):
8859
    """Build hooks env.
8860

8861
    This will run on the master, primary node and target node.
8862

8863
    """
8864
    env = {
8865
      "EXPORT_NODE": self.op.target_node,
8866
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8867
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8868
      }
8869
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8870
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8871
          self.op.target_node]
8872
    return env, nl, nl
8873

    
8874
  def CheckPrereq(self):
8875
    """Check prerequisites.
8876

8877
    This checks that the instance and node names are valid.
8878

8879
    """
8880
    instance_name = self.op.instance_name
8881
    self.instance = self.cfg.GetInstanceInfo(instance_name)
8882
    assert self.instance is not None, \
8883
          "Cannot retrieve locked instance %s" % self.op.instance_name
8884
    _CheckNodeOnline(self, self.instance.primary_node)
8885

    
8886
    self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8887
    self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
8888
    assert self.dst_node is not None
8889

    
8890
    _CheckNodeOnline(self, self.dst_node.name)
8891
    _CheckNodeNotDrained(self, self.dst_node.name)
8892

    
8893
    # instance disk type verification
8894
    for disk in self.instance.disks:
8895
      if disk.dev_type == constants.LD_FILE:
8896
        raise errors.OpPrereqError("Export not supported for instances with"
8897
                                   " file-based disks", errors.ECODE_INVAL)
8898

    
8899
  def _CreateSnapshots(self, feedback_fn):
8900
    """Creates an LVM snapshot for every disk of the instance.
8901

8902
    @return: List of snapshots as L{objects.Disk} instances
8903

8904
    """
8905
    instance = self.instance
8906
    src_node = instance.primary_node
8907

    
8908
    vgname = self.cfg.GetVGName()
8909

    
8910
    snap_disks = []
8911

    
8912
    for idx, disk in enumerate(instance.disks):
8913
      feedback_fn("Creating a snapshot of disk/%s on node %s" %
8914
                  (idx, src_node))
8915

    
8916
      # result.payload will be a snapshot of an lvm leaf of the one we
8917
      # passed
8918
      result = self.rpc.call_blockdev_snapshot(src_node, disk)
8919
      msg = result.fail_msg
8920
      if msg:
8921
        self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8922
                        idx, src_node, msg)
8923
        snap_disks.append(False)
8924
      else:
8925
        disk_id = (vgname, result.payload)
8926
        new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8927
                               logical_id=disk_id, physical_id=disk_id,
8928
                               iv_name=disk.iv_name)
8929
        snap_disks.append(new_dev)
8930

    
8931
    return snap_disks
8932

    
8933
  def _RemoveSnapshot(self, feedback_fn, snap_disks, disk_index):
8934
    """Removes an LVM snapshot.
8935

8936
    @type snap_disks: list
8937
    @param snap_disks: The list of all snapshots as returned by
8938
                       L{_CreateSnapshots}
8939
    @type disk_index: number
8940
    @param disk_index: Index of the snapshot to be removed
8941
    @rtype: bool
8942
    @return: Whether removal was successful or not
8943

8944
    """
8945
    disk = snap_disks[disk_index]
8946
    if disk:
8947
      src_node = self.instance.primary_node
8948

    
8949
      feedback_fn("Removing snapshot of disk/%s on node %s" %
8950
                  (disk_index, src_node))
8951

    
8952
      result = self.rpc.call_blockdev_remove(src_node, disk)
8953
      if not result.fail_msg:
8954
        return True
8955

    
8956
      self.LogWarning("Could not remove snapshot for disk/%d from node"
8957
                      " %s: %s", disk_index, src_node, result.fail_msg)
8958

    
8959
    return False
8960

    
8961
  def _CleanupExports(self, feedback_fn):
8962
    """Removes exports of current instance from all other nodes.
8963

8964
    If an instance in a cluster with nodes A..D was exported to node C, its
8965
    exports will be removed from the nodes A, B and D.
8966

8967
    """
8968
    nodelist = self.cfg.GetNodeList()
8969
    nodelist.remove(self.dst_node.name)
8970

    
8971
    # on one-node clusters nodelist will be empty after the removal
8972
    # if we proceed the backup would be removed because OpQueryExports
8973
    # substitutes an empty list with the full cluster node list.
8974
    iname = self.instance.name
8975
    if nodelist:
8976
      feedback_fn("Removing old exports for instance %s" % iname)
8977
      exportlist = self.rpc.call_export_list(nodelist)
8978
      for node in exportlist:
8979
        if exportlist[node].fail_msg:
8980
          continue
8981
        if iname in exportlist[node].payload:
8982
          msg = self.rpc.call_export_remove(node, iname).fail_msg
8983
          if msg:
8984
            self.LogWarning("Could not remove older export for instance %s"
8985
                            " on node %s: %s", iname, node, msg)
8986

    
8987
  def Exec(self, feedback_fn):
8988
    """Export an instance to an image in the cluster.
8989

8990
    """
8991
    instance = self.instance
8992
    dst_node = self.dst_node
8993
    src_node = instance.primary_node
8994

    
8995
    if self.op.shutdown:
8996
      # shutdown the instance, but not the disks
8997
      feedback_fn("Shutting down instance %s" % instance.name)
8998
      result = self.rpc.call_instance_shutdown(src_node, instance,
8999
                                               self.shutdown_timeout)
9000
      result.Raise("Could not shutdown instance %s on"
9001
                   " node %s" % (instance.name, src_node))
9002

    
9003
    # set the disks ID correctly since call_instance_start needs the
9004
    # correct drbd minor to create the symlinks
9005
    for disk in instance.disks:
9006
      self.cfg.SetDiskID(disk, src_node)
9007

    
9008
    activate_disks = (not instance.admin_up)
9009

    
9010
    if activate_disks:
9011
      # Activate the instance disks if we'exporting a stopped instance
9012
      feedback_fn("Activating disks for %s" % instance.name)
9013
      _StartInstanceDisks(self, instance, None)
9014

    
9015
    try:
9016
      # per-disk results
9017
      dresults = []
9018
      removed_snaps = [False] * len(instance.disks)
9019

    
9020
      snap_disks = None
9021
      try:
9022
        try:
9023
          snap_disks = self._CreateSnapshots(feedback_fn)
9024
        finally:
9025
          if self.op.shutdown and instance.admin_up:
9026
            feedback_fn("Starting instance %s" % instance.name)
9027
            result = self.rpc.call_instance_start(src_node, instance,
9028
                                                  None, None)
9029
            msg = result.fail_msg
9030
            if msg:
9031
              _ShutdownInstanceDisks(self, instance)
9032
              raise errors.OpExecError("Could not start instance: %s" % msg)
9033

    
9034
        assert len(snap_disks) == len(instance.disks)
9035
        assert len(removed_snaps) == len(instance.disks)
9036

    
9037
        # TODO: check for size
9038

    
9039
        cluster_name = self.cfg.GetClusterName()
9040
        for idx, dev in enumerate(snap_disks):
9041
          feedback_fn("Exporting snapshot %s from %s to %s" %
9042
                      (idx, src_node, dst_node.name))
9043
          if dev:
9044
            # FIXME: pass debug from opcode to backend
9045
            result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
9046
                                                   instance, cluster_name,
9047
                                                   idx, self.op.debug_level)
9048
            msg = result.fail_msg
9049
            if msg:
9050
              self.LogWarning("Could not export disk/%s from node %s to"
9051
                              " node %s: %s", idx, src_node, dst_node.name, msg)
9052
              dresults.append(False)
9053
            else:
9054
              dresults.append(True)
9055

    
9056
            # Remove snapshot
9057
            if self._RemoveSnapshot(feedback_fn, snap_disks, idx):
9058
              removed_snaps[idx] = True
9059
          else:
9060
            dresults.append(False)
9061

    
9062
        assert len(dresults) == len(instance.disks)
9063

    
9064
        # Check for backwards compatibility
9065
        assert compat.all(isinstance(i, bool) for i in dresults), \
9066
               "Not all results are boolean: %r" % dresults
9067

    
9068
        feedback_fn("Finalizing export on %s" % dst_node.name)
9069
        result = self.rpc.call_finalize_export(dst_node.name, instance,
9070
                                               snap_disks)
9071
        msg = result.fail_msg
9072
        fin_resu = not msg
9073
        if msg:
9074
          self.LogWarning("Could not finalize export for instance %s"
9075
                          " on node %s: %s", instance.name, dst_node.name, msg)
9076

    
9077
      finally:
9078
        # Remove all snapshots
9079
        assert len(removed_snaps) == len(instance.disks)
9080
        for idx, removed in enumerate(removed_snaps):
9081
          if not removed:
9082
            self._RemoveSnapshot(feedback_fn, snap_disks, idx)
9083

    
9084
    finally:
9085
      if activate_disks:
9086
        feedback_fn("Deactivating disks for %s" % instance.name)
9087
        _ShutdownInstanceDisks(self, instance)
9088

    
9089
    self._CleanupExports(feedback_fn)
9090

    
9091
    return fin_resu, dresults
9092

    
9093

    
9094
class LURemoveExport(NoHooksLU):
9095
  """Remove exports related to the named instance.
9096

9097
  """
9098
  _OP_REQP = ["instance_name"]
9099
  REQ_BGL = False
9100

    
9101
  def ExpandNames(self):
9102
    self.needed_locks = {}
9103
    # We need all nodes to be locked in order for RemoveExport to work, but we
9104
    # don't need to lock the instance itself, as nothing will happen to it (and
9105
    # we can remove exports also for a removed instance)
9106
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9107

    
9108
  def CheckPrereq(self):
9109
    """Check prerequisites.
9110
    """
9111
    pass
9112

    
9113
  def Exec(self, feedback_fn):
9114
    """Remove any export.
9115

9116
    """
9117
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9118
    # If the instance was not found we'll try with the name that was passed in.
9119
    # This will only work if it was an FQDN, though.
9120
    fqdn_warn = False
9121
    if not instance_name:
9122
      fqdn_warn = True
9123
      instance_name = self.op.instance_name
9124

    
9125
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9126
    exportlist = self.rpc.call_export_list(locked_nodes)
9127
    found = False
9128
    for node in exportlist:
9129
      msg = exportlist[node].fail_msg
9130
      if msg:
9131
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9132
        continue
9133
      if instance_name in exportlist[node].payload:
9134
        found = True
9135
        result = self.rpc.call_export_remove(node, instance_name)
9136
        msg = result.fail_msg
9137
        if msg:
9138
          logging.error("Could not remove export for instance %s"
9139
                        " on node %s: %s", instance_name, node, msg)
9140

    
9141
    if fqdn_warn and not found:
9142
      feedback_fn("Export not found. If trying to remove an export belonging"
9143
                  " to a deleted instance please use its Fully Qualified"
9144
                  " Domain Name.")
9145

    
9146

    
9147
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9148
  """Generic tags LU.
9149

9150
  This is an abstract class which is the parent of all the other tags LUs.
9151

9152
  """
9153

    
9154
  def ExpandNames(self):
9155
    self.needed_locks = {}
9156
    if self.op.kind == constants.TAG_NODE:
9157
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9158
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
9159
    elif self.op.kind == constants.TAG_INSTANCE:
9160
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9161
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9162

    
9163
  def CheckPrereq(self):
9164
    """Check prerequisites.
9165

9166
    """
9167
    if self.op.kind == constants.TAG_CLUSTER:
9168
      self.target = self.cfg.GetClusterInfo()
9169
    elif self.op.kind == constants.TAG_NODE:
9170
      self.target = self.cfg.GetNodeInfo(self.op.name)
9171
    elif self.op.kind == constants.TAG_INSTANCE:
9172
      self.target = self.cfg.GetInstanceInfo(self.op.name)
9173
    else:
9174
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9175
                                 str(self.op.kind), errors.ECODE_INVAL)
9176

    
9177

    
9178
class LUGetTags(TagsLU):
9179
  """Returns the tags of a given object.
9180

9181
  """
9182
  _OP_REQP = ["kind", "name"]
9183
  REQ_BGL = False
9184

    
9185
  def Exec(self, feedback_fn):
9186
    """Returns the tag list.
9187

9188
    """
9189
    return list(self.target.GetTags())
9190

    
9191

    
9192
class LUSearchTags(NoHooksLU):
9193
  """Searches the tags for a given pattern.
9194

9195
  """
9196
  _OP_REQP = ["pattern"]
9197
  REQ_BGL = False
9198

    
9199
  def ExpandNames(self):
9200
    self.needed_locks = {}
9201

    
9202
  def CheckPrereq(self):
9203
    """Check prerequisites.
9204

9205
    This checks the pattern passed for validity by compiling it.
9206

9207
    """
9208
    try:
9209
      self.re = re.compile(self.op.pattern)
9210
    except re.error, err:
9211
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9212
                                 (self.op.pattern, err), errors.ECODE_INVAL)
9213

    
9214
  def Exec(self, feedback_fn):
9215
    """Returns the tag list.
9216

9217
    """
9218
    cfg = self.cfg
9219
    tgts = [("/cluster", cfg.GetClusterInfo())]
9220
    ilist = cfg.GetAllInstancesInfo().values()
9221
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9222
    nlist = cfg.GetAllNodesInfo().values()
9223
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9224
    results = []
9225
    for path, target in tgts:
9226
      for tag in target.GetTags():
9227
        if self.re.search(tag):
9228
          results.append((path, tag))
9229
    return results
9230

    
9231

    
9232
class LUAddTags(TagsLU):
9233
  """Sets a tag on a given object.
9234

9235
  """
9236
  _OP_REQP = ["kind", "name", "tags"]
9237
  REQ_BGL = False
9238

    
9239
  def CheckPrereq(self):
9240
    """Check prerequisites.
9241

9242
    This checks the type and length of the tag name and value.
9243

9244
    """
9245
    TagsLU.CheckPrereq(self)
9246
    for tag in self.op.tags:
9247
      objects.TaggableObject.ValidateTag(tag)
9248

    
9249
  def Exec(self, feedback_fn):
9250
    """Sets the tag.
9251

9252
    """
9253
    try:
9254
      for tag in self.op.tags:
9255
        self.target.AddTag(tag)
9256
    except errors.TagError, err:
9257
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
9258
    self.cfg.Update(self.target, feedback_fn)
9259

    
9260

    
9261
class LUDelTags(TagsLU):
9262
  """Delete a list of tags from a given object.
9263

9264
  """
9265
  _OP_REQP = ["kind", "name", "tags"]
9266
  REQ_BGL = False
9267

    
9268
  def CheckPrereq(self):
9269
    """Check prerequisites.
9270

9271
    This checks that we have the given tag.
9272

9273
    """
9274
    TagsLU.CheckPrereq(self)
9275
    for tag in self.op.tags:
9276
      objects.TaggableObject.ValidateTag(tag)
9277
    del_tags = frozenset(self.op.tags)
9278
    cur_tags = self.target.GetTags()
9279
    if not del_tags <= cur_tags:
9280
      diff_tags = del_tags - cur_tags
9281
      diff_names = ["'%s'" % tag for tag in diff_tags]
9282
      diff_names.sort()
9283
      raise errors.OpPrereqError("Tag(s) %s not found" %
9284
                                 (",".join(diff_names)), errors.ECODE_NOENT)
9285

    
9286
  def Exec(self, feedback_fn):
9287
    """Remove the tag from the object.
9288

9289
    """
9290
    for tag in self.op.tags:
9291
      self.target.RemoveTag(tag)
9292
    self.cfg.Update(self.target, feedback_fn)
9293

    
9294

    
9295
class LUTestDelay(NoHooksLU):
9296
  """Sleep for a specified amount of time.
9297

9298
  This LU sleeps on the master and/or nodes for a specified amount of
9299
  time.
9300

9301
  """
9302
  _OP_REQP = ["duration", "on_master", "on_nodes"]
9303
  REQ_BGL = False
9304

    
9305
  def ExpandNames(self):
9306
    """Expand names and set required locks.
9307

9308
    This expands the node list, if any.
9309

9310
    """
9311
    self.needed_locks = {}
9312
    if self.op.on_nodes:
9313
      # _GetWantedNodes can be used here, but is not always appropriate to use
9314
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9315
      # more information.
9316
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9317
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9318

    
9319
  def CheckPrereq(self):
9320
    """Check prerequisites.
9321

9322
    """
9323

    
9324
  def Exec(self, feedback_fn):
9325
    """Do the actual sleep.
9326

9327
    """
9328
    if self.op.on_master:
9329
      if not utils.TestDelay(self.op.duration):
9330
        raise errors.OpExecError("Error during master delay test")
9331
    if self.op.on_nodes:
9332
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9333
      for node, node_result in result.items():
9334
        node_result.Raise("Failure during rpc call to node %s" % node)
9335

    
9336

    
9337
class IAllocator(object):
9338
  """IAllocator framework.
9339

9340
  An IAllocator instance has three sets of attributes:
9341
    - cfg that is needed to query the cluster
9342
    - input data (all members of the _KEYS class attribute are required)
9343
    - four buffer attributes (in|out_data|text), that represent the
9344
      input (to the external script) in text and data structure format,
9345
      and the output from it, again in two formats
9346
    - the result variables from the script (success, info, nodes) for
9347
      easy usage
9348

9349
  """
9350
  # pylint: disable-msg=R0902
9351
  # lots of instance attributes
9352
  _ALLO_KEYS = [
9353
    "name", "mem_size", "disks", "disk_template",
9354
    "os", "tags", "nics", "vcpus", "hypervisor",
9355
    ]
9356
  _RELO_KEYS = [
9357
    "name", "relocate_from",
9358
    ]
9359
  _EVAC_KEYS = [
9360
    "evac_nodes",
9361
    ]
9362

    
9363
  def __init__(self, cfg, rpc, mode, **kwargs):
9364
    self.cfg = cfg
9365
    self.rpc = rpc
9366
    # init buffer variables
9367
    self.in_text = self.out_text = self.in_data = self.out_data = None
9368
    # init all input fields so that pylint is happy
9369
    self.mode = mode
9370
    self.mem_size = self.disks = self.disk_template = None
9371
    self.os = self.tags = self.nics = self.vcpus = None
9372
    self.hypervisor = None
9373
    self.relocate_from = None
9374
    self.name = None
9375
    self.evac_nodes = None
9376
    # computed fields
9377
    self.required_nodes = None
9378
    # init result fields
9379
    self.success = self.info = self.result = None
9380
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9381
      keyset = self._ALLO_KEYS
9382
      fn = self._AddNewInstance
9383
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9384
      keyset = self._RELO_KEYS
9385
      fn = self._AddRelocateInstance
9386
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9387
      keyset = self._EVAC_KEYS
9388
      fn = self._AddEvacuateNodes
9389
    else:
9390
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9391
                                   " IAllocator" % self.mode)
9392
    for key in kwargs:
9393
      if key not in keyset:
9394
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
9395
                                     " IAllocator" % key)
9396
      setattr(self, key, kwargs[key])
9397

    
9398
    for key in keyset:
9399
      if key not in kwargs:
9400
        raise errors.ProgrammerError("Missing input parameter '%s' to"
9401
                                     " IAllocator" % key)
9402
    self._BuildInputData(fn)
9403

    
9404
  def _ComputeClusterData(self):
9405
    """Compute the generic allocator input data.
9406

9407
    This is the data that is independent of the actual operation.
9408

9409
    """
9410
    cfg = self.cfg
9411
    cluster_info = cfg.GetClusterInfo()
9412
    # cluster data
9413
    data = {
9414
      "version": constants.IALLOCATOR_VERSION,
9415
      "cluster_name": cfg.GetClusterName(),
9416
      "cluster_tags": list(cluster_info.GetTags()),
9417
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9418
      # we don't have job IDs
9419
      }
9420
    iinfo = cfg.GetAllInstancesInfo().values()
9421
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9422

    
9423
    # node data
9424
    node_results = {}
9425
    node_list = cfg.GetNodeList()
9426

    
9427
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9428
      hypervisor_name = self.hypervisor
9429
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9430
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9431
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9432
      hypervisor_name = cluster_info.enabled_hypervisors[0]
9433

    
9434
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9435
                                        hypervisor_name)
9436
    node_iinfo = \
9437
      self.rpc.call_all_instances_info(node_list,
9438
                                       cluster_info.enabled_hypervisors)
9439
    for nname, nresult in node_data.items():
9440
      # first fill in static (config-based) values
9441
      ninfo = cfg.GetNodeInfo(nname)
9442
      pnr = {
9443
        "tags": list(ninfo.GetTags()),
9444
        "primary_ip": ninfo.primary_ip,
9445
        "secondary_ip": ninfo.secondary_ip,
9446
        "offline": ninfo.offline,
9447
        "drained": ninfo.drained,
9448
        "master_candidate": ninfo.master_candidate,
9449
        }
9450

    
9451
      if not (ninfo.offline or ninfo.drained):
9452
        nresult.Raise("Can't get data for node %s" % nname)
9453
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9454
                                nname)
9455
        remote_info = nresult.payload
9456

    
9457
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
9458
                     'vg_size', 'vg_free', 'cpu_total']:
9459
          if attr not in remote_info:
9460
            raise errors.OpExecError("Node '%s' didn't return attribute"
9461
                                     " '%s'" % (nname, attr))
9462
          if not isinstance(remote_info[attr], int):
9463
            raise errors.OpExecError("Node '%s' returned invalid value"
9464
                                     " for '%s': %s" %
9465
                                     (nname, attr, remote_info[attr]))
9466
        # compute memory used by primary instances
9467
        i_p_mem = i_p_up_mem = 0
9468
        for iinfo, beinfo in i_list:
9469
          if iinfo.primary_node == nname:
9470
            i_p_mem += beinfo[constants.BE_MEMORY]
9471
            if iinfo.name not in node_iinfo[nname].payload:
9472
              i_used_mem = 0
9473
            else:
9474
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9475
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9476
            remote_info['memory_free'] -= max(0, i_mem_diff)
9477

    
9478
            if iinfo.admin_up:
9479
              i_p_up_mem += beinfo[constants.BE_MEMORY]
9480

    
9481
        # compute memory used by instances
9482
        pnr_dyn = {
9483
          "total_memory": remote_info['memory_total'],
9484
          "reserved_memory": remote_info['memory_dom0'],
9485
          "free_memory": remote_info['memory_free'],
9486
          "total_disk": remote_info['vg_size'],
9487
          "free_disk": remote_info['vg_free'],
9488
          "total_cpus": remote_info['cpu_total'],
9489
          "i_pri_memory": i_p_mem,
9490
          "i_pri_up_memory": i_p_up_mem,
9491
          }
9492
        pnr.update(pnr_dyn)
9493

    
9494
      node_results[nname] = pnr
9495
    data["nodes"] = node_results
9496

    
9497
    # instance data
9498
    instance_data = {}
9499
    for iinfo, beinfo in i_list:
9500
      nic_data = []
9501
      for nic in iinfo.nics:
9502
        filled_params = objects.FillDict(
9503
            cluster_info.nicparams[constants.PP_DEFAULT],
9504
            nic.nicparams)
9505
        nic_dict = {"mac": nic.mac,
9506
                    "ip": nic.ip,
9507
                    "mode": filled_params[constants.NIC_MODE],
9508
                    "link": filled_params[constants.NIC_LINK],
9509
                   }
9510
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9511
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9512
        nic_data.append(nic_dict)
9513
      pir = {
9514
        "tags": list(iinfo.GetTags()),
9515
        "admin_up": iinfo.admin_up,
9516
        "vcpus": beinfo[constants.BE_VCPUS],
9517
        "memory": beinfo[constants.BE_MEMORY],
9518
        "os": iinfo.os,
9519
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9520
        "nics": nic_data,
9521
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9522
        "disk_template": iinfo.disk_template,
9523
        "hypervisor": iinfo.hypervisor,
9524
        }
9525
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9526
                                                 pir["disks"])
9527
      instance_data[iinfo.name] = pir
9528

    
9529
    data["instances"] = instance_data
9530

    
9531
    self.in_data = data
9532

    
9533
  def _AddNewInstance(self):
9534
    """Add new instance data to allocator structure.
9535

9536
    This in combination with _AllocatorGetClusterData will create the
9537
    correct structure needed as input for the allocator.
9538

9539
    The checks for the completeness of the opcode must have already been
9540
    done.
9541

9542
    """
9543
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9544

    
9545
    if self.disk_template in constants.DTS_NET_MIRROR:
9546
      self.required_nodes = 2
9547
    else:
9548
      self.required_nodes = 1
9549
    request = {
9550
      "name": self.name,
9551
      "disk_template": self.disk_template,
9552
      "tags": self.tags,
9553
      "os": self.os,
9554
      "vcpus": self.vcpus,
9555
      "memory": self.mem_size,
9556
      "disks": self.disks,
9557
      "disk_space_total": disk_space,
9558
      "nics": self.nics,
9559
      "required_nodes": self.required_nodes,
9560
      }
9561
    return request
9562

    
9563
  def _AddRelocateInstance(self):
9564
    """Add relocate instance data to allocator structure.
9565

9566
    This in combination with _IAllocatorGetClusterData will create the
9567
    correct structure needed as input for the allocator.
9568

9569
    The checks for the completeness of the opcode must have already been
9570
    done.
9571

9572
    """
9573
    instance = self.cfg.GetInstanceInfo(self.name)
9574
    if instance is None:
9575
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
9576
                                   " IAllocator" % self.name)
9577

    
9578
    if instance.disk_template not in constants.DTS_NET_MIRROR:
9579
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9580
                                 errors.ECODE_INVAL)
9581

    
9582
    if len(instance.secondary_nodes) != 1:
9583
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
9584
                                 errors.ECODE_STATE)
9585

    
9586
    self.required_nodes = 1
9587
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
9588
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9589

    
9590
    request = {
9591
      "name": self.name,
9592
      "disk_space_total": disk_space,
9593
      "required_nodes": self.required_nodes,
9594
      "relocate_from": self.relocate_from,
9595
      }
9596
    return request
9597

    
9598
  def _AddEvacuateNodes(self):
9599
    """Add evacuate nodes data to allocator structure.
9600

9601
    """
9602
    request = {
9603
      "evac_nodes": self.evac_nodes
9604
      }
9605
    return request
9606

    
9607
  def _BuildInputData(self, fn):
9608
    """Build input data structures.
9609

9610
    """
9611
    self._ComputeClusterData()
9612

    
9613
    request = fn()
9614
    request["type"] = self.mode
9615
    self.in_data["request"] = request
9616

    
9617
    self.in_text = serializer.Dump(self.in_data)
9618

    
9619
  def Run(self, name, validate=True, call_fn=None):
9620
    """Run an instance allocator and return the results.
9621

9622
    """
9623
    if call_fn is None:
9624
      call_fn = self.rpc.call_iallocator_runner
9625

    
9626
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9627
    result.Raise("Failure while running the iallocator script")
9628

    
9629
    self.out_text = result.payload
9630
    if validate:
9631
      self._ValidateResult()
9632

    
9633
  def _ValidateResult(self):
9634
    """Process the allocator results.
9635

9636
    This will process and if successful save the result in
9637
    self.out_data and the other parameters.
9638

9639
    """
9640
    try:
9641
      rdict = serializer.Load(self.out_text)
9642
    except Exception, err:
9643
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9644

    
9645
    if not isinstance(rdict, dict):
9646
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
9647

    
9648
    # TODO: remove backwards compatiblity in later versions
9649
    if "nodes" in rdict and "result" not in rdict:
9650
      rdict["result"] = rdict["nodes"]
9651
      del rdict["nodes"]
9652

    
9653
    for key in "success", "info", "result":
9654
      if key not in rdict:
9655
        raise errors.OpExecError("Can't parse iallocator results:"
9656
                                 " missing key '%s'" % key)
9657
      setattr(self, key, rdict[key])
9658

    
9659
    if not isinstance(rdict["result"], list):
9660
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9661
                               " is not a list")
9662
    self.out_data = rdict
9663

    
9664

    
9665
class LUTestAllocator(NoHooksLU):
9666
  """Run allocator tests.
9667

9668
  This LU runs the allocator tests
9669

9670
  """
9671
  _OP_REQP = ["direction", "mode", "name"]
9672

    
9673
  def CheckPrereq(self):
9674
    """Check prerequisites.
9675

9676
    This checks the opcode parameters depending on the director and mode test.
9677

9678
    """
9679
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9680
      for attr in ["name", "mem_size", "disks", "disk_template",
9681
                   "os", "tags", "nics", "vcpus"]:
9682
        if not hasattr(self.op, attr):
9683
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9684
                                     attr, errors.ECODE_INVAL)
9685
      iname = self.cfg.ExpandInstanceName(self.op.name)
9686
      if iname is not None:
9687
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9688
                                   iname, errors.ECODE_EXISTS)
9689
      if not isinstance(self.op.nics, list):
9690
        raise errors.OpPrereqError("Invalid parameter 'nics'",
9691
                                   errors.ECODE_INVAL)
9692
      for row in self.op.nics:
9693
        if (not isinstance(row, dict) or
9694
            "mac" not in row or
9695
            "ip" not in row or
9696
            "bridge" not in row):
9697
          raise errors.OpPrereqError("Invalid contents of the 'nics'"
9698
                                     " parameter", errors.ECODE_INVAL)
9699
      if not isinstance(self.op.disks, list):
9700
        raise errors.OpPrereqError("Invalid parameter 'disks'",
9701
                                   errors.ECODE_INVAL)
9702
      for row in self.op.disks:
9703
        if (not isinstance(row, dict) or
9704
            "size" not in row or
9705
            not isinstance(row["size"], int) or
9706
            "mode" not in row or
9707
            row["mode"] not in ['r', 'w']):
9708
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
9709
                                     " parameter", errors.ECODE_INVAL)
9710
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9711
        self.op.hypervisor = self.cfg.GetHypervisorType()
9712
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9713
      if not hasattr(self.op, "name"):
9714
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9715
                                   errors.ECODE_INVAL)
9716
      fname = _ExpandInstanceName(self.cfg, self.op.name)
9717
      self.op.name = fname
9718
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9719
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9720
      if not hasattr(self.op, "evac_nodes"):
9721
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9722
                                   " opcode input", errors.ECODE_INVAL)
9723
    else:
9724
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9725
                                 self.op.mode, errors.ECODE_INVAL)
9726

    
9727
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9728
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
9729
        raise errors.OpPrereqError("Missing allocator name",
9730
                                   errors.ECODE_INVAL)
9731
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9732
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
9733
                                 self.op.direction, errors.ECODE_INVAL)
9734

    
9735
  def Exec(self, feedback_fn):
9736
    """Run the allocator test.
9737

9738
    """
9739
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9740
      ial = IAllocator(self.cfg, self.rpc,
9741
                       mode=self.op.mode,
9742
                       name=self.op.name,
9743
                       mem_size=self.op.mem_size,
9744
                       disks=self.op.disks,
9745
                       disk_template=self.op.disk_template,
9746
                       os=self.op.os,
9747
                       tags=self.op.tags,
9748
                       nics=self.op.nics,
9749
                       vcpus=self.op.vcpus,
9750
                       hypervisor=self.op.hypervisor,
9751
                       )
9752
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9753
      ial = IAllocator(self.cfg, self.rpc,
9754
                       mode=self.op.mode,
9755
                       name=self.op.name,
9756
                       relocate_from=list(self.relocate_from),
9757
                       )
9758
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9759
      ial = IAllocator(self.cfg, self.rpc,
9760
                       mode=self.op.mode,
9761
                       evac_nodes=self.op.evac_nodes)
9762
    else:
9763
      raise errors.ProgrammerError("Uncatched mode %s in"
9764
                                   " LUTestAllocator.Exec", self.op.mode)
9765

    
9766
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
9767
      result = ial.in_text
9768
    else:
9769
      ial.Run(self.op.allocator, validate=False)
9770
      result = ial.out_text
9771
    return result