Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 24d70417

History | View | Annotate | Download (341.4 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
import os
30
import os.path
31
import time
32
import re
33
import platform
34
import logging
35
import copy
36
import OpenSSL
37

    
38
from ganeti import ssh
39
from ganeti import utils
40
from ganeti import errors
41
from ganeti import hypervisor
42
from ganeti import locking
43
from ganeti import constants
44
from ganeti import objects
45
from ganeti import serializer
46
from ganeti import ssconf
47
from ganeti import uidpool
48
from ganeti import compat
49
from ganeti import masterd
50

    
51
import ganeti.masterd.instance # pylint: disable-msg=W0611
52

    
53

    
54
class LogicalUnit(object):
55
  """Logical Unit base class.
56

57
  Subclasses must follow these rules:
58
    - implement ExpandNames
59
    - implement CheckPrereq (except when tasklets are used)
60
    - implement Exec (except when tasklets are used)
61
    - implement BuildHooksEnv
62
    - redefine HPATH and HTYPE
63
    - optionally redefine their run requirements:
64
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
65

66
  Note that all commands require root permissions.
67

68
  @ivar dry_run_result: the value (if any) that will be returned to the caller
69
      in dry-run mode (signalled by opcode dry_run parameter)
70

71
  """
72
  HPATH = None
73
  HTYPE = None
74
  _OP_REQP = []
75
  REQ_BGL = True
76

    
77
  def __init__(self, processor, op, context, rpc):
78
    """Constructor for LogicalUnit.
79

80
    This needs to be overridden in derived classes in order to check op
81
    validity.
82

83
    """
84
    self.proc = processor
85
    self.op = op
86
    self.cfg = context.cfg
87
    self.context = context
88
    self.rpc = rpc
89
    # Dicts used to declare locking needs to mcpu
90
    self.needed_locks = None
91
    self.acquired_locks = {}
92
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
93
    self.add_locks = {}
94
    self.remove_locks = {}
95
    # Used to force good behavior when calling helper functions
96
    self.recalculate_locks = {}
97
    self.__ssh = None
98
    # logging
99
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
100
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
101
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
102
    # support for dry-run
103
    self.dry_run_result = None
104
    # support for generic debug attribute
105
    if (not hasattr(self.op, "debug_level") or
106
        not isinstance(self.op.debug_level, int)):
107
      self.op.debug_level = 0
108

    
109
    # Tasklets
110
    self.tasklets = None
111

    
112
    for attr_name in self._OP_REQP:
113
      attr_val = getattr(op, attr_name, None)
114
      if attr_val is None:
115
        raise errors.OpPrereqError("Required parameter '%s' missing" %
116
                                   attr_name, errors.ECODE_INVAL)
117

    
118
    self.CheckArguments()
119

    
120
  def __GetSSH(self):
121
    """Returns the SshRunner object
122

123
    """
124
    if not self.__ssh:
125
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
126
    return self.__ssh
127

    
128
  ssh = property(fget=__GetSSH)
129

    
130
  def CheckArguments(self):
131
    """Check syntactic validity for the opcode arguments.
132

133
    This method is for doing a simple syntactic check and ensure
134
    validity of opcode parameters, without any cluster-related
135
    checks. While the same can be accomplished in ExpandNames and/or
136
    CheckPrereq, doing these separate is better because:
137

138
      - ExpandNames is left as as purely a lock-related function
139
      - CheckPrereq is run after we have acquired locks (and possible
140
        waited for them)
141

142
    The function is allowed to change the self.op attribute so that
143
    later methods can no longer worry about missing parameters.
144

145
    """
146
    pass
147

    
148
  def ExpandNames(self):
149
    """Expand names for this LU.
150

151
    This method is called before starting to execute the opcode, and it should
152
    update all the parameters of the opcode to their canonical form (e.g. a
153
    short node name must be fully expanded after this method has successfully
154
    completed). This way locking, hooks, logging, ecc. can work correctly.
155

156
    LUs which implement this method must also populate the self.needed_locks
157
    member, as a dict with lock levels as keys, and a list of needed lock names
158
    as values. Rules:
159

160
      - use an empty dict if you don't need any lock
161
      - if you don't need any lock at a particular level omit that level
162
      - don't put anything for the BGL level
163
      - if you want all locks at a level use locking.ALL_SET as a value
164

165
    If you need to share locks (rather than acquire them exclusively) at one
166
    level you can modify self.share_locks, setting a true value (usually 1) for
167
    that level. By default locks are not shared.
168

169
    This function can also define a list of tasklets, which then will be
170
    executed in order instead of the usual LU-level CheckPrereq and Exec
171
    functions, if those are not defined by the LU.
172

173
    Examples::
174

175
      # Acquire all nodes and one instance
176
      self.needed_locks = {
177
        locking.LEVEL_NODE: locking.ALL_SET,
178
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
179
      }
180
      # Acquire just two nodes
181
      self.needed_locks = {
182
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
183
      }
184
      # Acquire no locks
185
      self.needed_locks = {} # No, you can't leave it to the default value None
186

187
    """
188
    # The implementation of this method is mandatory only if the new LU is
189
    # concurrent, so that old LUs don't need to be changed all at the same
190
    # time.
191
    if self.REQ_BGL:
192
      self.needed_locks = {} # Exclusive LUs don't need locks.
193
    else:
194
      raise NotImplementedError
195

    
196
  def DeclareLocks(self, level):
197
    """Declare LU locking needs for a level
198

199
    While most LUs can just declare their locking needs at ExpandNames time,
200
    sometimes there's the need to calculate some locks after having acquired
201
    the ones before. This function is called just before acquiring locks at a
202
    particular level, but after acquiring the ones at lower levels, and permits
203
    such calculations. It can be used to modify self.needed_locks, and by
204
    default it does nothing.
205

206
    This function is only called if you have something already set in
207
    self.needed_locks for the level.
208

209
    @param level: Locking level which is going to be locked
210
    @type level: member of ganeti.locking.LEVELS
211

212
    """
213

    
214
  def CheckPrereq(self):
215
    """Check prerequisites for this LU.
216

217
    This method should check that the prerequisites for the execution
218
    of this LU are fulfilled. It can do internode communication, but
219
    it should be idempotent - no cluster or system changes are
220
    allowed.
221

222
    The method should raise errors.OpPrereqError in case something is
223
    not fulfilled. Its return value is ignored.
224

225
    This method should also update all the parameters of the opcode to
226
    their canonical form if it hasn't been done by ExpandNames before.
227

228
    """
229
    if self.tasklets is not None:
230
      for (idx, tl) in enumerate(self.tasklets):
231
        logging.debug("Checking prerequisites for tasklet %s/%s",
232
                      idx + 1, len(self.tasklets))
233
        tl.CheckPrereq()
234
    else:
235
      raise NotImplementedError
236

    
237
  def Exec(self, feedback_fn):
238
    """Execute the LU.
239

240
    This method should implement the actual work. It should raise
241
    errors.OpExecError for failures that are somewhat dealt with in
242
    code, or expected.
243

244
    """
245
    if self.tasklets is not None:
246
      for (idx, tl) in enumerate(self.tasklets):
247
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
248
        tl.Exec(feedback_fn)
249
    else:
250
      raise NotImplementedError
251

    
252
  def BuildHooksEnv(self):
253
    """Build hooks environment for this LU.
254

255
    This method should return a three-node tuple consisting of: a dict
256
    containing the environment that will be used for running the
257
    specific hook for this LU, a list of node names on which the hook
258
    should run before the execution, and a list of node names on which
259
    the hook should run after the execution.
260

261
    The keys of the dict must not have 'GANETI_' prefixed as this will
262
    be handled in the hooks runner. Also note additional keys will be
263
    added by the hooks runner. If the LU doesn't define any
264
    environment, an empty dict (and not None) should be returned.
265

266
    No nodes should be returned as an empty list (and not None).
267

268
    Note that if the HPATH for a LU class is None, this function will
269
    not be called.
270

271
    """
272
    raise NotImplementedError
273

    
274
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
275
    """Notify the LU about the results of its hooks.
276

277
    This method is called every time a hooks phase is executed, and notifies
278
    the Logical Unit about the hooks' result. The LU can then use it to alter
279
    its result based on the hooks.  By default the method does nothing and the
280
    previous result is passed back unchanged but any LU can define it if it
281
    wants to use the local cluster hook-scripts somehow.
282

283
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
284
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
285
    @param hook_results: the results of the multi-node hooks rpc call
286
    @param feedback_fn: function used send feedback back to the caller
287
    @param lu_result: the previous Exec result this LU had, or None
288
        in the PRE phase
289
    @return: the new Exec result, based on the previous result
290
        and hook results
291

292
    """
293
    # API must be kept, thus we ignore the unused argument and could
294
    # be a function warnings
295
    # pylint: disable-msg=W0613,R0201
296
    return lu_result
297

    
298
  def _ExpandAndLockInstance(self):
299
    """Helper function to expand and lock an instance.
300

301
    Many LUs that work on an instance take its name in self.op.instance_name
302
    and need to expand it and then declare the expanded name for locking. This
303
    function does it, and then updates self.op.instance_name to the expanded
304
    name. It also initializes needed_locks as a dict, if this hasn't been done
305
    before.
306

307
    """
308
    if self.needed_locks is None:
309
      self.needed_locks = {}
310
    else:
311
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
312
        "_ExpandAndLockInstance called with instance-level locks set"
313
    self.op.instance_name = _ExpandInstanceName(self.cfg,
314
                                                self.op.instance_name)
315
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
316

    
317
  def _LockInstancesNodes(self, primary_only=False):
318
    """Helper function to declare instances' nodes for locking.
319

320
    This function should be called after locking one or more instances to lock
321
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
322
    with all primary or secondary nodes for instances already locked and
323
    present in self.needed_locks[locking.LEVEL_INSTANCE].
324

325
    It should be called from DeclareLocks, and for safety only works if
326
    self.recalculate_locks[locking.LEVEL_NODE] is set.
327

328
    In the future it may grow parameters to just lock some instance's nodes, or
329
    to just lock primaries or secondary nodes, if needed.
330

331
    If should be called in DeclareLocks in a way similar to::
332

333
      if level == locking.LEVEL_NODE:
334
        self._LockInstancesNodes()
335

336
    @type primary_only: boolean
337
    @param primary_only: only lock primary nodes of locked instances
338

339
    """
340
    assert locking.LEVEL_NODE in self.recalculate_locks, \
341
      "_LockInstancesNodes helper function called with no nodes to recalculate"
342

    
343
    # TODO: check if we're really been called with the instance locks held
344

    
345
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
346
    # future we might want to have different behaviors depending on the value
347
    # of self.recalculate_locks[locking.LEVEL_NODE]
348
    wanted_nodes = []
349
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
350
      instance = self.context.cfg.GetInstanceInfo(instance_name)
351
      wanted_nodes.append(instance.primary_node)
352
      if not primary_only:
353
        wanted_nodes.extend(instance.secondary_nodes)
354

    
355
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
356
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
357
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
358
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
359

    
360
    del self.recalculate_locks[locking.LEVEL_NODE]
361

    
362

    
363
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
364
  """Simple LU which runs no hooks.
365

366
  This LU is intended as a parent for other LogicalUnits which will
367
  run no hooks, in order to reduce duplicate code.
368

369
  """
370
  HPATH = None
371
  HTYPE = None
372

    
373
  def BuildHooksEnv(self):
374
    """Empty BuildHooksEnv for NoHooksLu.
375

376
    This just raises an error.
377

378
    """
379
    assert False, "BuildHooksEnv called for NoHooksLUs"
380

    
381

    
382
class Tasklet:
383
  """Tasklet base class.
384

385
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
386
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
387
  tasklets know nothing about locks.
388

389
  Subclasses must follow these rules:
390
    - Implement CheckPrereq
391
    - Implement Exec
392

393
  """
394
  def __init__(self, lu):
395
    self.lu = lu
396

    
397
    # Shortcuts
398
    self.cfg = lu.cfg
399
    self.rpc = lu.rpc
400

    
401
  def CheckPrereq(self):
402
    """Check prerequisites for this tasklets.
403

404
    This method should check whether the prerequisites for the execution of
405
    this tasklet are fulfilled. It can do internode communication, but it
406
    should be idempotent - no cluster or system changes are allowed.
407

408
    The method should raise errors.OpPrereqError in case something is not
409
    fulfilled. Its return value is ignored.
410

411
    This method should also update all parameters to their canonical form if it
412
    hasn't been done before.
413

414
    """
415
    raise NotImplementedError
416

    
417
  def Exec(self, feedback_fn):
418
    """Execute the tasklet.
419

420
    This method should implement the actual work. It should raise
421
    errors.OpExecError for failures that are somewhat dealt with in code, or
422
    expected.
423

424
    """
425
    raise NotImplementedError
426

    
427

    
428
def _GetWantedNodes(lu, nodes):
429
  """Returns list of checked and expanded node names.
430

431
  @type lu: L{LogicalUnit}
432
  @param lu: the logical unit on whose behalf we execute
433
  @type nodes: list
434
  @param nodes: list of node names or None for all nodes
435
  @rtype: list
436
  @return: the list of nodes, sorted
437
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
438

439
  """
440
  if not isinstance(nodes, list):
441
    raise errors.OpPrereqError("Invalid argument type 'nodes'",
442
                               errors.ECODE_INVAL)
443

    
444
  if not nodes:
445
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
446
      " non-empty list of nodes whose name is to be expanded.")
447

    
448
  wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
449
  return utils.NiceSort(wanted)
450

    
451

    
452
def _GetWantedInstances(lu, instances):
453
  """Returns list of checked and expanded instance names.
454

455
  @type lu: L{LogicalUnit}
456
  @param lu: the logical unit on whose behalf we execute
457
  @type instances: list
458
  @param instances: list of instance names or None for all instances
459
  @rtype: list
460
  @return: the list of instances, sorted
461
  @raise errors.OpPrereqError: if the instances parameter is wrong type
462
  @raise errors.OpPrereqError: if any of the passed instances is not found
463

464
  """
465
  if not isinstance(instances, list):
466
    raise errors.OpPrereqError("Invalid argument type 'instances'",
467
                               errors.ECODE_INVAL)
468

    
469
  if instances:
470
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
471
  else:
472
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
473
  return wanted
474

    
475

    
476
def _CheckOutputFields(static, dynamic, selected):
477
  """Checks whether all selected fields are valid.
478

479
  @type static: L{utils.FieldSet}
480
  @param static: static fields set
481
  @type dynamic: L{utils.FieldSet}
482
  @param dynamic: dynamic fields set
483

484
  """
485
  f = utils.FieldSet()
486
  f.Extend(static)
487
  f.Extend(dynamic)
488

    
489
  delta = f.NonMatching(selected)
490
  if delta:
491
    raise errors.OpPrereqError("Unknown output fields selected: %s"
492
                               % ",".join(delta), errors.ECODE_INVAL)
493

    
494

    
495
def _CheckBooleanOpField(op, name):
496
  """Validates boolean opcode parameters.
497

498
  This will ensure that an opcode parameter is either a boolean value,
499
  or None (but that it always exists).
500

501
  """
502
  val = getattr(op, name, None)
503
  if not (val is None or isinstance(val, bool)):
504
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
505
                               (name, str(val)), errors.ECODE_INVAL)
506
  setattr(op, name, val)
507

    
508

    
509
def _CheckGlobalHvParams(params):
510
  """Validates that given hypervisor params are not global ones.
511

512
  This will ensure that instances don't get customised versions of
513
  global params.
514

515
  """
516
  used_globals = constants.HVC_GLOBALS.intersection(params)
517
  if used_globals:
518
    msg = ("The following hypervisor parameters are global and cannot"
519
           " be customized at instance level, please modify them at"
520
           " cluster level: %s" % utils.CommaJoin(used_globals))
521
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
522

    
523

    
524
def _CheckNodeOnline(lu, node):
525
  """Ensure that a given node is online.
526

527
  @param lu: the LU on behalf of which we make the check
528
  @param node: the node to check
529
  @raise errors.OpPrereqError: if the node is offline
530

531
  """
532
  if lu.cfg.GetNodeInfo(node).offline:
533
    raise errors.OpPrereqError("Can't use offline node %s" % node,
534
                               errors.ECODE_INVAL)
535

    
536

    
537
def _CheckNodeNotDrained(lu, node):
538
  """Ensure that a given node is not drained.
539

540
  @param lu: the LU on behalf of which we make the check
541
  @param node: the node to check
542
  @raise errors.OpPrereqError: if the node is drained
543

544
  """
545
  if lu.cfg.GetNodeInfo(node).drained:
546
    raise errors.OpPrereqError("Can't use drained node %s" % node,
547
                               errors.ECODE_INVAL)
548

    
549

    
550
def _CheckNodeHasOS(lu, node, os_name, force_variant):
551
  """Ensure that a node supports a given OS.
552

553
  @param lu: the LU on behalf of which we make the check
554
  @param node: the node to check
555
  @param os_name: the OS to query about
556
  @param force_variant: whether to ignore variant errors
557
  @raise errors.OpPrereqError: if the node is not supporting the OS
558

559
  """
560
  result = lu.rpc.call_os_get(node, os_name)
561
  result.Raise("OS '%s' not in supported OS list for node %s" %
562
               (os_name, node),
563
               prereq=True, ecode=errors.ECODE_INVAL)
564
  if not force_variant:
565
    _CheckOSVariant(result.payload, os_name)
566

    
567

    
568
def _RequireFileStorage():
569
  """Checks that file storage is enabled.
570

571
  @raise errors.OpPrereqError: when file storage is disabled
572

573
  """
574
  if not constants.ENABLE_FILE_STORAGE:
575
    raise errors.OpPrereqError("File storage disabled at configure time",
576
                               errors.ECODE_INVAL)
577

    
578

    
579
def _CheckDiskTemplate(template):
580
  """Ensure a given disk template is valid.
581

582
  """
583
  if template not in constants.DISK_TEMPLATES:
584
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
585
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
586
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
587
  if template == constants.DT_FILE:
588
    _RequireFileStorage()
589

    
590

    
591
def _CheckStorageType(storage_type):
592
  """Ensure a given storage type is valid.
593

594
  """
595
  if storage_type not in constants.VALID_STORAGE_TYPES:
596
    raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
597
                               errors.ECODE_INVAL)
598
  if storage_type == constants.ST_FILE:
599
    _RequireFileStorage()
600

    
601

    
602

    
603
def _CheckInstanceDown(lu, instance, reason):
604
  """Ensure that an instance is not running."""
605
  if instance.admin_up:
606
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
607
                               (instance.name, reason), errors.ECODE_STATE)
608

    
609
  pnode = instance.primary_node
610
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
611
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
612
              prereq=True, ecode=errors.ECODE_ENVIRON)
613

    
614
  if instance.name in ins_l.payload:
615
    raise errors.OpPrereqError("Instance %s is running, %s" %
616
                               (instance.name, reason), errors.ECODE_STATE)
617

    
618

    
619
def _ExpandItemName(fn, name, kind):
620
  """Expand an item name.
621

622
  @param fn: the function to use for expansion
623
  @param name: requested item name
624
  @param kind: text description ('Node' or 'Instance')
625
  @return: the resolved (full) name
626
  @raise errors.OpPrereqError: if the item is not found
627

628
  """
629
  full_name = fn(name)
630
  if full_name is None:
631
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
632
                               errors.ECODE_NOENT)
633
  return full_name
634

    
635

    
636
def _ExpandNodeName(cfg, name):
637
  """Wrapper over L{_ExpandItemName} for nodes."""
638
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
639

    
640

    
641
def _ExpandInstanceName(cfg, name):
642
  """Wrapper over L{_ExpandItemName} for instance."""
643
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
644

    
645

    
646
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
647
                          memory, vcpus, nics, disk_template, disks,
648
                          bep, hvp, hypervisor_name):
649
  """Builds instance related env variables for hooks
650

651
  This builds the hook environment from individual variables.
652

653
  @type name: string
654
  @param name: the name of the instance
655
  @type primary_node: string
656
  @param primary_node: the name of the instance's primary node
657
  @type secondary_nodes: list
658
  @param secondary_nodes: list of secondary nodes as strings
659
  @type os_type: string
660
  @param os_type: the name of the instance's OS
661
  @type status: boolean
662
  @param status: the should_run status of the instance
663
  @type memory: string
664
  @param memory: the memory size of the instance
665
  @type vcpus: string
666
  @param vcpus: the count of VCPUs the instance has
667
  @type nics: list
668
  @param nics: list of tuples (ip, mac, mode, link) representing
669
      the NICs the instance has
670
  @type disk_template: string
671
  @param disk_template: the disk template of the instance
672
  @type disks: list
673
  @param disks: the list of (size, mode) pairs
674
  @type bep: dict
675
  @param bep: the backend parameters for the instance
676
  @type hvp: dict
677
  @param hvp: the hypervisor parameters for the instance
678
  @type hypervisor_name: string
679
  @param hypervisor_name: the hypervisor for the instance
680
  @rtype: dict
681
  @return: the hook environment for this instance
682

683
  """
684
  if status:
685
    str_status = "up"
686
  else:
687
    str_status = "down"
688
  env = {
689
    "OP_TARGET": name,
690
    "INSTANCE_NAME": name,
691
    "INSTANCE_PRIMARY": primary_node,
692
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
693
    "INSTANCE_OS_TYPE": os_type,
694
    "INSTANCE_STATUS": str_status,
695
    "INSTANCE_MEMORY": memory,
696
    "INSTANCE_VCPUS": vcpus,
697
    "INSTANCE_DISK_TEMPLATE": disk_template,
698
    "INSTANCE_HYPERVISOR": hypervisor_name,
699
  }
700

    
701
  if nics:
702
    nic_count = len(nics)
703
    for idx, (ip, mac, mode, link) in enumerate(nics):
704
      if ip is None:
705
        ip = ""
706
      env["INSTANCE_NIC%d_IP" % idx] = ip
707
      env["INSTANCE_NIC%d_MAC" % idx] = mac
708
      env["INSTANCE_NIC%d_MODE" % idx] = mode
709
      env["INSTANCE_NIC%d_LINK" % idx] = link
710
      if mode == constants.NIC_MODE_BRIDGED:
711
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
712
  else:
713
    nic_count = 0
714

    
715
  env["INSTANCE_NIC_COUNT"] = nic_count
716

    
717
  if disks:
718
    disk_count = len(disks)
719
    for idx, (size, mode) in enumerate(disks):
720
      env["INSTANCE_DISK%d_SIZE" % idx] = size
721
      env["INSTANCE_DISK%d_MODE" % idx] = mode
722
  else:
723
    disk_count = 0
724

    
725
  env["INSTANCE_DISK_COUNT"] = disk_count
726

    
727
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
728
    for key, value in source.items():
729
      env["INSTANCE_%s_%s" % (kind, key)] = value
730

    
731
  return env
732

    
733

    
734
def _NICListToTuple(lu, nics):
735
  """Build a list of nic information tuples.
736

737
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
738
  value in LUQueryInstanceData.
739

740
  @type lu:  L{LogicalUnit}
741
  @param lu: the logical unit on whose behalf we execute
742
  @type nics: list of L{objects.NIC}
743
  @param nics: list of nics to convert to hooks tuples
744

745
  """
746
  hooks_nics = []
747
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
748
  for nic in nics:
749
    ip = nic.ip
750
    mac = nic.mac
751
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
752
    mode = filled_params[constants.NIC_MODE]
753
    link = filled_params[constants.NIC_LINK]
754
    hooks_nics.append((ip, mac, mode, link))
755
  return hooks_nics
756

    
757

    
758
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
759
  """Builds instance related env variables for hooks from an object.
760

761
  @type lu: L{LogicalUnit}
762
  @param lu: the logical unit on whose behalf we execute
763
  @type instance: L{objects.Instance}
764
  @param instance: the instance for which we should build the
765
      environment
766
  @type override: dict
767
  @param override: dictionary with key/values that will override
768
      our values
769
  @rtype: dict
770
  @return: the hook environment dictionary
771

772
  """
773
  cluster = lu.cfg.GetClusterInfo()
774
  bep = cluster.FillBE(instance)
775
  hvp = cluster.FillHV(instance)
776
  args = {
777
    'name': instance.name,
778
    'primary_node': instance.primary_node,
779
    'secondary_nodes': instance.secondary_nodes,
780
    'os_type': instance.os,
781
    'status': instance.admin_up,
782
    'memory': bep[constants.BE_MEMORY],
783
    'vcpus': bep[constants.BE_VCPUS],
784
    'nics': _NICListToTuple(lu, instance.nics),
785
    'disk_template': instance.disk_template,
786
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
787
    'bep': bep,
788
    'hvp': hvp,
789
    'hypervisor_name': instance.hypervisor,
790
  }
791
  if override:
792
    args.update(override)
793
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
794

    
795

    
796
def _AdjustCandidatePool(lu, exceptions):
797
  """Adjust the candidate pool after node operations.
798

799
  """
800
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
801
  if mod_list:
802
    lu.LogInfo("Promoted nodes to master candidate role: %s",
803
               utils.CommaJoin(node.name for node in mod_list))
804
    for name in mod_list:
805
      lu.context.ReaddNode(name)
806
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
807
  if mc_now > mc_max:
808
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
809
               (mc_now, mc_max))
810

    
811

    
812
def _DecideSelfPromotion(lu, exceptions=None):
813
  """Decide whether I should promote myself as a master candidate.
814

815
  """
816
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
817
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
818
  # the new node will increase mc_max with one, so:
819
  mc_should = min(mc_should + 1, cp_size)
820
  return mc_now < mc_should
821

    
822

    
823
def _CheckNicsBridgesExist(lu, target_nics, target_node,
824
                               profile=constants.PP_DEFAULT):
825
  """Check that the brigdes needed by a list of nics exist.
826

827
  """
828
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
829
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
830
                for nic in target_nics]
831
  brlist = [params[constants.NIC_LINK] for params in paramslist
832
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
833
  if brlist:
834
    result = lu.rpc.call_bridges_exist(target_node, brlist)
835
    result.Raise("Error checking bridges on destination node '%s'" %
836
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
837

    
838

    
839
def _CheckInstanceBridgesExist(lu, instance, node=None):
840
  """Check that the brigdes needed by an instance exist.
841

842
  """
843
  if node is None:
844
    node = instance.primary_node
845
  _CheckNicsBridgesExist(lu, instance.nics, node)
846

    
847

    
848
def _CheckOSVariant(os_obj, name):
849
  """Check whether an OS name conforms to the os variants specification.
850

851
  @type os_obj: L{objects.OS}
852
  @param os_obj: OS object to check
853
  @type name: string
854
  @param name: OS name passed by the user, to check for validity
855

856
  """
857
  if not os_obj.supported_variants:
858
    return
859
  try:
860
    variant = name.split("+", 1)[1]
861
  except IndexError:
862
    raise errors.OpPrereqError("OS name must include a variant",
863
                               errors.ECODE_INVAL)
864

    
865
  if variant not in os_obj.supported_variants:
866
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
867

    
868

    
869
def _GetNodeInstancesInner(cfg, fn):
870
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
871

    
872

    
873
def _GetNodeInstances(cfg, node_name):
874
  """Returns a list of all primary and secondary instances on a node.
875

876
  """
877

    
878
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
879

    
880

    
881
def _GetNodePrimaryInstances(cfg, node_name):
882
  """Returns primary instances on a node.
883

884
  """
885
  return _GetNodeInstancesInner(cfg,
886
                                lambda inst: node_name == inst.primary_node)
887

    
888

    
889
def _GetNodeSecondaryInstances(cfg, node_name):
890
  """Returns secondary instances on a node.
891

892
  """
893
  return _GetNodeInstancesInner(cfg,
894
                                lambda inst: node_name in inst.secondary_nodes)
895

    
896

    
897
def _GetStorageTypeArgs(cfg, storage_type):
898
  """Returns the arguments for a storage type.
899

900
  """
901
  # Special case for file storage
902
  if storage_type == constants.ST_FILE:
903
    # storage.FileStorage wants a list of storage directories
904
    return [[cfg.GetFileStorageDir()]]
905

    
906
  return []
907

    
908

    
909
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
910
  faulty = []
911

    
912
  for dev in instance.disks:
913
    cfg.SetDiskID(dev, node_name)
914

    
915
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
916
  result.Raise("Failed to get disk status from node %s" % node_name,
917
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
918

    
919
  for idx, bdev_status in enumerate(result.payload):
920
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
921
      faulty.append(idx)
922

    
923
  return faulty
924

    
925

    
926
class LUPostInitCluster(LogicalUnit):
927
  """Logical unit for running hooks after cluster initialization.
928

929
  """
930
  HPATH = "cluster-init"
931
  HTYPE = constants.HTYPE_CLUSTER
932
  _OP_REQP = []
933

    
934
  def BuildHooksEnv(self):
935
    """Build hooks env.
936

937
    """
938
    env = {"OP_TARGET": self.cfg.GetClusterName()}
939
    mn = self.cfg.GetMasterNode()
940
    return env, [], [mn]
941

    
942
  def CheckPrereq(self):
943
    """No prerequisites to check.
944

945
    """
946
    return True
947

    
948
  def Exec(self, feedback_fn):
949
    """Nothing to do.
950

951
    """
952
    return True
953

    
954

    
955
class LUDestroyCluster(LogicalUnit):
956
  """Logical unit for destroying the cluster.
957

958
  """
959
  HPATH = "cluster-destroy"
960
  HTYPE = constants.HTYPE_CLUSTER
961
  _OP_REQP = []
962

    
963
  def BuildHooksEnv(self):
964
    """Build hooks env.
965

966
    """
967
    env = {"OP_TARGET": self.cfg.GetClusterName()}
968
    return env, [], []
969

    
970
  def CheckPrereq(self):
971
    """Check prerequisites.
972

973
    This checks whether the cluster is empty.
974

975
    Any errors are signaled by raising errors.OpPrereqError.
976

977
    """
978
    master = self.cfg.GetMasterNode()
979

    
980
    nodelist = self.cfg.GetNodeList()
981
    if len(nodelist) != 1 or nodelist[0] != master:
982
      raise errors.OpPrereqError("There are still %d node(s) in"
983
                                 " this cluster." % (len(nodelist) - 1),
984
                                 errors.ECODE_INVAL)
985
    instancelist = self.cfg.GetInstanceList()
986
    if instancelist:
987
      raise errors.OpPrereqError("There are still %d instance(s) in"
988
                                 " this cluster." % len(instancelist),
989
                                 errors.ECODE_INVAL)
990

    
991
  def Exec(self, feedback_fn):
992
    """Destroys the cluster.
993

994
    """
995
    master = self.cfg.GetMasterNode()
996
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
997

    
998
    # Run post hooks on master node before it's removed
999
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1000
    try:
1001
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1002
    except:
1003
      # pylint: disable-msg=W0702
1004
      self.LogWarning("Errors occurred running hooks on %s" % master)
1005

    
1006
    result = self.rpc.call_node_stop_master(master, False)
1007
    result.Raise("Could not disable the master role")
1008

    
1009
    if modify_ssh_setup:
1010
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1011
      utils.CreateBackup(priv_key)
1012
      utils.CreateBackup(pub_key)
1013

    
1014
    return master
1015

    
1016

    
1017
def _VerifyCertificate(filename):
1018
  """Verifies a certificate for LUVerifyCluster.
1019

1020
  @type filename: string
1021
  @param filename: Path to PEM file
1022

1023
  """
1024
  try:
1025
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1026
                                           utils.ReadFile(filename))
1027
  except Exception, err: # pylint: disable-msg=W0703
1028
    return (LUVerifyCluster.ETYPE_ERROR,
1029
            "Failed to load X509 certificate %s: %s" % (filename, err))
1030

    
1031
  (errcode, msg) = \
1032
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1033
                                constants.SSL_CERT_EXPIRATION_ERROR)
1034

    
1035
  if msg:
1036
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1037
  else:
1038
    fnamemsg = None
1039

    
1040
  if errcode is None:
1041
    return (None, fnamemsg)
1042
  elif errcode == utils.CERT_WARNING:
1043
    return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1044
  elif errcode == utils.CERT_ERROR:
1045
    return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1046

    
1047
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1048

    
1049

    
1050
class LUVerifyCluster(LogicalUnit):
1051
  """Verifies the cluster status.
1052

1053
  """
1054
  HPATH = "cluster-verify"
1055
  HTYPE = constants.HTYPE_CLUSTER
1056
  _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1057
  REQ_BGL = False
1058

    
1059
  TCLUSTER = "cluster"
1060
  TNODE = "node"
1061
  TINSTANCE = "instance"
1062

    
1063
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1064
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1065
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1066
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1067
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1068
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1069
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1070
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1071
  ENODEDRBD = (TNODE, "ENODEDRBD")
1072
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1073
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1074
  ENODEHV = (TNODE, "ENODEHV")
1075
  ENODELVM = (TNODE, "ENODELVM")
1076
  ENODEN1 = (TNODE, "ENODEN1")
1077
  ENODENET = (TNODE, "ENODENET")
1078
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1079
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1080
  ENODERPC = (TNODE, "ENODERPC")
1081
  ENODESSH = (TNODE, "ENODESSH")
1082
  ENODEVERSION = (TNODE, "ENODEVERSION")
1083
  ENODESETUP = (TNODE, "ENODESETUP")
1084
  ENODETIME = (TNODE, "ENODETIME")
1085

    
1086
  ETYPE_FIELD = "code"
1087
  ETYPE_ERROR = "ERROR"
1088
  ETYPE_WARNING = "WARNING"
1089

    
1090
  class NodeImage(object):
1091
    """A class representing the logical and physical status of a node.
1092

1093
    @ivar volumes: a structure as returned from
1094
        L{ganeti.backend.GetVolumeList} (runtime)
1095
    @ivar instances: a list of running instances (runtime)
1096
    @ivar pinst: list of configured primary instances (config)
1097
    @ivar sinst: list of configured secondary instances (config)
1098
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1099
        of this node (config)
1100
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1101
    @ivar dfree: free disk, as reported by the node (runtime)
1102
    @ivar offline: the offline status (config)
1103
    @type rpc_fail: boolean
1104
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1105
        not whether the individual keys were correct) (runtime)
1106
    @type lvm_fail: boolean
1107
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1108
    @type hyp_fail: boolean
1109
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1110
    @type ghost: boolean
1111
    @ivar ghost: whether this is a known node or not (config)
1112

1113
    """
1114
    def __init__(self, offline=False):
1115
      self.volumes = {}
1116
      self.instances = []
1117
      self.pinst = []
1118
      self.sinst = []
1119
      self.sbp = {}
1120
      self.mfree = 0
1121
      self.dfree = 0
1122
      self.offline = offline
1123
      self.rpc_fail = False
1124
      self.lvm_fail = False
1125
      self.hyp_fail = False
1126
      self.ghost = False
1127

    
1128
  def ExpandNames(self):
1129
    self.needed_locks = {
1130
      locking.LEVEL_NODE: locking.ALL_SET,
1131
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1132
    }
1133
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1134

    
1135
  def _Error(self, ecode, item, msg, *args, **kwargs):
1136
    """Format an error message.
1137

1138
    Based on the opcode's error_codes parameter, either format a
1139
    parseable error code, or a simpler error string.
1140

1141
    This must be called only from Exec and functions called from Exec.
1142

1143
    """
1144
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1145
    itype, etxt = ecode
1146
    # first complete the msg
1147
    if args:
1148
      msg = msg % args
1149
    # then format the whole message
1150
    if self.op.error_codes:
1151
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1152
    else:
1153
      if item:
1154
        item = " " + item
1155
      else:
1156
        item = ""
1157
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1158
    # and finally report it via the feedback_fn
1159
    self._feedback_fn("  - %s" % msg)
1160

    
1161
  def _ErrorIf(self, cond, *args, **kwargs):
1162
    """Log an error message if the passed condition is True.
1163

1164
    """
1165
    cond = bool(cond) or self.op.debug_simulate_errors
1166
    if cond:
1167
      self._Error(*args, **kwargs)
1168
    # do not mark the operation as failed for WARN cases only
1169
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1170
      self.bad = self.bad or cond
1171

    
1172
  def _VerifyNode(self, ninfo, nresult):
1173
    """Run multiple tests against a node.
1174

1175
    Test list:
1176

1177
      - compares ganeti version
1178
      - checks vg existence and size > 20G
1179
      - checks config file checksum
1180
      - checks ssh to other nodes
1181

1182
    @type ninfo: L{objects.Node}
1183
    @param ninfo: the node to check
1184
    @param nresult: the results from the node
1185
    @rtype: boolean
1186
    @return: whether overall this call was successful (and we can expect
1187
         reasonable values in the respose)
1188

1189
    """
1190
    node = ninfo.name
1191
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1192

    
1193
    # main result, nresult should be a non-empty dict
1194
    test = not nresult or not isinstance(nresult, dict)
1195
    _ErrorIf(test, self.ENODERPC, node,
1196
                  "unable to verify node: no data returned")
1197
    if test:
1198
      return False
1199

    
1200
    # compares ganeti version
1201
    local_version = constants.PROTOCOL_VERSION
1202
    remote_version = nresult.get("version", None)
1203
    test = not (remote_version and
1204
                isinstance(remote_version, (list, tuple)) and
1205
                len(remote_version) == 2)
1206
    _ErrorIf(test, self.ENODERPC, node,
1207
             "connection to node returned invalid data")
1208
    if test:
1209
      return False
1210

    
1211
    test = local_version != remote_version[0]
1212
    _ErrorIf(test, self.ENODEVERSION, node,
1213
             "incompatible protocol versions: master %s,"
1214
             " node %s", local_version, remote_version[0])
1215
    if test:
1216
      return False
1217

    
1218
    # node seems compatible, we can actually try to look into its results
1219

    
1220
    # full package version
1221
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1222
                  self.ENODEVERSION, node,
1223
                  "software version mismatch: master %s, node %s",
1224
                  constants.RELEASE_VERSION, remote_version[1],
1225
                  code=self.ETYPE_WARNING)
1226

    
1227
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1228
    if isinstance(hyp_result, dict):
1229
      for hv_name, hv_result in hyp_result.iteritems():
1230
        test = hv_result is not None
1231
        _ErrorIf(test, self.ENODEHV, node,
1232
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1233

    
1234

    
1235
    test = nresult.get(constants.NV_NODESETUP,
1236
                           ["Missing NODESETUP results"])
1237
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1238
             "; ".join(test))
1239

    
1240
    return True
1241

    
1242
  def _VerifyNodeTime(self, ninfo, nresult,
1243
                      nvinfo_starttime, nvinfo_endtime):
1244
    """Check the node time.
1245

1246
    @type ninfo: L{objects.Node}
1247
    @param ninfo: the node to check
1248
    @param nresult: the remote results for the node
1249
    @param nvinfo_starttime: the start time of the RPC call
1250
    @param nvinfo_endtime: the end time of the RPC call
1251

1252
    """
1253
    node = ninfo.name
1254
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1255

    
1256
    ntime = nresult.get(constants.NV_TIME, None)
1257
    try:
1258
      ntime_merged = utils.MergeTime(ntime)
1259
    except (ValueError, TypeError):
1260
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1261
      return
1262

    
1263
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1264
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1265
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1266
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1267
    else:
1268
      ntime_diff = None
1269

    
1270
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1271
             "Node time diverges by at least %s from master node time",
1272
             ntime_diff)
1273

    
1274
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1275
    """Check the node time.
1276

1277
    @type ninfo: L{objects.Node}
1278
    @param ninfo: the node to check
1279
    @param nresult: the remote results for the node
1280
    @param vg_name: the configured VG name
1281

1282
    """
1283
    if vg_name is None:
1284
      return
1285

    
1286
    node = ninfo.name
1287
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1288

    
1289
    # checks vg existence and size > 20G
1290
    vglist = nresult.get(constants.NV_VGLIST, None)
1291
    test = not vglist
1292
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1293
    if not test:
1294
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1295
                                            constants.MIN_VG_SIZE)
1296
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1297

    
1298
    # check pv names
1299
    pvlist = nresult.get(constants.NV_PVLIST, None)
1300
    test = pvlist is None
1301
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1302
    if not test:
1303
      # check that ':' is not present in PV names, since it's a
1304
      # special character for lvcreate (denotes the range of PEs to
1305
      # use on the PV)
1306
      for _, pvname, owner_vg in pvlist:
1307
        test = ":" in pvname
1308
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1309
                 " '%s' of VG '%s'", pvname, owner_vg)
1310

    
1311
  def _VerifyNodeNetwork(self, ninfo, nresult):
1312
    """Check the node time.
1313

1314
    @type ninfo: L{objects.Node}
1315
    @param ninfo: the node to check
1316
    @param nresult: the remote results for the node
1317

1318
    """
1319
    node = ninfo.name
1320
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1321

    
1322
    test = constants.NV_NODELIST not in nresult
1323
    _ErrorIf(test, self.ENODESSH, node,
1324
             "node hasn't returned node ssh connectivity data")
1325
    if not test:
1326
      if nresult[constants.NV_NODELIST]:
1327
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1328
          _ErrorIf(True, self.ENODESSH, node,
1329
                   "ssh communication with node '%s': %s", a_node, a_msg)
1330

    
1331
    test = constants.NV_NODENETTEST not in nresult
1332
    _ErrorIf(test, self.ENODENET, node,
1333
             "node hasn't returned node tcp connectivity data")
1334
    if not test:
1335
      if nresult[constants.NV_NODENETTEST]:
1336
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1337
        for anode in nlist:
1338
          _ErrorIf(True, self.ENODENET, node,
1339
                   "tcp communication with node '%s': %s",
1340
                   anode, nresult[constants.NV_NODENETTEST][anode])
1341

    
1342
  def _VerifyInstance(self, instance, instanceconfig, node_image):
1343
    """Verify an instance.
1344

1345
    This function checks to see if the required block devices are
1346
    available on the instance's node.
1347

1348
    """
1349
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1350
    node_current = instanceconfig.primary_node
1351

    
1352
    node_vol_should = {}
1353
    instanceconfig.MapLVsByNode(node_vol_should)
1354

    
1355
    for node in node_vol_should:
1356
      n_img = node_image[node]
1357
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1358
        # ignore missing volumes on offline or broken nodes
1359
        continue
1360
      for volume in node_vol_should[node]:
1361
        test = volume not in n_img.volumes
1362
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1363
                 "volume %s missing on node %s", volume, node)
1364

    
1365
    if instanceconfig.admin_up:
1366
      pri_img = node_image[node_current]
1367
      test = instance not in pri_img.instances and not pri_img.offline
1368
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1369
               "instance not running on its primary node %s",
1370
               node_current)
1371

    
1372
    for node, n_img in node_image.items():
1373
      if (not node == node_current):
1374
        test = instance in n_img.instances
1375
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1376
                 "instance should not run on node %s", node)
1377

    
1378
  def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1379
    """Verify if there are any unknown volumes in the cluster.
1380

1381
    The .os, .swap and backup volumes are ignored. All other volumes are
1382
    reported as unknown.
1383

1384
    """
1385
    for node, n_img in node_image.items():
1386
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1387
        # skip non-healthy nodes
1388
        continue
1389
      for volume in n_img.volumes:
1390
        test = (node not in node_vol_should or
1391
                volume not in node_vol_should[node])
1392
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1393
                      "volume %s is unknown", volume)
1394

    
1395
  def _VerifyOrphanInstances(self, instancelist, node_image):
1396
    """Verify the list of running instances.
1397

1398
    This checks what instances are running but unknown to the cluster.
1399

1400
    """
1401
    for node, n_img in node_image.items():
1402
      for o_inst in n_img.instances:
1403
        test = o_inst not in instancelist
1404
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1405
                      "instance %s on node %s should not exist", o_inst, node)
1406

    
1407
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1408
    """Verify N+1 Memory Resilience.
1409

1410
    Check that if one single node dies we can still start all the
1411
    instances it was primary for.
1412

1413
    """
1414
    for node, n_img in node_image.items():
1415
      # This code checks that every node which is now listed as
1416
      # secondary has enough memory to host all instances it is
1417
      # supposed to should a single other node in the cluster fail.
1418
      # FIXME: not ready for failover to an arbitrary node
1419
      # FIXME: does not support file-backed instances
1420
      # WARNING: we currently take into account down instances as well
1421
      # as up ones, considering that even if they're down someone
1422
      # might want to start them even in the event of a node failure.
1423
      for prinode, instances in n_img.sbp.items():
1424
        needed_mem = 0
1425
        for instance in instances:
1426
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1427
          if bep[constants.BE_AUTO_BALANCE]:
1428
            needed_mem += bep[constants.BE_MEMORY]
1429
        test = n_img.mfree < needed_mem
1430
        self._ErrorIf(test, self.ENODEN1, node,
1431
                      "not enough memory on to accommodate"
1432
                      " failovers should peer node %s fail", prinode)
1433

    
1434
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1435
                       master_files):
1436
    """Verifies and computes the node required file checksums.
1437

1438
    @type ninfo: L{objects.Node}
1439
    @param ninfo: the node to check
1440
    @param nresult: the remote results for the node
1441
    @param file_list: required list of files
1442
    @param local_cksum: dictionary of local files and their checksums
1443
    @param master_files: list of files that only masters should have
1444

1445
    """
1446
    node = ninfo.name
1447
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1448

    
1449
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1450
    test = not isinstance(remote_cksum, dict)
1451
    _ErrorIf(test, self.ENODEFILECHECK, node,
1452
             "node hasn't returned file checksum data")
1453
    if test:
1454
      return
1455

    
1456
    for file_name in file_list:
1457
      node_is_mc = ninfo.master_candidate
1458
      must_have = (file_name not in master_files) or node_is_mc
1459
      # missing
1460
      test1 = file_name not in remote_cksum
1461
      # invalid checksum
1462
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1463
      # existing and good
1464
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1465
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1466
               "file '%s' missing", file_name)
1467
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1468
               "file '%s' has wrong checksum", file_name)
1469
      # not candidate and this is not a must-have file
1470
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1471
               "file '%s' should not exist on non master"
1472
               " candidates (and the file is outdated)", file_name)
1473
      # all good, except non-master/non-must have combination
1474
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1475
               "file '%s' should not exist"
1476
               " on non master candidates", file_name)
1477

    
1478
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1479
    """Verifies and the node DRBD status.
1480

1481
    @type ninfo: L{objects.Node}
1482
    @param ninfo: the node to check
1483
    @param nresult: the remote results for the node
1484
    @param instanceinfo: the dict of instances
1485
    @param drbd_map: the DRBD map as returned by
1486
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1487

1488
    """
1489
    node = ninfo.name
1490
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1491

    
1492
    # compute the DRBD minors
1493
    node_drbd = {}
1494
    for minor, instance in drbd_map[node].items():
1495
      test = instance not in instanceinfo
1496
      _ErrorIf(test, self.ECLUSTERCFG, None,
1497
               "ghost instance '%s' in temporary DRBD map", instance)
1498
        # ghost instance should not be running, but otherwise we
1499
        # don't give double warnings (both ghost instance and
1500
        # unallocated minor in use)
1501
      if test:
1502
        node_drbd[minor] = (instance, False)
1503
      else:
1504
        instance = instanceinfo[instance]
1505
        node_drbd[minor] = (instance.name, instance.admin_up)
1506

    
1507
    # and now check them
1508
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1509
    test = not isinstance(used_minors, (tuple, list))
1510
    _ErrorIf(test, self.ENODEDRBD, node,
1511
             "cannot parse drbd status file: %s", str(used_minors))
1512
    if test:
1513
      # we cannot check drbd status
1514
      return
1515

    
1516
    for minor, (iname, must_exist) in node_drbd.items():
1517
      test = minor not in used_minors and must_exist
1518
      _ErrorIf(test, self.ENODEDRBD, node,
1519
               "drbd minor %d of instance %s is not active", minor, iname)
1520
    for minor in used_minors:
1521
      test = minor not in node_drbd
1522
      _ErrorIf(test, self.ENODEDRBD, node,
1523
               "unallocated drbd minor %d is in use", minor)
1524

    
1525
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1526
    """Verifies and updates the node volume data.
1527

1528
    This function will update a L{NodeImage}'s internal structures
1529
    with data from the remote call.
1530

1531
    @type ninfo: L{objects.Node}
1532
    @param ninfo: the node to check
1533
    @param nresult: the remote results for the node
1534
    @param nimg: the node image object
1535
    @param vg_name: the configured VG name
1536

1537
    """
1538
    node = ninfo.name
1539
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1540

    
1541
    nimg.lvm_fail = True
1542
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1543
    if vg_name is None:
1544
      pass
1545
    elif isinstance(lvdata, basestring):
1546
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1547
               utils.SafeEncode(lvdata))
1548
    elif not isinstance(lvdata, dict):
1549
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1550
    else:
1551
      nimg.volumes = lvdata
1552
      nimg.lvm_fail = False
1553

    
1554
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1555
    """Verifies and updates the node instance list.
1556

1557
    If the listing was successful, then updates this node's instance
1558
    list. Otherwise, it marks the RPC call as failed for the instance
1559
    list key.
1560

1561
    @type ninfo: L{objects.Node}
1562
    @param ninfo: the node to check
1563
    @param nresult: the remote results for the node
1564
    @param nimg: the node image object
1565

1566
    """
1567
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1568
    test = not isinstance(idata, list)
1569
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1570
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1571
    if test:
1572
      nimg.hyp_fail = True
1573
    else:
1574
      nimg.instances = idata
1575

    
1576
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1577
    """Verifies and computes a node information map
1578

1579
    @type ninfo: L{objects.Node}
1580
    @param ninfo: the node to check
1581
    @param nresult: the remote results for the node
1582
    @param nimg: the node image object
1583
    @param vg_name: the configured VG name
1584

1585
    """
1586
    node = ninfo.name
1587
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1588

    
1589
    # try to read free memory (from the hypervisor)
1590
    hv_info = nresult.get(constants.NV_HVINFO, None)
1591
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1592
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1593
    if not test:
1594
      try:
1595
        nimg.mfree = int(hv_info["memory_free"])
1596
      except (ValueError, TypeError):
1597
        _ErrorIf(True, self.ENODERPC, node,
1598
                 "node returned invalid nodeinfo, check hypervisor")
1599

    
1600
    # FIXME: devise a free space model for file based instances as well
1601
    if vg_name is not None:
1602
      test = (constants.NV_VGLIST not in nresult or
1603
              vg_name not in nresult[constants.NV_VGLIST])
1604
      _ErrorIf(test, self.ENODELVM, node,
1605
               "node didn't return data for the volume group '%s'"
1606
               " - it is either missing or broken", vg_name)
1607
      if not test:
1608
        try:
1609
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1610
        except (ValueError, TypeError):
1611
          _ErrorIf(True, self.ENODERPC, node,
1612
                   "node returned invalid LVM info, check LVM status")
1613

    
1614
  def CheckPrereq(self):
1615
    """Check prerequisites.
1616

1617
    Transform the list of checks we're going to skip into a set and check that
1618
    all its members are valid.
1619

1620
    """
1621
    self.skip_set = frozenset(self.op.skip_checks)
1622
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1623
      raise errors.OpPrereqError("Invalid checks to be skipped specified",
1624
                                 errors.ECODE_INVAL)
1625

    
1626
  def BuildHooksEnv(self):
1627
    """Build hooks env.
1628

1629
    Cluster-Verify hooks just ran in the post phase and their failure makes
1630
    the output be logged in the verify output and the verification to fail.
1631

1632
    """
1633
    all_nodes = self.cfg.GetNodeList()
1634
    env = {
1635
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1636
      }
1637
    for node in self.cfg.GetAllNodesInfo().values():
1638
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1639

    
1640
    return env, [], all_nodes
1641

    
1642
  def Exec(self, feedback_fn):
1643
    """Verify integrity of cluster, performing various test on nodes.
1644

1645
    """
1646
    self.bad = False
1647
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1648
    verbose = self.op.verbose
1649
    self._feedback_fn = feedback_fn
1650
    feedback_fn("* Verifying global settings")
1651
    for msg in self.cfg.VerifyConfig():
1652
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1653

    
1654
    # Check the cluster certificates
1655
    for cert_filename in constants.ALL_CERT_FILES:
1656
      (errcode, msg) = _VerifyCertificate(cert_filename)
1657
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1658

    
1659
    vg_name = self.cfg.GetVGName()
1660
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1661
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1662
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1663
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1664
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1665
                        for iname in instancelist)
1666
    i_non_redundant = [] # Non redundant instances
1667
    i_non_a_balanced = [] # Non auto-balanced instances
1668
    n_offline = 0 # Count of offline nodes
1669
    n_drained = 0 # Count of nodes being drained
1670
    node_vol_should = {}
1671

    
1672
    # FIXME: verify OS list
1673
    # do local checksums
1674
    master_files = [constants.CLUSTER_CONF_FILE]
1675

    
1676
    file_names = ssconf.SimpleStore().GetFileList()
1677
    file_names.extend(constants.ALL_CERT_FILES)
1678
    file_names.extend(master_files)
1679

    
1680
    local_checksums = utils.FingerprintFiles(file_names)
1681

    
1682
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1683
    node_verify_param = {
1684
      constants.NV_FILELIST: file_names,
1685
      constants.NV_NODELIST: [node.name for node in nodeinfo
1686
                              if not node.offline],
1687
      constants.NV_HYPERVISOR: hypervisors,
1688
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1689
                                  node.secondary_ip) for node in nodeinfo
1690
                                 if not node.offline],
1691
      constants.NV_INSTANCELIST: hypervisors,
1692
      constants.NV_VERSION: None,
1693
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1694
      constants.NV_NODESETUP: None,
1695
      constants.NV_TIME: None,
1696
      }
1697

    
1698
    if vg_name is not None:
1699
      node_verify_param[constants.NV_VGLIST] = None
1700
      node_verify_param[constants.NV_LVLIST] = vg_name
1701
      node_verify_param[constants.NV_PVLIST] = [vg_name]
1702
      node_verify_param[constants.NV_DRBDLIST] = None
1703

    
1704
    # Build our expected cluster state
1705
    node_image = dict((node.name, self.NodeImage(offline=node.offline))
1706
                      for node in nodeinfo)
1707

    
1708
    for instance in instancelist:
1709
      inst_config = instanceinfo[instance]
1710

    
1711
      for nname in inst_config.all_nodes:
1712
        if nname not in node_image:
1713
          # ghost node
1714
          gnode = self.NodeImage()
1715
          gnode.ghost = True
1716
          node_image[nname] = gnode
1717

    
1718
      inst_config.MapLVsByNode(node_vol_should)
1719

    
1720
      pnode = inst_config.primary_node
1721
      node_image[pnode].pinst.append(instance)
1722

    
1723
      for snode in inst_config.secondary_nodes:
1724
        nimg = node_image[snode]
1725
        nimg.sinst.append(instance)
1726
        if pnode not in nimg.sbp:
1727
          nimg.sbp[pnode] = []
1728
        nimg.sbp[pnode].append(instance)
1729

    
1730
    # At this point, we have the in-memory data structures complete,
1731
    # except for the runtime information, which we'll gather next
1732

    
1733
    # Due to the way our RPC system works, exact response times cannot be
1734
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1735
    # time before and after executing the request, we can at least have a time
1736
    # window.
1737
    nvinfo_starttime = time.time()
1738
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1739
                                           self.cfg.GetClusterName())
1740
    nvinfo_endtime = time.time()
1741

    
1742
    cluster = self.cfg.GetClusterInfo()
1743
    master_node = self.cfg.GetMasterNode()
1744
    all_drbd_map = self.cfg.ComputeDRBDMap()
1745

    
1746
    feedback_fn("* Verifying node status")
1747
    for node_i in nodeinfo:
1748
      node = node_i.name
1749
      nimg = node_image[node]
1750

    
1751
      if node_i.offline:
1752
        if verbose:
1753
          feedback_fn("* Skipping offline node %s" % (node,))
1754
        n_offline += 1
1755
        continue
1756

    
1757
      if node == master_node:
1758
        ntype = "master"
1759
      elif node_i.master_candidate:
1760
        ntype = "master candidate"
1761
      elif node_i.drained:
1762
        ntype = "drained"
1763
        n_drained += 1
1764
      else:
1765
        ntype = "regular"
1766
      if verbose:
1767
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1768

    
1769
      msg = all_nvinfo[node].fail_msg
1770
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1771
      if msg:
1772
        nimg.rpc_fail = True
1773
        continue
1774

    
1775
      nresult = all_nvinfo[node].payload
1776

    
1777
      nimg.call_ok = self._VerifyNode(node_i, nresult)
1778
      self._VerifyNodeNetwork(node_i, nresult)
1779
      self._VerifyNodeLVM(node_i, nresult, vg_name)
1780
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1781
                            master_files)
1782
      self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1783
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1784

    
1785
      self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1786
      self._UpdateNodeInstances(node_i, nresult, nimg)
1787
      self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1788

    
1789
    feedback_fn("* Verifying instance status")
1790
    for instance in instancelist:
1791
      if verbose:
1792
        feedback_fn("* Verifying instance %s" % instance)
1793
      inst_config = instanceinfo[instance]
1794
      self._VerifyInstance(instance, inst_config, node_image)
1795
      inst_nodes_offline = []
1796

    
1797
      pnode = inst_config.primary_node
1798
      pnode_img = node_image[pnode]
1799
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
1800
               self.ENODERPC, pnode, "instance %s, connection to"
1801
               " primary node failed", instance)
1802

    
1803
      if pnode_img.offline:
1804
        inst_nodes_offline.append(pnode)
1805

    
1806
      # If the instance is non-redundant we cannot survive losing its primary
1807
      # node, so we are not N+1 compliant. On the other hand we have no disk
1808
      # templates with more than one secondary so that situation is not well
1809
      # supported either.
1810
      # FIXME: does not support file-backed instances
1811
      if not inst_config.secondary_nodes:
1812
        i_non_redundant.append(instance)
1813
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
1814
               instance, "instance has multiple secondary nodes: %s",
1815
               utils.CommaJoin(inst_config.secondary_nodes),
1816
               code=self.ETYPE_WARNING)
1817

    
1818
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1819
        i_non_a_balanced.append(instance)
1820

    
1821
      for snode in inst_config.secondary_nodes:
1822
        s_img = node_image[snode]
1823
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
1824
                 "instance %s, connection to secondary node failed", instance)
1825

    
1826
        if s_img.offline:
1827
          inst_nodes_offline.append(snode)
1828

    
1829
      # warn that the instance lives on offline nodes
1830
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1831
               "instance lives on offline node(s) %s",
1832
               utils.CommaJoin(inst_nodes_offline))
1833
      # ... or ghost nodes
1834
      for node in inst_config.all_nodes:
1835
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
1836
                 "instance lives on ghost node %s", node)
1837

    
1838
    feedback_fn("* Verifying orphan volumes")
1839
    self._VerifyOrphanVolumes(node_vol_should, node_image)
1840

    
1841
    feedback_fn("* Verifying oprhan instances")
1842
    self._VerifyOrphanInstances(instancelist, node_image)
1843

    
1844
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1845
      feedback_fn("* Verifying N+1 Memory redundancy")
1846
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
1847

    
1848
    feedback_fn("* Other Notes")
1849
    if i_non_redundant:
1850
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1851
                  % len(i_non_redundant))
1852

    
1853
    if i_non_a_balanced:
1854
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1855
                  % len(i_non_a_balanced))
1856

    
1857
    if n_offline:
1858
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
1859

    
1860
    if n_drained:
1861
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
1862

    
1863
    return not self.bad
1864

    
1865
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1866
    """Analyze the post-hooks' result
1867

1868
    This method analyses the hook result, handles it, and sends some
1869
    nicely-formatted feedback back to the user.
1870

1871
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1872
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1873
    @param hooks_results: the results of the multi-node hooks rpc call
1874
    @param feedback_fn: function used send feedback back to the caller
1875
    @param lu_result: previous Exec result
1876
    @return: the new Exec result, based on the previous result
1877
        and hook results
1878

1879
    """
1880
    # We only really run POST phase hooks, and are only interested in
1881
    # their results
1882
    if phase == constants.HOOKS_PHASE_POST:
1883
      # Used to change hooks' output to proper indentation
1884
      indent_re = re.compile('^', re.M)
1885
      feedback_fn("* Hooks Results")
1886
      assert hooks_results, "invalid result from hooks"
1887

    
1888
      for node_name in hooks_results:
1889
        res = hooks_results[node_name]
1890
        msg = res.fail_msg
1891
        test = msg and not res.offline
1892
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
1893
                      "Communication failure in hooks execution: %s", msg)
1894
        if res.offline or msg:
1895
          # No need to investigate payload if node is offline or gave an error.
1896
          # override manually lu_result here as _ErrorIf only
1897
          # overrides self.bad
1898
          lu_result = 1
1899
          continue
1900
        for script, hkr, output in res.payload:
1901
          test = hkr == constants.HKR_FAIL
1902
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
1903
                        "Script %s failed, output:", script)
1904
          if test:
1905
            output = indent_re.sub('      ', output)
1906
            feedback_fn("%s" % output)
1907
            lu_result = 0
1908

    
1909
      return lu_result
1910

    
1911

    
1912
class LUVerifyDisks(NoHooksLU):
1913
  """Verifies the cluster disks status.
1914

1915
  """
1916
  _OP_REQP = []
1917
  REQ_BGL = False
1918

    
1919
  def ExpandNames(self):
1920
    self.needed_locks = {
1921
      locking.LEVEL_NODE: locking.ALL_SET,
1922
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1923
    }
1924
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1925

    
1926
  def CheckPrereq(self):
1927
    """Check prerequisites.
1928

1929
    This has no prerequisites.
1930

1931
    """
1932
    pass
1933

    
1934
  def Exec(self, feedback_fn):
1935
    """Verify integrity of cluster disks.
1936

1937
    @rtype: tuple of three items
1938
    @return: a tuple of (dict of node-to-node_error, list of instances
1939
        which need activate-disks, dict of instance: (node, volume) for
1940
        missing volumes
1941

1942
    """
1943
    result = res_nodes, res_instances, res_missing = {}, [], {}
1944

    
1945
    vg_name = self.cfg.GetVGName()
1946
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1947
    instances = [self.cfg.GetInstanceInfo(name)
1948
                 for name in self.cfg.GetInstanceList()]
1949

    
1950
    nv_dict = {}
1951
    for inst in instances:
1952
      inst_lvs = {}
1953
      if (not inst.admin_up or
1954
          inst.disk_template not in constants.DTS_NET_MIRROR):
1955
        continue
1956
      inst.MapLVsByNode(inst_lvs)
1957
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1958
      for node, vol_list in inst_lvs.iteritems():
1959
        for vol in vol_list:
1960
          nv_dict[(node, vol)] = inst
1961

    
1962
    if not nv_dict:
1963
      return result
1964

    
1965
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1966

    
1967
    for node in nodes:
1968
      # node_volume
1969
      node_res = node_lvs[node]
1970
      if node_res.offline:
1971
        continue
1972
      msg = node_res.fail_msg
1973
      if msg:
1974
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1975
        res_nodes[node] = msg
1976
        continue
1977

    
1978
      lvs = node_res.payload
1979
      for lv_name, (_, _, lv_online) in lvs.items():
1980
        inst = nv_dict.pop((node, lv_name), None)
1981
        if (not lv_online and inst is not None
1982
            and inst.name not in res_instances):
1983
          res_instances.append(inst.name)
1984

    
1985
    # any leftover items in nv_dict are missing LVs, let's arrange the
1986
    # data better
1987
    for key, inst in nv_dict.iteritems():
1988
      if inst.name not in res_missing:
1989
        res_missing[inst.name] = []
1990
      res_missing[inst.name].append(key)
1991

    
1992
    return result
1993

    
1994

    
1995
class LURepairDiskSizes(NoHooksLU):
1996
  """Verifies the cluster disks sizes.
1997

1998
  """
1999
  _OP_REQP = ["instances"]
2000
  REQ_BGL = False
2001

    
2002
  def ExpandNames(self):
2003
    if not isinstance(self.op.instances, list):
2004
      raise errors.OpPrereqError("Invalid argument type 'instances'",
2005
                                 errors.ECODE_INVAL)
2006

    
2007
    if self.op.instances:
2008
      self.wanted_names = []
2009
      for name in self.op.instances:
2010
        full_name = _ExpandInstanceName(self.cfg, name)
2011
        self.wanted_names.append(full_name)
2012
      self.needed_locks = {
2013
        locking.LEVEL_NODE: [],
2014
        locking.LEVEL_INSTANCE: self.wanted_names,
2015
        }
2016
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2017
    else:
2018
      self.wanted_names = None
2019
      self.needed_locks = {
2020
        locking.LEVEL_NODE: locking.ALL_SET,
2021
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2022
        }
2023
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2024

    
2025
  def DeclareLocks(self, level):
2026
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2027
      self._LockInstancesNodes(primary_only=True)
2028

    
2029
  def CheckPrereq(self):
2030
    """Check prerequisites.
2031

2032
    This only checks the optional instance list against the existing names.
2033

2034
    """
2035
    if self.wanted_names is None:
2036
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2037

    
2038
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2039
                             in self.wanted_names]
2040

    
2041
  def _EnsureChildSizes(self, disk):
2042
    """Ensure children of the disk have the needed disk size.
2043

2044
    This is valid mainly for DRBD8 and fixes an issue where the
2045
    children have smaller disk size.
2046

2047
    @param disk: an L{ganeti.objects.Disk} object
2048

2049
    """
2050
    if disk.dev_type == constants.LD_DRBD8:
2051
      assert disk.children, "Empty children for DRBD8?"
2052
      fchild = disk.children[0]
2053
      mismatch = fchild.size < disk.size
2054
      if mismatch:
2055
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2056
                     fchild.size, disk.size)
2057
        fchild.size = disk.size
2058

    
2059
      # and we recurse on this child only, not on the metadev
2060
      return self._EnsureChildSizes(fchild) or mismatch
2061
    else:
2062
      return False
2063

    
2064
  def Exec(self, feedback_fn):
2065
    """Verify the size of cluster disks.
2066

2067
    """
2068
    # TODO: check child disks too
2069
    # TODO: check differences in size between primary/secondary nodes
2070
    per_node_disks = {}
2071
    for instance in self.wanted_instances:
2072
      pnode = instance.primary_node
2073
      if pnode not in per_node_disks:
2074
        per_node_disks[pnode] = []
2075
      for idx, disk in enumerate(instance.disks):
2076
        per_node_disks[pnode].append((instance, idx, disk))
2077

    
2078
    changed = []
2079
    for node, dskl in per_node_disks.items():
2080
      newl = [v[2].Copy() for v in dskl]
2081
      for dsk in newl:
2082
        self.cfg.SetDiskID(dsk, node)
2083
      result = self.rpc.call_blockdev_getsizes(node, newl)
2084
      if result.fail_msg:
2085
        self.LogWarning("Failure in blockdev_getsizes call to node"
2086
                        " %s, ignoring", node)
2087
        continue
2088
      if len(result.data) != len(dskl):
2089
        self.LogWarning("Invalid result from node %s, ignoring node results",
2090
                        node)
2091
        continue
2092
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2093
        if size is None:
2094
          self.LogWarning("Disk %d of instance %s did not return size"
2095
                          " information, ignoring", idx, instance.name)
2096
          continue
2097
        if not isinstance(size, (int, long)):
2098
          self.LogWarning("Disk %d of instance %s did not return valid"
2099
                          " size information, ignoring", idx, instance.name)
2100
          continue
2101
        size = size >> 20
2102
        if size != disk.size:
2103
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2104
                       " correcting: recorded %d, actual %d", idx,
2105
                       instance.name, disk.size, size)
2106
          disk.size = size
2107
          self.cfg.Update(instance, feedback_fn)
2108
          changed.append((instance.name, idx, size))
2109
        if self._EnsureChildSizes(disk):
2110
          self.cfg.Update(instance, feedback_fn)
2111
          changed.append((instance.name, idx, disk.size))
2112
    return changed
2113

    
2114

    
2115
class LURenameCluster(LogicalUnit):
2116
  """Rename the cluster.
2117

2118
  """
2119
  HPATH = "cluster-rename"
2120
  HTYPE = constants.HTYPE_CLUSTER
2121
  _OP_REQP = ["name"]
2122

    
2123
  def BuildHooksEnv(self):
2124
    """Build hooks env.
2125

2126
    """
2127
    env = {
2128
      "OP_TARGET": self.cfg.GetClusterName(),
2129
      "NEW_NAME": self.op.name,
2130
      }
2131
    mn = self.cfg.GetMasterNode()
2132
    all_nodes = self.cfg.GetNodeList()
2133
    return env, [mn], all_nodes
2134

    
2135
  def CheckPrereq(self):
2136
    """Verify that the passed name is a valid one.
2137

2138
    """
2139
    hostname = utils.GetHostInfo(self.op.name)
2140

    
2141
    new_name = hostname.name
2142
    self.ip = new_ip = hostname.ip
2143
    old_name = self.cfg.GetClusterName()
2144
    old_ip = self.cfg.GetMasterIP()
2145
    if new_name == old_name and new_ip == old_ip:
2146
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2147
                                 " cluster has changed",
2148
                                 errors.ECODE_INVAL)
2149
    if new_ip != old_ip:
2150
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2151
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2152
                                   " reachable on the network. Aborting." %
2153
                                   new_ip, errors.ECODE_NOTUNIQUE)
2154

    
2155
    self.op.name = new_name
2156

    
2157
  def Exec(self, feedback_fn):
2158
    """Rename the cluster.
2159

2160
    """
2161
    clustername = self.op.name
2162
    ip = self.ip
2163

    
2164
    # shutdown the master IP
2165
    master = self.cfg.GetMasterNode()
2166
    result = self.rpc.call_node_stop_master(master, False)
2167
    result.Raise("Could not disable the master role")
2168

    
2169
    try:
2170
      cluster = self.cfg.GetClusterInfo()
2171
      cluster.cluster_name = clustername
2172
      cluster.master_ip = ip
2173
      self.cfg.Update(cluster, feedback_fn)
2174

    
2175
      # update the known hosts file
2176
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2177
      node_list = self.cfg.GetNodeList()
2178
      try:
2179
        node_list.remove(master)
2180
      except ValueError:
2181
        pass
2182
      result = self.rpc.call_upload_file(node_list,
2183
                                         constants.SSH_KNOWN_HOSTS_FILE)
2184
      for to_node, to_result in result.iteritems():
2185
        msg = to_result.fail_msg
2186
        if msg:
2187
          msg = ("Copy of file %s to node %s failed: %s" %
2188
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2189
          self.proc.LogWarning(msg)
2190

    
2191
    finally:
2192
      result = self.rpc.call_node_start_master(master, False, False)
2193
      msg = result.fail_msg
2194
      if msg:
2195
        self.LogWarning("Could not re-enable the master role on"
2196
                        " the master, please restart manually: %s", msg)
2197

    
2198

    
2199
def _RecursiveCheckIfLVMBased(disk):
2200
  """Check if the given disk or its children are lvm-based.
2201

2202
  @type disk: L{objects.Disk}
2203
  @param disk: the disk to check
2204
  @rtype: boolean
2205
  @return: boolean indicating whether a LD_LV dev_type was found or not
2206

2207
  """
2208
  if disk.children:
2209
    for chdisk in disk.children:
2210
      if _RecursiveCheckIfLVMBased(chdisk):
2211
        return True
2212
  return disk.dev_type == constants.LD_LV
2213

    
2214

    
2215
class LUSetClusterParams(LogicalUnit):
2216
  """Change the parameters of the cluster.
2217

2218
  """
2219
  HPATH = "cluster-modify"
2220
  HTYPE = constants.HTYPE_CLUSTER
2221
  _OP_REQP = []
2222
  REQ_BGL = False
2223

    
2224
  def CheckArguments(self):
2225
    """Check parameters
2226

2227
    """
2228
    for attr in ["candidate_pool_size",
2229
                 "uid_pool", "add_uids", "remove_uids"]:
2230
      if not hasattr(self.op, attr):
2231
        setattr(self.op, attr, None)
2232

    
2233
    if self.op.candidate_pool_size is not None:
2234
      try:
2235
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2236
      except (ValueError, TypeError), err:
2237
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2238
                                   str(err), errors.ECODE_INVAL)
2239
      if self.op.candidate_pool_size < 1:
2240
        raise errors.OpPrereqError("At least one master candidate needed",
2241
                                   errors.ECODE_INVAL)
2242

    
2243
    _CheckBooleanOpField(self.op, "maintain_node_health")
2244

    
2245
    if self.op.uid_pool:
2246
      uidpool.CheckUidPool(self.op.uid_pool)
2247

    
2248
    if self.op.add_uids:
2249
      uidpool.CheckUidPool(self.op.add_uids)
2250

    
2251
    if self.op.remove_uids:
2252
      uidpool.CheckUidPool(self.op.remove_uids)
2253

    
2254
  def ExpandNames(self):
2255
    # FIXME: in the future maybe other cluster params won't require checking on
2256
    # all nodes to be modified.
2257
    self.needed_locks = {
2258
      locking.LEVEL_NODE: locking.ALL_SET,
2259
    }
2260
    self.share_locks[locking.LEVEL_NODE] = 1
2261

    
2262
  def BuildHooksEnv(self):
2263
    """Build hooks env.
2264

2265
    """
2266
    env = {
2267
      "OP_TARGET": self.cfg.GetClusterName(),
2268
      "NEW_VG_NAME": self.op.vg_name,
2269
      }
2270
    mn = self.cfg.GetMasterNode()
2271
    return env, [mn], [mn]
2272

    
2273
  def CheckPrereq(self):
2274
    """Check prerequisites.
2275

2276
    This checks whether the given params don't conflict and
2277
    if the given volume group is valid.
2278

2279
    """
2280
    if self.op.vg_name is not None and not self.op.vg_name:
2281
      instances = self.cfg.GetAllInstancesInfo().values()
2282
      for inst in instances:
2283
        for disk in inst.disks:
2284
          if _RecursiveCheckIfLVMBased(disk):
2285
            raise errors.OpPrereqError("Cannot disable lvm storage while"
2286
                                       " lvm-based instances exist",
2287
                                       errors.ECODE_INVAL)
2288

    
2289
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2290

    
2291
    # if vg_name not None, checks given volume group on all nodes
2292
    if self.op.vg_name:
2293
      vglist = self.rpc.call_vg_list(node_list)
2294
      for node in node_list:
2295
        msg = vglist[node].fail_msg
2296
        if msg:
2297
          # ignoring down node
2298
          self.LogWarning("Error while gathering data on node %s"
2299
                          " (ignoring node): %s", node, msg)
2300
          continue
2301
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2302
                                              self.op.vg_name,
2303
                                              constants.MIN_VG_SIZE)
2304
        if vgstatus:
2305
          raise errors.OpPrereqError("Error on node '%s': %s" %
2306
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2307

    
2308
    self.cluster = cluster = self.cfg.GetClusterInfo()
2309
    # validate params changes
2310
    if self.op.beparams:
2311
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2312
      self.new_beparams = objects.FillDict(
2313
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2314

    
2315
    if self.op.nicparams:
2316
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2317
      self.new_nicparams = objects.FillDict(
2318
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2319
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2320
      nic_errors = []
2321

    
2322
      # check all instances for consistency
2323
      for instance in self.cfg.GetAllInstancesInfo().values():
2324
        for nic_idx, nic in enumerate(instance.nics):
2325
          params_copy = copy.deepcopy(nic.nicparams)
2326
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2327

    
2328
          # check parameter syntax
2329
          try:
2330
            objects.NIC.CheckParameterSyntax(params_filled)
2331
          except errors.ConfigurationError, err:
2332
            nic_errors.append("Instance %s, nic/%d: %s" %
2333
                              (instance.name, nic_idx, err))
2334

    
2335
          # if we're moving instances to routed, check that they have an ip
2336
          target_mode = params_filled[constants.NIC_MODE]
2337
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2338
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2339
                              (instance.name, nic_idx))
2340
      if nic_errors:
2341
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2342
                                   "\n".join(nic_errors))
2343

    
2344
    # hypervisor list/parameters
2345
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2346
    if self.op.hvparams:
2347
      if not isinstance(self.op.hvparams, dict):
2348
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2349
                                   errors.ECODE_INVAL)
2350
      for hv_name, hv_dict in self.op.hvparams.items():
2351
        if hv_name not in self.new_hvparams:
2352
          self.new_hvparams[hv_name] = hv_dict
2353
        else:
2354
          self.new_hvparams[hv_name].update(hv_dict)
2355

    
2356
    # os hypervisor parameters
2357
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2358
    if self.op.os_hvp:
2359
      if not isinstance(self.op.os_hvp, dict):
2360
        raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2361
                                   errors.ECODE_INVAL)
2362
      for os_name, hvs in self.op.os_hvp.items():
2363
        if not isinstance(hvs, dict):
2364
          raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2365
                                      " input"), errors.ECODE_INVAL)
2366
        if os_name not in self.new_os_hvp:
2367
          self.new_os_hvp[os_name] = hvs
2368
        else:
2369
          for hv_name, hv_dict in hvs.items():
2370
            if hv_name not in self.new_os_hvp[os_name]:
2371
              self.new_os_hvp[os_name][hv_name] = hv_dict
2372
            else:
2373
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2374

    
2375
    # changes to the hypervisor list
2376
    if self.op.enabled_hypervisors is not None:
2377
      self.hv_list = self.op.enabled_hypervisors
2378
      if not self.hv_list:
2379
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2380
                                   " least one member",
2381
                                   errors.ECODE_INVAL)
2382
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2383
      if invalid_hvs:
2384
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2385
                                   " entries: %s" %
2386
                                   utils.CommaJoin(invalid_hvs),
2387
                                   errors.ECODE_INVAL)
2388
      for hv in self.hv_list:
2389
        # if the hypervisor doesn't already exist in the cluster
2390
        # hvparams, we initialize it to empty, and then (in both
2391
        # cases) we make sure to fill the defaults, as we might not
2392
        # have a complete defaults list if the hypervisor wasn't
2393
        # enabled before
2394
        if hv not in new_hvp:
2395
          new_hvp[hv] = {}
2396
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2397
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2398
    else:
2399
      self.hv_list = cluster.enabled_hypervisors
2400

    
2401
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2402
      # either the enabled list has changed, or the parameters have, validate
2403
      for hv_name, hv_params in self.new_hvparams.items():
2404
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2405
            (self.op.enabled_hypervisors and
2406
             hv_name in self.op.enabled_hypervisors)):
2407
          # either this is a new hypervisor, or its parameters have changed
2408
          hv_class = hypervisor.GetHypervisor(hv_name)
2409
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2410
          hv_class.CheckParameterSyntax(hv_params)
2411
          _CheckHVParams(self, node_list, hv_name, hv_params)
2412

    
2413
    if self.op.os_hvp:
2414
      # no need to check any newly-enabled hypervisors, since the
2415
      # defaults have already been checked in the above code-block
2416
      for os_name, os_hvp in self.new_os_hvp.items():
2417
        for hv_name, hv_params in os_hvp.items():
2418
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2419
          # we need to fill in the new os_hvp on top of the actual hv_p
2420
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2421
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2422
          hv_class = hypervisor.GetHypervisor(hv_name)
2423
          hv_class.CheckParameterSyntax(new_osp)
2424
          _CheckHVParams(self, node_list, hv_name, new_osp)
2425

    
2426

    
2427
  def Exec(self, feedback_fn):
2428
    """Change the parameters of the cluster.
2429

2430
    """
2431
    if self.op.vg_name is not None:
2432
      new_volume = self.op.vg_name
2433
      if not new_volume:
2434
        new_volume = None
2435
      if new_volume != self.cfg.GetVGName():
2436
        self.cfg.SetVGName(new_volume)
2437
      else:
2438
        feedback_fn("Cluster LVM configuration already in desired"
2439
                    " state, not changing")
2440
    if self.op.hvparams:
2441
      self.cluster.hvparams = self.new_hvparams
2442
    if self.op.os_hvp:
2443
      self.cluster.os_hvp = self.new_os_hvp
2444
    if self.op.enabled_hypervisors is not None:
2445
      self.cluster.hvparams = self.new_hvparams
2446
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2447
    if self.op.beparams:
2448
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2449
    if self.op.nicparams:
2450
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2451

    
2452
    if self.op.candidate_pool_size is not None:
2453
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2454
      # we need to update the pool size here, otherwise the save will fail
2455
      _AdjustCandidatePool(self, [])
2456

    
2457
    if self.op.maintain_node_health is not None:
2458
      self.cluster.maintain_node_health = self.op.maintain_node_health
2459

    
2460
    if self.op.add_uids is not None:
2461
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2462

    
2463
    if self.op.remove_uids is not None:
2464
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2465

    
2466
    if self.op.uid_pool is not None:
2467
      self.cluster.uid_pool = self.op.uid_pool
2468

    
2469
    self.cfg.Update(self.cluster, feedback_fn)
2470

    
2471

    
2472
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2473
  """Distribute additional files which are part of the cluster configuration.
2474

2475
  ConfigWriter takes care of distributing the config and ssconf files, but
2476
  there are more files which should be distributed to all nodes. This function
2477
  makes sure those are copied.
2478

2479
  @param lu: calling logical unit
2480
  @param additional_nodes: list of nodes not in the config to distribute to
2481

2482
  """
2483
  # 1. Gather target nodes
2484
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2485
  dist_nodes = lu.cfg.GetOnlineNodeList()
2486
  if additional_nodes is not None:
2487
    dist_nodes.extend(additional_nodes)
2488
  if myself.name in dist_nodes:
2489
    dist_nodes.remove(myself.name)
2490

    
2491
  # 2. Gather files to distribute
2492
  dist_files = set([constants.ETC_HOSTS,
2493
                    constants.SSH_KNOWN_HOSTS_FILE,
2494
                    constants.RAPI_CERT_FILE,
2495
                    constants.RAPI_USERS_FILE,
2496
                    constants.CONFD_HMAC_KEY,
2497
                   ])
2498

    
2499
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2500
  for hv_name in enabled_hypervisors:
2501
    hv_class = hypervisor.GetHypervisor(hv_name)
2502
    dist_files.update(hv_class.GetAncillaryFiles())
2503

    
2504
  # 3. Perform the files upload
2505
  for fname in dist_files:
2506
    if os.path.exists(fname):
2507
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2508
      for to_node, to_result in result.items():
2509
        msg = to_result.fail_msg
2510
        if msg:
2511
          msg = ("Copy of file %s to node %s failed: %s" %
2512
                 (fname, to_node, msg))
2513
          lu.proc.LogWarning(msg)
2514

    
2515

    
2516
class LURedistributeConfig(NoHooksLU):
2517
  """Force the redistribution of cluster configuration.
2518

2519
  This is a very simple LU.
2520

2521
  """
2522
  _OP_REQP = []
2523
  REQ_BGL = False
2524

    
2525
  def ExpandNames(self):
2526
    self.needed_locks = {
2527
      locking.LEVEL_NODE: locking.ALL_SET,
2528
    }
2529
    self.share_locks[locking.LEVEL_NODE] = 1
2530

    
2531
  def CheckPrereq(self):
2532
    """Check prerequisites.
2533

2534
    """
2535

    
2536
  def Exec(self, feedback_fn):
2537
    """Redistribute the configuration.
2538

2539
    """
2540
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2541
    _RedistributeAncillaryFiles(self)
2542

    
2543

    
2544
def _WaitForSync(lu, instance, oneshot=False):
2545
  """Sleep and poll for an instance's disk to sync.
2546

2547
  """
2548
  if not instance.disks:
2549
    return True
2550

    
2551
  if not oneshot:
2552
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2553

    
2554
  node = instance.primary_node
2555

    
2556
  for dev in instance.disks:
2557
    lu.cfg.SetDiskID(dev, node)
2558

    
2559
  # TODO: Convert to utils.Retry
2560

    
2561
  retries = 0
2562
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2563
  while True:
2564
    max_time = 0
2565
    done = True
2566
    cumul_degraded = False
2567
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2568
    msg = rstats.fail_msg
2569
    if msg:
2570
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2571
      retries += 1
2572
      if retries >= 10:
2573
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2574
                                 " aborting." % node)
2575
      time.sleep(6)
2576
      continue
2577
    rstats = rstats.payload
2578
    retries = 0
2579
    for i, mstat in enumerate(rstats):
2580
      if mstat is None:
2581
        lu.LogWarning("Can't compute data for node %s/%s",
2582
                           node, instance.disks[i].iv_name)
2583
        continue
2584

    
2585
      cumul_degraded = (cumul_degraded or
2586
                        (mstat.is_degraded and mstat.sync_percent is None))
2587
      if mstat.sync_percent is not None:
2588
        done = False
2589
        if mstat.estimated_time is not None:
2590
          rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2591
          max_time = mstat.estimated_time
2592
        else:
2593
          rem_time = "no time estimate"
2594
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2595
                        (instance.disks[i].iv_name, mstat.sync_percent,
2596
                         rem_time))
2597

    
2598
    # if we're done but degraded, let's do a few small retries, to
2599
    # make sure we see a stable and not transient situation; therefore
2600
    # we force restart of the loop
2601
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2602
      logging.info("Degraded disks found, %d retries left", degr_retries)
2603
      degr_retries -= 1
2604
      time.sleep(1)
2605
      continue
2606

    
2607
    if done or oneshot:
2608
      break
2609

    
2610
    time.sleep(min(60, max_time))
2611

    
2612
  if done:
2613
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2614
  return not cumul_degraded
2615

    
2616

    
2617
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2618
  """Check that mirrors are not degraded.
2619

2620
  The ldisk parameter, if True, will change the test from the
2621
  is_degraded attribute (which represents overall non-ok status for
2622
  the device(s)) to the ldisk (representing the local storage status).
2623

2624
  """
2625
  lu.cfg.SetDiskID(dev, node)
2626

    
2627
  result = True
2628

    
2629
  if on_primary or dev.AssembleOnSecondary():
2630
    rstats = lu.rpc.call_blockdev_find(node, dev)
2631
    msg = rstats.fail_msg
2632
    if msg:
2633
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2634
      result = False
2635
    elif not rstats.payload:
2636
      lu.LogWarning("Can't find disk on node %s", node)
2637
      result = False
2638
    else:
2639
      if ldisk:
2640
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2641
      else:
2642
        result = result and not rstats.payload.is_degraded
2643

    
2644
  if dev.children:
2645
    for child in dev.children:
2646
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2647

    
2648
  return result
2649

    
2650

    
2651
class LUDiagnoseOS(NoHooksLU):
2652
  """Logical unit for OS diagnose/query.
2653

2654
  """
2655
  _OP_REQP = ["output_fields", "names"]
2656
  REQ_BGL = False
2657
  _FIELDS_STATIC = utils.FieldSet()
2658
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2659
  # Fields that need calculation of global os validity
2660
  _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2661

    
2662
  def ExpandNames(self):
2663
    if self.op.names:
2664
      raise errors.OpPrereqError("Selective OS query not supported",
2665
                                 errors.ECODE_INVAL)
2666

    
2667
    _CheckOutputFields(static=self._FIELDS_STATIC,
2668
                       dynamic=self._FIELDS_DYNAMIC,
2669
                       selected=self.op.output_fields)
2670

    
2671
    # Lock all nodes, in shared mode
2672
    # Temporary removal of locks, should be reverted later
2673
    # TODO: reintroduce locks when they are lighter-weight
2674
    self.needed_locks = {}
2675
    #self.share_locks[locking.LEVEL_NODE] = 1
2676
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2677

    
2678
  def CheckPrereq(self):
2679
    """Check prerequisites.
2680

2681
    """
2682

    
2683
  @staticmethod
2684
  def _DiagnoseByOS(rlist):
2685
    """Remaps a per-node return list into an a per-os per-node dictionary
2686

2687
    @param rlist: a map with node names as keys and OS objects as values
2688

2689
    @rtype: dict
2690
    @return: a dictionary with osnames as keys and as value another map, with
2691
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
2692

2693
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2694
                                     (/srv/..., False, "invalid api")],
2695
                           "node2": [(/srv/..., True, "")]}
2696
          }
2697

2698
    """
2699
    all_os = {}
2700
    # we build here the list of nodes that didn't fail the RPC (at RPC
2701
    # level), so that nodes with a non-responding node daemon don't
2702
    # make all OSes invalid
2703
    good_nodes = [node_name for node_name in rlist
2704
                  if not rlist[node_name].fail_msg]
2705
    for node_name, nr in rlist.items():
2706
      if nr.fail_msg or not nr.payload:
2707
        continue
2708
      for name, path, status, diagnose, variants in nr.payload:
2709
        if name not in all_os:
2710
          # build a list of nodes for this os containing empty lists
2711
          # for each node in node_list
2712
          all_os[name] = {}
2713
          for nname in good_nodes:
2714
            all_os[name][nname] = []
2715
        all_os[name][node_name].append((path, status, diagnose, variants))
2716
    return all_os
2717

    
2718
  def Exec(self, feedback_fn):
2719
    """Compute the list of OSes.
2720

2721
    """
2722
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2723
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2724
    pol = self._DiagnoseByOS(node_data)
2725
    output = []
2726
    calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2727
    calc_variants = "variants" in self.op.output_fields
2728

    
2729
    for os_name, os_data in pol.items():
2730
      row = []
2731
      if calc_valid:
2732
        valid = True
2733
        variants = None
2734
        for osl in os_data.values():
2735
          valid = valid and osl and osl[0][1]
2736
          if not valid:
2737
            variants = None
2738
            break
2739
          if calc_variants:
2740
            node_variants = osl[0][3]
2741
            if variants is None:
2742
              variants = node_variants
2743
            else:
2744
              variants = [v for v in variants if v in node_variants]
2745

    
2746
      for field in self.op.output_fields:
2747
        if field == "name":
2748
          val = os_name
2749
        elif field == "valid":
2750
          val = valid
2751
        elif field == "node_status":
2752
          # this is just a copy of the dict
2753
          val = {}
2754
          for node_name, nos_list in os_data.items():
2755
            val[node_name] = nos_list
2756
        elif field == "variants":
2757
          val =  variants
2758
        else:
2759
          raise errors.ParameterError(field)
2760
        row.append(val)
2761
      output.append(row)
2762

    
2763
    return output
2764

    
2765

    
2766
class LURemoveNode(LogicalUnit):
2767
  """Logical unit for removing a node.
2768

2769
  """
2770
  HPATH = "node-remove"
2771
  HTYPE = constants.HTYPE_NODE
2772
  _OP_REQP = ["node_name"]
2773

    
2774
  def BuildHooksEnv(self):
2775
    """Build hooks env.
2776

2777
    This doesn't run on the target node in the pre phase as a failed
2778
    node would then be impossible to remove.
2779

2780
    """
2781
    env = {
2782
      "OP_TARGET": self.op.node_name,
2783
      "NODE_NAME": self.op.node_name,
2784
      }
2785
    all_nodes = self.cfg.GetNodeList()
2786
    try:
2787
      all_nodes.remove(self.op.node_name)
2788
    except ValueError:
2789
      logging.warning("Node %s which is about to be removed not found"
2790
                      " in the all nodes list", self.op.node_name)
2791
    return env, all_nodes, all_nodes
2792

    
2793
  def CheckPrereq(self):
2794
    """Check prerequisites.
2795

2796
    This checks:
2797
     - the node exists in the configuration
2798
     - it does not have primary or secondary instances
2799
     - it's not the master
2800

2801
    Any errors are signaled by raising errors.OpPrereqError.
2802

2803
    """
2804
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2805
    node = self.cfg.GetNodeInfo(self.op.node_name)
2806
    assert node is not None
2807

    
2808
    instance_list = self.cfg.GetInstanceList()
2809

    
2810
    masternode = self.cfg.GetMasterNode()
2811
    if node.name == masternode:
2812
      raise errors.OpPrereqError("Node is the master node,"
2813
                                 " you need to failover first.",
2814
                                 errors.ECODE_INVAL)
2815

    
2816
    for instance_name in instance_list:
2817
      instance = self.cfg.GetInstanceInfo(instance_name)
2818
      if node.name in instance.all_nodes:
2819
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2820
                                   " please remove first." % instance_name,
2821
                                   errors.ECODE_INVAL)
2822
    self.op.node_name = node.name
2823
    self.node = node
2824

    
2825
  def Exec(self, feedback_fn):
2826
    """Removes the node from the cluster.
2827

2828
    """
2829
    node = self.node
2830
    logging.info("Stopping the node daemon and removing configs from node %s",
2831
                 node.name)
2832

    
2833
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2834

    
2835
    # Promote nodes to master candidate as needed
2836
    _AdjustCandidatePool(self, exceptions=[node.name])
2837
    self.context.RemoveNode(node.name)
2838

    
2839
    # Run post hooks on the node before it's removed
2840
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2841
    try:
2842
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2843
    except:
2844
      # pylint: disable-msg=W0702
2845
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
2846

    
2847
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2848
    msg = result.fail_msg
2849
    if msg:
2850
      self.LogWarning("Errors encountered on the remote node while leaving"
2851
                      " the cluster: %s", msg)
2852

    
2853

    
2854
class LUQueryNodes(NoHooksLU):
2855
  """Logical unit for querying nodes.
2856

2857
  """
2858
  # pylint: disable-msg=W0142
2859
  _OP_REQP = ["output_fields", "names", "use_locking"]
2860
  REQ_BGL = False
2861

    
2862
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2863
                    "master_candidate", "offline", "drained"]
2864

    
2865
  _FIELDS_DYNAMIC = utils.FieldSet(
2866
    "dtotal", "dfree",
2867
    "mtotal", "mnode", "mfree",
2868
    "bootid",
2869
    "ctotal", "cnodes", "csockets",
2870
    )
2871

    
2872
  _FIELDS_STATIC = utils.FieldSet(*[
2873
    "pinst_cnt", "sinst_cnt",
2874
    "pinst_list", "sinst_list",
2875
    "pip", "sip", "tags",
2876
    "master",
2877
    "role"] + _SIMPLE_FIELDS
2878
    )
2879

    
2880
  def ExpandNames(self):
2881
    _CheckOutputFields(static=self._FIELDS_STATIC,
2882
                       dynamic=self._FIELDS_DYNAMIC,
2883
                       selected=self.op.output_fields)
2884

    
2885
    self.needed_locks = {}
2886
    self.share_locks[locking.LEVEL_NODE] = 1
2887

    
2888
    if self.op.names:
2889
      self.wanted = _GetWantedNodes(self, self.op.names)
2890
    else:
2891
      self.wanted = locking.ALL_SET
2892

    
2893
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2894
    self.do_locking = self.do_node_query and self.op.use_locking
2895
    if self.do_locking:
2896
      # if we don't request only static fields, we need to lock the nodes
2897
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2898

    
2899
  def CheckPrereq(self):
2900
    """Check prerequisites.
2901

2902
    """
2903
    # The validation of the node list is done in the _GetWantedNodes,
2904
    # if non empty, and if empty, there's no validation to do
2905
    pass
2906

    
2907
  def Exec(self, feedback_fn):
2908
    """Computes the list of nodes and their attributes.
2909

2910
    """
2911
    all_info = self.cfg.GetAllNodesInfo()
2912
    if self.do_locking:
2913
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2914
    elif self.wanted != locking.ALL_SET:
2915
      nodenames = self.wanted
2916
      missing = set(nodenames).difference(all_info.keys())
2917
      if missing:
2918
        raise errors.OpExecError(
2919
          "Some nodes were removed before retrieving their data: %s" % missing)
2920
    else:
2921
      nodenames = all_info.keys()
2922

    
2923
    nodenames = utils.NiceSort(nodenames)
2924
    nodelist = [all_info[name] for name in nodenames]
2925

    
2926
    # begin data gathering
2927

    
2928
    if self.do_node_query:
2929
      live_data = {}
2930
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2931
                                          self.cfg.GetHypervisorType())
2932
      for name in nodenames:
2933
        nodeinfo = node_data[name]
2934
        if not nodeinfo.fail_msg and nodeinfo.payload:
2935
          nodeinfo = nodeinfo.payload
2936
          fn = utils.TryConvert
2937
          live_data[name] = {
2938
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2939
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2940
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2941
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2942
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2943
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2944
            "bootid": nodeinfo.get('bootid', None),
2945
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2946
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2947
            }
2948
        else:
2949
          live_data[name] = {}
2950
    else:
2951
      live_data = dict.fromkeys(nodenames, {})
2952

    
2953
    node_to_primary = dict([(name, set()) for name in nodenames])
2954
    node_to_secondary = dict([(name, set()) for name in nodenames])
2955

    
2956
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2957
                             "sinst_cnt", "sinst_list"))
2958
    if inst_fields & frozenset(self.op.output_fields):
2959
      inst_data = self.cfg.GetAllInstancesInfo()
2960

    
2961
      for inst in inst_data.values():
2962
        if inst.primary_node in node_to_primary:
2963
          node_to_primary[inst.primary_node].add(inst.name)
2964
        for secnode in inst.secondary_nodes:
2965
          if secnode in node_to_secondary:
2966
            node_to_secondary[secnode].add(inst.name)
2967

    
2968
    master_node = self.cfg.GetMasterNode()
2969

    
2970
    # end data gathering
2971

    
2972
    output = []
2973
    for node in nodelist:
2974
      node_output = []
2975
      for field in self.op.output_fields:
2976
        if field in self._SIMPLE_FIELDS:
2977
          val = getattr(node, field)
2978
        elif field == "pinst_list":
2979
          val = list(node_to_primary[node.name])
2980
        elif field == "sinst_list":
2981
          val = list(node_to_secondary[node.name])
2982
        elif field == "pinst_cnt":
2983
          val = len(node_to_primary[node.name])
2984
        elif field == "sinst_cnt":
2985
          val = len(node_to_secondary[node.name])
2986
        elif field == "pip":
2987
          val = node.primary_ip
2988
        elif field == "sip":
2989
          val = node.secondary_ip
2990
        elif field == "tags":
2991
          val = list(node.GetTags())
2992
        elif field == "master":
2993
          val = node.name == master_node
2994
        elif self._FIELDS_DYNAMIC.Matches(field):
2995
          val = live_data[node.name].get(field, None)
2996
        elif field == "role":
2997
          if node.name == master_node:
2998
            val = "M"
2999
          elif node.master_candidate:
3000
            val = "C"
3001
          elif node.drained:
3002
            val = "D"
3003
          elif node.offline:
3004
            val = "O"
3005
          else:
3006
            val = "R"
3007
        else:
3008
          raise errors.ParameterError(field)
3009
        node_output.append(val)
3010
      output.append(node_output)
3011

    
3012
    return output
3013

    
3014

    
3015
class LUQueryNodeVolumes(NoHooksLU):
3016
  """Logical unit for getting volumes on node(s).
3017

3018
  """
3019
  _OP_REQP = ["nodes", "output_fields"]
3020
  REQ_BGL = False
3021
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3022
  _FIELDS_STATIC = utils.FieldSet("node")
3023

    
3024
  def ExpandNames(self):
3025
    _CheckOutputFields(static=self._FIELDS_STATIC,
3026
                       dynamic=self._FIELDS_DYNAMIC,
3027
                       selected=self.op.output_fields)
3028

    
3029
    self.needed_locks = {}
3030
    self.share_locks[locking.LEVEL_NODE] = 1
3031
    if not self.op.nodes:
3032
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3033
    else:
3034
      self.needed_locks[locking.LEVEL_NODE] = \
3035
        _GetWantedNodes(self, self.op.nodes)
3036

    
3037
  def CheckPrereq(self):
3038
    """Check prerequisites.
3039

3040
    This checks that the fields required are valid output fields.
3041

3042
    """
3043
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3044

    
3045
  def Exec(self, feedback_fn):
3046
    """Computes the list of nodes and their attributes.
3047

3048
    """
3049
    nodenames = self.nodes
3050
    volumes = self.rpc.call_node_volumes(nodenames)
3051

    
3052
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3053
             in self.cfg.GetInstanceList()]
3054

    
3055
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3056

    
3057
    output = []
3058
    for node in nodenames:
3059
      nresult = volumes[node]
3060
      if nresult.offline:
3061
        continue
3062
      msg = nresult.fail_msg
3063
      if msg:
3064
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3065
        continue
3066

    
3067
      node_vols = nresult.payload[:]
3068
      node_vols.sort(key=lambda vol: vol['dev'])
3069

    
3070
      for vol in node_vols:
3071
        node_output = []
3072
        for field in self.op.output_fields:
3073
          if field == "node":
3074
            val = node
3075
          elif field == "phys":
3076
            val = vol['dev']
3077
          elif field == "vg":
3078
            val = vol['vg']
3079
          elif field == "name":
3080
            val = vol['name']
3081
          elif field == "size":
3082
            val = int(float(vol['size']))
3083
          elif field == "instance":
3084
            for inst in ilist:
3085
              if node not in lv_by_node[inst]:
3086
                continue
3087
              if vol['name'] in lv_by_node[inst][node]:
3088
                val = inst.name
3089
                break
3090
            else:
3091
              val = '-'
3092
          else:
3093
            raise errors.ParameterError(field)
3094
          node_output.append(str(val))
3095

    
3096
        output.append(node_output)
3097

    
3098
    return output
3099

    
3100

    
3101
class LUQueryNodeStorage(NoHooksLU):
3102
  """Logical unit for getting information on storage units on node(s).
3103

3104
  """
3105
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
3106
  REQ_BGL = False
3107
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3108

    
3109
  def CheckArguments(self):
3110
    _CheckStorageType(self.op.storage_type)
3111

    
3112
    _CheckOutputFields(static=self._FIELDS_STATIC,
3113
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3114
                       selected=self.op.output_fields)
3115

    
3116
  def ExpandNames(self):
3117
    self.needed_locks = {}
3118
    self.share_locks[locking.LEVEL_NODE] = 1
3119

    
3120
    if self.op.nodes:
3121
      self.needed_locks[locking.LEVEL_NODE] = \
3122
        _GetWantedNodes(self, self.op.nodes)
3123
    else:
3124
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3125

    
3126
  def CheckPrereq(self):
3127
    """Check prerequisites.
3128

3129
    This checks that the fields required are valid output fields.
3130

3131
    """
3132
    self.op.name = getattr(self.op, "name", None)
3133

    
3134
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3135

    
3136
  def Exec(self, feedback_fn):
3137
    """Computes the list of nodes and their attributes.
3138

3139
    """
3140
    # Always get name to sort by
3141
    if constants.SF_NAME in self.op.output_fields:
3142
      fields = self.op.output_fields[:]
3143
    else:
3144
      fields = [constants.SF_NAME] + self.op.output_fields
3145

    
3146
    # Never ask for node or type as it's only known to the LU
3147
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3148
      while extra in fields:
3149
        fields.remove(extra)
3150

    
3151
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3152
    name_idx = field_idx[constants.SF_NAME]
3153

    
3154
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3155
    data = self.rpc.call_storage_list(self.nodes,
3156
                                      self.op.storage_type, st_args,
3157
                                      self.op.name, fields)
3158

    
3159
    result = []
3160

    
3161
    for node in utils.NiceSort(self.nodes):
3162
      nresult = data[node]
3163
      if nresult.offline:
3164
        continue
3165

    
3166
      msg = nresult.fail_msg
3167
      if msg:
3168
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3169
        continue
3170

    
3171
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3172

    
3173
      for name in utils.NiceSort(rows.keys()):
3174
        row = rows[name]
3175

    
3176
        out = []
3177

    
3178
        for field in self.op.output_fields:
3179
          if field == constants.SF_NODE:
3180
            val = node
3181
          elif field == constants.SF_TYPE:
3182
            val = self.op.storage_type
3183
          elif field in field_idx:
3184
            val = row[field_idx[field]]
3185
          else:
3186
            raise errors.ParameterError(field)
3187

    
3188
          out.append(val)
3189

    
3190
        result.append(out)
3191

    
3192
    return result
3193

    
3194

    
3195
class LUModifyNodeStorage(NoHooksLU):
3196
  """Logical unit for modifying a storage volume on a node.
3197

3198
  """
3199
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3200
  REQ_BGL = False
3201

    
3202
  def CheckArguments(self):
3203
    self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
3204

    
3205
    _CheckStorageType(self.op.storage_type)
3206

    
3207
  def ExpandNames(self):
3208
    self.needed_locks = {
3209
      locking.LEVEL_NODE: self.op.node_name,
3210
      }
3211

    
3212
  def CheckPrereq(self):
3213
    """Check prerequisites.
3214

3215
    """
3216
    storage_type = self.op.storage_type
3217

    
3218
    try:
3219
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3220
    except KeyError:
3221
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3222
                                 " modified" % storage_type,
3223
                                 errors.ECODE_INVAL)
3224

    
3225
    diff = set(self.op.changes.keys()) - modifiable
3226
    if diff:
3227
      raise errors.OpPrereqError("The following fields can not be modified for"
3228
                                 " storage units of type '%s': %r" %
3229
                                 (storage_type, list(diff)),
3230
                                 errors.ECODE_INVAL)
3231

    
3232
  def Exec(self, feedback_fn):
3233
    """Computes the list of nodes and their attributes.
3234

3235
    """
3236
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3237
    result = self.rpc.call_storage_modify(self.op.node_name,
3238
                                          self.op.storage_type, st_args,
3239
                                          self.op.name, self.op.changes)
3240
    result.Raise("Failed to modify storage unit '%s' on %s" %
3241
                 (self.op.name, self.op.node_name))
3242

    
3243

    
3244
class LUAddNode(LogicalUnit):
3245
  """Logical unit for adding node to the cluster.
3246

3247
  """
3248
  HPATH = "node-add"
3249
  HTYPE = constants.HTYPE_NODE
3250
  _OP_REQP = ["node_name"]
3251

    
3252
  def CheckArguments(self):
3253
    # validate/normalize the node name
3254
    self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3255

    
3256
  def BuildHooksEnv(self):
3257
    """Build hooks env.
3258

3259
    This will run on all nodes before, and on all nodes + the new node after.
3260

3261
    """
3262
    env = {
3263
      "OP_TARGET": self.op.node_name,
3264
      "NODE_NAME": self.op.node_name,
3265
      "NODE_PIP": self.op.primary_ip,
3266
      "NODE_SIP": self.op.secondary_ip,
3267
      }
3268
    nodes_0 = self.cfg.GetNodeList()
3269
    nodes_1 = nodes_0 + [self.op.node_name, ]
3270
    return env, nodes_0, nodes_1
3271

    
3272
  def CheckPrereq(self):
3273
    """Check prerequisites.
3274

3275
    This checks:
3276
     - the new node is not already in the config
3277
     - it is resolvable
3278
     - its parameters (single/dual homed) matches the cluster
3279

3280
    Any errors are signaled by raising errors.OpPrereqError.
3281

3282
    """
3283
    node_name = self.op.node_name
3284
    cfg = self.cfg
3285

    
3286
    dns_data = utils.GetHostInfo(node_name)
3287

    
3288
    node = dns_data.name
3289
    primary_ip = self.op.primary_ip = dns_data.ip
3290
    secondary_ip = getattr(self.op, "secondary_ip", None)
3291
    if secondary_ip is None:
3292
      secondary_ip = primary_ip
3293
    if not utils.IsValidIP(secondary_ip):
3294
      raise errors.OpPrereqError("Invalid secondary IP given",
3295
                                 errors.ECODE_INVAL)
3296
    self.op.secondary_ip = secondary_ip
3297

    
3298
    node_list = cfg.GetNodeList()
3299
    if not self.op.readd and node in node_list:
3300
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3301
                                 node, errors.ECODE_EXISTS)
3302
    elif self.op.readd and node not in node_list:
3303
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3304
                                 errors.ECODE_NOENT)
3305

    
3306
    self.changed_primary_ip = False
3307

    
3308
    for existing_node_name in node_list:
3309
      existing_node = cfg.GetNodeInfo(existing_node_name)
3310

    
3311
      if self.op.readd and node == existing_node_name:
3312
        if existing_node.secondary_ip != secondary_ip:
3313
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
3314
                                     " address configuration as before",
3315
                                     errors.ECODE_INVAL)
3316
        if existing_node.primary_ip != primary_ip:
3317
          self.changed_primary_ip = True
3318

    
3319
        continue
3320

    
3321
      if (existing_node.primary_ip == primary_ip or
3322
          existing_node.secondary_ip == primary_ip or
3323
          existing_node.primary_ip == secondary_ip or
3324
          existing_node.secondary_ip == secondary_ip):
3325
        raise errors.OpPrereqError("New node ip address(es) conflict with"
3326
                                   " existing node %s" % existing_node.name,
3327
                                   errors.ECODE_NOTUNIQUE)
3328

    
3329
    # check that the type of the node (single versus dual homed) is the
3330
    # same as for the master
3331
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3332
    master_singlehomed = myself.secondary_ip == myself.primary_ip
3333
    newbie_singlehomed = secondary_ip == primary_ip
3334
    if master_singlehomed != newbie_singlehomed:
3335
      if master_singlehomed:
3336
        raise errors.OpPrereqError("The master has no private ip but the"
3337
                                   " new node has one",
3338
                                   errors.ECODE_INVAL)
3339
      else:
3340
        raise errors.OpPrereqError("The master has a private ip but the"
3341
                                   " new node doesn't have one",
3342
                                   errors.ECODE_INVAL)
3343

    
3344
    # checks reachability
3345
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3346
      raise errors.OpPrereqError("Node not reachable by ping",
3347
                                 errors.ECODE_ENVIRON)
3348

    
3349
    if not newbie_singlehomed:
3350
      # check reachability from my secondary ip to newbie's secondary ip
3351
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3352
                           source=myself.secondary_ip):
3353
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3354
                                   " based ping to noded port",
3355
                                   errors.ECODE_ENVIRON)
3356

    
3357
    if self.op.readd:
3358
      exceptions = [node]
3359
    else:
3360
      exceptions = []
3361

    
3362
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3363

    
3364
    if self.op.readd:
3365
      self.new_node = self.cfg.GetNodeInfo(node)
3366
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
3367
    else:
3368
      self.new_node = objects.Node(name=node,
3369
                                   primary_ip=primary_ip,
3370
                                   secondary_ip=secondary_ip,
3371
                                   master_candidate=self.master_candidate,
3372
                                   offline=False, drained=False)
3373

    
3374
  def Exec(self, feedback_fn):
3375
    """Adds the new node to the cluster.
3376

3377
    """
3378
    new_node = self.new_node
3379
    node = new_node.name
3380

    
3381
    # for re-adds, reset the offline/drained/master-candidate flags;
3382
    # we need to reset here, otherwise offline would prevent RPC calls
3383
    # later in the procedure; this also means that if the re-add
3384
    # fails, we are left with a non-offlined, broken node
3385
    if self.op.readd:
3386
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3387
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3388
      # if we demote the node, we do cleanup later in the procedure
3389
      new_node.master_candidate = self.master_candidate
3390
      if self.changed_primary_ip:
3391
        new_node.primary_ip = self.op.primary_ip
3392

    
3393
    # notify the user about any possible mc promotion
3394
    if new_node.master_candidate:
3395
      self.LogInfo("Node will be a master candidate")
3396

    
3397
    # check connectivity
3398
    result = self.rpc.call_version([node])[node]
3399
    result.Raise("Can't get version information from node %s" % node)
3400
    if constants.PROTOCOL_VERSION == result.payload:
3401
      logging.info("Communication to node %s fine, sw version %s match",
3402
                   node, result.payload)
3403
    else:
3404
      raise errors.OpExecError("Version mismatch master version %s,"
3405
                               " node version %s" %
3406
                               (constants.PROTOCOL_VERSION, result.payload))
3407

    
3408
    # setup ssh on node
3409
    if self.cfg.GetClusterInfo().modify_ssh_setup:
3410
      logging.info("Copy ssh key to node %s", node)
3411
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3412
      keyarray = []
3413
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3414
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3415
                  priv_key, pub_key]
3416

    
3417
      for i in keyfiles:
3418
        keyarray.append(utils.ReadFile(i))
3419

    
3420
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3421
                                      keyarray[2], keyarray[3], keyarray[4],
3422
                                      keyarray[5])
3423
      result.Raise("Cannot transfer ssh keys to the new node")
3424

    
3425
    # Add node to our /etc/hosts, and add key to known_hosts
3426
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3427
      utils.AddHostToEtcHosts(new_node.name)
3428

    
3429
    if new_node.secondary_ip != new_node.primary_ip:
3430
      result = self.rpc.call_node_has_ip_address(new_node.name,
3431
                                                 new_node.secondary_ip)
3432
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3433
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3434
      if not result.payload:
3435
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3436
                                 " you gave (%s). Please fix and re-run this"
3437
                                 " command." % new_node.secondary_ip)
3438

    
3439
    node_verify_list = [self.cfg.GetMasterNode()]
3440
    node_verify_param = {
3441
      constants.NV_NODELIST: [node],
3442
      # TODO: do a node-net-test as well?
3443
    }
3444

    
3445
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3446
                                       self.cfg.GetClusterName())
3447
    for verifier in node_verify_list:
3448
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3449
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3450
      if nl_payload:
3451
        for failed in nl_payload:
3452
          feedback_fn("ssh/hostname verification failed"
3453
                      " (checking from %s): %s" %
3454
                      (verifier, nl_payload[failed]))
3455
        raise errors.OpExecError("ssh/hostname verification failed.")
3456

    
3457
    if self.op.readd:
3458
      _RedistributeAncillaryFiles(self)
3459
      self.context.ReaddNode(new_node)
3460
      # make sure we redistribute the config
3461
      self.cfg.Update(new_node, feedback_fn)
3462
      # and make sure the new node will not have old files around
3463
      if not new_node.master_candidate:
3464
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3465
        msg = result.fail_msg
3466
        if msg:
3467
          self.LogWarning("Node failed to demote itself from master"
3468
                          " candidate status: %s" % msg)
3469
    else:
3470
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3471
      self.context.AddNode(new_node, self.proc.GetECId())
3472

    
3473

    
3474
class LUSetNodeParams(LogicalUnit):
3475
  """Modifies the parameters of a node.
3476

3477
  """
3478
  HPATH = "node-modify"
3479
  HTYPE = constants.HTYPE_NODE
3480
  _OP_REQP = ["node_name"]
3481
  REQ_BGL = False
3482

    
3483
  def CheckArguments(self):
3484
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3485
    _CheckBooleanOpField(self.op, 'master_candidate')
3486
    _CheckBooleanOpField(self.op, 'offline')
3487
    _CheckBooleanOpField(self.op, 'drained')
3488
    _CheckBooleanOpField(self.op, 'auto_promote')
3489
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3490
    if all_mods.count(None) == 3:
3491
      raise errors.OpPrereqError("Please pass at least one modification",
3492
                                 errors.ECODE_INVAL)
3493
    if all_mods.count(True) > 1:
3494
      raise errors.OpPrereqError("Can't set the node into more than one"
3495
                                 " state at the same time",
3496
                                 errors.ECODE_INVAL)
3497

    
3498
    # Boolean value that tells us whether we're offlining or draining the node
3499
    self.offline_or_drain = (self.op.offline == True or
3500
                             self.op.drained == True)
3501
    self.deoffline_or_drain = (self.op.offline == False or
3502
                               self.op.drained == False)
3503
    self.might_demote = (self.op.master_candidate == False or
3504
                         self.offline_or_drain)
3505

    
3506
    self.lock_all = self.op.auto_promote and self.might_demote
3507

    
3508

    
3509
  def ExpandNames(self):
3510
    if self.lock_all:
3511
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3512
    else:
3513
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3514

    
3515
  def BuildHooksEnv(self):
3516
    """Build hooks env.
3517

3518
    This runs on the master node.
3519

3520
    """
3521
    env = {
3522
      "OP_TARGET": self.op.node_name,
3523
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3524
      "OFFLINE": str(self.op.offline),
3525
      "DRAINED": str(self.op.drained),
3526
      }
3527
    nl = [self.cfg.GetMasterNode(),
3528
          self.op.node_name]
3529
    return env, nl, nl
3530

    
3531
  def CheckPrereq(self):
3532
    """Check prerequisites.
3533

3534
    This only checks the instance list against the existing names.
3535

3536
    """
3537
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3538

    
3539
    if (self.op.master_candidate is not None or
3540
        self.op.drained is not None or
3541
        self.op.offline is not None):
3542
      # we can't change the master's node flags
3543
      if self.op.node_name == self.cfg.GetMasterNode():
3544
        raise errors.OpPrereqError("The master role can be changed"
3545
                                   " only via masterfailover",
3546
                                   errors.ECODE_INVAL)
3547

    
3548

    
3549
    if node.master_candidate and self.might_demote and not self.lock_all:
3550
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
3551
      # check if after removing the current node, we're missing master
3552
      # candidates
3553
      (mc_remaining, mc_should, _) = \
3554
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3555
      if mc_remaining < mc_should:
3556
        raise errors.OpPrereqError("Not enough master candidates, please"
3557
                                   " pass auto_promote to allow promotion",
3558
                                   errors.ECODE_INVAL)
3559

    
3560
    if (self.op.master_candidate == True and
3561
        ((node.offline and not self.op.offline == False) or
3562
         (node.drained and not self.op.drained == False))):
3563
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3564
                                 " to master_candidate" % node.name,
3565
                                 errors.ECODE_INVAL)
3566

    
3567
    # If we're being deofflined/drained, we'll MC ourself if needed
3568
    if (self.deoffline_or_drain and not self.offline_or_drain and not
3569
        self.op.master_candidate == True and not node.master_candidate):
3570
      self.op.master_candidate = _DecideSelfPromotion(self)
3571
      if self.op.master_candidate:
3572
        self.LogInfo("Autopromoting node to master candidate")
3573

    
3574
    return
3575

    
3576
  def Exec(self, feedback_fn):
3577
    """Modifies a node.
3578

3579
    """
3580
    node = self.node
3581

    
3582
    result = []
3583
    changed_mc = False
3584

    
3585
    if self.op.offline is not None:
3586
      node.offline = self.op.offline
3587
      result.append(("offline", str(self.op.offline)))
3588
      if self.op.offline == True:
3589
        if node.master_candidate:
3590
          node.master_candidate = False
3591
          changed_mc = True
3592
          result.append(("master_candidate", "auto-demotion due to offline"))
3593
        if node.drained:
3594
          node.drained = False
3595
          result.append(("drained", "clear drained status due to offline"))
3596

    
3597
    if self.op.master_candidate is not None:
3598
      node.master_candidate = self.op.master_candidate
3599
      changed_mc = True
3600
      result.append(("master_candidate", str(self.op.master_candidate)))
3601
      if self.op.master_candidate == False:
3602
        rrc = self.rpc.call_node_demote_from_mc(node.name)
3603
        msg = rrc.fail_msg
3604
        if msg:
3605
          self.LogWarning("Node failed to demote itself: %s" % msg)
3606

    
3607
    if self.op.drained is not None:
3608
      node.drained = self.op.drained
3609
      result.append(("drained", str(self.op.drained)))
3610
      if self.op.drained == True:
3611
        if node.master_candidate:
3612
          node.master_candidate = False
3613
          changed_mc = True
3614
          result.append(("master_candidate", "auto-demotion due to drain"))
3615
          rrc = self.rpc.call_node_demote_from_mc(node.name)
3616
          msg = rrc.fail_msg
3617
          if msg:
3618
            self.LogWarning("Node failed to demote itself: %s" % msg)
3619
        if node.offline:
3620
          node.offline = False
3621
          result.append(("offline", "clear offline status due to drain"))
3622

    
3623
    # we locked all nodes, we adjust the CP before updating this node
3624
    if self.lock_all:
3625
      _AdjustCandidatePool(self, [node.name])
3626

    
3627
    # this will trigger configuration file update, if needed
3628
    self.cfg.Update(node, feedback_fn)
3629

    
3630
    # this will trigger job queue propagation or cleanup
3631
    if changed_mc:
3632
      self.context.ReaddNode(node)
3633

    
3634
    return result
3635

    
3636

    
3637
class LUPowercycleNode(NoHooksLU):
3638
  """Powercycles a node.
3639

3640
  """
3641
  _OP_REQP = ["node_name", "force"]
3642
  REQ_BGL = False
3643

    
3644
  def CheckArguments(self):
3645
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3646
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3647
      raise errors.OpPrereqError("The node is the master and the force"
3648
                                 " parameter was not set",
3649
                                 errors.ECODE_INVAL)
3650

    
3651
  def ExpandNames(self):
3652
    """Locking for PowercycleNode.
3653

3654
    This is a last-resort option and shouldn't block on other
3655
    jobs. Therefore, we grab no locks.
3656

3657
    """
3658
    self.needed_locks = {}
3659

    
3660
  def CheckPrereq(self):
3661
    """Check prerequisites.
3662

3663
    This LU has no prereqs.
3664

3665
    """
3666
    pass
3667

    
3668
  def Exec(self, feedback_fn):
3669
    """Reboots a node.
3670

3671
    """
3672
    result = self.rpc.call_node_powercycle(self.op.node_name,
3673
                                           self.cfg.GetHypervisorType())
3674
    result.Raise("Failed to schedule the reboot")
3675
    return result.payload
3676

    
3677

    
3678
class LUQueryClusterInfo(NoHooksLU):
3679
  """Query cluster configuration.
3680

3681
  """
3682
  _OP_REQP = []
3683
  REQ_BGL = False
3684

    
3685
  def ExpandNames(self):
3686
    self.needed_locks = {}
3687

    
3688
  def CheckPrereq(self):
3689
    """No prerequsites needed for this LU.
3690

3691
    """
3692
    pass
3693

    
3694
  def Exec(self, feedback_fn):
3695
    """Return cluster config.
3696

3697
    """
3698
    cluster = self.cfg.GetClusterInfo()
3699
    os_hvp = {}
3700

    
3701
    # Filter just for enabled hypervisors
3702
    for os_name, hv_dict in cluster.os_hvp.items():
3703
      os_hvp[os_name] = {}
3704
      for hv_name, hv_params in hv_dict.items():
3705
        if hv_name in cluster.enabled_hypervisors:
3706
          os_hvp[os_name][hv_name] = hv_params
3707

    
3708
    result = {
3709
      "software_version": constants.RELEASE_VERSION,
3710
      "protocol_version": constants.PROTOCOL_VERSION,
3711
      "config_version": constants.CONFIG_VERSION,
3712
      "os_api_version": max(constants.OS_API_VERSIONS),
3713
      "export_version": constants.EXPORT_VERSION,
3714
      "architecture": (platform.architecture()[0], platform.machine()),
3715
      "name": cluster.cluster_name,
3716
      "master": cluster.master_node,
3717
      "default_hypervisor": cluster.enabled_hypervisors[0],
3718
      "enabled_hypervisors": cluster.enabled_hypervisors,
3719
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3720
                        for hypervisor_name in cluster.enabled_hypervisors]),
3721
      "os_hvp": os_hvp,
3722
      "beparams": cluster.beparams,
3723
      "nicparams": cluster.nicparams,
3724
      "candidate_pool_size": cluster.candidate_pool_size,
3725
      "master_netdev": cluster.master_netdev,
3726
      "volume_group_name": cluster.volume_group_name,
3727
      "file_storage_dir": cluster.file_storage_dir,
3728
      "maintain_node_health": cluster.maintain_node_health,
3729
      "ctime": cluster.ctime,
3730
      "mtime": cluster.mtime,
3731
      "uuid": cluster.uuid,
3732
      "tags": list(cluster.GetTags()),
3733
      "uid_pool": cluster.uid_pool,
3734
      }
3735

    
3736
    return result
3737

    
3738

    
3739
class LUQueryConfigValues(NoHooksLU):
3740
  """Return configuration values.
3741

3742
  """
3743
  _OP_REQP = []
3744
  REQ_BGL = False
3745
  _FIELDS_DYNAMIC = utils.FieldSet()
3746
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3747
                                  "watcher_pause")
3748

    
3749
  def ExpandNames(self):
3750
    self.needed_locks = {}
3751

    
3752
    _CheckOutputFields(static=self._FIELDS_STATIC,
3753
                       dynamic=self._FIELDS_DYNAMIC,
3754
                       selected=self.op.output_fields)
3755

    
3756
  def CheckPrereq(self):
3757
    """No prerequisites.
3758

3759
    """
3760
    pass
3761

    
3762
  def Exec(self, feedback_fn):
3763
    """Dump a representation of the cluster config to the standard output.
3764

3765
    """
3766
    values = []
3767
    for field in self.op.output_fields:
3768
      if field == "cluster_name":
3769
        entry = self.cfg.GetClusterName()
3770
      elif field == "master_node":
3771
        entry = self.cfg.GetMasterNode()
3772
      elif field == "drain_flag":
3773
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3774
      elif field == "watcher_pause":
3775
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3776
      else:
3777
        raise errors.ParameterError(field)
3778
      values.append(entry)
3779
    return values
3780

    
3781

    
3782
class LUActivateInstanceDisks(NoHooksLU):
3783
  """Bring up an instance's disks.
3784

3785
  """
3786
  _OP_REQP = ["instance_name"]
3787
  REQ_BGL = False
3788

    
3789
  def ExpandNames(self):
3790
    self._ExpandAndLockInstance()
3791
    self.needed_locks[locking.LEVEL_NODE] = []
3792
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3793

    
3794
  def DeclareLocks(self, level):
3795
    if level == locking.LEVEL_NODE:
3796
      self._LockInstancesNodes()
3797

    
3798
  def CheckPrereq(self):
3799
    """Check prerequisites.
3800

3801
    This checks that the instance is in the cluster.
3802

3803
    """
3804
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3805
    assert self.instance is not None, \
3806
      "Cannot retrieve locked instance %s" % self.op.instance_name
3807
    _CheckNodeOnline(self, self.instance.primary_node)
3808
    if not hasattr(self.op, "ignore_size"):
3809
      self.op.ignore_size = False
3810

    
3811
  def Exec(self, feedback_fn):
3812
    """Activate the disks.
3813

3814
    """
3815
    disks_ok, disks_info = \
3816
              _AssembleInstanceDisks(self, self.instance,
3817
                                     ignore_size=self.op.ignore_size)
3818
    if not disks_ok:
3819
      raise errors.OpExecError("Cannot activate block devices")
3820

    
3821
    return disks_info
3822

    
3823

    
3824
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3825
                           ignore_size=False):
3826
  """Prepare the block devices for an instance.
3827

3828
  This sets up the block devices on all nodes.
3829

3830
  @type lu: L{LogicalUnit}
3831
  @param lu: the logical unit on whose behalf we execute
3832
  @type instance: L{objects.Instance}
3833
  @param instance: the instance for whose disks we assemble
3834
  @type ignore_secondaries: boolean
3835
  @param ignore_secondaries: if true, errors on secondary nodes
3836
      won't result in an error return from the function
3837
  @type ignore_size: boolean
3838
  @param ignore_size: if true, the current known size of the disk
3839
      will not be used during the disk activation, useful for cases
3840
      when the size is wrong
3841
  @return: False if the operation failed, otherwise a list of
3842
      (host, instance_visible_name, node_visible_name)
3843
      with the mapping from node devices to instance devices
3844

3845
  """
3846
  device_info = []
3847
  disks_ok = True
3848
  iname = instance.name
3849
  # With the two passes mechanism we try to reduce the window of
3850
  # opportunity for the race condition of switching DRBD to primary
3851
  # before handshaking occured, but we do not eliminate it
3852

    
3853
  # The proper fix would be to wait (with some limits) until the
3854
  # connection has been made and drbd transitions from WFConnection
3855
  # into any other network-connected state (Connected, SyncTarget,
3856
  # SyncSource, etc.)
3857

    
3858
  # 1st pass, assemble on all nodes in secondary mode
3859
  for inst_disk in instance.disks:
3860
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3861
      if ignore_size:
3862
        node_disk = node_disk.Copy()
3863
        node_disk.UnsetSize()
3864
      lu.cfg.SetDiskID(node_disk, node)
3865
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3866
      msg = result.fail_msg
3867
      if msg:
3868
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3869
                           " (is_primary=False, pass=1): %s",
3870
                           inst_disk.iv_name, node, msg)
3871
        if not ignore_secondaries:
3872
          disks_ok = False
3873

    
3874
  # FIXME: race condition on drbd migration to primary
3875

    
3876
  # 2nd pass, do only the primary node
3877
  for inst_disk in instance.disks:
3878
    dev_path = None
3879

    
3880
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3881
      if node != instance.primary_node:
3882
        continue
3883
      if ignore_size:
3884
        node_disk = node_disk.Copy()
3885
        node_disk.UnsetSize()
3886
      lu.cfg.SetDiskID(node_disk, node)
3887
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3888
      msg = result.fail_msg
3889
      if msg:
3890
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3891
                           " (is_primary=True, pass=2): %s",
3892
                           inst_disk.iv_name, node, msg)
3893
        disks_ok = False
3894
      else:
3895
        dev_path = result.payload
3896

    
3897
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3898

    
3899
  # leave the disks configured for the primary node
3900
  # this is a workaround that would be fixed better by
3901
  # improving the logical/physical id handling
3902
  for disk in instance.disks:
3903
    lu.cfg.SetDiskID(disk, instance.primary_node)
3904

    
3905
  return disks_ok, device_info
3906

    
3907

    
3908
def _StartInstanceDisks(lu, instance, force):
3909
  """Start the disks of an instance.
3910

3911
  """
3912
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3913
                                           ignore_secondaries=force)
3914
  if not disks_ok:
3915
    _ShutdownInstanceDisks(lu, instance)
3916
    if force is not None and not force:
3917
      lu.proc.LogWarning("", hint="If the message above refers to a"
3918
                         " secondary node,"
3919
                         " you can retry the operation using '--force'.")
3920
    raise errors.OpExecError("Disk consistency error")
3921

    
3922

    
3923
class LUDeactivateInstanceDisks(NoHooksLU):
3924
  """Shutdown an instance's disks.
3925

3926
  """
3927
  _OP_REQP = ["instance_name"]
3928
  REQ_BGL = False
3929

    
3930
  def ExpandNames(self):
3931
    self._ExpandAndLockInstance()
3932
    self.needed_locks[locking.LEVEL_NODE] = []
3933
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3934

    
3935
  def DeclareLocks(self, level):
3936
    if level == locking.LEVEL_NODE:
3937
      self._LockInstancesNodes()
3938

    
3939
  def CheckPrereq(self):
3940
    """Check prerequisites.
3941

3942
    This checks that the instance is in the cluster.
3943

3944
    """
3945
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3946
    assert self.instance is not None, \
3947
      "Cannot retrieve locked instance %s" % self.op.instance_name
3948

    
3949
  def Exec(self, feedback_fn):
3950
    """Deactivate the disks
3951

3952
    """
3953
    instance = self.instance
3954
    _SafeShutdownInstanceDisks(self, instance)
3955

    
3956

    
3957
def _SafeShutdownInstanceDisks(lu, instance):
3958
  """Shutdown block devices of an instance.
3959

3960
  This function checks if an instance is running, before calling
3961
  _ShutdownInstanceDisks.
3962

3963
  """
3964
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
3965
  _ShutdownInstanceDisks(lu, instance)
3966

    
3967

    
3968
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3969
  """Shutdown block devices of an instance.
3970

3971
  This does the shutdown on all nodes of the instance.
3972

3973
  If the ignore_primary is false, errors on the primary node are
3974
  ignored.
3975

3976
  """
3977
  all_result = True
3978
  for disk in instance.disks:
3979
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3980
      lu.cfg.SetDiskID(top_disk, node)
3981
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3982
      msg = result.fail_msg
3983
      if msg:
3984
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3985
                      disk.iv_name, node, msg)
3986
        if not ignore_primary or node != instance.primary_node:
3987
          all_result = False
3988
  return all_result
3989

    
3990

    
3991
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3992
  """Checks if a node has enough free memory.
3993

3994
  This function check if a given node has the needed amount of free
3995
  memory. In case the node has less memory or we cannot get the
3996
  information from the node, this function raise an OpPrereqError
3997
  exception.
3998

3999
  @type lu: C{LogicalUnit}
4000
  @param lu: a logical unit from which we get configuration data
4001
  @type node: C{str}
4002
  @param node: the node to check
4003
  @type reason: C{str}
4004
  @param reason: string to use in the error message
4005
  @type requested: C{int}
4006
  @param requested: the amount of memory in MiB to check for
4007
  @type hypervisor_name: C{str}
4008
  @param hypervisor_name: the hypervisor to ask for memory stats
4009
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4010
      we cannot check the node
4011

4012
  """
4013
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4014
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4015
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4016
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4017
  if not isinstance(free_mem, int):
4018
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4019
                               " was '%s'" % (node, free_mem),
4020
                               errors.ECODE_ENVIRON)
4021
  if requested > free_mem:
4022
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4023
                               " needed %s MiB, available %s MiB" %
4024
                               (node, reason, requested, free_mem),
4025
                               errors.ECODE_NORES)
4026

    
4027

    
4028
def _CheckNodesFreeDisk(lu, nodenames, requested):
4029
  """Checks if nodes have enough free disk space in the default VG.
4030

4031
  This function check if all given nodes have the needed amount of
4032
  free disk. In case any node has less disk or we cannot get the
4033
  information from the node, this function raise an OpPrereqError
4034
  exception.
4035

4036
  @type lu: C{LogicalUnit}
4037
  @param lu: a logical unit from which we get configuration data
4038
  @type nodenames: C{list}
4039
  @param nodenames: the list of node names to check
4040
  @type requested: C{int}
4041
  @param requested: the amount of disk in MiB to check for
4042
  @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4043
      we cannot check the node
4044

4045
  """
4046
  nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4047
                                   lu.cfg.GetHypervisorType())
4048
  for node in nodenames:
4049
    info = nodeinfo[node]
4050
    info.Raise("Cannot get current information from node %s" % node,
4051
               prereq=True, ecode=errors.ECODE_ENVIRON)
4052
    vg_free = info.payload.get("vg_free", None)
4053
    if not isinstance(vg_free, int):
4054
      raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4055
                                 " result was '%s'" % (node, vg_free),
4056
                                 errors.ECODE_ENVIRON)
4057
    if requested > vg_free:
4058
      raise errors.OpPrereqError("Not enough disk space on target node %s:"
4059
                                 " required %d MiB, available %d MiB" %
4060
                                 (node, requested, vg_free),
4061
                                 errors.ECODE_NORES)
4062

    
4063

    
4064
class LUStartupInstance(LogicalUnit):
4065
  """Starts an instance.
4066

4067
  """
4068
  HPATH = "instance-start"
4069
  HTYPE = constants.HTYPE_INSTANCE
4070
  _OP_REQP = ["instance_name", "force"]
4071
  REQ_BGL = False
4072

    
4073
  def ExpandNames(self):
4074
    self._ExpandAndLockInstance()
4075

    
4076
  def BuildHooksEnv(self):
4077
    """Build hooks env.
4078

4079
    This runs on master, primary and secondary nodes of the instance.
4080

4081
    """
4082
    env = {
4083
      "FORCE": self.op.force,
4084
      }
4085
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4086
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4087
    return env, nl, nl
4088

    
4089
  def CheckPrereq(self):
4090
    """Check prerequisites.
4091

4092
    This checks that the instance is in the cluster.
4093

4094
    """
4095
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4096
    assert self.instance is not None, \
4097
      "Cannot retrieve locked instance %s" % self.op.instance_name
4098

    
4099
    # extra beparams
4100
    self.beparams = getattr(self.op, "beparams", {})
4101
    if self.beparams:
4102
      if not isinstance(self.beparams, dict):
4103
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4104
                                   " dict" % (type(self.beparams), ),
4105
                                   errors.ECODE_INVAL)
4106
      # fill the beparams dict
4107
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
4108
      self.op.beparams = self.beparams
4109

    
4110
    # extra hvparams
4111
    self.hvparams = getattr(self.op, "hvparams", {})
4112
    if self.hvparams:
4113
      if not isinstance(self.hvparams, dict):
4114
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4115
                                   " dict" % (type(self.hvparams), ),
4116
                                   errors.ECODE_INVAL)
4117

    
4118
      # check hypervisor parameter syntax (locally)
4119
      cluster = self.cfg.GetClusterInfo()
4120
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
4121
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
4122
                                    instance.hvparams)
4123
      filled_hvp.update(self.hvparams)
4124
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4125
      hv_type.CheckParameterSyntax(filled_hvp)
4126
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4127
      self.op.hvparams = self.hvparams
4128

    
4129
    _CheckNodeOnline(self, instance.primary_node)
4130

    
4131
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4132
    # check bridges existence
4133
    _CheckInstanceBridgesExist(self, instance)
4134

    
4135
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4136
                                              instance.name,
4137
                                              instance.hypervisor)
4138
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4139
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4140
    if not remote_info.payload: # not running already
4141
      _CheckNodeFreeMemory(self, instance.primary_node,
4142
                           "starting instance %s" % instance.name,
4143
                           bep[constants.BE_MEMORY], instance.hypervisor)
4144

    
4145
  def Exec(self, feedback_fn):
4146
    """Start the instance.
4147

4148
    """
4149
    instance = self.instance
4150
    force = self.op.force
4151

    
4152
    self.cfg.MarkInstanceUp(instance.name)
4153

    
4154
    node_current = instance.primary_node
4155

    
4156
    _StartInstanceDisks(self, instance, force)
4157

    
4158
    result = self.rpc.call_instance_start(node_current, instance,
4159
                                          self.hvparams, self.beparams)
4160
    msg = result.fail_msg
4161
    if msg:
4162
      _ShutdownInstanceDisks(self, instance)
4163
      raise errors.OpExecError("Could not start instance: %s" % msg)
4164

    
4165

    
4166
class LURebootInstance(LogicalUnit):
4167
  """Reboot an instance.
4168

4169
  """
4170
  HPATH = "instance-reboot"
4171
  HTYPE = constants.HTYPE_INSTANCE
4172
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4173
  REQ_BGL = False
4174

    
4175
  def CheckArguments(self):
4176
    """Check the arguments.
4177

4178
    """
4179
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4180
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4181

    
4182
  def ExpandNames(self):
4183
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
4184
                                   constants.INSTANCE_REBOOT_HARD,
4185
                                   constants.INSTANCE_REBOOT_FULL]:
4186
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
4187
                                  (constants.INSTANCE_REBOOT_SOFT,
4188
                                   constants.INSTANCE_REBOOT_HARD,
4189
                                   constants.INSTANCE_REBOOT_FULL))
4190
    self._ExpandAndLockInstance()
4191

    
4192
  def BuildHooksEnv(self):
4193
    """Build hooks env.
4194

4195
    This runs on master, primary and secondary nodes of the instance.
4196

4197
    """
4198
    env = {
4199
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4200
      "REBOOT_TYPE": self.op.reboot_type,
4201
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4202
      }
4203
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4204
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4205
    return env, nl, nl
4206

    
4207
  def CheckPrereq(self):
4208
    """Check prerequisites.
4209

4210
    This checks that the instance is in the cluster.
4211

4212
    """
4213
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4214
    assert self.instance is not None, \
4215
      "Cannot retrieve locked instance %s" % self.op.instance_name
4216

    
4217
    _CheckNodeOnline(self, instance.primary_node)
4218

    
4219
    # check bridges existence
4220
    _CheckInstanceBridgesExist(self, instance)
4221

    
4222
  def Exec(self, feedback_fn):
4223
    """Reboot the instance.
4224

4225
    """
4226
    instance = self.instance
4227
    ignore_secondaries = self.op.ignore_secondaries
4228
    reboot_type = self.op.reboot_type
4229

    
4230
    node_current = instance.primary_node
4231

    
4232
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4233
                       constants.INSTANCE_REBOOT_HARD]:
4234
      for disk in instance.disks:
4235
        self.cfg.SetDiskID(disk, node_current)
4236
      result = self.rpc.call_instance_reboot(node_current, instance,
4237
                                             reboot_type,
4238
                                             self.shutdown_timeout)
4239
      result.Raise("Could not reboot instance")
4240
    else:
4241
      result = self.rpc.call_instance_shutdown(node_current, instance,
4242
                                               self.shutdown_timeout)
4243
      result.Raise("Could not shutdown instance for full reboot")
4244
      _ShutdownInstanceDisks(self, instance)
4245
      _StartInstanceDisks(self, instance, ignore_secondaries)
4246
      result = self.rpc.call_instance_start(node_current, instance, None, None)
4247
      msg = result.fail_msg
4248
      if msg:
4249
        _ShutdownInstanceDisks(self, instance)
4250
        raise errors.OpExecError("Could not start instance for"
4251
                                 " full reboot: %s" % msg)
4252

    
4253
    self.cfg.MarkInstanceUp(instance.name)
4254

    
4255

    
4256
class LUShutdownInstance(LogicalUnit):
4257
  """Shutdown an instance.
4258

4259
  """
4260
  HPATH = "instance-stop"
4261
  HTYPE = constants.HTYPE_INSTANCE
4262
  _OP_REQP = ["instance_name"]
4263
  REQ_BGL = False
4264

    
4265
  def CheckArguments(self):
4266
    """Check the arguments.
4267

4268
    """
4269
    self.timeout = getattr(self.op, "timeout",
4270
                           constants.DEFAULT_SHUTDOWN_TIMEOUT)
4271

    
4272
  def ExpandNames(self):
4273
    self._ExpandAndLockInstance()
4274

    
4275
  def BuildHooksEnv(self):
4276
    """Build hooks env.
4277

4278
    This runs on master, primary and secondary nodes of the instance.
4279

4280
    """
4281
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4282
    env["TIMEOUT"] = self.timeout
4283
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4284
    return env, nl, nl
4285

    
4286
  def CheckPrereq(self):
4287
    """Check prerequisites.
4288

4289
    This checks that the instance is in the cluster.
4290

4291
    """
4292
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4293
    assert self.instance is not None, \
4294
      "Cannot retrieve locked instance %s" % self.op.instance_name
4295
    _CheckNodeOnline(self, self.instance.primary_node)
4296

    
4297
  def Exec(self, feedback_fn):
4298
    """Shutdown the instance.
4299

4300
    """
4301
    instance = self.instance
4302
    node_current = instance.primary_node
4303
    timeout = self.timeout
4304
    self.cfg.MarkInstanceDown(instance.name)
4305
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4306
    msg = result.fail_msg
4307
    if msg:
4308
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4309

    
4310
    _ShutdownInstanceDisks(self, instance)
4311

    
4312

    
4313
class LUReinstallInstance(LogicalUnit):
4314
  """Reinstall an instance.
4315

4316
  """
4317
  HPATH = "instance-reinstall"
4318
  HTYPE = constants.HTYPE_INSTANCE
4319
  _OP_REQP = ["instance_name"]
4320
  REQ_BGL = False
4321

    
4322
  def ExpandNames(self):
4323
    self._ExpandAndLockInstance()
4324

    
4325
  def BuildHooksEnv(self):
4326
    """Build hooks env.
4327

4328
    This runs on master, primary and secondary nodes of the instance.
4329

4330
    """
4331
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4332
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4333
    return env, nl, nl
4334

    
4335
  def CheckPrereq(self):
4336
    """Check prerequisites.
4337

4338
    This checks that the instance is in the cluster and is not running.
4339

4340
    """
4341
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4342
    assert instance is not None, \
4343
      "Cannot retrieve locked instance %s" % self.op.instance_name
4344
    _CheckNodeOnline(self, instance.primary_node)
4345

    
4346
    if instance.disk_template == constants.DT_DISKLESS:
4347
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4348
                                 self.op.instance_name,
4349
                                 errors.ECODE_INVAL)
4350
    _CheckInstanceDown(self, instance, "cannot reinstall")
4351

    
4352
    self.op.os_type = getattr(self.op, "os_type", None)
4353
    self.op.force_variant = getattr(self.op, "force_variant", False)
4354
    if self.op.os_type is not None:
4355
      # OS verification
4356
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4357
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4358

    
4359
    self.instance = instance
4360

    
4361
  def Exec(self, feedback_fn):
4362
    """Reinstall the instance.
4363

4364
    """
4365
    inst = self.instance
4366

    
4367
    if self.op.os_type is not None:
4368
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4369
      inst.os = self.op.os_type
4370
      self.cfg.Update(inst, feedback_fn)
4371

    
4372
    _StartInstanceDisks(self, inst, None)
4373
    try:
4374
      feedback_fn("Running the instance OS create scripts...")
4375
      # FIXME: pass debug option from opcode to backend
4376
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4377
                                             self.op.debug_level)
4378
      result.Raise("Could not install OS for instance %s on node %s" %
4379
                   (inst.name, inst.primary_node))
4380
    finally:
4381
      _ShutdownInstanceDisks(self, inst)
4382

    
4383

    
4384
class LURecreateInstanceDisks(LogicalUnit):
4385
  """Recreate an instance's missing disks.
4386

4387
  """
4388
  HPATH = "instance-recreate-disks"
4389
  HTYPE = constants.HTYPE_INSTANCE
4390
  _OP_REQP = ["instance_name", "disks"]
4391
  REQ_BGL = False
4392

    
4393
  def CheckArguments(self):
4394
    """Check the arguments.
4395

4396
    """
4397
    if not isinstance(self.op.disks, list):
4398
      raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4399
    for item in self.op.disks:
4400
      if (not isinstance(item, int) or
4401
          item < 0):
4402
        raise errors.OpPrereqError("Invalid disk specification '%s'" %
4403
                                   str(item), errors.ECODE_INVAL)
4404

    
4405
  def ExpandNames(self):
4406
    self._ExpandAndLockInstance()
4407

    
4408
  def BuildHooksEnv(self):
4409
    """Build hooks env.
4410

4411
    This runs on master, primary and secondary nodes of the instance.
4412

4413
    """
4414
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4415
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4416
    return env, nl, nl
4417

    
4418
  def CheckPrereq(self):
4419
    """Check prerequisites.
4420

4421
    This checks that the instance is in the cluster and is not running.
4422

4423
    """
4424
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4425
    assert instance is not None, \
4426
      "Cannot retrieve locked instance %s" % self.op.instance_name
4427
    _CheckNodeOnline(self, instance.primary_node)
4428

    
4429
    if instance.disk_template == constants.DT_DISKLESS:
4430
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4431
                                 self.op.instance_name, errors.ECODE_INVAL)
4432
    _CheckInstanceDown(self, instance, "cannot recreate disks")
4433

    
4434
    if not self.op.disks:
4435
      self.op.disks = range(len(instance.disks))
4436
    else:
4437
      for idx in self.op.disks:
4438
        if idx >= len(instance.disks):
4439
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4440
                                     errors.ECODE_INVAL)
4441

    
4442
    self.instance = instance
4443

    
4444
  def Exec(self, feedback_fn):
4445
    """Recreate the disks.
4446

4447
    """
4448
    to_skip = []
4449
    for idx, _ in enumerate(self.instance.disks):
4450
      if idx not in self.op.disks: # disk idx has not been passed in
4451
        to_skip.append(idx)
4452
        continue
4453

    
4454
    _CreateDisks(self, self.instance, to_skip=to_skip)
4455

    
4456

    
4457
class LURenameInstance(LogicalUnit):
4458
  """Rename an instance.
4459

4460
  """
4461
  HPATH = "instance-rename"
4462
  HTYPE = constants.HTYPE_INSTANCE
4463
  _OP_REQP = ["instance_name", "new_name"]
4464

    
4465
  def BuildHooksEnv(self):
4466
    """Build hooks env.
4467

4468
    This runs on master, primary and secondary nodes of the instance.
4469

4470
    """
4471
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4472
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4473
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4474
    return env, nl, nl
4475

    
4476
  def CheckPrereq(self):
4477
    """Check prerequisites.
4478

4479
    This checks that the instance is in the cluster and is not running.
4480

4481
    """
4482
    self.op.instance_name = _ExpandInstanceName(self.cfg,
4483
                                                self.op.instance_name)
4484
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4485
    assert instance is not None
4486
    _CheckNodeOnline(self, instance.primary_node)
4487
    _CheckInstanceDown(self, instance, "cannot rename")
4488
    self.instance = instance
4489

    
4490
    # new name verification
4491
    name_info = utils.GetHostInfo(self.op.new_name)
4492

    
4493
    self.op.new_name = new_name = name_info.name
4494
    instance_list = self.cfg.GetInstanceList()
4495
    if new_name in instance_list:
4496
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4497
                                 new_name, errors.ECODE_EXISTS)
4498

    
4499
    if not getattr(self.op, "ignore_ip", False):
4500
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4501
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4502
                                   (name_info.ip, new_name),
4503
                                   errors.ECODE_NOTUNIQUE)
4504

    
4505

    
4506
  def Exec(self, feedback_fn):
4507
    """Reinstall the instance.
4508

4509
    """
4510
    inst = self.instance
4511
    old_name = inst.name
4512

    
4513
    if inst.disk_template == constants.DT_FILE:
4514
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4515

    
4516
    self.cfg.RenameInstance(inst.name, self.op.new_name)
4517
    # Change the instance lock. This is definitely safe while we hold the BGL
4518
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4519
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4520

    
4521
    # re-read the instance from the configuration after rename
4522
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4523

    
4524
    if inst.disk_template == constants.DT_FILE:
4525
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4526
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4527
                                                     old_file_storage_dir,
4528
                                                     new_file_storage_dir)
4529
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4530
                   " (but the instance has been renamed in Ganeti)" %
4531
                   (inst.primary_node, old_file_storage_dir,
4532
                    new_file_storage_dir))
4533

    
4534
    _StartInstanceDisks(self, inst, None)
4535
    try:
4536
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4537
                                                 old_name, self.op.debug_level)
4538
      msg = result.fail_msg
4539
      if msg:
4540
        msg = ("Could not run OS rename script for instance %s on node %s"
4541
               " (but the instance has been renamed in Ganeti): %s" %
4542
               (inst.name, inst.primary_node, msg))
4543
        self.proc.LogWarning(msg)
4544
    finally:
4545
      _ShutdownInstanceDisks(self, inst)
4546

    
4547

    
4548
class LURemoveInstance(LogicalUnit):
4549
  """Remove an instance.
4550

4551
  """
4552
  HPATH = "instance-remove"
4553
  HTYPE = constants.HTYPE_INSTANCE
4554
  _OP_REQP = ["instance_name", "ignore_failures"]
4555
  REQ_BGL = False
4556

    
4557
  def CheckArguments(self):
4558
    """Check the arguments.
4559

4560
    """
4561
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4562
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4563

    
4564
  def ExpandNames(self):
4565
    self._ExpandAndLockInstance()
4566
    self.needed_locks[locking.LEVEL_NODE] = []
4567
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4568

    
4569
  def DeclareLocks(self, level):
4570
    if level == locking.LEVEL_NODE:
4571
      self._LockInstancesNodes()
4572

    
4573
  def BuildHooksEnv(self):
4574
    """Build hooks env.
4575

4576
    This runs on master, primary and secondary nodes of the instance.
4577

4578
    """
4579
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4580
    env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4581
    nl = [self.cfg.GetMasterNode()]
4582
    nl_post = list(self.instance.all_nodes) + nl
4583
    return env, nl, nl_post
4584

    
4585
  def CheckPrereq(self):
4586
    """Check prerequisites.
4587

4588
    This checks that the instance is in the cluster.
4589

4590
    """
4591
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4592
    assert self.instance is not None, \
4593
      "Cannot retrieve locked instance %s" % self.op.instance_name
4594

    
4595
  def Exec(self, feedback_fn):
4596
    """Remove the instance.
4597

4598
    """
4599
    instance = self.instance
4600
    logging.info("Shutting down instance %s on node %s",
4601
                 instance.name, instance.primary_node)
4602

    
4603
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4604
                                             self.shutdown_timeout)
4605
    msg = result.fail_msg
4606
    if msg:
4607
      if self.op.ignore_failures:
4608
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
4609
      else:
4610
        raise errors.OpExecError("Could not shutdown instance %s on"
4611
                                 " node %s: %s" %
4612
                                 (instance.name, instance.primary_node, msg))
4613

    
4614
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4615

    
4616

    
4617
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4618
  """Utility function to remove an instance.
4619

4620
  """
4621
  logging.info("Removing block devices for instance %s", instance.name)
4622

    
4623
  if not _RemoveDisks(lu, instance):
4624
    if not ignore_failures:
4625
      raise errors.OpExecError("Can't remove instance's disks")
4626
    feedback_fn("Warning: can't remove instance's disks")
4627

    
4628
  logging.info("Removing instance %s out of cluster config", instance.name)
4629

    
4630
  lu.cfg.RemoveInstance(instance.name)
4631

    
4632
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4633
    "Instance lock removal conflict"
4634

    
4635
  # Remove lock for the instance
4636
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4637

    
4638

    
4639
class LUQueryInstances(NoHooksLU):
4640
  """Logical unit for querying instances.
4641

4642
  """
4643
  # pylint: disable-msg=W0142
4644
  _OP_REQP = ["output_fields", "names", "use_locking"]
4645
  REQ_BGL = False
4646
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4647
                    "serial_no", "ctime", "mtime", "uuid"]
4648
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4649
                                    "admin_state",
4650
                                    "disk_template", "ip", "mac", "bridge",
4651
                                    "nic_mode", "nic_link",
4652
                                    "sda_size", "sdb_size", "vcpus", "tags",
4653
                                    "network_port", "beparams",
4654
                                    r"(disk)\.(size)/([0-9]+)",
4655
                                    r"(disk)\.(sizes)", "disk_usage",
4656
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4657
                                    r"(nic)\.(bridge)/([0-9]+)",
4658
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
4659
                                    r"(disk|nic)\.(count)",
4660
                                    "hvparams",
4661
                                    ] + _SIMPLE_FIELDS +
4662
                                  ["hv/%s" % name
4663
                                   for name in constants.HVS_PARAMETERS
4664
                                   if name not in constants.HVC_GLOBALS] +
4665
                                  ["be/%s" % name
4666
                                   for name in constants.BES_PARAMETERS])
4667
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4668

    
4669

    
4670
  def ExpandNames(self):
4671
    _CheckOutputFields(static=self._FIELDS_STATIC,
4672
                       dynamic=self._FIELDS_DYNAMIC,
4673
                       selected=self.op.output_fields)
4674

    
4675
    self.needed_locks = {}
4676
    self.share_locks[locking.LEVEL_INSTANCE] = 1
4677
    self.share_locks[locking.LEVEL_NODE] = 1
4678

    
4679
    if self.op.names:
4680
      self.wanted = _GetWantedInstances(self, self.op.names)
4681
    else:
4682
      self.wanted = locking.ALL_SET
4683

    
4684
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4685
    self.do_locking = self.do_node_query and self.op.use_locking
4686
    if self.do_locking:
4687
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4688
      self.needed_locks[locking.LEVEL_NODE] = []
4689
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4690

    
4691
  def DeclareLocks(self, level):
4692
    if level == locking.LEVEL_NODE and self.do_locking:
4693
      self._LockInstancesNodes()
4694

    
4695
  def CheckPrereq(self):
4696
    """Check prerequisites.
4697

4698
    """
4699
    pass
4700

    
4701
  def Exec(self, feedback_fn):
4702
    """Computes the list of nodes and their attributes.
4703

4704
    """
4705
    # pylint: disable-msg=R0912
4706
    # way too many branches here
4707
    all_info = self.cfg.GetAllInstancesInfo()
4708
    if self.wanted == locking.ALL_SET:
4709
      # caller didn't specify instance names, so ordering is not important
4710
      if self.do_locking:
4711
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4712
      else:
4713
        instance_names = all_info.keys()
4714
      instance_names = utils.NiceSort(instance_names)
4715
    else:
4716
      # caller did specify names, so we must keep the ordering
4717
      if self.do_locking:
4718
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4719
      else:
4720
        tgt_set = all_info.keys()
4721
      missing = set(self.wanted).difference(tgt_set)
4722
      if missing:
4723
        raise errors.OpExecError("Some instances were removed before"
4724
                                 " retrieving their data: %s" % missing)
4725
      instance_names = self.wanted
4726

    
4727
    instance_list = [all_info[iname] for iname in instance_names]
4728

    
4729
    # begin data gathering
4730

    
4731
    nodes = frozenset([inst.primary_node for inst in instance_list])
4732
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4733

    
4734
    bad_nodes = []
4735
    off_nodes = []
4736
    if self.do_node_query:
4737
      live_data = {}
4738
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4739
      for name in nodes:
4740
        result = node_data[name]
4741
        if result.offline:
4742
          # offline nodes will be in both lists
4743
          off_nodes.append(name)
4744
        if result.fail_msg:
4745
          bad_nodes.append(name)
4746
        else:
4747
          if result.payload:
4748
            live_data.update(result.payload)
4749
          # else no instance is alive
4750
    else:
4751
      live_data = dict([(name, {}) for name in instance_names])
4752

    
4753
    # end data gathering
4754

    
4755
    HVPREFIX = "hv/"
4756
    BEPREFIX = "be/"
4757
    output = []
4758
    cluster = self.cfg.GetClusterInfo()
4759
    for instance in instance_list:
4760
      iout = []
4761
      i_hv = cluster.FillHV(instance, skip_globals=True)
4762
      i_be = cluster.FillBE(instance)
4763
      i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4764
                                 nic.nicparams) for nic in instance.nics]
4765
      for field in self.op.output_fields:
4766
        st_match = self._FIELDS_STATIC.Matches(field)
4767
        if field in self._SIMPLE_FIELDS:
4768
          val = getattr(instance, field)
4769
        elif field == "pnode":
4770
          val = instance.primary_node
4771
        elif field == "snodes":
4772
          val = list(instance.secondary_nodes)
4773
        elif field == "admin_state":
4774
          val = instance.admin_up
4775
        elif field == "oper_state":
4776
          if instance.primary_node in bad_nodes:
4777
            val = None
4778
          else:
4779
            val = bool(live_data.get(instance.name))
4780
        elif field == "status":
4781
          if instance.primary_node in off_nodes:
4782
            val = "ERROR_nodeoffline"
4783
          elif instance.primary_node in bad_nodes:
4784
            val = "ERROR_nodedown"
4785
          else:
4786
            running = bool(live_data.get(instance.name))
4787
            if running:
4788
              if instance.admin_up:
4789
                val = "running"
4790
              else:
4791
                val = "ERROR_up"
4792
            else:
4793
              if instance.admin_up:
4794
                val = "ERROR_down"
4795
              else:
4796
                val = "ADMIN_down"
4797
        elif field == "oper_ram":
4798
          if instance.primary_node in bad_nodes:
4799
            val = None
4800
          elif instance.name in live_data:
4801
            val = live_data[instance.name].get("memory", "?")
4802
          else:
4803
            val = "-"
4804
        elif field == "vcpus":
4805
          val = i_be[constants.BE_VCPUS]
4806
        elif field == "disk_template":
4807
          val = instance.disk_template
4808
        elif field == "ip":
4809
          if instance.nics:
4810
            val = instance.nics[0].ip
4811
          else:
4812
            val = None
4813
        elif field == "nic_mode":
4814
          if instance.nics:
4815
            val = i_nicp[0][constants.NIC_MODE]
4816
          else:
4817
            val = None
4818
        elif field == "nic_link":
4819
          if instance.nics:
4820
            val = i_nicp[0][constants.NIC_LINK]
4821
          else:
4822
            val = None
4823
        elif field == "bridge":
4824
          if (instance.nics and
4825
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4826
            val = i_nicp[0][constants.NIC_LINK]
4827
          else:
4828
            val = None
4829
        elif field == "mac":
4830
          if instance.nics:
4831
            val = instance.nics[0].mac
4832
          else:
4833
            val = None
4834
        elif field == "sda_size" or field == "sdb_size":
4835
          idx = ord(field[2]) - ord('a')
4836
          try:
4837
            val = instance.FindDisk(idx).size
4838
          except errors.OpPrereqError:
4839
            val = None
4840
        elif field == "disk_usage": # total disk usage per node
4841
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
4842
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4843
        elif field == "tags":
4844
          val = list(instance.GetTags())
4845
        elif field == "hvparams":
4846
          val = i_hv
4847
        elif (field.startswith(HVPREFIX) and
4848
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4849
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4850
          val = i_hv.get(field[len(HVPREFIX):], None)
4851
        elif field == "beparams":
4852
          val = i_be
4853
        elif (field.startswith(BEPREFIX) and
4854
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4855
          val = i_be.get(field[len(BEPREFIX):], None)
4856
        elif st_match and st_match.groups():
4857
          # matches a variable list
4858
          st_groups = st_match.groups()
4859
          if st_groups and st_groups[0] == "disk":
4860
            if st_groups[1] == "count":
4861
              val = len(instance.disks)
4862
            elif st_groups[1] == "sizes":
4863
              val = [disk.size for disk in instance.disks]
4864
            elif st_groups[1] == "size":
4865
              try:
4866
                val = instance.FindDisk(st_groups[2]).size
4867
              except errors.OpPrereqError:
4868
                val = None
4869
            else:
4870
              assert False, "Unhandled disk parameter"
4871
          elif st_groups[0] == "nic":
4872
            if st_groups[1] == "count":
4873
              val = len(instance.nics)
4874
            elif st_groups[1] == "macs":
4875
              val = [nic.mac for nic in instance.nics]
4876
            elif st_groups[1] == "ips":
4877
              val = [nic.ip for nic in instance.nics]
4878
            elif st_groups[1] == "modes":
4879
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4880
            elif st_groups[1] == "links":
4881
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4882
            elif st_groups[1] == "bridges":
4883
              val = []
4884
              for nicp in i_nicp:
4885
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4886
                  val.append(nicp[constants.NIC_LINK])
4887
                else:
4888
                  val.append(None)
4889
            else:
4890
              # index-based item
4891
              nic_idx = int(st_groups[2])
4892
              if nic_idx >= len(instance.nics):
4893
                val = None
4894
              else:
4895
                if st_groups[1] == "mac":
4896
                  val = instance.nics[nic_idx].mac
4897
                elif st_groups[1] == "ip":
4898
                  val = instance.nics[nic_idx].ip
4899
                elif st_groups[1] == "mode":
4900
                  val = i_nicp[nic_idx][constants.NIC_MODE]
4901
                elif st_groups[1] == "link":
4902
                  val = i_nicp[nic_idx][constants.NIC_LINK]
4903
                elif st_groups[1] == "bridge":
4904
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4905
                  if nic_mode == constants.NIC_MODE_BRIDGED:
4906
                    val = i_nicp[nic_idx][constants.NIC_LINK]
4907
                  else:
4908
                    val = None
4909
                else:
4910
                  assert False, "Unhandled NIC parameter"
4911
          else:
4912
            assert False, ("Declared but unhandled variable parameter '%s'" %
4913
                           field)
4914
        else:
4915
          assert False, "Declared but unhandled parameter '%s'" % field
4916
        iout.append(val)
4917
      output.append(iout)
4918

    
4919
    return output
4920

    
4921

    
4922
class LUFailoverInstance(LogicalUnit):
4923
  """Failover an instance.
4924

4925
  """
4926
  HPATH = "instance-failover"
4927
  HTYPE = constants.HTYPE_INSTANCE
4928
  _OP_REQP = ["instance_name", "ignore_consistency"]
4929
  REQ_BGL = False
4930

    
4931
  def CheckArguments(self):
4932
    """Check the arguments.
4933

4934
    """
4935
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4936
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4937

    
4938
  def ExpandNames(self):
4939
    self._ExpandAndLockInstance()
4940
    self.needed_locks[locking.LEVEL_NODE] = []
4941
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4942

    
4943
  def DeclareLocks(self, level):
4944
    if level == locking.LEVEL_NODE:
4945
      self._LockInstancesNodes()
4946

    
4947
  def BuildHooksEnv(self):
4948
    """Build hooks env.
4949

4950
    This runs on master, primary and secondary nodes of the instance.
4951

4952
    """
4953
    instance = self.instance
4954
    source_node = instance.primary_node
4955
    target_node = instance.secondary_nodes[0]
4956
    env = {
4957
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4958
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4959
      "OLD_PRIMARY": source_node,
4960
      "OLD_SECONDARY": target_node,
4961
      "NEW_PRIMARY": target_node,
4962
      "NEW_SECONDARY": source_node,
4963
      }
4964
    env.update(_BuildInstanceHookEnvByObject(self, instance))
4965
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4966
    nl_post = list(nl)
4967
    nl_post.append(source_node)
4968
    return env, nl, nl_post
4969

    
4970
  def CheckPrereq(self):
4971
    """Check prerequisites.
4972

4973
    This checks that the instance is in the cluster.
4974

4975
    """
4976
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4977
    assert self.instance is not None, \
4978
      "Cannot retrieve locked instance %s" % self.op.instance_name
4979

    
4980
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4981
    if instance.disk_template not in constants.DTS_NET_MIRROR:
4982
      raise errors.OpPrereqError("Instance's disk layout is not"
4983
                                 " network mirrored, cannot failover.",
4984
                                 errors.ECODE_STATE)
4985

    
4986
    secondary_nodes = instance.secondary_nodes
4987
    if not secondary_nodes:
4988
      raise errors.ProgrammerError("no secondary node but using "
4989
                                   "a mirrored disk template")
4990

    
4991
    target_node = secondary_nodes[0]
4992
    _CheckNodeOnline(self, target_node)
4993
    _CheckNodeNotDrained(self, target_node)
4994
    if instance.admin_up:
4995
      # check memory requirements on the secondary node
4996
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4997
                           instance.name, bep[constants.BE_MEMORY],
4998
                           instance.hypervisor)
4999
    else:
5000
      self.LogInfo("Not checking memory on the secondary node as"
5001
                   " instance will not be started")
5002

    
5003
    # check bridge existance
5004
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5005

    
5006
  def Exec(self, feedback_fn):
5007
    """Failover an instance.
5008

5009
    The failover is done by shutting it down on its present node and
5010
    starting it on the secondary.
5011

5012
    """
5013
    instance = self.instance
5014

    
5015
    source_node = instance.primary_node
5016
    target_node = instance.secondary_nodes[0]
5017

    
5018
    if instance.admin_up:
5019
      feedback_fn("* checking disk consistency between source and target")
5020
      for dev in instance.disks:
5021
        # for drbd, these are drbd over lvm
5022
        if not _CheckDiskConsistency(self, dev, target_node, False):
5023
          if not self.op.ignore_consistency:
5024
            raise errors.OpExecError("Disk %s is degraded on target node,"
5025
                                     " aborting failover." % dev.iv_name)
5026
    else:
5027
      feedback_fn("* not checking disk consistency as instance is not running")
5028

    
5029
    feedback_fn("* shutting down instance on source node")
5030
    logging.info("Shutting down instance %s on node %s",
5031
                 instance.name, source_node)
5032

    
5033
    result = self.rpc.call_instance_shutdown(source_node, instance,
5034
                                             self.shutdown_timeout)
5035
    msg = result.fail_msg
5036
    if msg:
5037
      if self.op.ignore_consistency:
5038
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5039
                             " Proceeding anyway. Please make sure node"
5040
                             " %s is down. Error details: %s",
5041
                             instance.name, source_node, source_node, msg)
5042
      else:
5043
        raise errors.OpExecError("Could not shutdown instance %s on"
5044
                                 " node %s: %s" %
5045
                                 (instance.name, source_node, msg))
5046

    
5047
    feedback_fn("* deactivating the instance's disks on source node")
5048
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5049
      raise errors.OpExecError("Can't shut down the instance's disks.")
5050

    
5051
    instance.primary_node = target_node
5052
    # distribute new instance config to the other nodes
5053
    self.cfg.Update(instance, feedback_fn)
5054

    
5055
    # Only start the instance if it's marked as up
5056
    if instance.admin_up:
5057
      feedback_fn("* activating the instance's disks on target node")
5058
      logging.info("Starting instance %s on node %s",
5059
                   instance.name, target_node)
5060

    
5061
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5062
                                               ignore_secondaries=True)
5063
      if not disks_ok:
5064
        _ShutdownInstanceDisks(self, instance)
5065
        raise errors.OpExecError("Can't activate the instance's disks")
5066

    
5067
      feedback_fn("* starting the instance on the target node")
5068
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5069
      msg = result.fail_msg
5070
      if msg:
5071
        _ShutdownInstanceDisks(self, instance)
5072
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5073
                                 (instance.name, target_node, msg))
5074

    
5075

    
5076
class LUMigrateInstance(LogicalUnit):
5077
  """Migrate an instance.
5078

5079
  This is migration without shutting down, compared to the failover,
5080
  which is done with shutdown.
5081

5082
  """
5083
  HPATH = "instance-migrate"
5084
  HTYPE = constants.HTYPE_INSTANCE
5085
  _OP_REQP = ["instance_name", "live", "cleanup"]
5086

    
5087
  REQ_BGL = False
5088

    
5089
  def ExpandNames(self):
5090
    self._ExpandAndLockInstance()
5091

    
5092
    self.needed_locks[locking.LEVEL_NODE] = []
5093
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5094

    
5095
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5096
                                       self.op.live, self.op.cleanup)
5097
    self.tasklets = [self._migrater]
5098

    
5099
  def DeclareLocks(self, level):
5100
    if level == locking.LEVEL_NODE:
5101
      self._LockInstancesNodes()
5102

    
5103
  def BuildHooksEnv(self):
5104
    """Build hooks env.
5105

5106
    This runs on master, primary and secondary nodes of the instance.
5107

5108
    """
5109
    instance = self._migrater.instance
5110
    source_node = instance.primary_node
5111
    target_node = instance.secondary_nodes[0]
5112
    env = _BuildInstanceHookEnvByObject(self, instance)
5113
    env["MIGRATE_LIVE"] = self.op.live
5114
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5115
    env.update({
5116
        "OLD_PRIMARY": source_node,
5117
        "OLD_SECONDARY": target_node,
5118
        "NEW_PRIMARY": target_node,
5119
        "NEW_SECONDARY": source_node,
5120
        })
5121
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5122
    nl_post = list(nl)
5123
    nl_post.append(source_node)
5124
    return env, nl, nl_post
5125

    
5126

    
5127
class LUMoveInstance(LogicalUnit):
5128
  """Move an instance by data-copying.
5129

5130
  """
5131
  HPATH = "instance-move"
5132
  HTYPE = constants.HTYPE_INSTANCE
5133
  _OP_REQP = ["instance_name", "target_node"]
5134
  REQ_BGL = False
5135

    
5136
  def CheckArguments(self):
5137
    """Check the arguments.
5138

5139
    """
5140
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5141
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
5142

    
5143
  def ExpandNames(self):
5144
    self._ExpandAndLockInstance()
5145
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5146
    self.op.target_node = target_node
5147
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5148
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5149

    
5150
  def DeclareLocks(self, level):
5151
    if level == locking.LEVEL_NODE:
5152
      self._LockInstancesNodes(primary_only=True)
5153

    
5154
  def BuildHooksEnv(self):
5155
    """Build hooks env.
5156

5157
    This runs on master, primary and secondary nodes of the instance.
5158

5159
    """
5160
    env = {
5161
      "TARGET_NODE": self.op.target_node,
5162
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5163
      }
5164
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5165
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5166
                                       self.op.target_node]
5167
    return env, nl, nl
5168

    
5169
  def CheckPrereq(self):
5170
    """Check prerequisites.
5171

5172
    This checks that the instance is in the cluster.
5173

5174
    """
5175
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5176
    assert self.instance is not None, \
5177
      "Cannot retrieve locked instance %s" % self.op.instance_name
5178

    
5179
    node = self.cfg.GetNodeInfo(self.op.target_node)
5180
    assert node is not None, \
5181
      "Cannot retrieve locked node %s" % self.op.target_node
5182

    
5183
    self.target_node = target_node = node.name
5184

    
5185
    if target_node == instance.primary_node:
5186
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
5187
                                 (instance.name, target_node),
5188
                                 errors.ECODE_STATE)
5189

    
5190
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5191

    
5192
    for idx, dsk in enumerate(instance.disks):
5193
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5194
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5195
                                   " cannot copy" % idx, errors.ECODE_STATE)
5196

    
5197
    _CheckNodeOnline(self, target_node)
5198
    _CheckNodeNotDrained(self, target_node)
5199

    
5200
    if instance.admin_up:
5201
      # check memory requirements on the secondary node
5202
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5203
                           instance.name, bep[constants.BE_MEMORY],
5204
                           instance.hypervisor)
5205
    else:
5206
      self.LogInfo("Not checking memory on the secondary node as"
5207
                   " instance will not be started")
5208

    
5209
    # check bridge existance
5210
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5211

    
5212
  def Exec(self, feedback_fn):
5213
    """Move an instance.
5214

5215
    The move is done by shutting it down on its present node, copying
5216
    the data over (slow) and starting it on the new node.
5217

5218
    """
5219
    instance = self.instance
5220

    
5221
    source_node = instance.primary_node
5222
    target_node = self.target_node
5223

    
5224
    self.LogInfo("Shutting down instance %s on source node %s",
5225
                 instance.name, source_node)
5226

    
5227
    result = self.rpc.call_instance_shutdown(source_node, instance,
5228
                                             self.shutdown_timeout)
5229
    msg = result.fail_msg
5230
    if msg:
5231
      if self.op.ignore_consistency:
5232
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5233
                             " Proceeding anyway. Please make sure node"
5234
                             " %s is down. Error details: %s",
5235
                             instance.name, source_node, source_node, msg)
5236
      else:
5237
        raise errors.OpExecError("Could not shutdown instance %s on"
5238
                                 " node %s: %s" %
5239
                                 (instance.name, source_node, msg))
5240

    
5241
    # create the target disks
5242
    try:
5243
      _CreateDisks(self, instance, target_node=target_node)
5244
    except errors.OpExecError:
5245
      self.LogWarning("Device creation failed, reverting...")
5246
      try:
5247
        _RemoveDisks(self, instance, target_node=target_node)
5248
      finally:
5249
        self.cfg.ReleaseDRBDMinors(instance.name)
5250
        raise
5251

    
5252
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5253

    
5254
    errs = []
5255
    # activate, get path, copy the data over
5256
    for idx, disk in enumerate(instance.disks):
5257
      self.LogInfo("Copying data for disk %d", idx)
5258
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5259
                                               instance.name, True)
5260
      if result.fail_msg:
5261
        self.LogWarning("Can't assemble newly created disk %d: %s",
5262
                        idx, result.fail_msg)
5263
        errs.append(result.fail_msg)
5264
        break
5265
      dev_path = result.payload
5266
      result = self.rpc.call_blockdev_export(source_node, disk,
5267
                                             target_node, dev_path,
5268
                                             cluster_name)
5269
      if result.fail_msg:
5270
        self.LogWarning("Can't copy data over for disk %d: %s",
5271
                        idx, result.fail_msg)
5272
        errs.append(result.fail_msg)
5273
        break
5274

    
5275
    if errs:
5276
      self.LogWarning("Some disks failed to copy, aborting")
5277
      try:
5278
        _RemoveDisks(self, instance, target_node=target_node)
5279
      finally:
5280
        self.cfg.ReleaseDRBDMinors(instance.name)
5281
        raise errors.OpExecError("Errors during disk copy: %s" %
5282
                                 (",".join(errs),))
5283

    
5284
    instance.primary_node = target_node
5285
    self.cfg.Update(instance, feedback_fn)
5286

    
5287
    self.LogInfo("Removing the disks on the original node")
5288
    _RemoveDisks(self, instance, target_node=source_node)
5289

    
5290
    # Only start the instance if it's marked as up
5291
    if instance.admin_up:
5292
      self.LogInfo("Starting instance %s on node %s",
5293
                   instance.name, target_node)
5294

    
5295
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5296
                                           ignore_secondaries=True)
5297
      if not disks_ok:
5298
        _ShutdownInstanceDisks(self, instance)
5299
        raise errors.OpExecError("Can't activate the instance's disks")
5300

    
5301
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5302
      msg = result.fail_msg
5303
      if msg:
5304
        _ShutdownInstanceDisks(self, instance)
5305
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5306
                                 (instance.name, target_node, msg))
5307

    
5308

    
5309
class LUMigrateNode(LogicalUnit):
5310
  """Migrate all instances from a node.
5311

5312
  """
5313
  HPATH = "node-migrate"
5314
  HTYPE = constants.HTYPE_NODE
5315
  _OP_REQP = ["node_name", "live"]
5316
  REQ_BGL = False
5317

    
5318
  def ExpandNames(self):
5319
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5320

    
5321
    self.needed_locks = {
5322
      locking.LEVEL_NODE: [self.op.node_name],
5323
      }
5324

    
5325
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5326

    
5327
    # Create tasklets for migrating instances for all instances on this node
5328
    names = []
5329
    tasklets = []
5330

    
5331
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5332
      logging.debug("Migrating instance %s", inst.name)
5333
      names.append(inst.name)
5334

    
5335
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5336

    
5337
    self.tasklets = tasklets
5338

    
5339
    # Declare instance locks
5340
    self.needed_locks[locking.LEVEL_INSTANCE] = names
5341

    
5342
  def DeclareLocks(self, level):
5343
    if level == locking.LEVEL_NODE:
5344
      self._LockInstancesNodes()
5345

    
5346
  def BuildHooksEnv(self):
5347
    """Build hooks env.
5348

5349
    This runs on the master, the primary and all the secondaries.
5350

5351
    """
5352
    env = {
5353
      "NODE_NAME": self.op.node_name,
5354
      }
5355

    
5356
    nl = [self.cfg.GetMasterNode()]
5357

    
5358
    return (env, nl, nl)
5359

    
5360

    
5361
class TLMigrateInstance(Tasklet):
5362
  def __init__(self, lu, instance_name, live, cleanup):
5363
    """Initializes this class.
5364

5365
    """
5366
    Tasklet.__init__(self, lu)
5367

    
5368
    # Parameters
5369
    self.instance_name = instance_name
5370
    self.live = live
5371
    self.cleanup = cleanup
5372

    
5373
  def CheckPrereq(self):
5374
    """Check prerequisites.
5375

5376
    This checks that the instance is in the cluster.
5377

5378
    """
5379
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5380
    instance = self.cfg.GetInstanceInfo(instance_name)
5381
    assert instance is not None
5382

    
5383
    if instance.disk_template != constants.DT_DRBD8:
5384
      raise errors.OpPrereqError("Instance's disk layout is not"
5385
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5386

    
5387
    secondary_nodes = instance.secondary_nodes
5388
    if not secondary_nodes:
5389
      raise errors.ConfigurationError("No secondary node but using"
5390
                                      " drbd8 disk template")
5391

    
5392
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
5393

    
5394
    target_node = secondary_nodes[0]
5395
    # check memory requirements on the secondary node
5396
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5397
                         instance.name, i_be[constants.BE_MEMORY],
5398
                         instance.hypervisor)
5399

    
5400
    # check bridge existance
5401
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5402

    
5403
    if not self.cleanup:
5404
      _CheckNodeNotDrained(self, target_node)
5405
      result = self.rpc.call_instance_migratable(instance.primary_node,
5406
                                                 instance)
5407
      result.Raise("Can't migrate, please use failover",
5408
                   prereq=True, ecode=errors.ECODE_STATE)
5409

    
5410
    self.instance = instance
5411

    
5412
  def _WaitUntilSync(self):
5413
    """Poll with custom rpc for disk sync.
5414

5415
    This uses our own step-based rpc call.
5416

5417
    """
5418
    self.feedback_fn("* wait until resync is done")
5419
    all_done = False
5420
    while not all_done:
5421
      all_done = True
5422
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5423
                                            self.nodes_ip,
5424
                                            self.instance.disks)
5425
      min_percent = 100
5426
      for node, nres in result.items():
5427
        nres.Raise("Cannot resync disks on node %s" % node)
5428
        node_done, node_percent = nres.payload
5429
        all_done = all_done and node_done
5430
        if node_percent is not None:
5431
          min_percent = min(min_percent, node_percent)
5432
      if not all_done:
5433
        if min_percent < 100:
5434
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
5435
        time.sleep(2)
5436

    
5437
  def _EnsureSecondary(self, node):
5438
    """Demote a node to secondary.
5439

5440
    """
5441
    self.feedback_fn("* switching node %s to secondary mode" % node)
5442

    
5443
    for dev in self.instance.disks:
5444
      self.cfg.SetDiskID(dev, node)
5445

    
5446
    result = self.rpc.call_blockdev_close(node, self.instance.name,
5447
                                          self.instance.disks)
5448
    result.Raise("Cannot change disk to secondary on node %s" % node)
5449

    
5450
  def _GoStandalone(self):
5451
    """Disconnect from the network.
5452

5453
    """
5454
    self.feedback_fn("* changing into standalone mode")
5455
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5456
                                               self.instance.disks)
5457
    for node, nres in result.items():
5458
      nres.Raise("Cannot disconnect disks node %s" % node)
5459

    
5460
  def _GoReconnect(self, multimaster):
5461
    """Reconnect to the network.
5462

5463
    """
5464
    if multimaster:
5465
      msg = "dual-master"
5466
    else:
5467
      msg = "single-master"
5468
    self.feedback_fn("* changing disks into %s mode" % msg)
5469
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5470
                                           self.instance.disks,
5471
                                           self.instance.name, multimaster)
5472
    for node, nres in result.items():
5473
      nres.Raise("Cannot change disks config on node %s" % node)
5474

    
5475
  def _ExecCleanup(self):
5476
    """Try to cleanup after a failed migration.
5477

5478
    The cleanup is done by:
5479
      - check that the instance is running only on one node
5480
        (and update the config if needed)
5481
      - change disks on its secondary node to secondary
5482
      - wait until disks are fully synchronized
5483
      - disconnect from the network
5484
      - change disks into single-master mode
5485
      - wait again until disks are fully synchronized
5486

5487
    """
5488
    instance = self.instance
5489
    target_node = self.target_node
5490
    source_node = self.source_node
5491

    
5492
    # check running on only one node
5493
    self.feedback_fn("* checking where the instance actually runs"
5494
                     " (if this hangs, the hypervisor might be in"
5495
                     " a bad state)")
5496
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5497
    for node, result in ins_l.items():
5498
      result.Raise("Can't contact node %s" % node)
5499

    
5500
    runningon_source = instance.name in ins_l[source_node].payload
5501
    runningon_target = instance.name in ins_l[target_node].payload
5502

    
5503
    if runningon_source and runningon_target:
5504
      raise errors.OpExecError("Instance seems to be running on two nodes,"
5505
                               " or the hypervisor is confused. You will have"
5506
                               " to ensure manually that it runs only on one"
5507
                               " and restart this operation.")
5508

    
5509
    if not (runningon_source or runningon_target):
5510
      raise errors.OpExecError("Instance does not seem to be running at all."
5511
                               " In this case, it's safer to repair by"
5512
                               " running 'gnt-instance stop' to ensure disk"
5513
                               " shutdown, and then restarting it.")
5514

    
5515
    if runningon_target:
5516
      # the migration has actually succeeded, we need to update the config
5517
      self.feedback_fn("* instance running on secondary node (%s),"
5518
                       " updating config" % target_node)
5519
      instance.primary_node = target_node
5520
      self.cfg.Update(instance, self.feedback_fn)
5521
      demoted_node = source_node
5522
    else:
5523
      self.feedback_fn("* instance confirmed to be running on its"
5524
                       " primary node (%s)" % source_node)
5525
      demoted_node = target_node
5526

    
5527
    self._EnsureSecondary(demoted_node)
5528
    try:
5529
      self._WaitUntilSync()
5530
    except errors.OpExecError:
5531
      # we ignore here errors, since if the device is standalone, it
5532
      # won't be able to sync
5533
      pass
5534
    self._GoStandalone()
5535
    self._GoReconnect(False)
5536
    self._WaitUntilSync()
5537

    
5538
    self.feedback_fn("* done")
5539

    
5540
  def _RevertDiskStatus(self):
5541
    """Try to revert the disk status after a failed migration.
5542

5543
    """
5544
    target_node = self.target_node
5545
    try:
5546
      self._EnsureSecondary(target_node)
5547
      self._GoStandalone()
5548
      self._GoReconnect(False)
5549
      self._WaitUntilSync()
5550
    except errors.OpExecError, err:
5551
      self.lu.LogWarning("Migration failed and I can't reconnect the"
5552
                         " drives: error '%s'\n"
5553
                         "Please look and recover the instance status" %
5554
                         str(err))
5555

    
5556
  def _AbortMigration(self):
5557
    """Call the hypervisor code to abort a started migration.
5558

5559
    """
5560
    instance = self.instance
5561
    target_node = self.target_node
5562
    migration_info = self.migration_info
5563

    
5564
    abort_result = self.rpc.call_finalize_migration(target_node,
5565
                                                    instance,
5566
                                                    migration_info,
5567
                                                    False)
5568
    abort_msg = abort_result.fail_msg
5569
    if abort_msg:
5570
      logging.error("Aborting migration failed on target node %s: %s",
5571
                    target_node, abort_msg)
5572
      # Don't raise an exception here, as we stil have to try to revert the
5573
      # disk status, even if this step failed.
5574

    
5575
  def _ExecMigration(self):
5576
    """Migrate an instance.
5577

5578
    The migrate is done by:
5579
      - change the disks into dual-master mode
5580
      - wait until disks are fully synchronized again
5581
      - migrate the instance
5582
      - change disks on the new secondary node (the old primary) to secondary
5583
      - wait until disks are fully synchronized
5584
      - change disks into single-master mode
5585

5586
    """
5587
    instance = self.instance
5588
    target_node = self.target_node
5589
    source_node = self.source_node
5590

    
5591
    self.feedback_fn("* checking disk consistency between source and target")
5592
    for dev in instance.disks:
5593
      if not _CheckDiskConsistency(self, dev, target_node, False):
5594
        raise errors.OpExecError("Disk %s is degraded or not fully"
5595
                                 " synchronized on target node,"
5596
                                 " aborting migrate." % dev.iv_name)
5597

    
5598
    # First get the migration information from the remote node
5599
    result = self.rpc.call_migration_info(source_node, instance)
5600
    msg = result.fail_msg
5601
    if msg:
5602
      log_err = ("Failed fetching source migration information from %s: %s" %
5603
                 (source_node, msg))
5604
      logging.error(log_err)
5605
      raise errors.OpExecError(log_err)
5606

    
5607
    self.migration_info = migration_info = result.payload
5608

    
5609
    # Then switch the disks to master/master mode
5610
    self._EnsureSecondary(target_node)
5611
    self._GoStandalone()
5612
    self._GoReconnect(True)
5613
    self._WaitUntilSync()
5614

    
5615
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
5616
    result = self.rpc.call_accept_instance(target_node,
5617
                                           instance,
5618
                                           migration_info,
5619
                                           self.nodes_ip[target_node])
5620

    
5621
    msg = result.fail_msg
5622
    if msg:
5623
      logging.error("Instance pre-migration failed, trying to revert"
5624
                    " disk status: %s", msg)
5625
      self.feedback_fn("Pre-migration failed, aborting")
5626
      self._AbortMigration()
5627
      self._RevertDiskStatus()
5628
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5629
                               (instance.name, msg))
5630

    
5631
    self.feedback_fn("* migrating instance to %s" % target_node)
5632
    time.sleep(10)
5633
    result = self.rpc.call_instance_migrate(source_node, instance,
5634
                                            self.nodes_ip[target_node],
5635
                                            self.live)
5636
    msg = result.fail_msg
5637
    if msg:
5638
      logging.error("Instance migration failed, trying to revert"
5639
                    " disk status: %s", msg)
5640
      self.feedback_fn("Migration failed, aborting")
5641
      self._AbortMigration()
5642
      self._RevertDiskStatus()
5643
      raise errors.OpExecError("Could not migrate instance %s: %s" %
5644
                               (instance.name, msg))
5645
    time.sleep(10)
5646

    
5647
    instance.primary_node = target_node
5648
    # distribute new instance config to the other nodes
5649
    self.cfg.Update(instance, self.feedback_fn)
5650

    
5651
    result = self.rpc.call_finalize_migration(target_node,
5652
                                              instance,
5653
                                              migration_info,
5654
                                              True)
5655
    msg = result.fail_msg
5656
    if msg:
5657
      logging.error("Instance migration succeeded, but finalization failed:"
5658
                    " %s", msg)
5659
      raise errors.OpExecError("Could not finalize instance migration: %s" %
5660
                               msg)
5661

    
5662
    self._EnsureSecondary(source_node)
5663
    self._WaitUntilSync()
5664
    self._GoStandalone()
5665
    self._GoReconnect(False)
5666
    self._WaitUntilSync()
5667

    
5668
    self.feedback_fn("* done")
5669

    
5670
  def Exec(self, feedback_fn):
5671
    """Perform the migration.
5672

5673
    """
5674
    feedback_fn("Migrating instance %s" % self.instance.name)
5675

    
5676
    self.feedback_fn = feedback_fn
5677

    
5678
    self.source_node = self.instance.primary_node
5679
    self.target_node = self.instance.secondary_nodes[0]
5680
    self.all_nodes = [self.source_node, self.target_node]
5681
    self.nodes_ip = {
5682
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5683
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5684
      }
5685

    
5686
    if self.cleanup:
5687
      return self._ExecCleanup()
5688
    else:
5689
      return self._ExecMigration()
5690

    
5691

    
5692
def _CreateBlockDev(lu, node, instance, device, force_create,
5693
                    info, force_open):
5694
  """Create a tree of block devices on a given node.
5695

5696
  If this device type has to be created on secondaries, create it and
5697
  all its children.
5698

5699
  If not, just recurse to children keeping the same 'force' value.
5700

5701
  @param lu: the lu on whose behalf we execute
5702
  @param node: the node on which to create the device
5703
  @type instance: L{objects.Instance}
5704
  @param instance: the instance which owns the device
5705
  @type device: L{objects.Disk}
5706
  @param device: the device to create
5707
  @type force_create: boolean
5708
  @param force_create: whether to force creation of this device; this
5709
      will be change to True whenever we find a device which has
5710
      CreateOnSecondary() attribute
5711
  @param info: the extra 'metadata' we should attach to the device
5712
      (this will be represented as a LVM tag)
5713
  @type force_open: boolean
5714
  @param force_open: this parameter will be passes to the
5715
      L{backend.BlockdevCreate} function where it specifies
5716
      whether we run on primary or not, and it affects both
5717
      the child assembly and the device own Open() execution
5718

5719
  """
5720
  if device.CreateOnSecondary():
5721
    force_create = True
5722

    
5723
  if device.children:
5724
    for child in device.children:
5725
      _CreateBlockDev(lu, node, instance, child, force_create,
5726
                      info, force_open)
5727

    
5728
  if not force_create:
5729
    return
5730

    
5731
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5732

    
5733

    
5734
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5735
  """Create a single block device on a given node.
5736

5737
  This will not recurse over children of the device, so they must be
5738
  created in advance.
5739

5740
  @param lu: the lu on whose behalf we execute
5741
  @param node: the node on which to create the device
5742
  @type instance: L{objects.Instance}
5743
  @param instance: the instance which owns the device
5744
  @type device: L{objects.Disk}
5745
  @param device: the device to create
5746
  @param info: the extra 'metadata' we should attach to the device
5747
      (this will be represented as a LVM tag)
5748
  @type force_open: boolean
5749
  @param force_open: this parameter will be passes to the
5750
      L{backend.BlockdevCreate} function where it specifies
5751
      whether we run on primary or not, and it affects both
5752
      the child assembly and the device own Open() execution
5753

5754
  """
5755
  lu.cfg.SetDiskID(device, node)
5756
  result = lu.rpc.call_blockdev_create(node, device, device.size,
5757
                                       instance.name, force_open, info)
5758
  result.Raise("Can't create block device %s on"
5759
               " node %s for instance %s" % (device, node, instance.name))
5760
  if device.physical_id is None:
5761
    device.physical_id = result.payload
5762

    
5763

    
5764
def _GenerateUniqueNames(lu, exts):
5765
  """Generate a suitable LV name.
5766

5767
  This will generate a logical volume name for the given instance.
5768

5769
  """
5770
  results = []
5771
  for val in exts:
5772
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5773
    results.append("%s%s" % (new_id, val))
5774
  return results
5775

    
5776

    
5777
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5778
                         p_minor, s_minor):
5779
  """Generate a drbd8 device complete with its children.
5780

5781
  """
5782
  port = lu.cfg.AllocatePort()
5783
  vgname = lu.cfg.GetVGName()
5784
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5785
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5786
                          logical_id=(vgname, names[0]))
5787
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5788
                          logical_id=(vgname, names[1]))
5789
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5790
                          logical_id=(primary, secondary, port,
5791
                                      p_minor, s_minor,
5792
                                      shared_secret),
5793
                          children=[dev_data, dev_meta],
5794
                          iv_name=iv_name)
5795
  return drbd_dev
5796

    
5797

    
5798
def _GenerateDiskTemplate(lu, template_name,
5799
                          instance_name, primary_node,
5800
                          secondary_nodes, disk_info,
5801
                          file_storage_dir, file_driver,
5802
                          base_index):
5803
  """Generate the entire disk layout for a given template type.
5804

5805
  """
5806
  #TODO: compute space requirements
5807

    
5808
  vgname = lu.cfg.GetVGName()
5809
  disk_count = len(disk_info)
5810
  disks = []
5811
  if template_name == constants.DT_DISKLESS:
5812
    pass
5813
  elif template_name == constants.DT_PLAIN:
5814
    if len(secondary_nodes) != 0:
5815
      raise errors.ProgrammerError("Wrong template configuration")
5816

    
5817
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5818
                                      for i in range(disk_count)])
5819
    for idx, disk in enumerate(disk_info):
5820
      disk_index = idx + base_index
5821
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5822
                              logical_id=(vgname, names[idx]),
5823
                              iv_name="disk/%d" % disk_index,
5824
                              mode=disk["mode"])
5825
      disks.append(disk_dev)
5826
  elif template_name == constants.DT_DRBD8:
5827
    if len(secondary_nodes) != 1:
5828
      raise errors.ProgrammerError("Wrong template configuration")
5829
    remote_node = secondary_nodes[0]
5830
    minors = lu.cfg.AllocateDRBDMinor(
5831
      [primary_node, remote_node] * len(disk_info), instance_name)
5832

    
5833
    names = []
5834
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5835
                                               for i in range(disk_count)]):
5836
      names.append(lv_prefix + "_data")
5837
      names.append(lv_prefix + "_meta")
5838
    for idx, disk in enumerate(disk_info):
5839
      disk_index = idx + base_index
5840
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5841
                                      disk["size"], names[idx*2:idx*2+2],
5842
                                      "disk/%d" % disk_index,
5843
                                      minors[idx*2], minors[idx*2+1])
5844
      disk_dev.mode = disk["mode"]
5845
      disks.append(disk_dev)
5846
  elif template_name == constants.DT_FILE:
5847
    if len(secondary_nodes) != 0:
5848
      raise errors.ProgrammerError("Wrong template configuration")
5849

    
5850
    _RequireFileStorage()
5851

    
5852
    for idx, disk in enumerate(disk_info):
5853
      disk_index = idx + base_index
5854
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5855
                              iv_name="disk/%d" % disk_index,
5856
                              logical_id=(file_driver,
5857
                                          "%s/disk%d" % (file_storage_dir,
5858
                                                         disk_index)),
5859
                              mode=disk["mode"])
5860
      disks.append(disk_dev)
5861
  else:
5862
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5863
  return disks
5864

    
5865

    
5866
def _GetInstanceInfoText(instance):
5867
  """Compute that text that should be added to the disk's metadata.
5868

5869
  """
5870
  return "originstname+%s" % instance.name
5871

    
5872

    
5873
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5874
  """Create all disks for an instance.
5875

5876
  This abstracts away some work from AddInstance.
5877

5878
  @type lu: L{LogicalUnit}
5879
  @param lu: the logical unit on whose behalf we execute
5880
  @type instance: L{objects.Instance}
5881
  @param instance: the instance whose disks we should create
5882
  @type to_skip: list
5883
  @param to_skip: list of indices to skip
5884
  @type target_node: string
5885
  @param target_node: if passed, overrides the target node for creation
5886
  @rtype: boolean
5887
  @return: the success of the creation
5888

5889
  """
5890
  info = _GetInstanceInfoText(instance)
5891
  if target_node is None:
5892
    pnode = instance.primary_node
5893
    all_nodes = instance.all_nodes
5894
  else:
5895
    pnode = target_node
5896
    all_nodes = [pnode]
5897

    
5898
  if instance.disk_template == constants.DT_FILE:
5899
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5900
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5901

    
5902
    result.Raise("Failed to create directory '%s' on"
5903
                 " node %s" % (file_storage_dir, pnode))
5904

    
5905
  # Note: this needs to be kept in sync with adding of disks in
5906
  # LUSetInstanceParams
5907
  for idx, device in enumerate(instance.disks):
5908
    if to_skip and idx in to_skip:
5909
      continue
5910
    logging.info("Creating volume %s for instance %s",
5911
                 device.iv_name, instance.name)
5912
    #HARDCODE
5913
    for node in all_nodes:
5914
      f_create = node == pnode
5915
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5916

    
5917

    
5918
def _RemoveDisks(lu, instance, target_node=None):
5919
  """Remove all disks for an instance.
5920

5921
  This abstracts away some work from `AddInstance()` and
5922
  `RemoveInstance()`. Note that in case some of the devices couldn't
5923
  be removed, the removal will continue with the other ones (compare
5924
  with `_CreateDisks()`).
5925

5926
  @type lu: L{LogicalUnit}
5927
  @param lu: the logical unit on whose behalf we execute
5928
  @type instance: L{objects.Instance}
5929
  @param instance: the instance whose disks we should remove
5930
  @type target_node: string
5931
  @param target_node: used to override the node on which to remove the disks
5932
  @rtype: boolean
5933
  @return: the success of the removal
5934

5935
  """
5936
  logging.info("Removing block devices for instance %s", instance.name)
5937

    
5938
  all_result = True
5939
  for device in instance.disks:
5940
    if target_node:
5941
      edata = [(target_node, device)]
5942
    else:
5943
      edata = device.ComputeNodeTree(instance.primary_node)
5944
    for node, disk in edata:
5945
      lu.cfg.SetDiskID(disk, node)
5946
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5947
      if msg:
5948
        lu.LogWarning("Could not remove block device %s on node %s,"
5949
                      " continuing anyway: %s", device.iv_name, node, msg)
5950
        all_result = False
5951

    
5952
  if instance.disk_template == constants.DT_FILE:
5953
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5954
    if target_node:
5955
      tgt = target_node
5956
    else:
5957
      tgt = instance.primary_node
5958
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5959
    if result.fail_msg:
5960
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5961
                    file_storage_dir, instance.primary_node, result.fail_msg)
5962
      all_result = False
5963

    
5964
  return all_result
5965

    
5966

    
5967
def _ComputeDiskSize(disk_template, disks):
5968
  """Compute disk size requirements in the volume group
5969

5970
  """
5971
  # Required free disk space as a function of disk and swap space
5972
  req_size_dict = {
5973
    constants.DT_DISKLESS: None,
5974
    constants.DT_PLAIN: sum(d["size"] for d in disks),
5975
    # 128 MB are added for drbd metadata for each disk
5976
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5977
    constants.DT_FILE: None,
5978
  }
5979

    
5980
  if disk_template not in req_size_dict:
5981
    raise errors.ProgrammerError("Disk template '%s' size requirement"
5982
                                 " is unknown" %  disk_template)
5983

    
5984
  return req_size_dict[disk_template]
5985

    
5986

    
5987
def _CheckHVParams(lu, nodenames, hvname, hvparams):
5988
  """Hypervisor parameter validation.
5989

5990
  This function abstract the hypervisor parameter validation to be
5991
  used in both instance create and instance modify.
5992

5993
  @type lu: L{LogicalUnit}
5994
  @param lu: the logical unit for which we check
5995
  @type nodenames: list
5996
  @param nodenames: the list of nodes on which we should check
5997
  @type hvname: string
5998
  @param hvname: the name of the hypervisor we should use
5999
  @type hvparams: dict
6000
  @param hvparams: the parameters which we need to check
6001
  @raise errors.OpPrereqError: if the parameters are not valid
6002

6003
  """
6004
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6005
                                                  hvname,
6006
                                                  hvparams)
6007
  for node in nodenames:
6008
    info = hvinfo[node]
6009
    if info.offline:
6010
      continue
6011
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6012

    
6013

    
6014
class LUCreateInstance(LogicalUnit):
6015
  """Create an instance.
6016

6017
  """
6018
  HPATH = "instance-add"
6019
  HTYPE = constants.HTYPE_INSTANCE
6020
  _OP_REQP = ["instance_name", "disks",
6021
              "mode", "start",
6022
              "wait_for_sync", "ip_check", "nics",
6023
              "hvparams", "beparams"]
6024
  REQ_BGL = False
6025

    
6026
  def CheckArguments(self):
6027
    """Check arguments.
6028

6029
    """
6030
    # set optional parameters to none if they don't exist
6031
    for attr in ["pnode", "snode", "iallocator", "hypervisor",
6032
                 "disk_template", "identify_defaults"]:
6033
      if not hasattr(self.op, attr):
6034
        setattr(self.op, attr, None)
6035

    
6036
    # do not require name_check to ease forward/backward compatibility
6037
    # for tools
6038
    if not hasattr(self.op, "name_check"):
6039
      self.op.name_check = True
6040
    if not hasattr(self.op, "no_install"):
6041
      self.op.no_install = False
6042
    if self.op.no_install and self.op.start:
6043
      self.LogInfo("No-installation mode selected, disabling startup")
6044
      self.op.start = False
6045
    # validate/normalize the instance name
6046
    self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6047
    if self.op.ip_check and not self.op.name_check:
6048
      # TODO: make the ip check more flexible and not depend on the name check
6049
      raise errors.OpPrereqError("Cannot do ip checks without a name check",
6050
                                 errors.ECODE_INVAL)
6051
    # check disk information: either all adopt, or no adopt
6052
    has_adopt = has_no_adopt = False
6053
    for disk in self.op.disks:
6054
      if "adopt" in disk:
6055
        has_adopt = True
6056
      else:
6057
        has_no_adopt = True
6058
    if has_adopt and has_no_adopt:
6059
      raise errors.OpPrereqError("Either all disks are adopted or none is",
6060
                                 errors.ECODE_INVAL)
6061
    if has_adopt:
6062
      if self.op.disk_template != constants.DT_PLAIN:
6063
        raise errors.OpPrereqError("Disk adoption is only supported for the"
6064
                                   " 'plain' disk template",
6065
                                   errors.ECODE_INVAL)
6066
      if self.op.iallocator is not None:
6067
        raise errors.OpPrereqError("Disk adoption not allowed with an"
6068
                                   " iallocator script", errors.ECODE_INVAL)
6069
      if self.op.mode == constants.INSTANCE_IMPORT:
6070
        raise errors.OpPrereqError("Disk adoption not allowed for"
6071
                                   " instance import", errors.ECODE_INVAL)
6072

    
6073
    self.adopt_disks = has_adopt
6074

    
6075
    # verify creation mode
6076
    if self.op.mode not in (constants.INSTANCE_CREATE,
6077
                            constants.INSTANCE_IMPORT):
6078
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6079
                                 self.op.mode, errors.ECODE_INVAL)
6080

    
6081
    # instance name verification
6082
    if self.op.name_check:
6083
      self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6084
      self.op.instance_name = self.hostname1.name
6085
      # used in CheckPrereq for ip ping check
6086
      self.check_ip = self.hostname1.ip
6087
    else:
6088
      self.check_ip = None
6089

    
6090
    # file storage checks
6091
    if (self.op.file_driver and
6092
        not self.op.file_driver in constants.FILE_DRIVER):
6093
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6094
                                 self.op.file_driver, errors.ECODE_INVAL)
6095

    
6096
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6097
      raise errors.OpPrereqError("File storage directory path not absolute",
6098
                                 errors.ECODE_INVAL)
6099

    
6100
    ### Node/iallocator related checks
6101
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
6102
      raise errors.OpPrereqError("One and only one of iallocator and primary"
6103
                                 " node must be given",
6104
                                 errors.ECODE_INVAL)
6105

    
6106
    if self.op.mode == constants.INSTANCE_IMPORT:
6107
      # On import force_variant must be True, because if we forced it at
6108
      # initial install, our only chance when importing it back is that it
6109
      # works again!
6110
      self.op.force_variant = True
6111

    
6112
      if self.op.no_install:
6113
        self.LogInfo("No-installation mode has no effect during import")
6114

    
6115
    else: # INSTANCE_CREATE
6116
      if getattr(self.op, "os_type", None) is None:
6117
        raise errors.OpPrereqError("No guest OS specified",
6118
                                   errors.ECODE_INVAL)
6119
      self.op.force_variant = getattr(self.op, "force_variant", False)
6120
      if self.op.disk_template is None:
6121
        raise errors.OpPrereqError("No disk template specified",
6122
                                   errors.ECODE_INVAL)
6123

    
6124
  def ExpandNames(self):
6125
    """ExpandNames for CreateInstance.
6126

6127
    Figure out the right locks for instance creation.
6128

6129
    """
6130
    self.needed_locks = {}
6131

    
6132
    instance_name = self.op.instance_name
6133
    # this is just a preventive check, but someone might still add this
6134
    # instance in the meantime, and creation will fail at lock-add time
6135
    if instance_name in self.cfg.GetInstanceList():
6136
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6137
                                 instance_name, errors.ECODE_EXISTS)
6138

    
6139
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6140

    
6141
    if self.op.iallocator:
6142
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6143
    else:
6144
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6145
      nodelist = [self.op.pnode]
6146
      if self.op.snode is not None:
6147
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6148
        nodelist.append(self.op.snode)
6149
      self.needed_locks[locking.LEVEL_NODE] = nodelist
6150

    
6151
    # in case of import lock the source node too
6152
    if self.op.mode == constants.INSTANCE_IMPORT:
6153
      src_node = getattr(self.op, "src_node", None)
6154
      src_path = getattr(self.op, "src_path", None)
6155

    
6156
      if src_path is None:
6157
        self.op.src_path = src_path = self.op.instance_name
6158

    
6159
      if src_node is None:
6160
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6161
        self.op.src_node = None
6162
        if os.path.isabs(src_path):
6163
          raise errors.OpPrereqError("Importing an instance from an absolute"
6164
                                     " path requires a source node option.",
6165
                                     errors.ECODE_INVAL)
6166
      else:
6167
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6168
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6169
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
6170
        if not os.path.isabs(src_path):
6171
          self.op.src_path = src_path = \
6172
            utils.PathJoin(constants.EXPORT_DIR, src_path)
6173

    
6174
  def _RunAllocator(self):
6175
    """Run the allocator based on input opcode.
6176

6177
    """
6178
    nics = [n.ToDict() for n in self.nics]
6179
    ial = IAllocator(self.cfg, self.rpc,
6180
                     mode=constants.IALLOCATOR_MODE_ALLOC,
6181
                     name=self.op.instance_name,
6182
                     disk_template=self.op.disk_template,
6183
                     tags=[],
6184
                     os=self.op.os_type,
6185
                     vcpus=self.be_full[constants.BE_VCPUS],
6186
                     mem_size=self.be_full[constants.BE_MEMORY],
6187
                     disks=self.disks,
6188
                     nics=nics,
6189
                     hypervisor=self.op.hypervisor,
6190
                     )
6191

    
6192
    ial.Run(self.op.iallocator)
6193

    
6194
    if not ial.success:
6195
      raise errors.OpPrereqError("Can't compute nodes using"
6196
                                 " iallocator '%s': %s" %
6197
                                 (self.op.iallocator, ial.info),
6198
                                 errors.ECODE_NORES)
6199
    if len(ial.result) != ial.required_nodes:
6200
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6201
                                 " of nodes (%s), required %s" %
6202
                                 (self.op.iallocator, len(ial.result),
6203
                                  ial.required_nodes), errors.ECODE_FAULT)
6204
    self.op.pnode = ial.result[0]
6205
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6206
                 self.op.instance_name, self.op.iallocator,
6207
                 utils.CommaJoin(ial.result))
6208
    if ial.required_nodes == 2:
6209
      self.op.snode = ial.result[1]
6210

    
6211
  def BuildHooksEnv(self):
6212
    """Build hooks env.
6213

6214
    This runs on master, primary and secondary nodes of the instance.
6215

6216
    """
6217
    env = {
6218
      "ADD_MODE": self.op.mode,
6219
      }
6220
    if self.op.mode == constants.INSTANCE_IMPORT:
6221
      env["SRC_NODE"] = self.op.src_node
6222
      env["SRC_PATH"] = self.op.src_path
6223
      env["SRC_IMAGES"] = self.src_images
6224

    
6225
    env.update(_BuildInstanceHookEnv(
6226
      name=self.op.instance_name,
6227
      primary_node=self.op.pnode,
6228
      secondary_nodes=self.secondaries,
6229
      status=self.op.start,
6230
      os_type=self.op.os_type,
6231
      memory=self.be_full[constants.BE_MEMORY],
6232
      vcpus=self.be_full[constants.BE_VCPUS],
6233
      nics=_NICListToTuple(self, self.nics),
6234
      disk_template=self.op.disk_template,
6235
      disks=[(d["size"], d["mode"]) for d in self.disks],
6236
      bep=self.be_full,
6237
      hvp=self.hv_full,
6238
      hypervisor_name=self.op.hypervisor,
6239
    ))
6240

    
6241
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6242
          self.secondaries)
6243
    return env, nl, nl
6244

    
6245
  def _ReadExportInfo(self):
6246
    """Reads the export information from disk.
6247

6248
    It will override the opcode source node and path with the actual
6249
    information, if these two were not specified before.
6250

6251
    @return: the export information
6252

6253
    """
6254
    assert self.op.mode == constants.INSTANCE_IMPORT
6255

    
6256
    src_node = self.op.src_node
6257
    src_path = self.op.src_path
6258

    
6259
    if src_node is None:
6260
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6261
      exp_list = self.rpc.call_export_list(locked_nodes)
6262
      found = False
6263
      for node in exp_list:
6264
        if exp_list[node].fail_msg:
6265
          continue
6266
        if src_path in exp_list[node].payload:
6267
          found = True
6268
          self.op.src_node = src_node = node
6269
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6270
                                                       src_path)
6271
          break
6272
      if not found:
6273
        raise errors.OpPrereqError("No export found for relative path %s" %
6274
                                    src_path, errors.ECODE_INVAL)
6275

    
6276
    _CheckNodeOnline(self, src_node)
6277
    result = self.rpc.call_export_info(src_node, src_path)
6278
    result.Raise("No export or invalid export found in dir %s" % src_path)
6279

    
6280
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6281
    if not export_info.has_section(constants.INISECT_EXP):
6282
      raise errors.ProgrammerError("Corrupted export config",
6283
                                   errors.ECODE_ENVIRON)
6284

    
6285
    ei_version = export_info.get(constants.INISECT_EXP, "version")
6286
    if (int(ei_version) != constants.EXPORT_VERSION):
6287
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6288
                                 (ei_version, constants.EXPORT_VERSION),
6289
                                 errors.ECODE_ENVIRON)
6290
    return export_info
6291

    
6292
  def _ReadExportParams(self, einfo):
6293
    """Use export parameters as defaults.
6294

6295
    In case the opcode doesn't specify (as in override) some instance
6296
    parameters, then try to use them from the export information, if
6297
    that declares them.
6298

6299
    """
6300
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6301

    
6302
    if self.op.disk_template is None:
6303
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
6304
        self.op.disk_template = einfo.get(constants.INISECT_INS,
6305
                                          "disk_template")
6306
      else:
6307
        raise errors.OpPrereqError("No disk template specified and the export"
6308
                                   " is missing the disk_template information",
6309
                                   errors.ECODE_INVAL)
6310

    
6311
    if not self.op.disks:
6312
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
6313
        disks = []
6314
        # TODO: import the disk iv_name too
6315
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6316
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6317
          disks.append({"size": disk_sz})
6318
        self.op.disks = disks
6319
      else:
6320
        raise errors.OpPrereqError("No disk info specified and the export"
6321
                                   " is missing the disk information",
6322
                                   errors.ECODE_INVAL)
6323

    
6324
    if (not self.op.nics and
6325
        einfo.has_option(constants.INISECT_INS, "nic_count")):
6326
      nics = []
6327
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6328
        ndict = {}
6329
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6330
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6331
          ndict[name] = v
6332
        nics.append(ndict)
6333
      self.op.nics = nics
6334

    
6335
    if (self.op.hypervisor is None and
6336
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
6337
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6338
    if einfo.has_section(constants.INISECT_HYP):
6339
      # use the export parameters but do not override the ones
6340
      # specified by the user
6341
      for name, value in einfo.items(constants.INISECT_HYP):
6342
        if name not in self.op.hvparams:
6343
          self.op.hvparams[name] = value
6344

    
6345
    if einfo.has_section(constants.INISECT_BEP):
6346
      # use the parameters, without overriding
6347
      for name, value in einfo.items(constants.INISECT_BEP):
6348
        if name not in self.op.beparams:
6349
          self.op.beparams[name] = value
6350
    else:
6351
      # try to read the parameters old style, from the main section
6352
      for name in constants.BES_PARAMETERS:
6353
        if (name not in self.op.beparams and
6354
            einfo.has_option(constants.INISECT_INS, name)):
6355
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6356

    
6357
  def _RevertToDefaults(self, cluster):
6358
    """Revert the instance parameters to the default values.
6359

6360
    """
6361
    # hvparams
6362
    hv_defs = cluster.GetHVDefaults(self.op.hypervisor, self.op.os_type)
6363
    for name in self.op.hvparams.keys():
6364
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6365
        del self.op.hvparams[name]
6366
    # beparams
6367
    be_defs = cluster.beparams.get(constants.PP_DEFAULT, {})
6368
    for name in self.op.beparams.keys():
6369
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
6370
        del self.op.beparams[name]
6371
    # nic params
6372
    nic_defs = cluster.nicparams.get(constants.PP_DEFAULT, {})
6373
    for nic in self.op.nics:
6374
      for name in constants.NICS_PARAMETERS:
6375
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6376
          del nic[name]
6377

    
6378
  def CheckPrereq(self):
6379
    """Check prerequisites.
6380

6381
    """
6382
    if self.op.mode == constants.INSTANCE_IMPORT:
6383
      export_info = self._ReadExportInfo()
6384
      self._ReadExportParams(export_info)
6385

    
6386
    _CheckDiskTemplate(self.op.disk_template)
6387

    
6388
    if (not self.cfg.GetVGName() and
6389
        self.op.disk_template not in constants.DTS_NOT_LVM):
6390
      raise errors.OpPrereqError("Cluster does not support lvm-based"
6391
                                 " instances", errors.ECODE_STATE)
6392

    
6393
    if self.op.hypervisor is None:
6394
      self.op.hypervisor = self.cfg.GetHypervisorType()
6395

    
6396
    cluster = self.cfg.GetClusterInfo()
6397
    enabled_hvs = cluster.enabled_hypervisors
6398
    if self.op.hypervisor not in enabled_hvs:
6399
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6400
                                 " cluster (%s)" % (self.op.hypervisor,
6401
                                  ",".join(enabled_hvs)),
6402
                                 errors.ECODE_STATE)
6403

    
6404
    # check hypervisor parameter syntax (locally)
6405
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6406
    filled_hvp = objects.FillDict(cluster.GetHVDefaults(self.op.hypervisor,
6407
                                                        self.op.os_type),
6408
                                  self.op.hvparams)
6409
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6410
    hv_type.CheckParameterSyntax(filled_hvp)
6411
    self.hv_full = filled_hvp
6412
    # check that we don't specify global parameters on an instance
6413
    _CheckGlobalHvParams(self.op.hvparams)
6414

    
6415
    # fill and remember the beparams dict
6416
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6417
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
6418
                                    self.op.beparams)
6419

    
6420
    # now that hvp/bep are in final format, let's reset to defaults,
6421
    # if told to do so
6422
    if self.op.identify_defaults:
6423
      self._RevertToDefaults(cluster)
6424

    
6425
    # NIC buildup
6426
    self.nics = []
6427
    for idx, nic in enumerate(self.op.nics):
6428
      nic_mode_req = nic.get("mode", None)
6429
      nic_mode = nic_mode_req
6430
      if nic_mode is None:
6431
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6432

    
6433
      # in routed mode, for the first nic, the default ip is 'auto'
6434
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6435
        default_ip_mode = constants.VALUE_AUTO
6436
      else:
6437
        default_ip_mode = constants.VALUE_NONE
6438

    
6439
      # ip validity checks
6440
      ip = nic.get("ip", default_ip_mode)
6441
      if ip is None or ip.lower() == constants.VALUE_NONE:
6442
        nic_ip = None
6443
      elif ip.lower() == constants.VALUE_AUTO:
6444
        if not self.op.name_check:
6445
          raise errors.OpPrereqError("IP address set to auto but name checks"
6446
                                     " have been skipped. Aborting.",
6447
                                     errors.ECODE_INVAL)
6448
        nic_ip = self.hostname1.ip
6449
      else:
6450
        if not utils.IsValidIP(ip):
6451
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6452
                                     " like a valid IP" % ip,
6453
                                     errors.ECODE_INVAL)
6454
        nic_ip = ip
6455

    
6456
      # TODO: check the ip address for uniqueness
6457
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6458
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
6459
                                   errors.ECODE_INVAL)
6460

    
6461
      # MAC address verification
6462
      mac = nic.get("mac", constants.VALUE_AUTO)
6463
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6464
        mac = utils.NormalizeAndValidateMac(mac)
6465

    
6466
        try:
6467
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
6468
        except errors.ReservationError:
6469
          raise errors.OpPrereqError("MAC address %s already in use"
6470
                                     " in cluster" % mac,
6471
                                     errors.ECODE_NOTUNIQUE)
6472

    
6473
      # bridge verification
6474
      bridge = nic.get("bridge", None)
6475
      link = nic.get("link", None)
6476
      if bridge and link:
6477
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6478
                                   " at the same time", errors.ECODE_INVAL)
6479
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6480
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6481
                                   errors.ECODE_INVAL)
6482
      elif bridge:
6483
        link = bridge
6484

    
6485
      nicparams = {}
6486
      if nic_mode_req:
6487
        nicparams[constants.NIC_MODE] = nic_mode_req
6488
      if link:
6489
        nicparams[constants.NIC_LINK] = link
6490

    
6491
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
6492
                                      nicparams)
6493
      objects.NIC.CheckParameterSyntax(check_params)
6494
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6495

    
6496
    # disk checks/pre-build
6497
    self.disks = []
6498
    for disk in self.op.disks:
6499
      mode = disk.get("mode", constants.DISK_RDWR)
6500
      if mode not in constants.DISK_ACCESS_SET:
6501
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6502
                                   mode, errors.ECODE_INVAL)
6503
      size = disk.get("size", None)
6504
      if size is None:
6505
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6506
      try:
6507
        size = int(size)
6508
      except (TypeError, ValueError):
6509
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6510
                                   errors.ECODE_INVAL)
6511
      new_disk = {"size": size, "mode": mode}
6512
      if "adopt" in disk:
6513
        new_disk["adopt"] = disk["adopt"]
6514
      self.disks.append(new_disk)
6515

    
6516
    if self.op.mode == constants.INSTANCE_IMPORT:
6517

    
6518
      # Check that the new instance doesn't have less disks than the export
6519
      instance_disks = len(self.disks)
6520
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6521
      if instance_disks < export_disks:
6522
        raise errors.OpPrereqError("Not enough disks to import."
6523
                                   " (instance: %d, export: %d)" %
6524
                                   (instance_disks, export_disks),
6525
                                   errors.ECODE_INVAL)
6526

    
6527
      disk_images = []
6528
      for idx in range(export_disks):
6529
        option = 'disk%d_dump' % idx
6530
        if export_info.has_option(constants.INISECT_INS, option):
6531
          # FIXME: are the old os-es, disk sizes, etc. useful?
6532
          export_name = export_info.get(constants.INISECT_INS, option)
6533
          image = utils.PathJoin(self.op.src_path, export_name)
6534
          disk_images.append(image)
6535
        else:
6536
          disk_images.append(False)
6537

    
6538
      self.src_images = disk_images
6539

    
6540
      old_name = export_info.get(constants.INISECT_INS, 'name')
6541
      try:
6542
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6543
      except (TypeError, ValueError), err:
6544
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
6545
                                   " an integer: %s" % str(err),
6546
                                   errors.ECODE_STATE)
6547
      if self.op.instance_name == old_name:
6548
        for idx, nic in enumerate(self.nics):
6549
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6550
            nic_mac_ini = 'nic%d_mac' % idx
6551
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6552

    
6553
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6554

    
6555
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
6556
    if self.op.ip_check:
6557
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6558
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6559
                                   (self.check_ip, self.op.instance_name),
6560
                                   errors.ECODE_NOTUNIQUE)
6561

    
6562
    #### mac address generation
6563
    # By generating here the mac address both the allocator and the hooks get
6564
    # the real final mac address rather than the 'auto' or 'generate' value.
6565
    # There is a race condition between the generation and the instance object
6566
    # creation, which means that we know the mac is valid now, but we're not
6567
    # sure it will be when we actually add the instance. If things go bad
6568
    # adding the instance will abort because of a duplicate mac, and the
6569
    # creation job will fail.
6570
    for nic in self.nics:
6571
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6572
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6573

    
6574
    #### allocator run
6575

    
6576
    if self.op.iallocator is not None:
6577
      self._RunAllocator()
6578

    
6579
    #### node related checks
6580

    
6581
    # check primary node
6582
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6583
    assert self.pnode is not None, \
6584
      "Cannot retrieve locked node %s" % self.op.pnode
6585
    if pnode.offline:
6586
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6587
                                 pnode.name, errors.ECODE_STATE)
6588
    if pnode.drained:
6589
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6590
                                 pnode.name, errors.ECODE_STATE)
6591

    
6592
    self.secondaries = []
6593

    
6594
    # mirror node verification
6595
    if self.op.disk_template in constants.DTS_NET_MIRROR:
6596
      if self.op.snode is None:
6597
        raise errors.OpPrereqError("The networked disk templates need"
6598
                                   " a mirror node", errors.ECODE_INVAL)
6599
      if self.op.snode == pnode.name:
6600
        raise errors.OpPrereqError("The secondary node cannot be the"
6601
                                   " primary node.", errors.ECODE_INVAL)
6602
      _CheckNodeOnline(self, self.op.snode)
6603
      _CheckNodeNotDrained(self, self.op.snode)
6604
      self.secondaries.append(self.op.snode)
6605

    
6606
    nodenames = [pnode.name] + self.secondaries
6607

    
6608
    req_size = _ComputeDiskSize(self.op.disk_template,
6609
                                self.disks)
6610

    
6611
    # Check lv size requirements, if not adopting
6612
    if req_size is not None and not self.adopt_disks:
6613
      _CheckNodesFreeDisk(self, nodenames, req_size)
6614

    
6615
    if self.adopt_disks: # instead, we must check the adoption data
6616
      all_lvs = set([i["adopt"] for i in self.disks])
6617
      if len(all_lvs) != len(self.disks):
6618
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
6619
                                   errors.ECODE_INVAL)
6620
      for lv_name in all_lvs:
6621
        try:
6622
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6623
        except errors.ReservationError:
6624
          raise errors.OpPrereqError("LV named %s used by another instance" %
6625
                                     lv_name, errors.ECODE_NOTUNIQUE)
6626

    
6627
      node_lvs = self.rpc.call_lv_list([pnode.name],
6628
                                       self.cfg.GetVGName())[pnode.name]
6629
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6630
      node_lvs = node_lvs.payload
6631
      delta = all_lvs.difference(node_lvs.keys())
6632
      if delta:
6633
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
6634
                                   utils.CommaJoin(delta),
6635
                                   errors.ECODE_INVAL)
6636
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6637
      if online_lvs:
6638
        raise errors.OpPrereqError("Online logical volumes found, cannot"
6639
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
6640
                                   errors.ECODE_STATE)
6641
      # update the size of disk based on what is found
6642
      for dsk in self.disks:
6643
        dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6644

    
6645
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6646

    
6647
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6648

    
6649
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6650

    
6651
    # memory check on primary node
6652
    if self.op.start:
6653
      _CheckNodeFreeMemory(self, self.pnode.name,
6654
                           "creating instance %s" % self.op.instance_name,
6655
                           self.be_full[constants.BE_MEMORY],
6656
                           self.op.hypervisor)
6657

    
6658
    self.dry_run_result = list(nodenames)
6659

    
6660
  def Exec(self, feedback_fn):
6661
    """Create and add the instance to the cluster.
6662

6663
    """
6664
    instance = self.op.instance_name
6665
    pnode_name = self.pnode.name
6666

    
6667
    ht_kind = self.op.hypervisor
6668
    if ht_kind in constants.HTS_REQ_PORT:
6669
      network_port = self.cfg.AllocatePort()
6670
    else:
6671
      network_port = None
6672

    
6673
    if constants.ENABLE_FILE_STORAGE:
6674
      # this is needed because os.path.join does not accept None arguments
6675
      if self.op.file_storage_dir is None:
6676
        string_file_storage_dir = ""
6677
      else:
6678
        string_file_storage_dir = self.op.file_storage_dir
6679

    
6680
      # build the full file storage dir path
6681
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6682
                                        string_file_storage_dir, instance)
6683
    else:
6684
      file_storage_dir = ""
6685

    
6686
    disks = _GenerateDiskTemplate(self,
6687
                                  self.op.disk_template,
6688
                                  instance, pnode_name,
6689
                                  self.secondaries,
6690
                                  self.disks,
6691
                                  file_storage_dir,
6692
                                  self.op.file_driver,
6693
                                  0)
6694

    
6695
    iobj = objects.Instance(name=instance, os=self.op.os_type,
6696
                            primary_node=pnode_name,
6697
                            nics=self.nics, disks=disks,
6698
                            disk_template=self.op.disk_template,
6699
                            admin_up=False,
6700
                            network_port=network_port,
6701
                            beparams=self.op.beparams,
6702
                            hvparams=self.op.hvparams,
6703
                            hypervisor=self.op.hypervisor,
6704
                            )
6705

    
6706
    if self.adopt_disks:
6707
      # rename LVs to the newly-generated names; we need to construct
6708
      # 'fake' LV disks with the old data, plus the new unique_id
6709
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6710
      rename_to = []
6711
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6712
        rename_to.append(t_dsk.logical_id)
6713
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6714
        self.cfg.SetDiskID(t_dsk, pnode_name)
6715
      result = self.rpc.call_blockdev_rename(pnode_name,
6716
                                             zip(tmp_disks, rename_to))
6717
      result.Raise("Failed to rename adoped LVs")
6718
    else:
6719
      feedback_fn("* creating instance disks...")
6720
      try:
6721
        _CreateDisks(self, iobj)
6722
      except errors.OpExecError:
6723
        self.LogWarning("Device creation failed, reverting...")
6724
        try:
6725
          _RemoveDisks(self, iobj)
6726
        finally:
6727
          self.cfg.ReleaseDRBDMinors(instance)
6728
          raise
6729

    
6730
    feedback_fn("adding instance %s to cluster config" % instance)
6731

    
6732
    self.cfg.AddInstance(iobj, self.proc.GetECId())
6733

    
6734
    # Declare that we don't want to remove the instance lock anymore, as we've
6735
    # added the instance to the config
6736
    del self.remove_locks[locking.LEVEL_INSTANCE]
6737
    # Unlock all the nodes
6738
    if self.op.mode == constants.INSTANCE_IMPORT:
6739
      nodes_keep = [self.op.src_node]
6740
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6741
                       if node != self.op.src_node]
6742
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6743
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6744
    else:
6745
      self.context.glm.release(locking.LEVEL_NODE)
6746
      del self.acquired_locks[locking.LEVEL_NODE]
6747

    
6748
    if self.op.wait_for_sync:
6749
      disk_abort = not _WaitForSync(self, iobj)
6750
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
6751
      # make sure the disks are not degraded (still sync-ing is ok)
6752
      time.sleep(15)
6753
      feedback_fn("* checking mirrors status")
6754
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6755
    else:
6756
      disk_abort = False
6757

    
6758
    if disk_abort:
6759
      _RemoveDisks(self, iobj)
6760
      self.cfg.RemoveInstance(iobj.name)
6761
      # Make sure the instance lock gets removed
6762
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6763
      raise errors.OpExecError("There are some degraded disks for"
6764
                               " this instance")
6765

    
6766
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6767
      if self.op.mode == constants.INSTANCE_CREATE:
6768
        if not self.op.no_install:
6769
          feedback_fn("* running the instance OS create scripts...")
6770
          # FIXME: pass debug option from opcode to backend
6771
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6772
                                                 self.op.debug_level)
6773
          result.Raise("Could not add os for instance %s"
6774
                       " on node %s" % (instance, pnode_name))
6775

    
6776
      elif self.op.mode == constants.INSTANCE_IMPORT:
6777
        feedback_fn("* running the instance OS import scripts...")
6778

    
6779
        transfers = []
6780

    
6781
        for idx, image in enumerate(self.src_images):
6782
          if not image:
6783
            continue
6784

    
6785
          # FIXME: pass debug option from opcode to backend
6786
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
6787
                                             constants.IEIO_FILE, (image, ),
6788
                                             constants.IEIO_SCRIPT,
6789
                                             (iobj.disks[idx], idx),
6790
                                             None)
6791
          transfers.append(dt)
6792

    
6793
        import_result = \
6794
          masterd.instance.TransferInstanceData(self, feedback_fn,
6795
                                                self.op.src_node, pnode_name,
6796
                                                self.pnode.secondary_ip,
6797
                                                iobj, transfers)
6798
        if not compat.all(import_result):
6799
          self.LogWarning("Some disks for instance %s on node %s were not"
6800
                          " imported successfully" % (instance, pnode_name))
6801

    
6802
      else:
6803
        # also checked in the prereq part
6804
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6805
                                     % self.op.mode)
6806

    
6807
    if self.op.start:
6808
      iobj.admin_up = True
6809
      self.cfg.Update(iobj, feedback_fn)
6810
      logging.info("Starting instance %s on node %s", instance, pnode_name)
6811
      feedback_fn("* starting instance...")
6812
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6813
      result.Raise("Could not start instance")
6814

    
6815
    return list(iobj.all_nodes)
6816

    
6817

    
6818
class LUConnectConsole(NoHooksLU):
6819
  """Connect to an instance's console.
6820

6821
  This is somewhat special in that it returns the command line that
6822
  you need to run on the master node in order to connect to the
6823
  console.
6824

6825
  """
6826
  _OP_REQP = ["instance_name"]
6827
  REQ_BGL = False
6828

    
6829
  def ExpandNames(self):
6830
    self._ExpandAndLockInstance()
6831

    
6832
  def CheckPrereq(self):
6833
    """Check prerequisites.
6834

6835
    This checks that the instance is in the cluster.
6836

6837
    """
6838
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6839
    assert self.instance is not None, \
6840
      "Cannot retrieve locked instance %s" % self.op.instance_name
6841
    _CheckNodeOnline(self, self.instance.primary_node)
6842

    
6843
  def Exec(self, feedback_fn):
6844
    """Connect to the console of an instance
6845

6846
    """
6847
    instance = self.instance
6848
    node = instance.primary_node
6849

    
6850
    node_insts = self.rpc.call_instance_list([node],
6851
                                             [instance.hypervisor])[node]
6852
    node_insts.Raise("Can't get node information from %s" % node)
6853

    
6854
    if instance.name not in node_insts.payload:
6855
      raise errors.OpExecError("Instance %s is not running." % instance.name)
6856

    
6857
    logging.debug("Connecting to console of %s on %s", instance.name, node)
6858

    
6859
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
6860
    cluster = self.cfg.GetClusterInfo()
6861
    # beparams and hvparams are passed separately, to avoid editing the
6862
    # instance and then saving the defaults in the instance itself.
6863
    hvparams = cluster.FillHV(instance)
6864
    beparams = cluster.FillBE(instance)
6865
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6866

    
6867
    # build ssh cmdline
6868
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6869

    
6870

    
6871
class LUReplaceDisks(LogicalUnit):
6872
  """Replace the disks of an instance.
6873

6874
  """
6875
  HPATH = "mirrors-replace"
6876
  HTYPE = constants.HTYPE_INSTANCE
6877
  _OP_REQP = ["instance_name", "mode", "disks"]
6878
  REQ_BGL = False
6879

    
6880
  def CheckArguments(self):
6881
    if not hasattr(self.op, "remote_node"):
6882
      self.op.remote_node = None
6883
    if not hasattr(self.op, "iallocator"):
6884
      self.op.iallocator = None
6885
    if not hasattr(self.op, "early_release"):
6886
      self.op.early_release = False
6887

    
6888
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6889
                                  self.op.iallocator)
6890

    
6891
  def ExpandNames(self):
6892
    self._ExpandAndLockInstance()
6893

    
6894
    if self.op.iallocator is not None:
6895
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6896

    
6897
    elif self.op.remote_node is not None:
6898
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6899
      self.op.remote_node = remote_node
6900

    
6901
      # Warning: do not remove the locking of the new secondary here
6902
      # unless DRBD8.AddChildren is changed to work in parallel;
6903
      # currently it doesn't since parallel invocations of
6904
      # FindUnusedMinor will conflict
6905
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6906
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6907

    
6908
    else:
6909
      self.needed_locks[locking.LEVEL_NODE] = []
6910
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6911

    
6912
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6913
                                   self.op.iallocator, self.op.remote_node,
6914
                                   self.op.disks, False, self.op.early_release)
6915

    
6916
    self.tasklets = [self.replacer]
6917

    
6918
  def DeclareLocks(self, level):
6919
    # If we're not already locking all nodes in the set we have to declare the
6920
    # instance's primary/secondary nodes.
6921
    if (level == locking.LEVEL_NODE and
6922
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6923
      self._LockInstancesNodes()
6924

    
6925
  def BuildHooksEnv(self):
6926
    """Build hooks env.
6927

6928
    This runs on the master, the primary and all the secondaries.
6929

6930
    """
6931
    instance = self.replacer.instance
6932
    env = {
6933
      "MODE": self.op.mode,
6934
      "NEW_SECONDARY": self.op.remote_node,
6935
      "OLD_SECONDARY": instance.secondary_nodes[0],
6936
      }
6937
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6938
    nl = [
6939
      self.cfg.GetMasterNode(),
6940
      instance.primary_node,
6941
      ]
6942
    if self.op.remote_node is not None:
6943
      nl.append(self.op.remote_node)
6944
    return env, nl, nl
6945

    
6946

    
6947
class LUEvacuateNode(LogicalUnit):
6948
  """Relocate the secondary instances from a node.
6949

6950
  """
6951
  HPATH = "node-evacuate"
6952
  HTYPE = constants.HTYPE_NODE
6953
  _OP_REQP = ["node_name"]
6954
  REQ_BGL = False
6955

    
6956
  def CheckArguments(self):
6957
    if not hasattr(self.op, "remote_node"):
6958
      self.op.remote_node = None
6959
    if not hasattr(self.op, "iallocator"):
6960
      self.op.iallocator = None
6961
    if not hasattr(self.op, "early_release"):
6962
      self.op.early_release = False
6963

    
6964
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6965
                                  self.op.remote_node,
6966
                                  self.op.iallocator)
6967

    
6968
  def ExpandNames(self):
6969
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6970

    
6971
    self.needed_locks = {}
6972

    
6973
    # Declare node locks
6974
    if self.op.iallocator is not None:
6975
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6976

    
6977
    elif self.op.remote_node is not None:
6978
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6979

    
6980
      # Warning: do not remove the locking of the new secondary here
6981
      # unless DRBD8.AddChildren is changed to work in parallel;
6982
      # currently it doesn't since parallel invocations of
6983
      # FindUnusedMinor will conflict
6984
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
6985
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6986

    
6987
    else:
6988
      raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
6989

    
6990
    # Create tasklets for replacing disks for all secondary instances on this
6991
    # node
6992
    names = []
6993
    tasklets = []
6994

    
6995
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6996
      logging.debug("Replacing disks for instance %s", inst.name)
6997
      names.append(inst.name)
6998

    
6999
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
7000
                                self.op.iallocator, self.op.remote_node, [],
7001
                                True, self.op.early_release)
7002
      tasklets.append(replacer)
7003

    
7004
    self.tasklets = tasklets
7005
    self.instance_names = names
7006

    
7007
    # Declare instance locks
7008
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
7009

    
7010
  def DeclareLocks(self, level):
7011
    # If we're not already locking all nodes in the set we have to declare the
7012
    # instance's primary/secondary nodes.
7013
    if (level == locking.LEVEL_NODE and
7014
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7015
      self._LockInstancesNodes()
7016

    
7017
  def BuildHooksEnv(self):
7018
    """Build hooks env.
7019

7020
    This runs on the master, the primary and all the secondaries.
7021

7022
    """
7023
    env = {
7024
      "NODE_NAME": self.op.node_name,
7025
      }
7026

    
7027
    nl = [self.cfg.GetMasterNode()]
7028

    
7029
    if self.op.remote_node is not None:
7030
      env["NEW_SECONDARY"] = self.op.remote_node
7031
      nl.append(self.op.remote_node)
7032

    
7033
    return (env, nl, nl)
7034

    
7035

    
7036
class TLReplaceDisks(Tasklet):
7037
  """Replaces disks for an instance.
7038

7039
  Note: Locking is not within the scope of this class.
7040

7041
  """
7042
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7043
               disks, delay_iallocator, early_release):
7044
    """Initializes this class.
7045

7046
    """
7047
    Tasklet.__init__(self, lu)
7048

    
7049
    # Parameters
7050
    self.instance_name = instance_name
7051
    self.mode = mode
7052
    self.iallocator_name = iallocator_name
7053
    self.remote_node = remote_node
7054
    self.disks = disks
7055
    self.delay_iallocator = delay_iallocator
7056
    self.early_release = early_release
7057

    
7058
    # Runtime data
7059
    self.instance = None
7060
    self.new_node = None
7061
    self.target_node = None
7062
    self.other_node = None
7063
    self.remote_node_info = None
7064
    self.node_secondary_ip = None
7065

    
7066
  @staticmethod
7067
  def CheckArguments(mode, remote_node, iallocator):
7068
    """Helper function for users of this class.
7069

7070
    """
7071
    # check for valid parameter combination
7072
    if mode == constants.REPLACE_DISK_CHG:
7073
      if remote_node is None and iallocator is None:
7074
        raise errors.OpPrereqError("When changing the secondary either an"
7075
                                   " iallocator script must be used or the"
7076
                                   " new node given", errors.ECODE_INVAL)
7077

    
7078
      if remote_node is not None and iallocator is not None:
7079
        raise errors.OpPrereqError("Give either the iallocator or the new"
7080
                                   " secondary, not both", errors.ECODE_INVAL)
7081

    
7082
    elif remote_node is not None or iallocator is not None:
7083
      # Not replacing the secondary
7084
      raise errors.OpPrereqError("The iallocator and new node options can"
7085
                                 " only be used when changing the"
7086
                                 " secondary node", errors.ECODE_INVAL)
7087

    
7088
  @staticmethod
7089
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7090
    """Compute a new secondary node using an IAllocator.
7091

7092
    """
7093
    ial = IAllocator(lu.cfg, lu.rpc,
7094
                     mode=constants.IALLOCATOR_MODE_RELOC,
7095
                     name=instance_name,
7096
                     relocate_from=relocate_from)
7097

    
7098
    ial.Run(iallocator_name)
7099

    
7100
    if not ial.success:
7101
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7102
                                 " %s" % (iallocator_name, ial.info),
7103
                                 errors.ECODE_NORES)
7104

    
7105
    if len(ial.result) != ial.required_nodes:
7106
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7107
                                 " of nodes (%s), required %s" %
7108
                                 (iallocator_name,
7109
                                  len(ial.result), ial.required_nodes),
7110
                                 errors.ECODE_FAULT)
7111

    
7112
    remote_node_name = ial.result[0]
7113

    
7114
    lu.LogInfo("Selected new secondary for instance '%s': %s",
7115
               instance_name, remote_node_name)
7116

    
7117
    return remote_node_name
7118

    
7119
  def _FindFaultyDisks(self, node_name):
7120
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7121
                                    node_name, True)
7122

    
7123
  def CheckPrereq(self):
7124
    """Check prerequisites.
7125

7126
    This checks that the instance is in the cluster.
7127

7128
    """
7129
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7130
    assert instance is not None, \
7131
      "Cannot retrieve locked instance %s" % self.instance_name
7132

    
7133
    if instance.disk_template != constants.DT_DRBD8:
7134
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7135
                                 " instances", errors.ECODE_INVAL)
7136

    
7137
    if len(instance.secondary_nodes) != 1:
7138
      raise errors.OpPrereqError("The instance has a strange layout,"
7139
                                 " expected one secondary but found %d" %
7140
                                 len(instance.secondary_nodes),
7141
                                 errors.ECODE_FAULT)
7142

    
7143
    if not self.delay_iallocator:
7144
      self._CheckPrereq2()
7145

    
7146
  def _CheckPrereq2(self):
7147
    """Check prerequisites, second part.
7148

7149
    This function should always be part of CheckPrereq. It was separated and is
7150
    now called from Exec because during node evacuation iallocator was only
7151
    called with an unmodified cluster model, not taking planned changes into
7152
    account.
7153

7154
    """
7155
    instance = self.instance
7156
    secondary_node = instance.secondary_nodes[0]
7157

    
7158
    if self.iallocator_name is None:
7159
      remote_node = self.remote_node
7160
    else:
7161
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7162
                                       instance.name, instance.secondary_nodes)
7163

    
7164
    if remote_node is not None:
7165
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7166
      assert self.remote_node_info is not None, \
7167
        "Cannot retrieve locked node %s" % remote_node
7168
    else:
7169
      self.remote_node_info = None
7170

    
7171
    if remote_node == self.instance.primary_node:
7172
      raise errors.OpPrereqError("The specified node is the primary node of"
7173
                                 " the instance.", errors.ECODE_INVAL)
7174

    
7175
    if remote_node == secondary_node:
7176
      raise errors.OpPrereqError("The specified node is already the"
7177
                                 " secondary node of the instance.",
7178
                                 errors.ECODE_INVAL)
7179

    
7180
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7181
                                    constants.REPLACE_DISK_CHG):
7182
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
7183
                                 errors.ECODE_INVAL)
7184

    
7185
    if self.mode == constants.REPLACE_DISK_AUTO:
7186
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
7187
      faulty_secondary = self._FindFaultyDisks(secondary_node)
7188

    
7189
      if faulty_primary and faulty_secondary:
7190
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7191
                                   " one node and can not be repaired"
7192
                                   " automatically" % self.instance_name,
7193
                                   errors.ECODE_STATE)
7194

    
7195
      if faulty_primary:
7196
        self.disks = faulty_primary
7197
        self.target_node = instance.primary_node
7198
        self.other_node = secondary_node
7199
        check_nodes = [self.target_node, self.other_node]
7200
      elif faulty_secondary:
7201
        self.disks = faulty_secondary
7202
        self.target_node = secondary_node
7203
        self.other_node = instance.primary_node
7204
        check_nodes = [self.target_node, self.other_node]
7205
      else:
7206
        self.disks = []
7207
        check_nodes = []
7208

    
7209
    else:
7210
      # Non-automatic modes
7211
      if self.mode == constants.REPLACE_DISK_PRI:
7212
        self.target_node = instance.primary_node
7213
        self.other_node = secondary_node
7214
        check_nodes = [self.target_node, self.other_node]
7215

    
7216
      elif self.mode == constants.REPLACE_DISK_SEC:
7217
        self.target_node = secondary_node
7218
        self.other_node = instance.primary_node
7219
        check_nodes = [self.target_node, self.other_node]
7220

    
7221
      elif self.mode == constants.REPLACE_DISK_CHG:
7222
        self.new_node = remote_node
7223
        self.other_node = instance.primary_node
7224
        self.target_node = secondary_node
7225
        check_nodes = [self.new_node, self.other_node]
7226

    
7227
        _CheckNodeNotDrained(self.lu, remote_node)
7228

    
7229
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
7230
        assert old_node_info is not None
7231
        if old_node_info.offline and not self.early_release:
7232
          # doesn't make sense to delay the release
7233
          self.early_release = True
7234
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7235
                          " early-release mode", secondary_node)
7236

    
7237
      else:
7238
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7239
                                     self.mode)
7240

    
7241
      # If not specified all disks should be replaced
7242
      if not self.disks:
7243
        self.disks = range(len(self.instance.disks))
7244

    
7245
    for node in check_nodes:
7246
      _CheckNodeOnline(self.lu, node)
7247

    
7248
    # Check whether disks are valid
7249
    for disk_idx in self.disks:
7250
      instance.FindDisk(disk_idx)
7251

    
7252
    # Get secondary node IP addresses
7253
    node_2nd_ip = {}
7254

    
7255
    for node_name in [self.target_node, self.other_node, self.new_node]:
7256
      if node_name is not None:
7257
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7258

    
7259
    self.node_secondary_ip = node_2nd_ip
7260

    
7261
  def Exec(self, feedback_fn):
7262
    """Execute disk replacement.
7263

7264
    This dispatches the disk replacement to the appropriate handler.
7265

7266
    """
7267
    if self.delay_iallocator:
7268
      self._CheckPrereq2()
7269

    
7270
    if not self.disks:
7271
      feedback_fn("No disks need replacement")
7272
      return
7273

    
7274
    feedback_fn("Replacing disk(s) %s for %s" %
7275
                (utils.CommaJoin(self.disks), self.instance.name))
7276

    
7277
    activate_disks = (not self.instance.admin_up)
7278

    
7279
    # Activate the instance disks if we're replacing them on a down instance
7280
    if activate_disks:
7281
      _StartInstanceDisks(self.lu, self.instance, True)
7282

    
7283
    try:
7284
      # Should we replace the secondary node?
7285
      if self.new_node is not None:
7286
        fn = self._ExecDrbd8Secondary
7287
      else:
7288
        fn = self._ExecDrbd8DiskOnly
7289

    
7290
      return fn(feedback_fn)
7291

    
7292
    finally:
7293
      # Deactivate the instance disks if we're replacing them on a
7294
      # down instance
7295
      if activate_disks:
7296
        _SafeShutdownInstanceDisks(self.lu, self.instance)
7297

    
7298
  def _CheckVolumeGroup(self, nodes):
7299
    self.lu.LogInfo("Checking volume groups")
7300

    
7301
    vgname = self.cfg.GetVGName()
7302

    
7303
    # Make sure volume group exists on all involved nodes
7304
    results = self.rpc.call_vg_list(nodes)
7305
    if not results:
7306
      raise errors.OpExecError("Can't list volume groups on the nodes")
7307

    
7308
    for node in nodes:
7309
      res = results[node]
7310
      res.Raise("Error checking node %s" % node)
7311
      if vgname not in res.payload:
7312
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
7313
                                 (vgname, node))
7314

    
7315
  def _CheckDisksExistence(self, nodes):
7316
    # Check disk existence
7317
    for idx, dev in enumerate(self.instance.disks):
7318
      if idx not in self.disks:
7319
        continue
7320

    
7321
      for node in nodes:
7322
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7323
        self.cfg.SetDiskID(dev, node)
7324

    
7325
        result = self.rpc.call_blockdev_find(node, dev)
7326

    
7327
        msg = result.fail_msg
7328
        if msg or not result.payload:
7329
          if not msg:
7330
            msg = "disk not found"
7331
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7332
                                   (idx, node, msg))
7333

    
7334
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7335
    for idx, dev in enumerate(self.instance.disks):
7336
      if idx not in self.disks:
7337
        continue
7338

    
7339
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7340
                      (idx, node_name))
7341

    
7342
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7343
                                   ldisk=ldisk):
7344
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7345
                                 " replace disks for instance %s" %
7346
                                 (node_name, self.instance.name))
7347

    
7348
  def _CreateNewStorage(self, node_name):
7349
    vgname = self.cfg.GetVGName()
7350
    iv_names = {}
7351

    
7352
    for idx, dev in enumerate(self.instance.disks):
7353
      if idx not in self.disks:
7354
        continue
7355

    
7356
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7357

    
7358
      self.cfg.SetDiskID(dev, node_name)
7359

    
7360
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7361
      names = _GenerateUniqueNames(self.lu, lv_names)
7362

    
7363
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7364
                             logical_id=(vgname, names[0]))
7365
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7366
                             logical_id=(vgname, names[1]))
7367

    
7368
      new_lvs = [lv_data, lv_meta]
7369
      old_lvs = dev.children
7370
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7371

    
7372
      # we pass force_create=True to force the LVM creation
7373
      for new_lv in new_lvs:
7374
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7375
                        _GetInstanceInfoText(self.instance), False)
7376

    
7377
    return iv_names
7378

    
7379
  def _CheckDevices(self, node_name, iv_names):
7380
    for name, (dev, _, _) in iv_names.iteritems():
7381
      self.cfg.SetDiskID(dev, node_name)
7382

    
7383
      result = self.rpc.call_blockdev_find(node_name, dev)
7384

    
7385
      msg = result.fail_msg
7386
      if msg or not result.payload:
7387
        if not msg:
7388
          msg = "disk not found"
7389
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
7390
                                 (name, msg))
7391

    
7392
      if result.payload.is_degraded:
7393
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
7394

    
7395
  def _RemoveOldStorage(self, node_name, iv_names):
7396
    for name, (_, old_lvs, _) in iv_names.iteritems():
7397
      self.lu.LogInfo("Remove logical volumes for %s" % name)
7398

    
7399
      for lv in old_lvs:
7400
        self.cfg.SetDiskID(lv, node_name)
7401

    
7402
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7403
        if msg:
7404
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
7405
                             hint="remove unused LVs manually")
7406

    
7407
  def _ReleaseNodeLock(self, node_name):
7408
    """Releases the lock for a given node."""
7409
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7410

    
7411
  def _ExecDrbd8DiskOnly(self, feedback_fn):
7412
    """Replace a disk on the primary or secondary for DRBD 8.
7413

7414
    The algorithm for replace is quite complicated:
7415

7416
      1. for each disk to be replaced:
7417

7418
        1. create new LVs on the target node with unique names
7419
        1. detach old LVs from the drbd device
7420
        1. rename old LVs to name_replaced.<time_t>
7421
        1. rename new LVs to old LVs
7422
        1. attach the new LVs (with the old names now) to the drbd device
7423

7424
      1. wait for sync across all devices
7425

7426
      1. for each modified disk:
7427

7428
        1. remove old LVs (which have the name name_replaces.<time_t>)
7429

7430
    Failures are not very well handled.
7431

7432
    """
7433
    steps_total = 6
7434

    
7435
    # Step: check device activation
7436
    self.lu.LogStep(1, steps_total, "Check device existence")
7437
    self._CheckDisksExistence([self.other_node, self.target_node])
7438
    self._CheckVolumeGroup([self.target_node, self.other_node])
7439

    
7440
    # Step: check other node consistency
7441
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7442
    self._CheckDisksConsistency(self.other_node,
7443
                                self.other_node == self.instance.primary_node,
7444
                                False)
7445

    
7446
    # Step: create new storage
7447
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7448
    iv_names = self._CreateNewStorage(self.target_node)
7449

    
7450
    # Step: for each lv, detach+rename*2+attach
7451
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7452
    for dev, old_lvs, new_lvs in iv_names.itervalues():
7453
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7454

    
7455
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7456
                                                     old_lvs)
7457
      result.Raise("Can't detach drbd from local storage on node"
7458
                   " %s for device %s" % (self.target_node, dev.iv_name))
7459
      #dev.children = []
7460
      #cfg.Update(instance)
7461

    
7462
      # ok, we created the new LVs, so now we know we have the needed
7463
      # storage; as such, we proceed on the target node to rename
7464
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7465
      # using the assumption that logical_id == physical_id (which in
7466
      # turn is the unique_id on that node)
7467

    
7468
      # FIXME(iustin): use a better name for the replaced LVs
7469
      temp_suffix = int(time.time())
7470
      ren_fn = lambda d, suff: (d.physical_id[0],
7471
                                d.physical_id[1] + "_replaced-%s" % suff)
7472

    
7473
      # Build the rename list based on what LVs exist on the node
7474
      rename_old_to_new = []
7475
      for to_ren in old_lvs:
7476
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7477
        if not result.fail_msg and result.payload:
7478
          # device exists
7479
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7480

    
7481
      self.lu.LogInfo("Renaming the old LVs on the target node")
7482
      result = self.rpc.call_blockdev_rename(self.target_node,
7483
                                             rename_old_to_new)
7484
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
7485

    
7486
      # Now we rename the new LVs to the old LVs
7487
      self.lu.LogInfo("Renaming the new LVs on the target node")
7488
      rename_new_to_old = [(new, old.physical_id)
7489
                           for old, new in zip(old_lvs, new_lvs)]
7490
      result = self.rpc.call_blockdev_rename(self.target_node,
7491
                                             rename_new_to_old)
7492
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
7493

    
7494
      for old, new in zip(old_lvs, new_lvs):
7495
        new.logical_id = old.logical_id
7496
        self.cfg.SetDiskID(new, self.target_node)
7497

    
7498
      for disk in old_lvs:
7499
        disk.logical_id = ren_fn(disk, temp_suffix)
7500
        self.cfg.SetDiskID(disk, self.target_node)
7501

    
7502
      # Now that the new lvs have the old name, we can add them to the device
7503
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7504
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7505
                                                  new_lvs)
7506
      msg = result.fail_msg
7507
      if msg:
7508
        for new_lv in new_lvs:
7509
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
7510
                                               new_lv).fail_msg
7511
          if msg2:
7512
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7513
                               hint=("cleanup manually the unused logical"
7514
                                     "volumes"))
7515
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7516

    
7517
      dev.children = new_lvs
7518

    
7519
      self.cfg.Update(self.instance, feedback_fn)
7520

    
7521
    cstep = 5
7522
    if self.early_release:
7523
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7524
      cstep += 1
7525
      self._RemoveOldStorage(self.target_node, iv_names)
7526
      # WARNING: we release both node locks here, do not do other RPCs
7527
      # than WaitForSync to the primary node
7528
      self._ReleaseNodeLock([self.target_node, self.other_node])
7529

    
7530
    # Wait for sync
7531
    # This can fail as the old devices are degraded and _WaitForSync
7532
    # does a combined result over all disks, so we don't check its return value
7533
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7534
    cstep += 1
7535
    _WaitForSync(self.lu, self.instance)
7536

    
7537
    # Check all devices manually
7538
    self._CheckDevices(self.instance.primary_node, iv_names)
7539

    
7540
    # Step: remove old storage
7541
    if not self.early_release:
7542
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7543
      cstep += 1
7544
      self._RemoveOldStorage(self.target_node, iv_names)
7545

    
7546
  def _ExecDrbd8Secondary(self, feedback_fn):
7547
    """Replace the secondary node for DRBD 8.
7548

7549
    The algorithm for replace is quite complicated:
7550
      - for all disks of the instance:
7551
        - create new LVs on the new node with same names
7552
        - shutdown the drbd device on the old secondary
7553
        - disconnect the drbd network on the primary
7554
        - create the drbd device on the new secondary
7555
        - network attach the drbd on the primary, using an artifice:
7556
          the drbd code for Attach() will connect to the network if it
7557
          finds a device which is connected to the good local disks but
7558
          not network enabled
7559
      - wait for sync across all devices
7560
      - remove all disks from the old secondary
7561

7562
    Failures are not very well handled.
7563

7564
    """
7565
    steps_total = 6
7566

    
7567
    # Step: check device activation
7568
    self.lu.LogStep(1, steps_total, "Check device existence")
7569
    self._CheckDisksExistence([self.instance.primary_node])
7570
    self._CheckVolumeGroup([self.instance.primary_node])
7571

    
7572
    # Step: check other node consistency
7573
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7574
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
7575

    
7576
    # Step: create new storage
7577
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7578
    for idx, dev in enumerate(self.instance.disks):
7579
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7580
                      (self.new_node, idx))
7581
      # we pass force_create=True to force LVM creation
7582
      for new_lv in dev.children:
7583
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7584
                        _GetInstanceInfoText(self.instance), False)
7585

    
7586
    # Step 4: dbrd minors and drbd setups changes
7587
    # after this, we must manually remove the drbd minors on both the
7588
    # error and the success paths
7589
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7590
    minors = self.cfg.AllocateDRBDMinor([self.new_node
7591
                                         for dev in self.instance.disks],
7592
                                        self.instance.name)
7593
    logging.debug("Allocated minors %r", minors)
7594

    
7595
    iv_names = {}
7596
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7597
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7598
                      (self.new_node, idx))
7599
      # create new devices on new_node; note that we create two IDs:
7600
      # one without port, so the drbd will be activated without
7601
      # networking information on the new node at this stage, and one
7602
      # with network, for the latter activation in step 4
7603
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7604
      if self.instance.primary_node == o_node1:
7605
        p_minor = o_minor1
7606
      else:
7607
        assert self.instance.primary_node == o_node2, "Three-node instance?"
7608
        p_minor = o_minor2
7609

    
7610
      new_alone_id = (self.instance.primary_node, self.new_node, None,
7611
                      p_minor, new_minor, o_secret)
7612
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
7613
                    p_minor, new_minor, o_secret)
7614

    
7615
      iv_names[idx] = (dev, dev.children, new_net_id)
7616
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7617
                    new_net_id)
7618
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7619
                              logical_id=new_alone_id,
7620
                              children=dev.children,
7621
                              size=dev.size)
7622
      try:
7623
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7624
                              _GetInstanceInfoText(self.instance), False)
7625
      except errors.GenericError:
7626
        self.cfg.ReleaseDRBDMinors(self.instance.name)
7627
        raise
7628

    
7629
    # We have new devices, shutdown the drbd on the old secondary
7630
    for idx, dev in enumerate(self.instance.disks):
7631
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7632
      self.cfg.SetDiskID(dev, self.target_node)
7633
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7634
      if msg:
7635
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7636
                           "node: %s" % (idx, msg),
7637
                           hint=("Please cleanup this device manually as"
7638
                                 " soon as possible"))
7639

    
7640
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7641
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7642
                                               self.node_secondary_ip,
7643
                                               self.instance.disks)\
7644
                                              [self.instance.primary_node]
7645

    
7646
    msg = result.fail_msg
7647
    if msg:
7648
      # detaches didn't succeed (unlikely)
7649
      self.cfg.ReleaseDRBDMinors(self.instance.name)
7650
      raise errors.OpExecError("Can't detach the disks from the network on"
7651
                               " old node: %s" % (msg,))
7652

    
7653
    # if we managed to detach at least one, we update all the disks of
7654
    # the instance to point to the new secondary
7655
    self.lu.LogInfo("Updating instance configuration")
7656
    for dev, _, new_logical_id in iv_names.itervalues():
7657
      dev.logical_id = new_logical_id
7658
      self.cfg.SetDiskID(dev, self.instance.primary_node)
7659

    
7660
    self.cfg.Update(self.instance, feedback_fn)
7661

    
7662
    # and now perform the drbd attach
7663
    self.lu.LogInfo("Attaching primary drbds to new secondary"
7664
                    " (standalone => connected)")
7665
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7666
                                            self.new_node],
7667
                                           self.node_secondary_ip,
7668
                                           self.instance.disks,
7669
                                           self.instance.name,
7670
                                           False)
7671
    for to_node, to_result in result.items():
7672
      msg = to_result.fail_msg
7673
      if msg:
7674
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7675
                           to_node, msg,
7676
                           hint=("please do a gnt-instance info to see the"
7677
                                 " status of disks"))
7678
    cstep = 5
7679
    if self.early_release:
7680
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7681
      cstep += 1
7682
      self._RemoveOldStorage(self.target_node, iv_names)
7683
      # WARNING: we release all node locks here, do not do other RPCs
7684
      # than WaitForSync to the primary node
7685
      self._ReleaseNodeLock([self.instance.primary_node,
7686
                             self.target_node,
7687
                             self.new_node])
7688

    
7689
    # Wait for sync
7690
    # This can fail as the old devices are degraded and _WaitForSync
7691
    # does a combined result over all disks, so we don't check its return value
7692
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7693
    cstep += 1
7694
    _WaitForSync(self.lu, self.instance)
7695

    
7696
    # Check all devices manually
7697
    self._CheckDevices(self.instance.primary_node, iv_names)
7698

    
7699
    # Step: remove old storage
7700
    if not self.early_release:
7701
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7702
      self._RemoveOldStorage(self.target_node, iv_names)
7703

    
7704

    
7705
class LURepairNodeStorage(NoHooksLU):
7706
  """Repairs the volume group on a node.
7707

7708
  """
7709
  _OP_REQP = ["node_name"]
7710
  REQ_BGL = False
7711

    
7712
  def CheckArguments(self):
7713
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7714

    
7715
    _CheckStorageType(self.op.storage_type)
7716

    
7717
  def ExpandNames(self):
7718
    self.needed_locks = {
7719
      locking.LEVEL_NODE: [self.op.node_name],
7720
      }
7721

    
7722
  def _CheckFaultyDisks(self, instance, node_name):
7723
    """Ensure faulty disks abort the opcode or at least warn."""
7724
    try:
7725
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7726
                                  node_name, True):
7727
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7728
                                   " node '%s'" % (instance.name, node_name),
7729
                                   errors.ECODE_STATE)
7730
    except errors.OpPrereqError, err:
7731
      if self.op.ignore_consistency:
7732
        self.proc.LogWarning(str(err.args[0]))
7733
      else:
7734
        raise
7735

    
7736
  def CheckPrereq(self):
7737
    """Check prerequisites.
7738

7739
    """
7740
    storage_type = self.op.storage_type
7741

    
7742
    if (constants.SO_FIX_CONSISTENCY not in
7743
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7744
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
7745
                                 " repaired" % storage_type,
7746
                                 errors.ECODE_INVAL)
7747

    
7748
    # Check whether any instance on this node has faulty disks
7749
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7750
      if not inst.admin_up:
7751
        continue
7752
      check_nodes = set(inst.all_nodes)
7753
      check_nodes.discard(self.op.node_name)
7754
      for inst_node_name in check_nodes:
7755
        self._CheckFaultyDisks(inst, inst_node_name)
7756

    
7757
  def Exec(self, feedback_fn):
7758
    feedback_fn("Repairing storage unit '%s' on %s ..." %
7759
                (self.op.name, self.op.node_name))
7760

    
7761
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7762
    result = self.rpc.call_storage_execute(self.op.node_name,
7763
                                           self.op.storage_type, st_args,
7764
                                           self.op.name,
7765
                                           constants.SO_FIX_CONSISTENCY)
7766
    result.Raise("Failed to repair storage unit '%s' on %s" %
7767
                 (self.op.name, self.op.node_name))
7768

    
7769

    
7770
class LUNodeEvacuationStrategy(NoHooksLU):
7771
  """Computes the node evacuation strategy.
7772

7773
  """
7774
  _OP_REQP = ["nodes"]
7775
  REQ_BGL = False
7776

    
7777
  def CheckArguments(self):
7778
    if not hasattr(self.op, "remote_node"):
7779
      self.op.remote_node = None
7780
    if not hasattr(self.op, "iallocator"):
7781
      self.op.iallocator = None
7782
    if self.op.remote_node is not None and self.op.iallocator is not None:
7783
      raise errors.OpPrereqError("Give either the iallocator or the new"
7784
                                 " secondary, not both", errors.ECODE_INVAL)
7785

    
7786
  def ExpandNames(self):
7787
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7788
    self.needed_locks = locks = {}
7789
    if self.op.remote_node is None:
7790
      locks[locking.LEVEL_NODE] = locking.ALL_SET
7791
    else:
7792
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7793
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7794

    
7795
  def CheckPrereq(self):
7796
    pass
7797

    
7798
  def Exec(self, feedback_fn):
7799
    if self.op.remote_node is not None:
7800
      instances = []
7801
      for node in self.op.nodes:
7802
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7803
      result = []
7804
      for i in instances:
7805
        if i.primary_node == self.op.remote_node:
7806
          raise errors.OpPrereqError("Node %s is the primary node of"
7807
                                     " instance %s, cannot use it as"
7808
                                     " secondary" %
7809
                                     (self.op.remote_node, i.name),
7810
                                     errors.ECODE_INVAL)
7811
        result.append([i.name, self.op.remote_node])
7812
    else:
7813
      ial = IAllocator(self.cfg, self.rpc,
7814
                       mode=constants.IALLOCATOR_MODE_MEVAC,
7815
                       evac_nodes=self.op.nodes)
7816
      ial.Run(self.op.iallocator, validate=True)
7817
      if not ial.success:
7818
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7819
                                 errors.ECODE_NORES)
7820
      result = ial.result
7821
    return result
7822

    
7823

    
7824
class LUGrowDisk(LogicalUnit):
7825
  """Grow a disk of an instance.
7826

7827
  """
7828
  HPATH = "disk-grow"
7829
  HTYPE = constants.HTYPE_INSTANCE
7830
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7831
  REQ_BGL = False
7832

    
7833
  def ExpandNames(self):
7834
    self._ExpandAndLockInstance()
7835
    self.needed_locks[locking.LEVEL_NODE] = []
7836
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7837

    
7838
  def DeclareLocks(self, level):
7839
    if level == locking.LEVEL_NODE:
7840
      self._LockInstancesNodes()
7841

    
7842
  def BuildHooksEnv(self):
7843
    """Build hooks env.
7844

7845
    This runs on the master, the primary and all the secondaries.
7846

7847
    """
7848
    env = {
7849
      "DISK": self.op.disk,
7850
      "AMOUNT": self.op.amount,
7851
      }
7852
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7853
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7854
    return env, nl, nl
7855

    
7856
  def CheckPrereq(self):
7857
    """Check prerequisites.
7858

7859
    This checks that the instance is in the cluster.
7860

7861
    """
7862
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7863
    assert instance is not None, \
7864
      "Cannot retrieve locked instance %s" % self.op.instance_name
7865
    nodenames = list(instance.all_nodes)
7866
    for node in nodenames:
7867
      _CheckNodeOnline(self, node)
7868

    
7869

    
7870
    self.instance = instance
7871

    
7872
    if instance.disk_template not in constants.DTS_GROWABLE:
7873
      raise errors.OpPrereqError("Instance's disk layout does not support"
7874
                                 " growing.", errors.ECODE_INVAL)
7875

    
7876
    self.disk = instance.FindDisk(self.op.disk)
7877

    
7878
    if instance.disk_template != constants.DT_FILE:
7879
      # TODO: check the free disk space for file, when that feature will be
7880
      # supported
7881
      _CheckNodesFreeDisk(self, nodenames, self.op.amount)
7882

    
7883
  def Exec(self, feedback_fn):
7884
    """Execute disk grow.
7885

7886
    """
7887
    instance = self.instance
7888
    disk = self.disk
7889
    for node in instance.all_nodes:
7890
      self.cfg.SetDiskID(disk, node)
7891
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7892
      result.Raise("Grow request failed to node %s" % node)
7893

    
7894
      # TODO: Rewrite code to work properly
7895
      # DRBD goes into sync mode for a short amount of time after executing the
7896
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7897
      # calling "resize" in sync mode fails. Sleeping for a short amount of
7898
      # time is a work-around.
7899
      time.sleep(5)
7900

    
7901
    disk.RecordGrow(self.op.amount)
7902
    self.cfg.Update(instance, feedback_fn)
7903
    if self.op.wait_for_sync:
7904
      disk_abort = not _WaitForSync(self, instance)
7905
      if disk_abort:
7906
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7907
                             " status.\nPlease check the instance.")
7908

    
7909

    
7910
class LUQueryInstanceData(NoHooksLU):
7911
  """Query runtime instance data.
7912

7913
  """
7914
  _OP_REQP = ["instances", "static"]
7915
  REQ_BGL = False
7916

    
7917
  def ExpandNames(self):
7918
    self.needed_locks = {}
7919
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7920

    
7921
    if not isinstance(self.op.instances, list):
7922
      raise errors.OpPrereqError("Invalid argument type 'instances'",
7923
                                 errors.ECODE_INVAL)
7924

    
7925
    if self.op.instances:
7926
      self.wanted_names = []
7927
      for name in self.op.instances:
7928
        full_name = _ExpandInstanceName(self.cfg, name)
7929
        self.wanted_names.append(full_name)
7930
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7931
    else:
7932
      self.wanted_names = None
7933
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7934

    
7935
    self.needed_locks[locking.LEVEL_NODE] = []
7936
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7937

    
7938
  def DeclareLocks(self, level):
7939
    if level == locking.LEVEL_NODE:
7940
      self._LockInstancesNodes()
7941

    
7942
  def CheckPrereq(self):
7943
    """Check prerequisites.
7944

7945
    This only checks the optional instance list against the existing names.
7946

7947
    """
7948
    if self.wanted_names is None:
7949
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7950

    
7951
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7952
                             in self.wanted_names]
7953
    return
7954

    
7955
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
7956
    """Returns the status of a block device
7957

7958
    """
7959
    if self.op.static or not node:
7960
      return None
7961

    
7962
    self.cfg.SetDiskID(dev, node)
7963

    
7964
    result = self.rpc.call_blockdev_find(node, dev)
7965
    if result.offline:
7966
      return None
7967

    
7968
    result.Raise("Can't compute disk status for %s" % instance_name)
7969

    
7970
    status = result.payload
7971
    if status is None:
7972
      return None
7973

    
7974
    return (status.dev_path, status.major, status.minor,
7975
            status.sync_percent, status.estimated_time,
7976
            status.is_degraded, status.ldisk_status)
7977

    
7978
  def _ComputeDiskStatus(self, instance, snode, dev):
7979
    """Compute block device status.
7980

7981
    """
7982
    if dev.dev_type in constants.LDS_DRBD:
7983
      # we change the snode then (otherwise we use the one passed in)
7984
      if dev.logical_id[0] == instance.primary_node:
7985
        snode = dev.logical_id[1]
7986
      else:
7987
        snode = dev.logical_id[0]
7988

    
7989
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
7990
                                              instance.name, dev)
7991
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
7992

    
7993
    if dev.children:
7994
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
7995
                      for child in dev.children]
7996
    else:
7997
      dev_children = []
7998

    
7999
    data = {
8000
      "iv_name": dev.iv_name,
8001
      "dev_type": dev.dev_type,
8002
      "logical_id": dev.logical_id,
8003
      "physical_id": dev.physical_id,
8004
      "pstatus": dev_pstatus,
8005
      "sstatus": dev_sstatus,
8006
      "children": dev_children,
8007
      "mode": dev.mode,
8008
      "size": dev.size,
8009
      }
8010

    
8011
    return data
8012

    
8013
  def Exec(self, feedback_fn):
8014
    """Gather and return data"""
8015
    result = {}
8016

    
8017
    cluster = self.cfg.GetClusterInfo()
8018

    
8019
    for instance in self.wanted_instances:
8020
      if not self.op.static:
8021
        remote_info = self.rpc.call_instance_info(instance.primary_node,
8022
                                                  instance.name,
8023
                                                  instance.hypervisor)
8024
        remote_info.Raise("Error checking node %s" % instance.primary_node)
8025
        remote_info = remote_info.payload
8026
        if remote_info and "state" in remote_info:
8027
          remote_state = "up"
8028
        else:
8029
          remote_state = "down"
8030
      else:
8031
        remote_state = None
8032
      if instance.admin_up:
8033
        config_state = "up"
8034
      else:
8035
        config_state = "down"
8036

    
8037
      disks = [self._ComputeDiskStatus(instance, None, device)
8038
               for device in instance.disks]
8039

    
8040
      idict = {
8041
        "name": instance.name,
8042
        "config_state": config_state,
8043
        "run_state": remote_state,
8044
        "pnode": instance.primary_node,
8045
        "snodes": instance.secondary_nodes,
8046
        "os": instance.os,
8047
        # this happens to be the same format used for hooks
8048
        "nics": _NICListToTuple(self, instance.nics),
8049
        "disks": disks,
8050
        "hypervisor": instance.hypervisor,
8051
        "network_port": instance.network_port,
8052
        "hv_instance": instance.hvparams,
8053
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
8054
        "be_instance": instance.beparams,
8055
        "be_actual": cluster.FillBE(instance),
8056
        "serial_no": instance.serial_no,
8057
        "mtime": instance.mtime,
8058
        "ctime": instance.ctime,
8059
        "uuid": instance.uuid,
8060
        }
8061

    
8062
      result[instance.name] = idict
8063

    
8064
    return result
8065

    
8066

    
8067
class LUSetInstanceParams(LogicalUnit):
8068
  """Modifies an instances's parameters.
8069

8070
  """
8071
  HPATH = "instance-modify"
8072
  HTYPE = constants.HTYPE_INSTANCE
8073
  _OP_REQP = ["instance_name"]
8074
  REQ_BGL = False
8075

    
8076
  def CheckArguments(self):
8077
    if not hasattr(self.op, 'nics'):
8078
      self.op.nics = []
8079
    if not hasattr(self.op, 'disks'):
8080
      self.op.disks = []
8081
    if not hasattr(self.op, 'beparams'):
8082
      self.op.beparams = {}
8083
    if not hasattr(self.op, 'hvparams'):
8084
      self.op.hvparams = {}
8085
    if not hasattr(self.op, "disk_template"):
8086
      self.op.disk_template = None
8087
    if not hasattr(self.op, "remote_node"):
8088
      self.op.remote_node = None
8089
    if not hasattr(self.op, "os_name"):
8090
      self.op.os_name = None
8091
    if not hasattr(self.op, "force_variant"):
8092
      self.op.force_variant = False
8093
    self.op.force = getattr(self.op, "force", False)
8094
    if not (self.op.nics or self.op.disks or self.op.disk_template or
8095
            self.op.hvparams or self.op.beparams or self.op.os_name):
8096
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8097

    
8098
    if self.op.hvparams:
8099
      _CheckGlobalHvParams(self.op.hvparams)
8100

    
8101
    # Disk validation
8102
    disk_addremove = 0
8103
    for disk_op, disk_dict in self.op.disks:
8104
      if disk_op == constants.DDM_REMOVE:
8105
        disk_addremove += 1
8106
        continue
8107
      elif disk_op == constants.DDM_ADD:
8108
        disk_addremove += 1
8109
      else:
8110
        if not isinstance(disk_op, int):
8111
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8112
        if not isinstance(disk_dict, dict):
8113
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8114
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8115

    
8116
      if disk_op == constants.DDM_ADD:
8117
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8118
        if mode not in constants.DISK_ACCESS_SET:
8119
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8120
                                     errors.ECODE_INVAL)
8121
        size = disk_dict.get('size', None)
8122
        if size is None:
8123
          raise errors.OpPrereqError("Required disk parameter size missing",
8124
                                     errors.ECODE_INVAL)
8125
        try:
8126
          size = int(size)
8127
        except (TypeError, ValueError), err:
8128
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8129
                                     str(err), errors.ECODE_INVAL)
8130
        disk_dict['size'] = size
8131
      else:
8132
        # modification of disk
8133
        if 'size' in disk_dict:
8134
          raise errors.OpPrereqError("Disk size change not possible, use"
8135
                                     " grow-disk", errors.ECODE_INVAL)
8136

    
8137
    if disk_addremove > 1:
8138
      raise errors.OpPrereqError("Only one disk add or remove operation"
8139
                                 " supported at a time", errors.ECODE_INVAL)
8140

    
8141
    if self.op.disks and self.op.disk_template is not None:
8142
      raise errors.OpPrereqError("Disk template conversion and other disk"
8143
                                 " changes not supported at the same time",
8144
                                 errors.ECODE_INVAL)
8145

    
8146
    if self.op.disk_template:
8147
      _CheckDiskTemplate(self.op.disk_template)
8148
      if (self.op.disk_template in constants.DTS_NET_MIRROR and
8149
          self.op.remote_node is None):
8150
        raise errors.OpPrereqError("Changing the disk template to a mirrored"
8151
                                   " one requires specifying a secondary node",
8152
                                   errors.ECODE_INVAL)
8153

    
8154
    # NIC validation
8155
    nic_addremove = 0
8156
    for nic_op, nic_dict in self.op.nics:
8157
      if nic_op == constants.DDM_REMOVE:
8158
        nic_addremove += 1
8159
        continue
8160
      elif nic_op == constants.DDM_ADD:
8161
        nic_addremove += 1
8162
      else:
8163
        if not isinstance(nic_op, int):
8164
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8165
        if not isinstance(nic_dict, dict):
8166
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8167
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8168

    
8169
      # nic_dict should be a dict
8170
      nic_ip = nic_dict.get('ip', None)
8171
      if nic_ip is not None:
8172
        if nic_ip.lower() == constants.VALUE_NONE:
8173
          nic_dict['ip'] = None
8174
        else:
8175
          if not utils.IsValidIP(nic_ip):
8176
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8177
                                       errors.ECODE_INVAL)
8178

    
8179
      nic_bridge = nic_dict.get('bridge', None)
8180
      nic_link = nic_dict.get('link', None)
8181
      if nic_bridge and nic_link:
8182
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8183
                                   " at the same time", errors.ECODE_INVAL)
8184
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8185
        nic_dict['bridge'] = None
8186
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8187
        nic_dict['link'] = None
8188

    
8189
      if nic_op == constants.DDM_ADD:
8190
        nic_mac = nic_dict.get('mac', None)
8191
        if nic_mac is None:
8192
          nic_dict['mac'] = constants.VALUE_AUTO
8193

    
8194
      if 'mac' in nic_dict:
8195
        nic_mac = nic_dict['mac']
8196
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8197
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8198

    
8199
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8200
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8201
                                     " modifying an existing nic",
8202
                                     errors.ECODE_INVAL)
8203

    
8204
    if nic_addremove > 1:
8205
      raise errors.OpPrereqError("Only one NIC add or remove operation"
8206
                                 " supported at a time", errors.ECODE_INVAL)
8207

    
8208
  def ExpandNames(self):
8209
    self._ExpandAndLockInstance()
8210
    self.needed_locks[locking.LEVEL_NODE] = []
8211
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8212

    
8213
  def DeclareLocks(self, level):
8214
    if level == locking.LEVEL_NODE:
8215
      self._LockInstancesNodes()
8216
      if self.op.disk_template and self.op.remote_node:
8217
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8218
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8219

    
8220
  def BuildHooksEnv(self):
8221
    """Build hooks env.
8222

8223
    This runs on the master, primary and secondaries.
8224

8225
    """
8226
    args = dict()
8227
    if constants.BE_MEMORY in self.be_new:
8228
      args['memory'] = self.be_new[constants.BE_MEMORY]
8229
    if constants.BE_VCPUS in self.be_new:
8230
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
8231
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8232
    # information at all.
8233
    if self.op.nics:
8234
      args['nics'] = []
8235
      nic_override = dict(self.op.nics)
8236
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
8237
      for idx, nic in enumerate(self.instance.nics):
8238
        if idx in nic_override:
8239
          this_nic_override = nic_override[idx]
8240
        else:
8241
          this_nic_override = {}
8242
        if 'ip' in this_nic_override:
8243
          ip = this_nic_override['ip']
8244
        else:
8245
          ip = nic.ip
8246
        if 'mac' in this_nic_override:
8247
          mac = this_nic_override['mac']
8248
        else:
8249
          mac = nic.mac
8250
        if idx in self.nic_pnew:
8251
          nicparams = self.nic_pnew[idx]
8252
        else:
8253
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
8254
        mode = nicparams[constants.NIC_MODE]
8255
        link = nicparams[constants.NIC_LINK]
8256
        args['nics'].append((ip, mac, mode, link))
8257
      if constants.DDM_ADD in nic_override:
8258
        ip = nic_override[constants.DDM_ADD].get('ip', None)
8259
        mac = nic_override[constants.DDM_ADD]['mac']
8260
        nicparams = self.nic_pnew[constants.DDM_ADD]
8261
        mode = nicparams[constants.NIC_MODE]
8262
        link = nicparams[constants.NIC_LINK]
8263
        args['nics'].append((ip, mac, mode, link))
8264
      elif constants.DDM_REMOVE in nic_override:
8265
        del args['nics'][-1]
8266

    
8267
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8268
    if self.op.disk_template:
8269
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8270
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8271
    return env, nl, nl
8272

    
8273
  @staticmethod
8274
  def _GetUpdatedParams(old_params, update_dict,
8275
                        default_values, parameter_types):
8276
    """Return the new params dict for the given params.
8277

8278
    @type old_params: dict
8279
    @param old_params: old parameters
8280
    @type update_dict: dict
8281
    @param update_dict: dict containing new parameter values,
8282
                        or constants.VALUE_DEFAULT to reset the
8283
                        parameter to its default value
8284
    @type default_values: dict
8285
    @param default_values: default values for the filled parameters
8286
    @type parameter_types: dict
8287
    @param parameter_types: dict mapping target dict keys to types
8288
                            in constants.ENFORCEABLE_TYPES
8289
    @rtype: (dict, dict)
8290
    @return: (new_parameters, filled_parameters)
8291

8292
    """
8293
    params_copy = copy.deepcopy(old_params)
8294
    for key, val in update_dict.iteritems():
8295
      if val == constants.VALUE_DEFAULT:
8296
        try:
8297
          del params_copy[key]
8298
        except KeyError:
8299
          pass
8300
      else:
8301
        params_copy[key] = val
8302
    utils.ForceDictType(params_copy, parameter_types)
8303
    params_filled = objects.FillDict(default_values, params_copy)
8304
    return (params_copy, params_filled)
8305

    
8306
  def CheckPrereq(self):
8307
    """Check prerequisites.
8308

8309
    This only checks the instance list against the existing names.
8310

8311
    """
8312
    self.force = self.op.force
8313

    
8314
    # checking the new params on the primary/secondary nodes
8315

    
8316
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8317
    cluster = self.cluster = self.cfg.GetClusterInfo()
8318
    assert self.instance is not None, \
8319
      "Cannot retrieve locked instance %s" % self.op.instance_name
8320
    pnode = instance.primary_node
8321
    nodelist = list(instance.all_nodes)
8322

    
8323
    if self.op.disk_template:
8324
      if instance.disk_template == self.op.disk_template:
8325
        raise errors.OpPrereqError("Instance already has disk template %s" %
8326
                                   instance.disk_template, errors.ECODE_INVAL)
8327

    
8328
      if (instance.disk_template,
8329
          self.op.disk_template) not in self._DISK_CONVERSIONS:
8330
        raise errors.OpPrereqError("Unsupported disk template conversion from"
8331
                                   " %s to %s" % (instance.disk_template,
8332
                                                  self.op.disk_template),
8333
                                   errors.ECODE_INVAL)
8334
      if self.op.disk_template in constants.DTS_NET_MIRROR:
8335
        _CheckNodeOnline(self, self.op.remote_node)
8336
        _CheckNodeNotDrained(self, self.op.remote_node)
8337
        disks = [{"size": d.size} for d in instance.disks]
8338
        required = _ComputeDiskSize(self.op.disk_template, disks)
8339
        _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8340
        _CheckInstanceDown(self, instance, "cannot change disk template")
8341

    
8342
    # hvparams processing
8343
    if self.op.hvparams:
8344
      i_hvdict, hv_new = self._GetUpdatedParams(
8345
                             instance.hvparams, self.op.hvparams,
8346
                             cluster.hvparams[instance.hypervisor],
8347
                             constants.HVS_PARAMETER_TYPES)
8348
      # local check
8349
      hypervisor.GetHypervisor(
8350
        instance.hypervisor).CheckParameterSyntax(hv_new)
8351
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8352
      self.hv_new = hv_new # the new actual values
8353
      self.hv_inst = i_hvdict # the new dict (without defaults)
8354
    else:
8355
      self.hv_new = self.hv_inst = {}
8356

    
8357
    # beparams processing
8358
    if self.op.beparams:
8359
      i_bedict, be_new = self._GetUpdatedParams(
8360
                             instance.beparams, self.op.beparams,
8361
                             cluster.beparams[constants.PP_DEFAULT],
8362
                             constants.BES_PARAMETER_TYPES)
8363
      self.be_new = be_new # the new actual values
8364
      self.be_inst = i_bedict # the new dict (without defaults)
8365
    else:
8366
      self.be_new = self.be_inst = {}
8367

    
8368
    self.warn = []
8369

    
8370
    if constants.BE_MEMORY in self.op.beparams and not self.force:
8371
      mem_check_list = [pnode]
8372
      if be_new[constants.BE_AUTO_BALANCE]:
8373
        # either we changed auto_balance to yes or it was from before
8374
        mem_check_list.extend(instance.secondary_nodes)
8375
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
8376
                                                  instance.hypervisor)
8377
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8378
                                         instance.hypervisor)
8379
      pninfo = nodeinfo[pnode]
8380
      msg = pninfo.fail_msg
8381
      if msg:
8382
        # Assume the primary node is unreachable and go ahead
8383
        self.warn.append("Can't get info from primary node %s: %s" %
8384
                         (pnode,  msg))
8385
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
8386
        self.warn.append("Node data from primary node %s doesn't contain"
8387
                         " free memory information" % pnode)
8388
      elif instance_info.fail_msg:
8389
        self.warn.append("Can't get instance runtime information: %s" %
8390
                        instance_info.fail_msg)
8391
      else:
8392
        if instance_info.payload:
8393
          current_mem = int(instance_info.payload['memory'])
8394
        else:
8395
          # Assume instance not running
8396
          # (there is a slight race condition here, but it's not very probable,
8397
          # and we have no other way to check)
8398
          current_mem = 0
8399
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8400
                    pninfo.payload['memory_free'])
8401
        if miss_mem > 0:
8402
          raise errors.OpPrereqError("This change will prevent the instance"
8403
                                     " from starting, due to %d MB of memory"
8404
                                     " missing on its primary node" % miss_mem,
8405
                                     errors.ECODE_NORES)
8406

    
8407
      if be_new[constants.BE_AUTO_BALANCE]:
8408
        for node, nres in nodeinfo.items():
8409
          if node not in instance.secondary_nodes:
8410
            continue
8411
          msg = nres.fail_msg
8412
          if msg:
8413
            self.warn.append("Can't get info from secondary node %s: %s" %
8414
                             (node, msg))
8415
          elif not isinstance(nres.payload.get('memory_free', None), int):
8416
            self.warn.append("Secondary node %s didn't return free"
8417
                             " memory information" % node)
8418
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8419
            self.warn.append("Not enough memory to failover instance to"
8420
                             " secondary node %s" % node)
8421

    
8422
    # NIC processing
8423
    self.nic_pnew = {}
8424
    self.nic_pinst = {}
8425
    for nic_op, nic_dict in self.op.nics:
8426
      if nic_op == constants.DDM_REMOVE:
8427
        if not instance.nics:
8428
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8429
                                     errors.ECODE_INVAL)
8430
        continue
8431
      if nic_op != constants.DDM_ADD:
8432
        # an existing nic
8433
        if not instance.nics:
8434
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8435
                                     " no NICs" % nic_op,
8436
                                     errors.ECODE_INVAL)
8437
        if nic_op < 0 or nic_op >= len(instance.nics):
8438
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8439
                                     " are 0 to %d" %
8440
                                     (nic_op, len(instance.nics) - 1),
8441
                                     errors.ECODE_INVAL)
8442
        old_nic_params = instance.nics[nic_op].nicparams
8443
        old_nic_ip = instance.nics[nic_op].ip
8444
      else:
8445
        old_nic_params = {}
8446
        old_nic_ip = None
8447

    
8448
      update_params_dict = dict([(key, nic_dict[key])
8449
                                 for key in constants.NICS_PARAMETERS
8450
                                 if key in nic_dict])
8451

    
8452
      if 'bridge' in nic_dict:
8453
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8454

    
8455
      new_nic_params, new_filled_nic_params = \
8456
          self._GetUpdatedParams(old_nic_params, update_params_dict,
8457
                                 cluster.nicparams[constants.PP_DEFAULT],
8458
                                 constants.NICS_PARAMETER_TYPES)
8459
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8460
      self.nic_pinst[nic_op] = new_nic_params
8461
      self.nic_pnew[nic_op] = new_filled_nic_params
8462
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8463

    
8464
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
8465
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8466
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8467
        if msg:
8468
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8469
          if self.force:
8470
            self.warn.append(msg)
8471
          else:
8472
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8473
      if new_nic_mode == constants.NIC_MODE_ROUTED:
8474
        if 'ip' in nic_dict:
8475
          nic_ip = nic_dict['ip']
8476
        else:
8477
          nic_ip = old_nic_ip
8478
        if nic_ip is None:
8479
          raise errors.OpPrereqError('Cannot set the nic ip to None'
8480
                                     ' on a routed nic', errors.ECODE_INVAL)
8481
      if 'mac' in nic_dict:
8482
        nic_mac = nic_dict['mac']
8483
        if nic_mac is None:
8484
          raise errors.OpPrereqError('Cannot set the nic mac to None',
8485
                                     errors.ECODE_INVAL)
8486
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8487
          # otherwise generate the mac
8488
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8489
        else:
8490
          # or validate/reserve the current one
8491
          try:
8492
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8493
          except errors.ReservationError:
8494
            raise errors.OpPrereqError("MAC address %s already in use"
8495
                                       " in cluster" % nic_mac,
8496
                                       errors.ECODE_NOTUNIQUE)
8497

    
8498
    # DISK processing
8499
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8500
      raise errors.OpPrereqError("Disk operations not supported for"
8501
                                 " diskless instances",
8502
                                 errors.ECODE_INVAL)
8503
    for disk_op, _ in self.op.disks:
8504
      if disk_op == constants.DDM_REMOVE:
8505
        if len(instance.disks) == 1:
8506
          raise errors.OpPrereqError("Cannot remove the last disk of"
8507
                                     " an instance", errors.ECODE_INVAL)
8508
        _CheckInstanceDown(self, instance, "cannot remove disks")
8509

    
8510
      if (disk_op == constants.DDM_ADD and
8511
          len(instance.nics) >= constants.MAX_DISKS):
8512
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8513
                                   " add more" % constants.MAX_DISKS,
8514
                                   errors.ECODE_STATE)
8515
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8516
        # an existing disk
8517
        if disk_op < 0 or disk_op >= len(instance.disks):
8518
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
8519
                                     " are 0 to %d" %
8520
                                     (disk_op, len(instance.disks)),
8521
                                     errors.ECODE_INVAL)
8522

    
8523
    # OS change
8524
    if self.op.os_name and not self.op.force:
8525
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8526
                      self.op.force_variant)
8527

    
8528
    return
8529

    
8530
  def _ConvertPlainToDrbd(self, feedback_fn):
8531
    """Converts an instance from plain to drbd.
8532

8533
    """
8534
    feedback_fn("Converting template to drbd")
8535
    instance = self.instance
8536
    pnode = instance.primary_node
8537
    snode = self.op.remote_node
8538

    
8539
    # create a fake disk info for _GenerateDiskTemplate
8540
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8541
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8542
                                      instance.name, pnode, [snode],
8543
                                      disk_info, None, None, 0)
8544
    info = _GetInstanceInfoText(instance)
8545
    feedback_fn("Creating aditional volumes...")
8546
    # first, create the missing data and meta devices
8547
    for disk in new_disks:
8548
      # unfortunately this is... not too nice
8549
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8550
                            info, True)
8551
      for child in disk.children:
8552
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
8553
    # at this stage, all new LVs have been created, we can rename the
8554
    # old ones
8555
    feedback_fn("Renaming original volumes...")
8556
    rename_list = [(o, n.children[0].logical_id)
8557
                   for (o, n) in zip(instance.disks, new_disks)]
8558
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
8559
    result.Raise("Failed to rename original LVs")
8560

    
8561
    feedback_fn("Initializing DRBD devices...")
8562
    # all child devices are in place, we can now create the DRBD devices
8563
    for disk in new_disks:
8564
      for node in [pnode, snode]:
8565
        f_create = node == pnode
8566
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8567

    
8568
    # at this point, the instance has been modified
8569
    instance.disk_template = constants.DT_DRBD8
8570
    instance.disks = new_disks
8571
    self.cfg.Update(instance, feedback_fn)
8572

    
8573
    # disks are created, waiting for sync
8574
    disk_abort = not _WaitForSync(self, instance)
8575
    if disk_abort:
8576
      raise errors.OpExecError("There are some degraded disks for"
8577
                               " this instance, please cleanup manually")
8578

    
8579
  def _ConvertDrbdToPlain(self, feedback_fn):
8580
    """Converts an instance from drbd to plain.
8581

8582
    """
8583
    instance = self.instance
8584
    assert len(instance.secondary_nodes) == 1
8585
    pnode = instance.primary_node
8586
    snode = instance.secondary_nodes[0]
8587
    feedback_fn("Converting template to plain")
8588

    
8589
    old_disks = instance.disks
8590
    new_disks = [d.children[0] for d in old_disks]
8591

    
8592
    # copy over size and mode
8593
    for parent, child in zip(old_disks, new_disks):
8594
      child.size = parent.size
8595
      child.mode = parent.mode
8596

    
8597
    # update instance structure
8598
    instance.disks = new_disks
8599
    instance.disk_template = constants.DT_PLAIN
8600
    self.cfg.Update(instance, feedback_fn)
8601

    
8602
    feedback_fn("Removing volumes on the secondary node...")
8603
    for disk in old_disks:
8604
      self.cfg.SetDiskID(disk, snode)
8605
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8606
      if msg:
8607
        self.LogWarning("Could not remove block device %s on node %s,"
8608
                        " continuing anyway: %s", disk.iv_name, snode, msg)
8609

    
8610
    feedback_fn("Removing unneeded volumes on the primary node...")
8611
    for idx, disk in enumerate(old_disks):
8612
      meta = disk.children[1]
8613
      self.cfg.SetDiskID(meta, pnode)
8614
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8615
      if msg:
8616
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
8617
                        " continuing anyway: %s", idx, pnode, msg)
8618

    
8619

    
8620
  def Exec(self, feedback_fn):
8621
    """Modifies an instance.
8622

8623
    All parameters take effect only at the next restart of the instance.
8624

8625
    """
8626
    # Process here the warnings from CheckPrereq, as we don't have a
8627
    # feedback_fn there.
8628
    for warn in self.warn:
8629
      feedback_fn("WARNING: %s" % warn)
8630

    
8631
    result = []
8632
    instance = self.instance
8633
    # disk changes
8634
    for disk_op, disk_dict in self.op.disks:
8635
      if disk_op == constants.DDM_REMOVE:
8636
        # remove the last disk
8637
        device = instance.disks.pop()
8638
        device_idx = len(instance.disks)
8639
        for node, disk in device.ComputeNodeTree(instance.primary_node):
8640
          self.cfg.SetDiskID(disk, node)
8641
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8642
          if msg:
8643
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
8644
                            " continuing anyway", device_idx, node, msg)
8645
        result.append(("disk/%d" % device_idx, "remove"))
8646
      elif disk_op == constants.DDM_ADD:
8647
        # add a new disk
8648
        if instance.disk_template == constants.DT_FILE:
8649
          file_driver, file_path = instance.disks[0].logical_id
8650
          file_path = os.path.dirname(file_path)
8651
        else:
8652
          file_driver = file_path = None
8653
        disk_idx_base = len(instance.disks)
8654
        new_disk = _GenerateDiskTemplate(self,
8655
                                         instance.disk_template,
8656
                                         instance.name, instance.primary_node,
8657
                                         instance.secondary_nodes,
8658
                                         [disk_dict],
8659
                                         file_path,
8660
                                         file_driver,
8661
                                         disk_idx_base)[0]
8662
        instance.disks.append(new_disk)
8663
        info = _GetInstanceInfoText(instance)
8664

    
8665
        logging.info("Creating volume %s for instance %s",
8666
                     new_disk.iv_name, instance.name)
8667
        # Note: this needs to be kept in sync with _CreateDisks
8668
        #HARDCODE
8669
        for node in instance.all_nodes:
8670
          f_create = node == instance.primary_node
8671
          try:
8672
            _CreateBlockDev(self, node, instance, new_disk,
8673
                            f_create, info, f_create)
8674
          except errors.OpExecError, err:
8675
            self.LogWarning("Failed to create volume %s (%s) on"
8676
                            " node %s: %s",
8677
                            new_disk.iv_name, new_disk, node, err)
8678
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8679
                       (new_disk.size, new_disk.mode)))
8680
      else:
8681
        # change a given disk
8682
        instance.disks[disk_op].mode = disk_dict['mode']
8683
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8684

    
8685
    if self.op.disk_template:
8686
      r_shut = _ShutdownInstanceDisks(self, instance)
8687
      if not r_shut:
8688
        raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8689
                                 " proceed with disk template conversion")
8690
      mode = (instance.disk_template, self.op.disk_template)
8691
      try:
8692
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
8693
      except:
8694
        self.cfg.ReleaseDRBDMinors(instance.name)
8695
        raise
8696
      result.append(("disk_template", self.op.disk_template))
8697

    
8698
    # NIC changes
8699
    for nic_op, nic_dict in self.op.nics:
8700
      if nic_op == constants.DDM_REMOVE:
8701
        # remove the last nic
8702
        del instance.nics[-1]
8703
        result.append(("nic.%d" % len(instance.nics), "remove"))
8704
      elif nic_op == constants.DDM_ADD:
8705
        # mac and bridge should be set, by now
8706
        mac = nic_dict['mac']
8707
        ip = nic_dict.get('ip', None)
8708
        nicparams = self.nic_pinst[constants.DDM_ADD]
8709
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8710
        instance.nics.append(new_nic)
8711
        result.append(("nic.%d" % (len(instance.nics) - 1),
8712
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
8713
                       (new_nic.mac, new_nic.ip,
8714
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8715
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8716
                       )))
8717
      else:
8718
        for key in 'mac', 'ip':
8719
          if key in nic_dict:
8720
            setattr(instance.nics[nic_op], key, nic_dict[key])
8721
        if nic_op in self.nic_pinst:
8722
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8723
        for key, val in nic_dict.iteritems():
8724
          result.append(("nic.%s/%d" % (key, nic_op), val))
8725

    
8726
    # hvparams changes
8727
    if self.op.hvparams:
8728
      instance.hvparams = self.hv_inst
8729
      for key, val in self.op.hvparams.iteritems():
8730
        result.append(("hv/%s" % key, val))
8731

    
8732
    # beparams changes
8733
    if self.op.beparams:
8734
      instance.beparams = self.be_inst
8735
      for key, val in self.op.beparams.iteritems():
8736
        result.append(("be/%s" % key, val))
8737

    
8738
    # OS change
8739
    if self.op.os_name:
8740
      instance.os = self.op.os_name
8741

    
8742
    self.cfg.Update(instance, feedback_fn)
8743

    
8744
    return result
8745

    
8746
  _DISK_CONVERSIONS = {
8747
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8748
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8749
    }
8750

    
8751
class LUQueryExports(NoHooksLU):
8752
  """Query the exports list
8753

8754
  """
8755
  _OP_REQP = ['nodes']
8756
  REQ_BGL = False
8757

    
8758
  def ExpandNames(self):
8759
    self.needed_locks = {}
8760
    self.share_locks[locking.LEVEL_NODE] = 1
8761
    if not self.op.nodes:
8762
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8763
    else:
8764
      self.needed_locks[locking.LEVEL_NODE] = \
8765
        _GetWantedNodes(self, self.op.nodes)
8766

    
8767
  def CheckPrereq(self):
8768
    """Check prerequisites.
8769

8770
    """
8771
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8772

    
8773
  def Exec(self, feedback_fn):
8774
    """Compute the list of all the exported system images.
8775

8776
    @rtype: dict
8777
    @return: a dictionary with the structure node->(export-list)
8778
        where export-list is a list of the instances exported on
8779
        that node.
8780

8781
    """
8782
    rpcresult = self.rpc.call_export_list(self.nodes)
8783
    result = {}
8784
    for node in rpcresult:
8785
      if rpcresult[node].fail_msg:
8786
        result[node] = False
8787
      else:
8788
        result[node] = rpcresult[node].payload
8789

    
8790
    return result
8791

    
8792

    
8793
class LUExportInstance(LogicalUnit):
8794
  """Export an instance to an image in the cluster.
8795

8796
  """
8797
  HPATH = "instance-export"
8798
  HTYPE = constants.HTYPE_INSTANCE
8799
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
8800
  REQ_BGL = False
8801

    
8802
  def CheckArguments(self):
8803
    """Check the arguments.
8804

8805
    """
8806
    _CheckBooleanOpField(self.op, "remove_instance")
8807
    _CheckBooleanOpField(self.op, "ignore_remove_failures")
8808

    
8809
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8810
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
8811
    self.remove_instance = getattr(self.op, "remove_instance", False)
8812
    self.ignore_remove_failures = getattr(self.op, "ignore_remove_failures",
8813
                                          False)
8814

    
8815
    if self.remove_instance and not self.op.shutdown:
8816
      raise errors.OpPrereqError("Can not remove instance without shutting it"
8817
                                 " down before")
8818

    
8819
  def ExpandNames(self):
8820
    self._ExpandAndLockInstance()
8821

    
8822
    # FIXME: lock only instance primary and destination node
8823
    #
8824
    # Sad but true, for now we have do lock all nodes, as we don't know where
8825
    # the previous export might be, and and in this LU we search for it and
8826
    # remove it from its current node. In the future we could fix this by:
8827
    #  - making a tasklet to search (share-lock all), then create the new one,
8828
    #    then one to remove, after
8829
    #  - removing the removal operation altogether
8830
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8831

    
8832
  def DeclareLocks(self, level):
8833
    """Last minute lock declaration."""
8834
    # All nodes are locked anyway, so nothing to do here.
8835

    
8836
  def BuildHooksEnv(self):
8837
    """Build hooks env.
8838

8839
    This will run on the master, primary node and target node.
8840

8841
    """
8842
    env = {
8843
      "EXPORT_NODE": self.op.target_node,
8844
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8845
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8846
      # TODO: Generic function for boolean env variables
8847
      "REMOVE_INSTANCE": str(bool(self.remove_instance)),
8848
      }
8849
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8850
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8851
          self.op.target_node]
8852
    return env, nl, nl
8853

    
8854
  def CheckPrereq(self):
8855
    """Check prerequisites.
8856

8857
    This checks that the instance and node names are valid.
8858

8859
    """
8860
    instance_name = self.op.instance_name
8861
    self.instance = self.cfg.GetInstanceInfo(instance_name)
8862
    assert self.instance is not None, \
8863
          "Cannot retrieve locked instance %s" % self.op.instance_name
8864
    _CheckNodeOnline(self, self.instance.primary_node)
8865

    
8866
    self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8867
    self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
8868
    assert self.dst_node is not None
8869

    
8870
    _CheckNodeOnline(self, self.dst_node.name)
8871
    _CheckNodeNotDrained(self, self.dst_node.name)
8872

    
8873
    # instance disk type verification
8874
    # TODO: Implement export support for file-based disks
8875
    for disk in self.instance.disks:
8876
      if disk.dev_type == constants.LD_FILE:
8877
        raise errors.OpPrereqError("Export not supported for instances with"
8878
                                   " file-based disks", errors.ECODE_INVAL)
8879

    
8880
  def _CreateSnapshots(self, feedback_fn):
8881
    """Creates an LVM snapshot for every disk of the instance.
8882

8883
    @return: List of snapshots as L{objects.Disk} instances
8884

8885
    """
8886
    instance = self.instance
8887
    src_node = instance.primary_node
8888

    
8889
    vgname = self.cfg.GetVGName()
8890

    
8891
    snap_disks = []
8892

    
8893
    for idx, disk in enumerate(instance.disks):
8894
      feedback_fn("Creating a snapshot of disk/%s on node %s" %
8895
                  (idx, src_node))
8896

    
8897
      # result.payload will be a snapshot of an lvm leaf of the one we
8898
      # passed
8899
      result = self.rpc.call_blockdev_snapshot(src_node, disk)
8900
      msg = result.fail_msg
8901
      if msg:
8902
        self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8903
                        idx, src_node, msg)
8904
        snap_disks.append(False)
8905
      else:
8906
        disk_id = (vgname, result.payload)
8907
        new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8908
                               logical_id=disk_id, physical_id=disk_id,
8909
                               iv_name=disk.iv_name)
8910
        snap_disks.append(new_dev)
8911

    
8912
    return snap_disks
8913

    
8914
  def _RemoveSnapshot(self, feedback_fn, snap_disks, disk_index):
8915
    """Removes an LVM snapshot.
8916

8917
    @type snap_disks: list
8918
    @param snap_disks: The list of all snapshots as returned by
8919
                       L{_CreateSnapshots}
8920
    @type disk_index: number
8921
    @param disk_index: Index of the snapshot to be removed
8922
    @rtype: bool
8923
    @return: Whether removal was successful or not
8924

8925
    """
8926
    disk = snap_disks[disk_index]
8927
    if disk:
8928
      src_node = self.instance.primary_node
8929

    
8930
      feedback_fn("Removing snapshot of disk/%s on node %s" %
8931
                  (disk_index, src_node))
8932

    
8933
      result = self.rpc.call_blockdev_remove(src_node, disk)
8934
      if not result.fail_msg:
8935
        return True
8936

    
8937
      self.LogWarning("Could not remove snapshot for disk/%d from node"
8938
                      " %s: %s", disk_index, src_node, result.fail_msg)
8939

    
8940
    return False
8941

    
8942
  def _CleanupExports(self, feedback_fn):
8943
    """Removes exports of current instance from all other nodes.
8944

8945
    If an instance in a cluster with nodes A..D was exported to node C, its
8946
    exports will be removed from the nodes A, B and D.
8947

8948
    """
8949
    nodelist = self.cfg.GetNodeList()
8950
    nodelist.remove(self.dst_node.name)
8951

    
8952
    # on one-node clusters nodelist will be empty after the removal
8953
    # if we proceed the backup would be removed because OpQueryExports
8954
    # substitutes an empty list with the full cluster node list.
8955
    iname = self.instance.name
8956
    if nodelist:
8957
      feedback_fn("Removing old exports for instance %s" % iname)
8958
      exportlist = self.rpc.call_export_list(nodelist)
8959
      for node in exportlist:
8960
        if exportlist[node].fail_msg:
8961
          continue
8962
        if iname in exportlist[node].payload:
8963
          msg = self.rpc.call_export_remove(node, iname).fail_msg
8964
          if msg:
8965
            self.LogWarning("Could not remove older export for instance %s"
8966
                            " on node %s: %s", iname, node, msg)
8967

    
8968
  def Exec(self, feedback_fn):
8969
    """Export an instance to an image in the cluster.
8970

8971
    """
8972
    instance = self.instance
8973
    dst_node = self.dst_node
8974
    src_node = instance.primary_node
8975

    
8976
    if self.op.shutdown:
8977
      # shutdown the instance, but not the disks
8978
      feedback_fn("Shutting down instance %s" % instance.name)
8979
      result = self.rpc.call_instance_shutdown(src_node, instance,
8980
                                               self.shutdown_timeout)
8981
      # TODO: Maybe ignore failures if ignore_remove_failures is set
8982
      result.Raise("Could not shutdown instance %s on"
8983
                   " node %s" % (instance.name, src_node))
8984

    
8985
    # set the disks ID correctly since call_instance_start needs the
8986
    # correct drbd minor to create the symlinks
8987
    for disk in instance.disks:
8988
      self.cfg.SetDiskID(disk, src_node)
8989

    
8990
    activate_disks = (not instance.admin_up)
8991

    
8992
    if activate_disks:
8993
      # Activate the instance disks if we'exporting a stopped instance
8994
      feedback_fn("Activating disks for %s" % instance.name)
8995
      _StartInstanceDisks(self, instance, None)
8996

    
8997
    try:
8998
      # per-disk results
8999
      removed_snaps = [False] * len(instance.disks)
9000

    
9001
      snap_disks = None
9002
      try:
9003
        try:
9004
          snap_disks = self._CreateSnapshots(feedback_fn)
9005
        finally:
9006
          if (self.op.shutdown and instance.admin_up and
9007
              not self.remove_instance):
9008
            feedback_fn("Starting instance %s" % instance.name)
9009
            result = self.rpc.call_instance_start(src_node, instance,
9010
                                                  None, None)
9011
            msg = result.fail_msg
9012
            if msg:
9013
              _ShutdownInstanceDisks(self, instance)
9014
              raise errors.OpExecError("Could not start instance: %s" % msg)
9015

    
9016
        assert len(snap_disks) == len(instance.disks)
9017
        assert len(removed_snaps) == len(instance.disks)
9018

    
9019
        # TODO: check for size
9020

    
9021
        def _TransferFinished(idx):
9022
          logging.debug("Transfer %s finished", idx)
9023
          if self._RemoveSnapshot(feedback_fn, snap_disks, idx):
9024
            removed_snaps[idx] = True
9025

    
9026
        transfers = []
9027

    
9028
        for idx, dev in enumerate(snap_disks):
9029
          if not dev:
9030
            transfers.append(None)
9031
            continue
9032

    
9033
          path = utils.PathJoin(constants.EXPORT_DIR, "%s.new" % instance.name,
9034
                                dev.physical_id[1])
9035

    
9036
          finished_fn = compat.partial(_TransferFinished, idx)
9037

    
9038
          # FIXME: pass debug option from opcode to backend
9039
          dt = masterd.instance.DiskTransfer("snapshot/%s" % idx,
9040
                                             constants.IEIO_SCRIPT, (dev, idx),
9041
                                             constants.IEIO_FILE, (path, ),
9042
                                             finished_fn)
9043
          transfers.append(dt)
9044

    
9045
        # Actually export data
9046
        dresults = \
9047
          masterd.instance.TransferInstanceData(self, feedback_fn,
9048
                                                src_node, dst_node.name,
9049
                                                dst_node.secondary_ip,
9050
                                                instance, transfers)
9051

    
9052
        assert len(dresults) == len(instance.disks)
9053

    
9054
        # Check for backwards compatibility
9055
        assert compat.all(isinstance(i, bool) for i in dresults), \
9056
               "Not all results are boolean: %r" % dresults
9057

    
9058
        feedback_fn("Finalizing export on %s" % dst_node.name)
9059
        result = self.rpc.call_finalize_export(dst_node.name, instance,
9060
                                               snap_disks)
9061
        msg = result.fail_msg
9062
        fin_resu = not msg
9063
        if msg:
9064
          self.LogWarning("Could not finalize export for instance %s"
9065
                          " on node %s: %s", instance.name, dst_node.name, msg)
9066

    
9067
      finally:
9068
        # Remove all snapshots
9069
        assert len(removed_snaps) == len(instance.disks)
9070
        for idx, removed in enumerate(removed_snaps):
9071
          if not removed:
9072
            self._RemoveSnapshot(feedback_fn, snap_disks, idx)
9073

    
9074
    finally:
9075
      if activate_disks:
9076
        feedback_fn("Deactivating disks for %s" % instance.name)
9077
        _ShutdownInstanceDisks(self, instance)
9078

    
9079
    # Remove instance if requested
9080
    if self.remove_instance:
9081
      feedback_fn("Removing instance %s" % instance.name)
9082
      _RemoveInstance(self, feedback_fn, instance, self.ignore_remove_failures)
9083

    
9084
    self._CleanupExports(feedback_fn)
9085

    
9086
    return fin_resu, dresults
9087

    
9088

    
9089
class LURemoveExport(NoHooksLU):
9090
  """Remove exports related to the named instance.
9091

9092
  """
9093
  _OP_REQP = ["instance_name"]
9094
  REQ_BGL = False
9095

    
9096
  def ExpandNames(self):
9097
    self.needed_locks = {}
9098
    # We need all nodes to be locked in order for RemoveExport to work, but we
9099
    # don't need to lock the instance itself, as nothing will happen to it (and
9100
    # we can remove exports also for a removed instance)
9101
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9102

    
9103
  def CheckPrereq(self):
9104
    """Check prerequisites.
9105
    """
9106
    pass
9107

    
9108
  def Exec(self, feedback_fn):
9109
    """Remove any export.
9110

9111
    """
9112
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9113
    # If the instance was not found we'll try with the name that was passed in.
9114
    # This will only work if it was an FQDN, though.
9115
    fqdn_warn = False
9116
    if not instance_name:
9117
      fqdn_warn = True
9118
      instance_name = self.op.instance_name
9119

    
9120
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9121
    exportlist = self.rpc.call_export_list(locked_nodes)
9122
    found = False
9123
    for node in exportlist:
9124
      msg = exportlist[node].fail_msg
9125
      if msg:
9126
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9127
        continue
9128
      if instance_name in exportlist[node].payload:
9129
        found = True
9130
        result = self.rpc.call_export_remove(node, instance_name)
9131
        msg = result.fail_msg
9132
        if msg:
9133
          logging.error("Could not remove export for instance %s"
9134
                        " on node %s: %s", instance_name, node, msg)
9135

    
9136
    if fqdn_warn and not found:
9137
      feedback_fn("Export not found. If trying to remove an export belonging"
9138
                  " to a deleted instance please use its Fully Qualified"
9139
                  " Domain Name.")
9140

    
9141

    
9142
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9143
  """Generic tags LU.
9144

9145
  This is an abstract class which is the parent of all the other tags LUs.
9146

9147
  """
9148

    
9149
  def ExpandNames(self):
9150
    self.needed_locks = {}
9151
    if self.op.kind == constants.TAG_NODE:
9152
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9153
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
9154
    elif self.op.kind == constants.TAG_INSTANCE:
9155
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9156
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9157

    
9158
  def CheckPrereq(self):
9159
    """Check prerequisites.
9160

9161
    """
9162
    if self.op.kind == constants.TAG_CLUSTER:
9163
      self.target = self.cfg.GetClusterInfo()
9164
    elif self.op.kind == constants.TAG_NODE:
9165
      self.target = self.cfg.GetNodeInfo(self.op.name)
9166
    elif self.op.kind == constants.TAG_INSTANCE:
9167
      self.target = self.cfg.GetInstanceInfo(self.op.name)
9168
    else:
9169
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9170
                                 str(self.op.kind), errors.ECODE_INVAL)
9171

    
9172

    
9173
class LUGetTags(TagsLU):
9174
  """Returns the tags of a given object.
9175

9176
  """
9177
  _OP_REQP = ["kind", "name"]
9178
  REQ_BGL = False
9179

    
9180
  def Exec(self, feedback_fn):
9181
    """Returns the tag list.
9182

9183
    """
9184
    return list(self.target.GetTags())
9185

    
9186

    
9187
class LUSearchTags(NoHooksLU):
9188
  """Searches the tags for a given pattern.
9189

9190
  """
9191
  _OP_REQP = ["pattern"]
9192
  REQ_BGL = False
9193

    
9194
  def ExpandNames(self):
9195
    self.needed_locks = {}
9196

    
9197
  def CheckPrereq(self):
9198
    """Check prerequisites.
9199

9200
    This checks the pattern passed for validity by compiling it.
9201

9202
    """
9203
    try:
9204
      self.re = re.compile(self.op.pattern)
9205
    except re.error, err:
9206
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9207
                                 (self.op.pattern, err), errors.ECODE_INVAL)
9208

    
9209
  def Exec(self, feedback_fn):
9210
    """Returns the tag list.
9211

9212
    """
9213
    cfg = self.cfg
9214
    tgts = [("/cluster", cfg.GetClusterInfo())]
9215
    ilist = cfg.GetAllInstancesInfo().values()
9216
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9217
    nlist = cfg.GetAllNodesInfo().values()
9218
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9219
    results = []
9220
    for path, target in tgts:
9221
      for tag in target.GetTags():
9222
        if self.re.search(tag):
9223
          results.append((path, tag))
9224
    return results
9225

    
9226

    
9227
class LUAddTags(TagsLU):
9228
  """Sets a tag on a given object.
9229

9230
  """
9231
  _OP_REQP = ["kind", "name", "tags"]
9232
  REQ_BGL = False
9233

    
9234
  def CheckPrereq(self):
9235
    """Check prerequisites.
9236

9237
    This checks the type and length of the tag name and value.
9238

9239
    """
9240
    TagsLU.CheckPrereq(self)
9241
    for tag in self.op.tags:
9242
      objects.TaggableObject.ValidateTag(tag)
9243

    
9244
  def Exec(self, feedback_fn):
9245
    """Sets the tag.
9246

9247
    """
9248
    try:
9249
      for tag in self.op.tags:
9250
        self.target.AddTag(tag)
9251
    except errors.TagError, err:
9252
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
9253
    self.cfg.Update(self.target, feedback_fn)
9254

    
9255

    
9256
class LUDelTags(TagsLU):
9257
  """Delete a list of tags from a given object.
9258

9259
  """
9260
  _OP_REQP = ["kind", "name", "tags"]
9261
  REQ_BGL = False
9262

    
9263
  def CheckPrereq(self):
9264
    """Check prerequisites.
9265

9266
    This checks that we have the given tag.
9267

9268
    """
9269
    TagsLU.CheckPrereq(self)
9270
    for tag in self.op.tags:
9271
      objects.TaggableObject.ValidateTag(tag)
9272
    del_tags = frozenset(self.op.tags)
9273
    cur_tags = self.target.GetTags()
9274
    if not del_tags <= cur_tags:
9275
      diff_tags = del_tags - cur_tags
9276
      diff_names = ["'%s'" % tag for tag in diff_tags]
9277
      diff_names.sort()
9278
      raise errors.OpPrereqError("Tag(s) %s not found" %
9279
                                 (",".join(diff_names)), errors.ECODE_NOENT)
9280

    
9281
  def Exec(self, feedback_fn):
9282
    """Remove the tag from the object.
9283

9284
    """
9285
    for tag in self.op.tags:
9286
      self.target.RemoveTag(tag)
9287
    self.cfg.Update(self.target, feedback_fn)
9288

    
9289

    
9290
class LUTestDelay(NoHooksLU):
9291
  """Sleep for a specified amount of time.
9292

9293
  This LU sleeps on the master and/or nodes for a specified amount of
9294
  time.
9295

9296
  """
9297
  _OP_REQP = ["duration", "on_master", "on_nodes"]
9298
  REQ_BGL = False
9299

    
9300
  def ExpandNames(self):
9301
    """Expand names and set required locks.
9302

9303
    This expands the node list, if any.
9304

9305
    """
9306
    self.needed_locks = {}
9307
    if self.op.on_nodes:
9308
      # _GetWantedNodes can be used here, but is not always appropriate to use
9309
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9310
      # more information.
9311
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9312
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9313

    
9314
  def CheckPrereq(self):
9315
    """Check prerequisites.
9316

9317
    """
9318

    
9319
  def Exec(self, feedback_fn):
9320
    """Do the actual sleep.
9321

9322
    """
9323
    if self.op.on_master:
9324
      if not utils.TestDelay(self.op.duration):
9325
        raise errors.OpExecError("Error during master delay test")
9326
    if self.op.on_nodes:
9327
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9328
      for node, node_result in result.items():
9329
        node_result.Raise("Failure during rpc call to node %s" % node)
9330

    
9331

    
9332
class IAllocator(object):
9333
  """IAllocator framework.
9334

9335
  An IAllocator instance has three sets of attributes:
9336
    - cfg that is needed to query the cluster
9337
    - input data (all members of the _KEYS class attribute are required)
9338
    - four buffer attributes (in|out_data|text), that represent the
9339
      input (to the external script) in text and data structure format,
9340
      and the output from it, again in two formats
9341
    - the result variables from the script (success, info, nodes) for
9342
      easy usage
9343

9344
  """
9345
  # pylint: disable-msg=R0902
9346
  # lots of instance attributes
9347
  _ALLO_KEYS = [
9348
    "name", "mem_size", "disks", "disk_template",
9349
    "os", "tags", "nics", "vcpus", "hypervisor",
9350
    ]
9351
  _RELO_KEYS = [
9352
    "name", "relocate_from",
9353
    ]
9354
  _EVAC_KEYS = [
9355
    "evac_nodes",
9356
    ]
9357

    
9358
  def __init__(self, cfg, rpc, mode, **kwargs):
9359
    self.cfg = cfg
9360
    self.rpc = rpc
9361
    # init buffer variables
9362
    self.in_text = self.out_text = self.in_data = self.out_data = None
9363
    # init all input fields so that pylint is happy
9364
    self.mode = mode
9365
    self.mem_size = self.disks = self.disk_template = None
9366
    self.os = self.tags = self.nics = self.vcpus = None
9367
    self.hypervisor = None
9368
    self.relocate_from = None
9369
    self.name = None
9370
    self.evac_nodes = None
9371
    # computed fields
9372
    self.required_nodes = None
9373
    # init result fields
9374
    self.success = self.info = self.result = None
9375
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9376
      keyset = self._ALLO_KEYS
9377
      fn = self._AddNewInstance
9378
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9379
      keyset = self._RELO_KEYS
9380
      fn = self._AddRelocateInstance
9381
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9382
      keyset = self._EVAC_KEYS
9383
      fn = self._AddEvacuateNodes
9384
    else:
9385
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9386
                                   " IAllocator" % self.mode)
9387
    for key in kwargs:
9388
      if key not in keyset:
9389
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
9390
                                     " IAllocator" % key)
9391
      setattr(self, key, kwargs[key])
9392

    
9393
    for key in keyset:
9394
      if key not in kwargs:
9395
        raise errors.ProgrammerError("Missing input parameter '%s' to"
9396
                                     " IAllocator" % key)
9397
    self._BuildInputData(fn)
9398

    
9399
  def _ComputeClusterData(self):
9400
    """Compute the generic allocator input data.
9401

9402
    This is the data that is independent of the actual operation.
9403

9404
    """
9405
    cfg = self.cfg
9406
    cluster_info = cfg.GetClusterInfo()
9407
    # cluster data
9408
    data = {
9409
      "version": constants.IALLOCATOR_VERSION,
9410
      "cluster_name": cfg.GetClusterName(),
9411
      "cluster_tags": list(cluster_info.GetTags()),
9412
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9413
      # we don't have job IDs
9414
      }
9415
    iinfo = cfg.GetAllInstancesInfo().values()
9416
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9417

    
9418
    # node data
9419
    node_results = {}
9420
    node_list = cfg.GetNodeList()
9421

    
9422
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9423
      hypervisor_name = self.hypervisor
9424
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9425
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9426
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9427
      hypervisor_name = cluster_info.enabled_hypervisors[0]
9428

    
9429
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9430
                                        hypervisor_name)
9431
    node_iinfo = \
9432
      self.rpc.call_all_instances_info(node_list,
9433
                                       cluster_info.enabled_hypervisors)
9434
    for nname, nresult in node_data.items():
9435
      # first fill in static (config-based) values
9436
      ninfo = cfg.GetNodeInfo(nname)
9437
      pnr = {
9438
        "tags": list(ninfo.GetTags()),
9439
        "primary_ip": ninfo.primary_ip,
9440
        "secondary_ip": ninfo.secondary_ip,
9441
        "offline": ninfo.offline,
9442
        "drained": ninfo.drained,
9443
        "master_candidate": ninfo.master_candidate,
9444
        }
9445

    
9446
      if not (ninfo.offline or ninfo.drained):
9447
        nresult.Raise("Can't get data for node %s" % nname)
9448
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9449
                                nname)
9450
        remote_info = nresult.payload
9451

    
9452
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
9453
                     'vg_size', 'vg_free', 'cpu_total']:
9454
          if attr not in remote_info:
9455
            raise errors.OpExecError("Node '%s' didn't return attribute"
9456
                                     " '%s'" % (nname, attr))
9457
          if not isinstance(remote_info[attr], int):
9458
            raise errors.OpExecError("Node '%s' returned invalid value"
9459
                                     " for '%s': %s" %
9460
                                     (nname, attr, remote_info[attr]))
9461
        # compute memory used by primary instances
9462
        i_p_mem = i_p_up_mem = 0
9463
        for iinfo, beinfo in i_list:
9464
          if iinfo.primary_node == nname:
9465
            i_p_mem += beinfo[constants.BE_MEMORY]
9466
            if iinfo.name not in node_iinfo[nname].payload:
9467
              i_used_mem = 0
9468
            else:
9469
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9470
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9471
            remote_info['memory_free'] -= max(0, i_mem_diff)
9472

    
9473
            if iinfo.admin_up:
9474
              i_p_up_mem += beinfo[constants.BE_MEMORY]
9475

    
9476
        # compute memory used by instances
9477
        pnr_dyn = {
9478
          "total_memory": remote_info['memory_total'],
9479
          "reserved_memory": remote_info['memory_dom0'],
9480
          "free_memory": remote_info['memory_free'],
9481
          "total_disk": remote_info['vg_size'],
9482
          "free_disk": remote_info['vg_free'],
9483
          "total_cpus": remote_info['cpu_total'],
9484
          "i_pri_memory": i_p_mem,
9485
          "i_pri_up_memory": i_p_up_mem,
9486
          }
9487
        pnr.update(pnr_dyn)
9488

    
9489
      node_results[nname] = pnr
9490
    data["nodes"] = node_results
9491

    
9492
    # instance data
9493
    instance_data = {}
9494
    for iinfo, beinfo in i_list:
9495
      nic_data = []
9496
      for nic in iinfo.nics:
9497
        filled_params = objects.FillDict(
9498
            cluster_info.nicparams[constants.PP_DEFAULT],
9499
            nic.nicparams)
9500
        nic_dict = {"mac": nic.mac,
9501
                    "ip": nic.ip,
9502
                    "mode": filled_params[constants.NIC_MODE],
9503
                    "link": filled_params[constants.NIC_LINK],
9504
                   }
9505
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9506
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9507
        nic_data.append(nic_dict)
9508
      pir = {
9509
        "tags": list(iinfo.GetTags()),
9510
        "admin_up": iinfo.admin_up,
9511
        "vcpus": beinfo[constants.BE_VCPUS],
9512
        "memory": beinfo[constants.BE_MEMORY],
9513
        "os": iinfo.os,
9514
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9515
        "nics": nic_data,
9516
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9517
        "disk_template": iinfo.disk_template,
9518
        "hypervisor": iinfo.hypervisor,
9519
        }
9520
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9521
                                                 pir["disks"])
9522
      instance_data[iinfo.name] = pir
9523

    
9524
    data["instances"] = instance_data
9525

    
9526
    self.in_data = data
9527

    
9528
  def _AddNewInstance(self):
9529
    """Add new instance data to allocator structure.
9530

9531
    This in combination with _AllocatorGetClusterData will create the
9532
    correct structure needed as input for the allocator.
9533

9534
    The checks for the completeness of the opcode must have already been
9535
    done.
9536

9537
    """
9538
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9539

    
9540
    if self.disk_template in constants.DTS_NET_MIRROR:
9541
      self.required_nodes = 2
9542
    else:
9543
      self.required_nodes = 1
9544
    request = {
9545
      "name": self.name,
9546
      "disk_template": self.disk_template,
9547
      "tags": self.tags,
9548
      "os": self.os,
9549
      "vcpus": self.vcpus,
9550
      "memory": self.mem_size,
9551
      "disks": self.disks,
9552
      "disk_space_total": disk_space,
9553
      "nics": self.nics,
9554
      "required_nodes": self.required_nodes,
9555
      }
9556
    return request
9557

    
9558
  def _AddRelocateInstance(self):
9559
    """Add relocate instance data to allocator structure.
9560

9561
    This in combination with _IAllocatorGetClusterData will create the
9562
    correct structure needed as input for the allocator.
9563

9564
    The checks for the completeness of the opcode must have already been
9565
    done.
9566

9567
    """
9568
    instance = self.cfg.GetInstanceInfo(self.name)
9569
    if instance is None:
9570
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
9571
                                   " IAllocator" % self.name)
9572

    
9573
    if instance.disk_template not in constants.DTS_NET_MIRROR:
9574
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9575
                                 errors.ECODE_INVAL)
9576

    
9577
    if len(instance.secondary_nodes) != 1:
9578
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
9579
                                 errors.ECODE_STATE)
9580

    
9581
    self.required_nodes = 1
9582
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
9583
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9584

    
9585
    request = {
9586
      "name": self.name,
9587
      "disk_space_total": disk_space,
9588
      "required_nodes": self.required_nodes,
9589
      "relocate_from": self.relocate_from,
9590
      }
9591
    return request
9592

    
9593
  def _AddEvacuateNodes(self):
9594
    """Add evacuate nodes data to allocator structure.
9595

9596
    """
9597
    request = {
9598
      "evac_nodes": self.evac_nodes
9599
      }
9600
    return request
9601

    
9602
  def _BuildInputData(self, fn):
9603
    """Build input data structures.
9604

9605
    """
9606
    self._ComputeClusterData()
9607

    
9608
    request = fn()
9609
    request["type"] = self.mode
9610
    self.in_data["request"] = request
9611

    
9612
    self.in_text = serializer.Dump(self.in_data)
9613

    
9614
  def Run(self, name, validate=True, call_fn=None):
9615
    """Run an instance allocator and return the results.
9616

9617
    """
9618
    if call_fn is None:
9619
      call_fn = self.rpc.call_iallocator_runner
9620

    
9621
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9622
    result.Raise("Failure while running the iallocator script")
9623

    
9624
    self.out_text = result.payload
9625
    if validate:
9626
      self._ValidateResult()
9627

    
9628
  def _ValidateResult(self):
9629
    """Process the allocator results.
9630

9631
    This will process and if successful save the result in
9632
    self.out_data and the other parameters.
9633

9634
    """
9635
    try:
9636
      rdict = serializer.Load(self.out_text)
9637
    except Exception, err:
9638
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9639

    
9640
    if not isinstance(rdict, dict):
9641
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
9642

    
9643
    # TODO: remove backwards compatiblity in later versions
9644
    if "nodes" in rdict and "result" not in rdict:
9645
      rdict["result"] = rdict["nodes"]
9646
      del rdict["nodes"]
9647

    
9648
    for key in "success", "info", "result":
9649
      if key not in rdict:
9650
        raise errors.OpExecError("Can't parse iallocator results:"
9651
                                 " missing key '%s'" % key)
9652
      setattr(self, key, rdict[key])
9653

    
9654
    if not isinstance(rdict["result"], list):
9655
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9656
                               " is not a list")
9657
    self.out_data = rdict
9658

    
9659

    
9660
class LUTestAllocator(NoHooksLU):
9661
  """Run allocator tests.
9662

9663
  This LU runs the allocator tests
9664

9665
  """
9666
  _OP_REQP = ["direction", "mode", "name"]
9667

    
9668
  def CheckPrereq(self):
9669
    """Check prerequisites.
9670

9671
    This checks the opcode parameters depending on the director and mode test.
9672

9673
    """
9674
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9675
      for attr in ["name", "mem_size", "disks", "disk_template",
9676
                   "os", "tags", "nics", "vcpus"]:
9677
        if not hasattr(self.op, attr):
9678
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9679
                                     attr, errors.ECODE_INVAL)
9680
      iname = self.cfg.ExpandInstanceName(self.op.name)
9681
      if iname is not None:
9682
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9683
                                   iname, errors.ECODE_EXISTS)
9684
      if not isinstance(self.op.nics, list):
9685
        raise errors.OpPrereqError("Invalid parameter 'nics'",
9686
                                   errors.ECODE_INVAL)
9687
      for row in self.op.nics:
9688
        if (not isinstance(row, dict) or
9689
            "mac" not in row or
9690
            "ip" not in row or
9691
            "bridge" not in row):
9692
          raise errors.OpPrereqError("Invalid contents of the 'nics'"
9693
                                     " parameter", errors.ECODE_INVAL)
9694
      if not isinstance(self.op.disks, list):
9695
        raise errors.OpPrereqError("Invalid parameter 'disks'",
9696
                                   errors.ECODE_INVAL)
9697
      for row in self.op.disks:
9698
        if (not isinstance(row, dict) or
9699
            "size" not in row or
9700
            not isinstance(row["size"], int) or
9701
            "mode" not in row or
9702
            row["mode"] not in ['r', 'w']):
9703
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
9704
                                     " parameter", errors.ECODE_INVAL)
9705
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9706
        self.op.hypervisor = self.cfg.GetHypervisorType()
9707
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9708
      if not hasattr(self.op, "name"):
9709
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9710
                                   errors.ECODE_INVAL)
9711
      fname = _ExpandInstanceName(self.cfg, self.op.name)
9712
      self.op.name = fname
9713
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9714
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9715
      if not hasattr(self.op, "evac_nodes"):
9716
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9717
                                   " opcode input", errors.ECODE_INVAL)
9718
    else:
9719
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9720
                                 self.op.mode, errors.ECODE_INVAL)
9721

    
9722
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9723
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
9724
        raise errors.OpPrereqError("Missing allocator name",
9725
                                   errors.ECODE_INVAL)
9726
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9727
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
9728
                                 self.op.direction, errors.ECODE_INVAL)
9729

    
9730
  def Exec(self, feedback_fn):
9731
    """Run the allocator test.
9732

9733
    """
9734
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9735
      ial = IAllocator(self.cfg, self.rpc,
9736
                       mode=self.op.mode,
9737
                       name=self.op.name,
9738
                       mem_size=self.op.mem_size,
9739
                       disks=self.op.disks,
9740
                       disk_template=self.op.disk_template,
9741
                       os=self.op.os,
9742
                       tags=self.op.tags,
9743
                       nics=self.op.nics,
9744
                       vcpus=self.op.vcpus,
9745
                       hypervisor=self.op.hypervisor,
9746
                       )
9747
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9748
      ial = IAllocator(self.cfg, self.rpc,
9749
                       mode=self.op.mode,
9750
                       name=self.op.name,
9751
                       relocate_from=list(self.relocate_from),
9752
                       )
9753
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9754
      ial = IAllocator(self.cfg, self.rpc,
9755
                       mode=self.op.mode,
9756
                       evac_nodes=self.op.evac_nodes)
9757
    else:
9758
      raise errors.ProgrammerError("Uncatched mode %s in"
9759
                                   " LUTestAllocator.Exec", self.op.mode)
9760

    
9761
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
9762
      result = ial.in_text
9763
    else:
9764
      ial.Run(self.op.allocator, validate=False)
9765
      result = ial.out_text
9766
    return result