Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 85a87e21

History | View | Annotate | Download (349.1 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
import os
30
import os.path
31
import time
32
import re
33
import platform
34
import logging
35
import copy
36
import OpenSSL
37

    
38
from ganeti import ssh
39
from ganeti import utils
40
from ganeti import errors
41
from ganeti import hypervisor
42
from ganeti import locking
43
from ganeti import constants
44
from ganeti import objects
45
from ganeti import serializer
46
from ganeti import ssconf
47
from ganeti import uidpool
48
from ganeti import compat
49
from ganeti import masterd
50

    
51
import ganeti.masterd.instance # pylint: disable-msg=W0611
52

    
53

    
54
class LogicalUnit(object):
55
  """Logical Unit base class.
56

57
  Subclasses must follow these rules:
58
    - implement ExpandNames
59
    - implement CheckPrereq (except when tasklets are used)
60
    - implement Exec (except when tasklets are used)
61
    - implement BuildHooksEnv
62
    - redefine HPATH and HTYPE
63
    - optionally redefine their run requirements:
64
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
65

66
  Note that all commands require root permissions.
67

68
  @ivar dry_run_result: the value (if any) that will be returned to the caller
69
      in dry-run mode (signalled by opcode dry_run parameter)
70

71
  """
72
  HPATH = None
73
  HTYPE = None
74
  _OP_REQP = []
75
  REQ_BGL = True
76

    
77
  def __init__(self, processor, op, context, rpc):
78
    """Constructor for LogicalUnit.
79

80
    This needs to be overridden in derived classes in order to check op
81
    validity.
82

83
    """
84
    self.proc = processor
85
    self.op = op
86
    self.cfg = context.cfg
87
    self.context = context
88
    self.rpc = rpc
89
    # Dicts used to declare locking needs to mcpu
90
    self.needed_locks = None
91
    self.acquired_locks = {}
92
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
93
    self.add_locks = {}
94
    self.remove_locks = {}
95
    # Used to force good behavior when calling helper functions
96
    self.recalculate_locks = {}
97
    self.__ssh = None
98
    # logging
99
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
100
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
101
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
102
    # support for dry-run
103
    self.dry_run_result = None
104
    # support for generic debug attribute
105
    if (not hasattr(self.op, "debug_level") or
106
        not isinstance(self.op.debug_level, int)):
107
      self.op.debug_level = 0
108

    
109
    # Tasklets
110
    self.tasklets = None
111

    
112
    for attr_name in self._OP_REQP:
113
      attr_val = getattr(op, attr_name, None)
114
      if attr_val is None:
115
        raise errors.OpPrereqError("Required parameter '%s' missing" %
116
                                   attr_name, errors.ECODE_INVAL)
117

    
118
    self.CheckArguments()
119

    
120
  def __GetSSH(self):
121
    """Returns the SshRunner object
122

123
    """
124
    if not self.__ssh:
125
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
126
    return self.__ssh
127

    
128
  ssh = property(fget=__GetSSH)
129

    
130
  def CheckArguments(self):
131
    """Check syntactic validity for the opcode arguments.
132

133
    This method is for doing a simple syntactic check and ensure
134
    validity of opcode parameters, without any cluster-related
135
    checks. While the same can be accomplished in ExpandNames and/or
136
    CheckPrereq, doing these separate is better because:
137

138
      - ExpandNames is left as as purely a lock-related function
139
      - CheckPrereq is run after we have acquired locks (and possible
140
        waited for them)
141

142
    The function is allowed to change the self.op attribute so that
143
    later methods can no longer worry about missing parameters.
144

145
    """
146
    pass
147

    
148
  def ExpandNames(self):
149
    """Expand names for this LU.
150

151
    This method is called before starting to execute the opcode, and it should
152
    update all the parameters of the opcode to their canonical form (e.g. a
153
    short node name must be fully expanded after this method has successfully
154
    completed). This way locking, hooks, logging, ecc. can work correctly.
155

156
    LUs which implement this method must also populate the self.needed_locks
157
    member, as a dict with lock levels as keys, and a list of needed lock names
158
    as values. Rules:
159

160
      - use an empty dict if you don't need any lock
161
      - if you don't need any lock at a particular level omit that level
162
      - don't put anything for the BGL level
163
      - if you want all locks at a level use locking.ALL_SET as a value
164

165
    If you need to share locks (rather than acquire them exclusively) at one
166
    level you can modify self.share_locks, setting a true value (usually 1) for
167
    that level. By default locks are not shared.
168

169
    This function can also define a list of tasklets, which then will be
170
    executed in order instead of the usual LU-level CheckPrereq and Exec
171
    functions, if those are not defined by the LU.
172

173
    Examples::
174

175
      # Acquire all nodes and one instance
176
      self.needed_locks = {
177
        locking.LEVEL_NODE: locking.ALL_SET,
178
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
179
      }
180
      # Acquire just two nodes
181
      self.needed_locks = {
182
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
183
      }
184
      # Acquire no locks
185
      self.needed_locks = {} # No, you can't leave it to the default value None
186

187
    """
188
    # The implementation of this method is mandatory only if the new LU is
189
    # concurrent, so that old LUs don't need to be changed all at the same
190
    # time.
191
    if self.REQ_BGL:
192
      self.needed_locks = {} # Exclusive LUs don't need locks.
193
    else:
194
      raise NotImplementedError
195

    
196
  def DeclareLocks(self, level):
197
    """Declare LU locking needs for a level
198

199
    While most LUs can just declare their locking needs at ExpandNames time,
200
    sometimes there's the need to calculate some locks after having acquired
201
    the ones before. This function is called just before acquiring locks at a
202
    particular level, but after acquiring the ones at lower levels, and permits
203
    such calculations. It can be used to modify self.needed_locks, and by
204
    default it does nothing.
205

206
    This function is only called if you have something already set in
207
    self.needed_locks for the level.
208

209
    @param level: Locking level which is going to be locked
210
    @type level: member of ganeti.locking.LEVELS
211

212
    """
213

    
214
  def CheckPrereq(self):
215
    """Check prerequisites for this LU.
216

217
    This method should check that the prerequisites for the execution
218
    of this LU are fulfilled. It can do internode communication, but
219
    it should be idempotent - no cluster or system changes are
220
    allowed.
221

222
    The method should raise errors.OpPrereqError in case something is
223
    not fulfilled. Its return value is ignored.
224

225
    This method should also update all the parameters of the opcode to
226
    their canonical form if it hasn't been done by ExpandNames before.
227

228
    """
229
    if self.tasklets is not None:
230
      for (idx, tl) in enumerate(self.tasklets):
231
        logging.debug("Checking prerequisites for tasklet %s/%s",
232
                      idx + 1, len(self.tasklets))
233
        tl.CheckPrereq()
234
    else:
235
      raise NotImplementedError
236

    
237
  def Exec(self, feedback_fn):
238
    """Execute the LU.
239

240
    This method should implement the actual work. It should raise
241
    errors.OpExecError for failures that are somewhat dealt with in
242
    code, or expected.
243

244
    """
245
    if self.tasklets is not None:
246
      for (idx, tl) in enumerate(self.tasklets):
247
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
248
        tl.Exec(feedback_fn)
249
    else:
250
      raise NotImplementedError
251

    
252
  def BuildHooksEnv(self):
253
    """Build hooks environment for this LU.
254

255
    This method should return a three-node tuple consisting of: a dict
256
    containing the environment that will be used for running the
257
    specific hook for this LU, a list of node names on which the hook
258
    should run before the execution, and a list of node names on which
259
    the hook should run after the execution.
260

261
    The keys of the dict must not have 'GANETI_' prefixed as this will
262
    be handled in the hooks runner. Also note additional keys will be
263
    added by the hooks runner. If the LU doesn't define any
264
    environment, an empty dict (and not None) should be returned.
265

266
    No nodes should be returned as an empty list (and not None).
267

268
    Note that if the HPATH for a LU class is None, this function will
269
    not be called.
270

271
    """
272
    raise NotImplementedError
273

    
274
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
275
    """Notify the LU about the results of its hooks.
276

277
    This method is called every time a hooks phase is executed, and notifies
278
    the Logical Unit about the hooks' result. The LU can then use it to alter
279
    its result based on the hooks.  By default the method does nothing and the
280
    previous result is passed back unchanged but any LU can define it if it
281
    wants to use the local cluster hook-scripts somehow.
282

283
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
284
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
285
    @param hook_results: the results of the multi-node hooks rpc call
286
    @param feedback_fn: function used send feedback back to the caller
287
    @param lu_result: the previous Exec result this LU had, or None
288
        in the PRE phase
289
    @return: the new Exec result, based on the previous result
290
        and hook results
291

292
    """
293
    # API must be kept, thus we ignore the unused argument and could
294
    # be a function warnings
295
    # pylint: disable-msg=W0613,R0201
296
    return lu_result
297

    
298
  def _ExpandAndLockInstance(self):
299
    """Helper function to expand and lock an instance.
300

301
    Many LUs that work on an instance take its name in self.op.instance_name
302
    and need to expand it and then declare the expanded name for locking. This
303
    function does it, and then updates self.op.instance_name to the expanded
304
    name. It also initializes needed_locks as a dict, if this hasn't been done
305
    before.
306

307
    """
308
    if self.needed_locks is None:
309
      self.needed_locks = {}
310
    else:
311
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
312
        "_ExpandAndLockInstance called with instance-level locks set"
313
    self.op.instance_name = _ExpandInstanceName(self.cfg,
314
                                                self.op.instance_name)
315
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
316

    
317
  def _LockInstancesNodes(self, primary_only=False):
318
    """Helper function to declare instances' nodes for locking.
319

320
    This function should be called after locking one or more instances to lock
321
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
322
    with all primary or secondary nodes for instances already locked and
323
    present in self.needed_locks[locking.LEVEL_INSTANCE].
324

325
    It should be called from DeclareLocks, and for safety only works if
326
    self.recalculate_locks[locking.LEVEL_NODE] is set.
327

328
    In the future it may grow parameters to just lock some instance's nodes, or
329
    to just lock primaries or secondary nodes, if needed.
330

331
    If should be called in DeclareLocks in a way similar to::
332

333
      if level == locking.LEVEL_NODE:
334
        self._LockInstancesNodes()
335

336
    @type primary_only: boolean
337
    @param primary_only: only lock primary nodes of locked instances
338

339
    """
340
    assert locking.LEVEL_NODE in self.recalculate_locks, \
341
      "_LockInstancesNodes helper function called with no nodes to recalculate"
342

    
343
    # TODO: check if we're really been called with the instance locks held
344

    
345
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
346
    # future we might want to have different behaviors depending on the value
347
    # of self.recalculate_locks[locking.LEVEL_NODE]
348
    wanted_nodes = []
349
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
350
      instance = self.context.cfg.GetInstanceInfo(instance_name)
351
      wanted_nodes.append(instance.primary_node)
352
      if not primary_only:
353
        wanted_nodes.extend(instance.secondary_nodes)
354

    
355
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
356
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
357
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
358
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
359

    
360
    del self.recalculate_locks[locking.LEVEL_NODE]
361

    
362

    
363
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
364
  """Simple LU which runs no hooks.
365

366
  This LU is intended as a parent for other LogicalUnits which will
367
  run no hooks, in order to reduce duplicate code.
368

369
  """
370
  HPATH = None
371
  HTYPE = None
372

    
373
  def BuildHooksEnv(self):
374
    """Empty BuildHooksEnv for NoHooksLu.
375

376
    This just raises an error.
377

378
    """
379
    assert False, "BuildHooksEnv called for NoHooksLUs"
380

    
381

    
382
class Tasklet:
383
  """Tasklet base class.
384

385
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
386
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
387
  tasklets know nothing about locks.
388

389
  Subclasses must follow these rules:
390
    - Implement CheckPrereq
391
    - Implement Exec
392

393
  """
394
  def __init__(self, lu):
395
    self.lu = lu
396

    
397
    # Shortcuts
398
    self.cfg = lu.cfg
399
    self.rpc = lu.rpc
400

    
401
  def CheckPrereq(self):
402
    """Check prerequisites for this tasklets.
403

404
    This method should check whether the prerequisites for the execution of
405
    this tasklet are fulfilled. It can do internode communication, but it
406
    should be idempotent - no cluster or system changes are allowed.
407

408
    The method should raise errors.OpPrereqError in case something is not
409
    fulfilled. Its return value is ignored.
410

411
    This method should also update all parameters to their canonical form if it
412
    hasn't been done before.
413

414
    """
415
    raise NotImplementedError
416

    
417
  def Exec(self, feedback_fn):
418
    """Execute the tasklet.
419

420
    This method should implement the actual work. It should raise
421
    errors.OpExecError for failures that are somewhat dealt with in code, or
422
    expected.
423

424
    """
425
    raise NotImplementedError
426

    
427

    
428
def _GetWantedNodes(lu, nodes):
429
  """Returns list of checked and expanded node names.
430

431
  @type lu: L{LogicalUnit}
432
  @param lu: the logical unit on whose behalf we execute
433
  @type nodes: list
434
  @param nodes: list of node names or None for all nodes
435
  @rtype: list
436
  @return: the list of nodes, sorted
437
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
438

439
  """
440
  if not isinstance(nodes, list):
441
    raise errors.OpPrereqError("Invalid argument type 'nodes'",
442
                               errors.ECODE_INVAL)
443

    
444
  if not nodes:
445
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
446
      " non-empty list of nodes whose name is to be expanded.")
447

    
448
  wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
449
  return utils.NiceSort(wanted)
450

    
451

    
452
def _GetWantedInstances(lu, instances):
453
  """Returns list of checked and expanded instance names.
454

455
  @type lu: L{LogicalUnit}
456
  @param lu: the logical unit on whose behalf we execute
457
  @type instances: list
458
  @param instances: list of instance names or None for all instances
459
  @rtype: list
460
  @return: the list of instances, sorted
461
  @raise errors.OpPrereqError: if the instances parameter is wrong type
462
  @raise errors.OpPrereqError: if any of the passed instances is not found
463

464
  """
465
  if not isinstance(instances, list):
466
    raise errors.OpPrereqError("Invalid argument type 'instances'",
467
                               errors.ECODE_INVAL)
468

    
469
  if instances:
470
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
471
  else:
472
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
473
  return wanted
474

    
475

    
476
def _GetUpdatedParams(old_params, update_dict):
477
  """Return the new version of a parameter dictionary.
478

479
  @type old_params: dict
480
  @param old_params: old parameters
481
  @type update_dict: dict
482
  @param update_dict: dict containing new parameter values, or
483
      constants.VALUE_DEFAULT to reset the parameter to its default
484
      value
485
  @rtype: dict
486
  @return: the new parameter dictionary
487

488
  """
489
  params_copy = copy.deepcopy(old_params)
490
  for key, val in update_dict.iteritems():
491
    if val == constants.VALUE_DEFAULT:
492
      try:
493
        del params_copy[key]
494
      except KeyError:
495
        pass
496
    else:
497
      params_copy[key] = val
498
  return params_copy
499

    
500

    
501
def _CheckOutputFields(static, dynamic, selected):
502
  """Checks whether all selected fields are valid.
503

504
  @type static: L{utils.FieldSet}
505
  @param static: static fields set
506
  @type dynamic: L{utils.FieldSet}
507
  @param dynamic: dynamic fields set
508

509
  """
510
  f = utils.FieldSet()
511
  f.Extend(static)
512
  f.Extend(dynamic)
513

    
514
  delta = f.NonMatching(selected)
515
  if delta:
516
    raise errors.OpPrereqError("Unknown output fields selected: %s"
517
                               % ",".join(delta), errors.ECODE_INVAL)
518

    
519

    
520
def _CheckBooleanOpField(op, name):
521
  """Validates boolean opcode parameters.
522

523
  This will ensure that an opcode parameter is either a boolean value,
524
  or None (but that it always exists).
525

526
  """
527
  val = getattr(op, name, None)
528
  if not (val is None or isinstance(val, bool)):
529
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
530
                               (name, str(val)), errors.ECODE_INVAL)
531
  setattr(op, name, val)
532

    
533

    
534
def _CheckGlobalHvParams(params):
535
  """Validates that given hypervisor params are not global ones.
536

537
  This will ensure that instances don't get customised versions of
538
  global params.
539

540
  """
541
  used_globals = constants.HVC_GLOBALS.intersection(params)
542
  if used_globals:
543
    msg = ("The following hypervisor parameters are global and cannot"
544
           " be customized at instance level, please modify them at"
545
           " cluster level: %s" % utils.CommaJoin(used_globals))
546
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
547

    
548

    
549
def _CheckNodeOnline(lu, node):
550
  """Ensure that a given node is online.
551

552
  @param lu: the LU on behalf of which we make the check
553
  @param node: the node to check
554
  @raise errors.OpPrereqError: if the node is offline
555

556
  """
557
  if lu.cfg.GetNodeInfo(node).offline:
558
    raise errors.OpPrereqError("Can't use offline node %s" % node,
559
                               errors.ECODE_INVAL)
560

    
561

    
562
def _CheckNodeNotDrained(lu, node):
563
  """Ensure that a given node is not drained.
564

565
  @param lu: the LU on behalf of which we make the check
566
  @param node: the node to check
567
  @raise errors.OpPrereqError: if the node is drained
568

569
  """
570
  if lu.cfg.GetNodeInfo(node).drained:
571
    raise errors.OpPrereqError("Can't use drained node %s" % node,
572
                               errors.ECODE_INVAL)
573

    
574

    
575
def _CheckNodeHasOS(lu, node, os_name, force_variant):
576
  """Ensure that a node supports a given OS.
577

578
  @param lu: the LU on behalf of which we make the check
579
  @param node: the node to check
580
  @param os_name: the OS to query about
581
  @param force_variant: whether to ignore variant errors
582
  @raise errors.OpPrereqError: if the node is not supporting the OS
583

584
  """
585
  result = lu.rpc.call_os_get(node, os_name)
586
  result.Raise("OS '%s' not in supported OS list for node %s" %
587
               (os_name, node),
588
               prereq=True, ecode=errors.ECODE_INVAL)
589
  if not force_variant:
590
    _CheckOSVariant(result.payload, os_name)
591

    
592

    
593
def _RequireFileStorage():
594
  """Checks that file storage is enabled.
595

596
  @raise errors.OpPrereqError: when file storage is disabled
597

598
  """
599
  if not constants.ENABLE_FILE_STORAGE:
600
    raise errors.OpPrereqError("File storage disabled at configure time",
601
                               errors.ECODE_INVAL)
602

    
603

    
604
def _CheckDiskTemplate(template):
605
  """Ensure a given disk template is valid.
606

607
  """
608
  if template not in constants.DISK_TEMPLATES:
609
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
610
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
611
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
612
  if template == constants.DT_FILE:
613
    _RequireFileStorage()
614

    
615

    
616
def _CheckStorageType(storage_type):
617
  """Ensure a given storage type is valid.
618

619
  """
620
  if storage_type not in constants.VALID_STORAGE_TYPES:
621
    raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
622
                               errors.ECODE_INVAL)
623
  if storage_type == constants.ST_FILE:
624
    _RequireFileStorage()
625

    
626

    
627
def _GetClusterDomainSecret():
628
  """Reads the cluster domain secret.
629

630
  """
631
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
632
                               strict=True)
633

    
634

    
635
def _CheckInstanceDown(lu, instance, reason):
636
  """Ensure that an instance is not running."""
637
  if instance.admin_up:
638
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
639
                               (instance.name, reason), errors.ECODE_STATE)
640

    
641
  pnode = instance.primary_node
642
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
643
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
644
              prereq=True, ecode=errors.ECODE_ENVIRON)
645

    
646
  if instance.name in ins_l.payload:
647
    raise errors.OpPrereqError("Instance %s is running, %s" %
648
                               (instance.name, reason), errors.ECODE_STATE)
649

    
650

    
651
def _ExpandItemName(fn, name, kind):
652
  """Expand an item name.
653

654
  @param fn: the function to use for expansion
655
  @param name: requested item name
656
  @param kind: text description ('Node' or 'Instance')
657
  @return: the resolved (full) name
658
  @raise errors.OpPrereqError: if the item is not found
659

660
  """
661
  full_name = fn(name)
662
  if full_name is None:
663
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
664
                               errors.ECODE_NOENT)
665
  return full_name
666

    
667

    
668
def _ExpandNodeName(cfg, name):
669
  """Wrapper over L{_ExpandItemName} for nodes."""
670
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
671

    
672

    
673
def _ExpandInstanceName(cfg, name):
674
  """Wrapper over L{_ExpandItemName} for instance."""
675
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
676

    
677

    
678
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
679
                          memory, vcpus, nics, disk_template, disks,
680
                          bep, hvp, hypervisor_name):
681
  """Builds instance related env variables for hooks
682

683
  This builds the hook environment from individual variables.
684

685
  @type name: string
686
  @param name: the name of the instance
687
  @type primary_node: string
688
  @param primary_node: the name of the instance's primary node
689
  @type secondary_nodes: list
690
  @param secondary_nodes: list of secondary nodes as strings
691
  @type os_type: string
692
  @param os_type: the name of the instance's OS
693
  @type status: boolean
694
  @param status: the should_run status of the instance
695
  @type memory: string
696
  @param memory: the memory size of the instance
697
  @type vcpus: string
698
  @param vcpus: the count of VCPUs the instance has
699
  @type nics: list
700
  @param nics: list of tuples (ip, mac, mode, link) representing
701
      the NICs the instance has
702
  @type disk_template: string
703
  @param disk_template: the disk template of the instance
704
  @type disks: list
705
  @param disks: the list of (size, mode) pairs
706
  @type bep: dict
707
  @param bep: the backend parameters for the instance
708
  @type hvp: dict
709
  @param hvp: the hypervisor parameters for the instance
710
  @type hypervisor_name: string
711
  @param hypervisor_name: the hypervisor for the instance
712
  @rtype: dict
713
  @return: the hook environment for this instance
714

715
  """
716
  if status:
717
    str_status = "up"
718
  else:
719
    str_status = "down"
720
  env = {
721
    "OP_TARGET": name,
722
    "INSTANCE_NAME": name,
723
    "INSTANCE_PRIMARY": primary_node,
724
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
725
    "INSTANCE_OS_TYPE": os_type,
726
    "INSTANCE_STATUS": str_status,
727
    "INSTANCE_MEMORY": memory,
728
    "INSTANCE_VCPUS": vcpus,
729
    "INSTANCE_DISK_TEMPLATE": disk_template,
730
    "INSTANCE_HYPERVISOR": hypervisor_name,
731
  }
732

    
733
  if nics:
734
    nic_count = len(nics)
735
    for idx, (ip, mac, mode, link) in enumerate(nics):
736
      if ip is None:
737
        ip = ""
738
      env["INSTANCE_NIC%d_IP" % idx] = ip
739
      env["INSTANCE_NIC%d_MAC" % idx] = mac
740
      env["INSTANCE_NIC%d_MODE" % idx] = mode
741
      env["INSTANCE_NIC%d_LINK" % idx] = link
742
      if mode == constants.NIC_MODE_BRIDGED:
743
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
744
  else:
745
    nic_count = 0
746

    
747
  env["INSTANCE_NIC_COUNT"] = nic_count
748

    
749
  if disks:
750
    disk_count = len(disks)
751
    for idx, (size, mode) in enumerate(disks):
752
      env["INSTANCE_DISK%d_SIZE" % idx] = size
753
      env["INSTANCE_DISK%d_MODE" % idx] = mode
754
  else:
755
    disk_count = 0
756

    
757
  env["INSTANCE_DISK_COUNT"] = disk_count
758

    
759
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
760
    for key, value in source.items():
761
      env["INSTANCE_%s_%s" % (kind, key)] = value
762

    
763
  return env
764

    
765

    
766
def _NICListToTuple(lu, nics):
767
  """Build a list of nic information tuples.
768

769
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
770
  value in LUQueryInstanceData.
771

772
  @type lu:  L{LogicalUnit}
773
  @param lu: the logical unit on whose behalf we execute
774
  @type nics: list of L{objects.NIC}
775
  @param nics: list of nics to convert to hooks tuples
776

777
  """
778
  hooks_nics = []
779
  cluster = lu.cfg.GetClusterInfo()
780
  for nic in nics:
781
    ip = nic.ip
782
    mac = nic.mac
783
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
784
    mode = filled_params[constants.NIC_MODE]
785
    link = filled_params[constants.NIC_LINK]
786
    hooks_nics.append((ip, mac, mode, link))
787
  return hooks_nics
788

    
789

    
790
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
791
  """Builds instance related env variables for hooks from an object.
792

793
  @type lu: L{LogicalUnit}
794
  @param lu: the logical unit on whose behalf we execute
795
  @type instance: L{objects.Instance}
796
  @param instance: the instance for which we should build the
797
      environment
798
  @type override: dict
799
  @param override: dictionary with key/values that will override
800
      our values
801
  @rtype: dict
802
  @return: the hook environment dictionary
803

804
  """
805
  cluster = lu.cfg.GetClusterInfo()
806
  bep = cluster.FillBE(instance)
807
  hvp = cluster.FillHV(instance)
808
  args = {
809
    'name': instance.name,
810
    'primary_node': instance.primary_node,
811
    'secondary_nodes': instance.secondary_nodes,
812
    'os_type': instance.os,
813
    'status': instance.admin_up,
814
    'memory': bep[constants.BE_MEMORY],
815
    'vcpus': bep[constants.BE_VCPUS],
816
    'nics': _NICListToTuple(lu, instance.nics),
817
    'disk_template': instance.disk_template,
818
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
819
    'bep': bep,
820
    'hvp': hvp,
821
    'hypervisor_name': instance.hypervisor,
822
  }
823
  if override:
824
    args.update(override)
825
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
826

    
827

    
828
def _AdjustCandidatePool(lu, exceptions):
829
  """Adjust the candidate pool after node operations.
830

831
  """
832
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
833
  if mod_list:
834
    lu.LogInfo("Promoted nodes to master candidate role: %s",
835
               utils.CommaJoin(node.name for node in mod_list))
836
    for name in mod_list:
837
      lu.context.ReaddNode(name)
838
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
839
  if mc_now > mc_max:
840
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
841
               (mc_now, mc_max))
842

    
843

    
844
def _DecideSelfPromotion(lu, exceptions=None):
845
  """Decide whether I should promote myself as a master candidate.
846

847
  """
848
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
849
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
850
  # the new node will increase mc_max with one, so:
851
  mc_should = min(mc_should + 1, cp_size)
852
  return mc_now < mc_should
853

    
854

    
855
def _CheckNicsBridgesExist(lu, target_nics, target_node):
856
  """Check that the brigdes needed by a list of nics exist.
857

858
  """
859
  cluster = lu.cfg.GetClusterInfo()
860
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
861
  brlist = [params[constants.NIC_LINK] for params in paramslist
862
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
863
  if brlist:
864
    result = lu.rpc.call_bridges_exist(target_node, brlist)
865
    result.Raise("Error checking bridges on destination node '%s'" %
866
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
867

    
868

    
869
def _CheckInstanceBridgesExist(lu, instance, node=None):
870
  """Check that the brigdes needed by an instance exist.
871

872
  """
873
  if node is None:
874
    node = instance.primary_node
875
  _CheckNicsBridgesExist(lu, instance.nics, node)
876

    
877

    
878
def _CheckOSVariant(os_obj, name):
879
  """Check whether an OS name conforms to the os variants specification.
880

881
  @type os_obj: L{objects.OS}
882
  @param os_obj: OS object to check
883
  @type name: string
884
  @param name: OS name passed by the user, to check for validity
885

886
  """
887
  if not os_obj.supported_variants:
888
    return
889
  try:
890
    variant = name.split("+", 1)[1]
891
  except IndexError:
892
    raise errors.OpPrereqError("OS name must include a variant",
893
                               errors.ECODE_INVAL)
894

    
895
  if variant not in os_obj.supported_variants:
896
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
897

    
898

    
899
def _GetNodeInstancesInner(cfg, fn):
900
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
901

    
902

    
903
def _GetNodeInstances(cfg, node_name):
904
  """Returns a list of all primary and secondary instances on a node.
905

906
  """
907

    
908
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
909

    
910

    
911
def _GetNodePrimaryInstances(cfg, node_name):
912
  """Returns primary instances on a node.
913

914
  """
915
  return _GetNodeInstancesInner(cfg,
916
                                lambda inst: node_name == inst.primary_node)
917

    
918

    
919
def _GetNodeSecondaryInstances(cfg, node_name):
920
  """Returns secondary instances on a node.
921

922
  """
923
  return _GetNodeInstancesInner(cfg,
924
                                lambda inst: node_name in inst.secondary_nodes)
925

    
926

    
927
def _GetStorageTypeArgs(cfg, storage_type):
928
  """Returns the arguments for a storage type.
929

930
  """
931
  # Special case for file storage
932
  if storage_type == constants.ST_FILE:
933
    # storage.FileStorage wants a list of storage directories
934
    return [[cfg.GetFileStorageDir()]]
935

    
936
  return []
937

    
938

    
939
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
940
  faulty = []
941

    
942
  for dev in instance.disks:
943
    cfg.SetDiskID(dev, node_name)
944

    
945
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
946
  result.Raise("Failed to get disk status from node %s" % node_name,
947
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
948

    
949
  for idx, bdev_status in enumerate(result.payload):
950
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
951
      faulty.append(idx)
952

    
953
  return faulty
954

    
955

    
956
class LUPostInitCluster(LogicalUnit):
957
  """Logical unit for running hooks after cluster initialization.
958

959
  """
960
  HPATH = "cluster-init"
961
  HTYPE = constants.HTYPE_CLUSTER
962
  _OP_REQP = []
963

    
964
  def BuildHooksEnv(self):
965
    """Build hooks env.
966

967
    """
968
    env = {"OP_TARGET": self.cfg.GetClusterName()}
969
    mn = self.cfg.GetMasterNode()
970
    return env, [], [mn]
971

    
972
  def CheckPrereq(self):
973
    """No prerequisites to check.
974

975
    """
976
    return True
977

    
978
  def Exec(self, feedback_fn):
979
    """Nothing to do.
980

981
    """
982
    return True
983

    
984

    
985
class LUDestroyCluster(LogicalUnit):
986
  """Logical unit for destroying the cluster.
987

988
  """
989
  HPATH = "cluster-destroy"
990
  HTYPE = constants.HTYPE_CLUSTER
991
  _OP_REQP = []
992

    
993
  def BuildHooksEnv(self):
994
    """Build hooks env.
995

996
    """
997
    env = {"OP_TARGET": self.cfg.GetClusterName()}
998
    return env, [], []
999

    
1000
  def CheckPrereq(self):
1001
    """Check prerequisites.
1002

1003
    This checks whether the cluster is empty.
1004

1005
    Any errors are signaled by raising errors.OpPrereqError.
1006

1007
    """
1008
    master = self.cfg.GetMasterNode()
1009

    
1010
    nodelist = self.cfg.GetNodeList()
1011
    if len(nodelist) != 1 or nodelist[0] != master:
1012
      raise errors.OpPrereqError("There are still %d node(s) in"
1013
                                 " this cluster." % (len(nodelist) - 1),
1014
                                 errors.ECODE_INVAL)
1015
    instancelist = self.cfg.GetInstanceList()
1016
    if instancelist:
1017
      raise errors.OpPrereqError("There are still %d instance(s) in"
1018
                                 " this cluster." % len(instancelist),
1019
                                 errors.ECODE_INVAL)
1020

    
1021
  def Exec(self, feedback_fn):
1022
    """Destroys the cluster.
1023

1024
    """
1025
    master = self.cfg.GetMasterNode()
1026
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1027

    
1028
    # Run post hooks on master node before it's removed
1029
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1030
    try:
1031
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1032
    except:
1033
      # pylint: disable-msg=W0702
1034
      self.LogWarning("Errors occurred running hooks on %s" % master)
1035

    
1036
    result = self.rpc.call_node_stop_master(master, False)
1037
    result.Raise("Could not disable the master role")
1038

    
1039
    if modify_ssh_setup:
1040
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1041
      utils.CreateBackup(priv_key)
1042
      utils.CreateBackup(pub_key)
1043

    
1044
    return master
1045

    
1046

    
1047
def _VerifyCertificate(filename):
1048
  """Verifies a certificate for LUVerifyCluster.
1049

1050
  @type filename: string
1051
  @param filename: Path to PEM file
1052

1053
  """
1054
  try:
1055
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1056
                                           utils.ReadFile(filename))
1057
  except Exception, err: # pylint: disable-msg=W0703
1058
    return (LUVerifyCluster.ETYPE_ERROR,
1059
            "Failed to load X509 certificate %s: %s" % (filename, err))
1060

    
1061
  (errcode, msg) = \
1062
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1063
                                constants.SSL_CERT_EXPIRATION_ERROR)
1064

    
1065
  if msg:
1066
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1067
  else:
1068
    fnamemsg = None
1069

    
1070
  if errcode is None:
1071
    return (None, fnamemsg)
1072
  elif errcode == utils.CERT_WARNING:
1073
    return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1074
  elif errcode == utils.CERT_ERROR:
1075
    return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1076

    
1077
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1078

    
1079

    
1080
class LUVerifyCluster(LogicalUnit):
1081
  """Verifies the cluster status.
1082

1083
  """
1084
  HPATH = "cluster-verify"
1085
  HTYPE = constants.HTYPE_CLUSTER
1086
  _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1087
  REQ_BGL = False
1088

    
1089
  TCLUSTER = "cluster"
1090
  TNODE = "node"
1091
  TINSTANCE = "instance"
1092

    
1093
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1094
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1095
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1096
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1097
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1098
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1099
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1100
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1101
  ENODEDRBD = (TNODE, "ENODEDRBD")
1102
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1103
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1104
  ENODEHV = (TNODE, "ENODEHV")
1105
  ENODELVM = (TNODE, "ENODELVM")
1106
  ENODEN1 = (TNODE, "ENODEN1")
1107
  ENODENET = (TNODE, "ENODENET")
1108
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1109
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1110
  ENODERPC = (TNODE, "ENODERPC")
1111
  ENODESSH = (TNODE, "ENODESSH")
1112
  ENODEVERSION = (TNODE, "ENODEVERSION")
1113
  ENODESETUP = (TNODE, "ENODESETUP")
1114
  ENODETIME = (TNODE, "ENODETIME")
1115

    
1116
  ETYPE_FIELD = "code"
1117
  ETYPE_ERROR = "ERROR"
1118
  ETYPE_WARNING = "WARNING"
1119

    
1120
  class NodeImage(object):
1121
    """A class representing the logical and physical status of a node.
1122

1123
    @ivar volumes: a structure as returned from
1124
        L{ganeti.backend.GetVolumeList} (runtime)
1125
    @ivar instances: a list of running instances (runtime)
1126
    @ivar pinst: list of configured primary instances (config)
1127
    @ivar sinst: list of configured secondary instances (config)
1128
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1129
        of this node (config)
1130
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1131
    @ivar dfree: free disk, as reported by the node (runtime)
1132
    @ivar offline: the offline status (config)
1133
    @type rpc_fail: boolean
1134
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1135
        not whether the individual keys were correct) (runtime)
1136
    @type lvm_fail: boolean
1137
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1138
    @type hyp_fail: boolean
1139
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1140
    @type ghost: boolean
1141
    @ivar ghost: whether this is a known node or not (config)
1142

1143
    """
1144
    def __init__(self, offline=False):
1145
      self.volumes = {}
1146
      self.instances = []
1147
      self.pinst = []
1148
      self.sinst = []
1149
      self.sbp = {}
1150
      self.mfree = 0
1151
      self.dfree = 0
1152
      self.offline = offline
1153
      self.rpc_fail = False
1154
      self.lvm_fail = False
1155
      self.hyp_fail = False
1156
      self.ghost = False
1157

    
1158
  def ExpandNames(self):
1159
    self.needed_locks = {
1160
      locking.LEVEL_NODE: locking.ALL_SET,
1161
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1162
    }
1163
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1164

    
1165
  def _Error(self, ecode, item, msg, *args, **kwargs):
1166
    """Format an error message.
1167

1168
    Based on the opcode's error_codes parameter, either format a
1169
    parseable error code, or a simpler error string.
1170

1171
    This must be called only from Exec and functions called from Exec.
1172

1173
    """
1174
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1175
    itype, etxt = ecode
1176
    # first complete the msg
1177
    if args:
1178
      msg = msg % args
1179
    # then format the whole message
1180
    if self.op.error_codes:
1181
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1182
    else:
1183
      if item:
1184
        item = " " + item
1185
      else:
1186
        item = ""
1187
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1188
    # and finally report it via the feedback_fn
1189
    self._feedback_fn("  - %s" % msg)
1190

    
1191
  def _ErrorIf(self, cond, *args, **kwargs):
1192
    """Log an error message if the passed condition is True.
1193

1194
    """
1195
    cond = bool(cond) or self.op.debug_simulate_errors
1196
    if cond:
1197
      self._Error(*args, **kwargs)
1198
    # do not mark the operation as failed for WARN cases only
1199
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1200
      self.bad = self.bad or cond
1201

    
1202
  def _VerifyNode(self, ninfo, nresult):
1203
    """Run multiple tests against a node.
1204

1205
    Test list:
1206

1207
      - compares ganeti version
1208
      - checks vg existence and size > 20G
1209
      - checks config file checksum
1210
      - checks ssh to other nodes
1211

1212
    @type ninfo: L{objects.Node}
1213
    @param ninfo: the node to check
1214
    @param nresult: the results from the node
1215
    @rtype: boolean
1216
    @return: whether overall this call was successful (and we can expect
1217
         reasonable values in the respose)
1218

1219
    """
1220
    node = ninfo.name
1221
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1222

    
1223
    # main result, nresult should be a non-empty dict
1224
    test = not nresult or not isinstance(nresult, dict)
1225
    _ErrorIf(test, self.ENODERPC, node,
1226
                  "unable to verify node: no data returned")
1227
    if test:
1228
      return False
1229

    
1230
    # compares ganeti version
1231
    local_version = constants.PROTOCOL_VERSION
1232
    remote_version = nresult.get("version", None)
1233
    test = not (remote_version and
1234
                isinstance(remote_version, (list, tuple)) and
1235
                len(remote_version) == 2)
1236
    _ErrorIf(test, self.ENODERPC, node,
1237
             "connection to node returned invalid data")
1238
    if test:
1239
      return False
1240

    
1241
    test = local_version != remote_version[0]
1242
    _ErrorIf(test, self.ENODEVERSION, node,
1243
             "incompatible protocol versions: master %s,"
1244
             " node %s", local_version, remote_version[0])
1245
    if test:
1246
      return False
1247

    
1248
    # node seems compatible, we can actually try to look into its results
1249

    
1250
    # full package version
1251
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1252
                  self.ENODEVERSION, node,
1253
                  "software version mismatch: master %s, node %s",
1254
                  constants.RELEASE_VERSION, remote_version[1],
1255
                  code=self.ETYPE_WARNING)
1256

    
1257
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1258
    if isinstance(hyp_result, dict):
1259
      for hv_name, hv_result in hyp_result.iteritems():
1260
        test = hv_result is not None
1261
        _ErrorIf(test, self.ENODEHV, node,
1262
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1263

    
1264

    
1265
    test = nresult.get(constants.NV_NODESETUP,
1266
                           ["Missing NODESETUP results"])
1267
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1268
             "; ".join(test))
1269

    
1270
    return True
1271

    
1272
  def _VerifyNodeTime(self, ninfo, nresult,
1273
                      nvinfo_starttime, nvinfo_endtime):
1274
    """Check the node time.
1275

1276
    @type ninfo: L{objects.Node}
1277
    @param ninfo: the node to check
1278
    @param nresult: the remote results for the node
1279
    @param nvinfo_starttime: the start time of the RPC call
1280
    @param nvinfo_endtime: the end time of the RPC call
1281

1282
    """
1283
    node = ninfo.name
1284
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1285

    
1286
    ntime = nresult.get(constants.NV_TIME, None)
1287
    try:
1288
      ntime_merged = utils.MergeTime(ntime)
1289
    except (ValueError, TypeError):
1290
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1291
      return
1292

    
1293
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1294
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1295
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1296
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1297
    else:
1298
      ntime_diff = None
1299

    
1300
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1301
             "Node time diverges by at least %s from master node time",
1302
             ntime_diff)
1303

    
1304
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1305
    """Check the node time.
1306

1307
    @type ninfo: L{objects.Node}
1308
    @param ninfo: the node to check
1309
    @param nresult: the remote results for the node
1310
    @param vg_name: the configured VG name
1311

1312
    """
1313
    if vg_name is None:
1314
      return
1315

    
1316
    node = ninfo.name
1317
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1318

    
1319
    # checks vg existence and size > 20G
1320
    vglist = nresult.get(constants.NV_VGLIST, None)
1321
    test = not vglist
1322
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1323
    if not test:
1324
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1325
                                            constants.MIN_VG_SIZE)
1326
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1327

    
1328
    # check pv names
1329
    pvlist = nresult.get(constants.NV_PVLIST, None)
1330
    test = pvlist is None
1331
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1332
    if not test:
1333
      # check that ':' is not present in PV names, since it's a
1334
      # special character for lvcreate (denotes the range of PEs to
1335
      # use on the PV)
1336
      for _, pvname, owner_vg in pvlist:
1337
        test = ":" in pvname
1338
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1339
                 " '%s' of VG '%s'", pvname, owner_vg)
1340

    
1341
  def _VerifyNodeNetwork(self, ninfo, nresult):
1342
    """Check the node time.
1343

1344
    @type ninfo: L{objects.Node}
1345
    @param ninfo: the node to check
1346
    @param nresult: the remote results for the node
1347

1348
    """
1349
    node = ninfo.name
1350
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1351

    
1352
    test = constants.NV_NODELIST not in nresult
1353
    _ErrorIf(test, self.ENODESSH, node,
1354
             "node hasn't returned node ssh connectivity data")
1355
    if not test:
1356
      if nresult[constants.NV_NODELIST]:
1357
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1358
          _ErrorIf(True, self.ENODESSH, node,
1359
                   "ssh communication with node '%s': %s", a_node, a_msg)
1360

    
1361
    test = constants.NV_NODENETTEST not in nresult
1362
    _ErrorIf(test, self.ENODENET, node,
1363
             "node hasn't returned node tcp connectivity data")
1364
    if not test:
1365
      if nresult[constants.NV_NODENETTEST]:
1366
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1367
        for anode in nlist:
1368
          _ErrorIf(True, self.ENODENET, node,
1369
                   "tcp communication with node '%s': %s",
1370
                   anode, nresult[constants.NV_NODENETTEST][anode])
1371

    
1372
    test = constants.NV_MASTERIP not in nresult
1373
    _ErrorIf(test, self.ENODENET, node,
1374
             "node hasn't returned node master IP reachability data")
1375
    if not test:
1376
      if not nresult[constants.NV_MASTERIP]:
1377
        if node == self.master_node:
1378
          msg = "the master node cannot reach the master IP (not configured?)"
1379
        else:
1380
          msg = "cannot reach the master IP"
1381
        _ErrorIf(True, self.ENODENET, node, msg)
1382

    
1383

    
1384
  def _VerifyInstance(self, instance, instanceconfig, node_image):
1385
    """Verify an instance.
1386

1387
    This function checks to see if the required block devices are
1388
    available on the instance's node.
1389

1390
    """
1391
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1392
    node_current = instanceconfig.primary_node
1393

    
1394
    node_vol_should = {}
1395
    instanceconfig.MapLVsByNode(node_vol_should)
1396

    
1397
    for node in node_vol_should:
1398
      n_img = node_image[node]
1399
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1400
        # ignore missing volumes on offline or broken nodes
1401
        continue
1402
      for volume in node_vol_should[node]:
1403
        test = volume not in n_img.volumes
1404
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1405
                 "volume %s missing on node %s", volume, node)
1406

    
1407
    if instanceconfig.admin_up:
1408
      pri_img = node_image[node_current]
1409
      test = instance not in pri_img.instances and not pri_img.offline
1410
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1411
               "instance not running on its primary node %s",
1412
               node_current)
1413

    
1414
    for node, n_img in node_image.items():
1415
      if (not node == node_current):
1416
        test = instance in n_img.instances
1417
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1418
                 "instance should not run on node %s", node)
1419

    
1420
  def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1421
    """Verify if there are any unknown volumes in the cluster.
1422

1423
    The .os, .swap and backup volumes are ignored. All other volumes are
1424
    reported as unknown.
1425

1426
    """
1427
    for node, n_img in node_image.items():
1428
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1429
        # skip non-healthy nodes
1430
        continue
1431
      for volume in n_img.volumes:
1432
        test = (node not in node_vol_should or
1433
                volume not in node_vol_should[node])
1434
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1435
                      "volume %s is unknown", volume)
1436

    
1437
  def _VerifyOrphanInstances(self, instancelist, node_image):
1438
    """Verify the list of running instances.
1439

1440
    This checks what instances are running but unknown to the cluster.
1441

1442
    """
1443
    for node, n_img in node_image.items():
1444
      for o_inst in n_img.instances:
1445
        test = o_inst not in instancelist
1446
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1447
                      "instance %s on node %s should not exist", o_inst, node)
1448

    
1449
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1450
    """Verify N+1 Memory Resilience.
1451

1452
    Check that if one single node dies we can still start all the
1453
    instances it was primary for.
1454

1455
    """
1456
    for node, n_img in node_image.items():
1457
      # This code checks that every node which is now listed as
1458
      # secondary has enough memory to host all instances it is
1459
      # supposed to should a single other node in the cluster fail.
1460
      # FIXME: not ready for failover to an arbitrary node
1461
      # FIXME: does not support file-backed instances
1462
      # WARNING: we currently take into account down instances as well
1463
      # as up ones, considering that even if they're down someone
1464
      # might want to start them even in the event of a node failure.
1465
      for prinode, instances in n_img.sbp.items():
1466
        needed_mem = 0
1467
        for instance in instances:
1468
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1469
          if bep[constants.BE_AUTO_BALANCE]:
1470
            needed_mem += bep[constants.BE_MEMORY]
1471
        test = n_img.mfree < needed_mem
1472
        self._ErrorIf(test, self.ENODEN1, node,
1473
                      "not enough memory on to accommodate"
1474
                      " failovers should peer node %s fail", prinode)
1475

    
1476
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1477
                       master_files):
1478
    """Verifies and computes the node required file checksums.
1479

1480
    @type ninfo: L{objects.Node}
1481
    @param ninfo: the node to check
1482
    @param nresult: the remote results for the node
1483
    @param file_list: required list of files
1484
    @param local_cksum: dictionary of local files and their checksums
1485
    @param master_files: list of files that only masters should have
1486

1487
    """
1488
    node = ninfo.name
1489
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1490

    
1491
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1492
    test = not isinstance(remote_cksum, dict)
1493
    _ErrorIf(test, self.ENODEFILECHECK, node,
1494
             "node hasn't returned file checksum data")
1495
    if test:
1496
      return
1497

    
1498
    for file_name in file_list:
1499
      node_is_mc = ninfo.master_candidate
1500
      must_have = (file_name not in master_files) or node_is_mc
1501
      # missing
1502
      test1 = file_name not in remote_cksum
1503
      # invalid checksum
1504
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1505
      # existing and good
1506
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1507
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1508
               "file '%s' missing", file_name)
1509
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1510
               "file '%s' has wrong checksum", file_name)
1511
      # not candidate and this is not a must-have file
1512
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1513
               "file '%s' should not exist on non master"
1514
               " candidates (and the file is outdated)", file_name)
1515
      # all good, except non-master/non-must have combination
1516
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1517
               "file '%s' should not exist"
1518
               " on non master candidates", file_name)
1519

    
1520
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1521
    """Verifies and the node DRBD status.
1522

1523
    @type ninfo: L{objects.Node}
1524
    @param ninfo: the node to check
1525
    @param nresult: the remote results for the node
1526
    @param instanceinfo: the dict of instances
1527
    @param drbd_map: the DRBD map as returned by
1528
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1529

1530
    """
1531
    node = ninfo.name
1532
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1533

    
1534
    # compute the DRBD minors
1535
    node_drbd = {}
1536
    for minor, instance in drbd_map[node].items():
1537
      test = instance not in instanceinfo
1538
      _ErrorIf(test, self.ECLUSTERCFG, None,
1539
               "ghost instance '%s' in temporary DRBD map", instance)
1540
        # ghost instance should not be running, but otherwise we
1541
        # don't give double warnings (both ghost instance and
1542
        # unallocated minor in use)
1543
      if test:
1544
        node_drbd[minor] = (instance, False)
1545
      else:
1546
        instance = instanceinfo[instance]
1547
        node_drbd[minor] = (instance.name, instance.admin_up)
1548

    
1549
    # and now check them
1550
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1551
    test = not isinstance(used_minors, (tuple, list))
1552
    _ErrorIf(test, self.ENODEDRBD, node,
1553
             "cannot parse drbd status file: %s", str(used_minors))
1554
    if test:
1555
      # we cannot check drbd status
1556
      return
1557

    
1558
    for minor, (iname, must_exist) in node_drbd.items():
1559
      test = minor not in used_minors and must_exist
1560
      _ErrorIf(test, self.ENODEDRBD, node,
1561
               "drbd minor %d of instance %s is not active", minor, iname)
1562
    for minor in used_minors:
1563
      test = minor not in node_drbd
1564
      _ErrorIf(test, self.ENODEDRBD, node,
1565
               "unallocated drbd minor %d is in use", minor)
1566

    
1567
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1568
    """Verifies and updates the node volume data.
1569

1570
    This function will update a L{NodeImage}'s internal structures
1571
    with data from the remote call.
1572

1573
    @type ninfo: L{objects.Node}
1574
    @param ninfo: the node to check
1575
    @param nresult: the remote results for the node
1576
    @param nimg: the node image object
1577
    @param vg_name: the configured VG name
1578

1579
    """
1580
    node = ninfo.name
1581
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1582

    
1583
    nimg.lvm_fail = True
1584
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1585
    if vg_name is None:
1586
      pass
1587
    elif isinstance(lvdata, basestring):
1588
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1589
               utils.SafeEncode(lvdata))
1590
    elif not isinstance(lvdata, dict):
1591
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1592
    else:
1593
      nimg.volumes = lvdata
1594
      nimg.lvm_fail = False
1595

    
1596
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1597
    """Verifies and updates the node instance list.
1598

1599
    If the listing was successful, then updates this node's instance
1600
    list. Otherwise, it marks the RPC call as failed for the instance
1601
    list key.
1602

1603
    @type ninfo: L{objects.Node}
1604
    @param ninfo: the node to check
1605
    @param nresult: the remote results for the node
1606
    @param nimg: the node image object
1607

1608
    """
1609
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1610
    test = not isinstance(idata, list)
1611
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1612
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1613
    if test:
1614
      nimg.hyp_fail = True
1615
    else:
1616
      nimg.instances = idata
1617

    
1618
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1619
    """Verifies and computes a node information map
1620

1621
    @type ninfo: L{objects.Node}
1622
    @param ninfo: the node to check
1623
    @param nresult: the remote results for the node
1624
    @param nimg: the node image object
1625
    @param vg_name: the configured VG name
1626

1627
    """
1628
    node = ninfo.name
1629
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1630

    
1631
    # try to read free memory (from the hypervisor)
1632
    hv_info = nresult.get(constants.NV_HVINFO, None)
1633
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1634
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1635
    if not test:
1636
      try:
1637
        nimg.mfree = int(hv_info["memory_free"])
1638
      except (ValueError, TypeError):
1639
        _ErrorIf(True, self.ENODERPC, node,
1640
                 "node returned invalid nodeinfo, check hypervisor")
1641

    
1642
    # FIXME: devise a free space model for file based instances as well
1643
    if vg_name is not None:
1644
      test = (constants.NV_VGLIST not in nresult or
1645
              vg_name not in nresult[constants.NV_VGLIST])
1646
      _ErrorIf(test, self.ENODELVM, node,
1647
               "node didn't return data for the volume group '%s'"
1648
               " - it is either missing or broken", vg_name)
1649
      if not test:
1650
        try:
1651
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1652
        except (ValueError, TypeError):
1653
          _ErrorIf(True, self.ENODERPC, node,
1654
                   "node returned invalid LVM info, check LVM status")
1655

    
1656
  def CheckPrereq(self):
1657
    """Check prerequisites.
1658

1659
    Transform the list of checks we're going to skip into a set and check that
1660
    all its members are valid.
1661

1662
    """
1663
    self.skip_set = frozenset(self.op.skip_checks)
1664
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1665
      raise errors.OpPrereqError("Invalid checks to be skipped specified",
1666
                                 errors.ECODE_INVAL)
1667

    
1668
  def BuildHooksEnv(self):
1669
    """Build hooks env.
1670

1671
    Cluster-Verify hooks just ran in the post phase and their failure makes
1672
    the output be logged in the verify output and the verification to fail.
1673

1674
    """
1675
    all_nodes = self.cfg.GetNodeList()
1676
    env = {
1677
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1678
      }
1679
    for node in self.cfg.GetAllNodesInfo().values():
1680
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1681

    
1682
    return env, [], all_nodes
1683

    
1684
  def Exec(self, feedback_fn):
1685
    """Verify integrity of cluster, performing various test on nodes.
1686

1687
    """
1688
    self.bad = False
1689
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1690
    verbose = self.op.verbose
1691
    self._feedback_fn = feedback_fn
1692
    feedback_fn("* Verifying global settings")
1693
    for msg in self.cfg.VerifyConfig():
1694
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1695

    
1696
    # Check the cluster certificates
1697
    for cert_filename in constants.ALL_CERT_FILES:
1698
      (errcode, msg) = _VerifyCertificate(cert_filename)
1699
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1700

    
1701
    vg_name = self.cfg.GetVGName()
1702
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1703
    cluster = self.cfg.GetClusterInfo()
1704
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1705
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1706
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1707
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1708
                        for iname in instancelist)
1709
    i_non_redundant = [] # Non redundant instances
1710
    i_non_a_balanced = [] # Non auto-balanced instances
1711
    n_offline = 0 # Count of offline nodes
1712
    n_drained = 0 # Count of nodes being drained
1713
    node_vol_should = {}
1714

    
1715
    # FIXME: verify OS list
1716
    # do local checksums
1717
    master_files = [constants.CLUSTER_CONF_FILE]
1718
    master_node = self.master_node = self.cfg.GetMasterNode()
1719
    master_ip = self.cfg.GetMasterIP()
1720

    
1721
    file_names = ssconf.SimpleStore().GetFileList()
1722
    file_names.extend(constants.ALL_CERT_FILES)
1723
    file_names.extend(master_files)
1724
    if cluster.modify_etc_hosts:
1725
      file_names.append(constants.ETC_HOSTS)
1726

    
1727
    local_checksums = utils.FingerprintFiles(file_names)
1728

    
1729
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1730
    node_verify_param = {
1731
      constants.NV_FILELIST: file_names,
1732
      constants.NV_NODELIST: [node.name for node in nodeinfo
1733
                              if not node.offline],
1734
      constants.NV_HYPERVISOR: hypervisors,
1735
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1736
                                  node.secondary_ip) for node in nodeinfo
1737
                                 if not node.offline],
1738
      constants.NV_INSTANCELIST: hypervisors,
1739
      constants.NV_VERSION: None,
1740
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1741
      constants.NV_NODESETUP: None,
1742
      constants.NV_TIME: None,
1743
      constants.NV_MASTERIP: (master_node, master_ip),
1744
      }
1745

    
1746
    if vg_name is not None:
1747
      node_verify_param[constants.NV_VGLIST] = None
1748
      node_verify_param[constants.NV_LVLIST] = vg_name
1749
      node_verify_param[constants.NV_PVLIST] = [vg_name]
1750
      node_verify_param[constants.NV_DRBDLIST] = None
1751

    
1752
    # Build our expected cluster state
1753
    node_image = dict((node.name, self.NodeImage(offline=node.offline))
1754
                      for node in nodeinfo)
1755

    
1756
    for instance in instancelist:
1757
      inst_config = instanceinfo[instance]
1758

    
1759
      for nname in inst_config.all_nodes:
1760
        if nname not in node_image:
1761
          # ghost node
1762
          gnode = self.NodeImage()
1763
          gnode.ghost = True
1764
          node_image[nname] = gnode
1765

    
1766
      inst_config.MapLVsByNode(node_vol_should)
1767

    
1768
      pnode = inst_config.primary_node
1769
      node_image[pnode].pinst.append(instance)
1770

    
1771
      for snode in inst_config.secondary_nodes:
1772
        nimg = node_image[snode]
1773
        nimg.sinst.append(instance)
1774
        if pnode not in nimg.sbp:
1775
          nimg.sbp[pnode] = []
1776
        nimg.sbp[pnode].append(instance)
1777

    
1778
    # At this point, we have the in-memory data structures complete,
1779
    # except for the runtime information, which we'll gather next
1780

    
1781
    # Due to the way our RPC system works, exact response times cannot be
1782
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1783
    # time before and after executing the request, we can at least have a time
1784
    # window.
1785
    nvinfo_starttime = time.time()
1786
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1787
                                           self.cfg.GetClusterName())
1788
    nvinfo_endtime = time.time()
1789

    
1790
    all_drbd_map = self.cfg.ComputeDRBDMap()
1791

    
1792
    feedback_fn("* Verifying node status")
1793
    for node_i in nodeinfo:
1794
      node = node_i.name
1795
      nimg = node_image[node]
1796

    
1797
      if node_i.offline:
1798
        if verbose:
1799
          feedback_fn("* Skipping offline node %s" % (node,))
1800
        n_offline += 1
1801
        continue
1802

    
1803
      if node == master_node:
1804
        ntype = "master"
1805
      elif node_i.master_candidate:
1806
        ntype = "master candidate"
1807
      elif node_i.drained:
1808
        ntype = "drained"
1809
        n_drained += 1
1810
      else:
1811
        ntype = "regular"
1812
      if verbose:
1813
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1814

    
1815
      msg = all_nvinfo[node].fail_msg
1816
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1817
      if msg:
1818
        nimg.rpc_fail = True
1819
        continue
1820

    
1821
      nresult = all_nvinfo[node].payload
1822

    
1823
      nimg.call_ok = self._VerifyNode(node_i, nresult)
1824
      self._VerifyNodeNetwork(node_i, nresult)
1825
      self._VerifyNodeLVM(node_i, nresult, vg_name)
1826
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1827
                            master_files)
1828
      self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1829
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1830

    
1831
      self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1832
      self._UpdateNodeInstances(node_i, nresult, nimg)
1833
      self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1834

    
1835
    feedback_fn("* Verifying instance status")
1836
    for instance in instancelist:
1837
      if verbose:
1838
        feedback_fn("* Verifying instance %s" % instance)
1839
      inst_config = instanceinfo[instance]
1840
      self._VerifyInstance(instance, inst_config, node_image)
1841
      inst_nodes_offline = []
1842

    
1843
      pnode = inst_config.primary_node
1844
      pnode_img = node_image[pnode]
1845
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
1846
               self.ENODERPC, pnode, "instance %s, connection to"
1847
               " primary node failed", instance)
1848

    
1849
      if pnode_img.offline:
1850
        inst_nodes_offline.append(pnode)
1851

    
1852
      # If the instance is non-redundant we cannot survive losing its primary
1853
      # node, so we are not N+1 compliant. On the other hand we have no disk
1854
      # templates with more than one secondary so that situation is not well
1855
      # supported either.
1856
      # FIXME: does not support file-backed instances
1857
      if not inst_config.secondary_nodes:
1858
        i_non_redundant.append(instance)
1859
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
1860
               instance, "instance has multiple secondary nodes: %s",
1861
               utils.CommaJoin(inst_config.secondary_nodes),
1862
               code=self.ETYPE_WARNING)
1863

    
1864
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1865
        i_non_a_balanced.append(instance)
1866

    
1867
      for snode in inst_config.secondary_nodes:
1868
        s_img = node_image[snode]
1869
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
1870
                 "instance %s, connection to secondary node failed", instance)
1871

    
1872
        if s_img.offline:
1873
          inst_nodes_offline.append(snode)
1874

    
1875
      # warn that the instance lives on offline nodes
1876
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1877
               "instance lives on offline node(s) %s",
1878
               utils.CommaJoin(inst_nodes_offline))
1879
      # ... or ghost nodes
1880
      for node in inst_config.all_nodes:
1881
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
1882
                 "instance lives on ghost node %s", node)
1883

    
1884
    feedback_fn("* Verifying orphan volumes")
1885
    self._VerifyOrphanVolumes(node_vol_should, node_image)
1886

    
1887
    feedback_fn("* Verifying orphan instances")
1888
    self._VerifyOrphanInstances(instancelist, node_image)
1889

    
1890
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1891
      feedback_fn("* Verifying N+1 Memory redundancy")
1892
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
1893

    
1894
    feedback_fn("* Other Notes")
1895
    if i_non_redundant:
1896
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1897
                  % len(i_non_redundant))
1898

    
1899
    if i_non_a_balanced:
1900
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1901
                  % len(i_non_a_balanced))
1902

    
1903
    if n_offline:
1904
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
1905

    
1906
    if n_drained:
1907
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
1908

    
1909
    return not self.bad
1910

    
1911
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1912
    """Analyze the post-hooks' result
1913

1914
    This method analyses the hook result, handles it, and sends some
1915
    nicely-formatted feedback back to the user.
1916

1917
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1918
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1919
    @param hooks_results: the results of the multi-node hooks rpc call
1920
    @param feedback_fn: function used send feedback back to the caller
1921
    @param lu_result: previous Exec result
1922
    @return: the new Exec result, based on the previous result
1923
        and hook results
1924

1925
    """
1926
    # We only really run POST phase hooks, and are only interested in
1927
    # their results
1928
    if phase == constants.HOOKS_PHASE_POST:
1929
      # Used to change hooks' output to proper indentation
1930
      indent_re = re.compile('^', re.M)
1931
      feedback_fn("* Hooks Results")
1932
      assert hooks_results, "invalid result from hooks"
1933

    
1934
      for node_name in hooks_results:
1935
        res = hooks_results[node_name]
1936
        msg = res.fail_msg
1937
        test = msg and not res.offline
1938
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
1939
                      "Communication failure in hooks execution: %s", msg)
1940
        if res.offline or msg:
1941
          # No need to investigate payload if node is offline or gave an error.
1942
          # override manually lu_result here as _ErrorIf only
1943
          # overrides self.bad
1944
          lu_result = 1
1945
          continue
1946
        for script, hkr, output in res.payload:
1947
          test = hkr == constants.HKR_FAIL
1948
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
1949
                        "Script %s failed, output:", script)
1950
          if test:
1951
            output = indent_re.sub('      ', output)
1952
            feedback_fn("%s" % output)
1953
            lu_result = 0
1954

    
1955
      return lu_result
1956

    
1957

    
1958
class LUVerifyDisks(NoHooksLU):
1959
  """Verifies the cluster disks status.
1960

1961
  """
1962
  _OP_REQP = []
1963
  REQ_BGL = False
1964

    
1965
  def ExpandNames(self):
1966
    self.needed_locks = {
1967
      locking.LEVEL_NODE: locking.ALL_SET,
1968
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1969
    }
1970
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1971

    
1972
  def CheckPrereq(self):
1973
    """Check prerequisites.
1974

1975
    This has no prerequisites.
1976

1977
    """
1978
    pass
1979

    
1980
  def Exec(self, feedback_fn):
1981
    """Verify integrity of cluster disks.
1982

1983
    @rtype: tuple of three items
1984
    @return: a tuple of (dict of node-to-node_error, list of instances
1985
        which need activate-disks, dict of instance: (node, volume) for
1986
        missing volumes
1987

1988
    """
1989
    result = res_nodes, res_instances, res_missing = {}, [], {}
1990

    
1991
    vg_name = self.cfg.GetVGName()
1992
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1993
    instances = [self.cfg.GetInstanceInfo(name)
1994
                 for name in self.cfg.GetInstanceList()]
1995

    
1996
    nv_dict = {}
1997
    for inst in instances:
1998
      inst_lvs = {}
1999
      if (not inst.admin_up or
2000
          inst.disk_template not in constants.DTS_NET_MIRROR):
2001
        continue
2002
      inst.MapLVsByNode(inst_lvs)
2003
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2004
      for node, vol_list in inst_lvs.iteritems():
2005
        for vol in vol_list:
2006
          nv_dict[(node, vol)] = inst
2007

    
2008
    if not nv_dict:
2009
      return result
2010

    
2011
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2012

    
2013
    for node in nodes:
2014
      # node_volume
2015
      node_res = node_lvs[node]
2016
      if node_res.offline:
2017
        continue
2018
      msg = node_res.fail_msg
2019
      if msg:
2020
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2021
        res_nodes[node] = msg
2022
        continue
2023

    
2024
      lvs = node_res.payload
2025
      for lv_name, (_, _, lv_online) in lvs.items():
2026
        inst = nv_dict.pop((node, lv_name), None)
2027
        if (not lv_online and inst is not None
2028
            and inst.name not in res_instances):
2029
          res_instances.append(inst.name)
2030

    
2031
    # any leftover items in nv_dict are missing LVs, let's arrange the
2032
    # data better
2033
    for key, inst in nv_dict.iteritems():
2034
      if inst.name not in res_missing:
2035
        res_missing[inst.name] = []
2036
      res_missing[inst.name].append(key)
2037

    
2038
    return result
2039

    
2040

    
2041
class LURepairDiskSizes(NoHooksLU):
2042
  """Verifies the cluster disks sizes.
2043

2044
  """
2045
  _OP_REQP = ["instances"]
2046
  REQ_BGL = False
2047

    
2048
  def ExpandNames(self):
2049
    if not isinstance(self.op.instances, list):
2050
      raise errors.OpPrereqError("Invalid argument type 'instances'",
2051
                                 errors.ECODE_INVAL)
2052

    
2053
    if self.op.instances:
2054
      self.wanted_names = []
2055
      for name in self.op.instances:
2056
        full_name = _ExpandInstanceName(self.cfg, name)
2057
        self.wanted_names.append(full_name)
2058
      self.needed_locks = {
2059
        locking.LEVEL_NODE: [],
2060
        locking.LEVEL_INSTANCE: self.wanted_names,
2061
        }
2062
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2063
    else:
2064
      self.wanted_names = None
2065
      self.needed_locks = {
2066
        locking.LEVEL_NODE: locking.ALL_SET,
2067
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2068
        }
2069
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2070

    
2071
  def DeclareLocks(self, level):
2072
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2073
      self._LockInstancesNodes(primary_only=True)
2074

    
2075
  def CheckPrereq(self):
2076
    """Check prerequisites.
2077

2078
    This only checks the optional instance list against the existing names.
2079

2080
    """
2081
    if self.wanted_names is None:
2082
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2083

    
2084
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2085
                             in self.wanted_names]
2086

    
2087
  def _EnsureChildSizes(self, disk):
2088
    """Ensure children of the disk have the needed disk size.
2089

2090
    This is valid mainly for DRBD8 and fixes an issue where the
2091
    children have smaller disk size.
2092

2093
    @param disk: an L{ganeti.objects.Disk} object
2094

2095
    """
2096
    if disk.dev_type == constants.LD_DRBD8:
2097
      assert disk.children, "Empty children for DRBD8?"
2098
      fchild = disk.children[0]
2099
      mismatch = fchild.size < disk.size
2100
      if mismatch:
2101
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2102
                     fchild.size, disk.size)
2103
        fchild.size = disk.size
2104

    
2105
      # and we recurse on this child only, not on the metadev
2106
      return self._EnsureChildSizes(fchild) or mismatch
2107
    else:
2108
      return False
2109

    
2110
  def Exec(self, feedback_fn):
2111
    """Verify the size of cluster disks.
2112

2113
    """
2114
    # TODO: check child disks too
2115
    # TODO: check differences in size between primary/secondary nodes
2116
    per_node_disks = {}
2117
    for instance in self.wanted_instances:
2118
      pnode = instance.primary_node
2119
      if pnode not in per_node_disks:
2120
        per_node_disks[pnode] = []
2121
      for idx, disk in enumerate(instance.disks):
2122
        per_node_disks[pnode].append((instance, idx, disk))
2123

    
2124
    changed = []
2125
    for node, dskl in per_node_disks.items():
2126
      newl = [v[2].Copy() for v in dskl]
2127
      for dsk in newl:
2128
        self.cfg.SetDiskID(dsk, node)
2129
      result = self.rpc.call_blockdev_getsizes(node, newl)
2130
      if result.fail_msg:
2131
        self.LogWarning("Failure in blockdev_getsizes call to node"
2132
                        " %s, ignoring", node)
2133
        continue
2134
      if len(result.data) != len(dskl):
2135
        self.LogWarning("Invalid result from node %s, ignoring node results",
2136
                        node)
2137
        continue
2138
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2139
        if size is None:
2140
          self.LogWarning("Disk %d of instance %s did not return size"
2141
                          " information, ignoring", idx, instance.name)
2142
          continue
2143
        if not isinstance(size, (int, long)):
2144
          self.LogWarning("Disk %d of instance %s did not return valid"
2145
                          " size information, ignoring", idx, instance.name)
2146
          continue
2147
        size = size >> 20
2148
        if size != disk.size:
2149
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2150
                       " correcting: recorded %d, actual %d", idx,
2151
                       instance.name, disk.size, size)
2152
          disk.size = size
2153
          self.cfg.Update(instance, feedback_fn)
2154
          changed.append((instance.name, idx, size))
2155
        if self._EnsureChildSizes(disk):
2156
          self.cfg.Update(instance, feedback_fn)
2157
          changed.append((instance.name, idx, disk.size))
2158
    return changed
2159

    
2160

    
2161
class LURenameCluster(LogicalUnit):
2162
  """Rename the cluster.
2163

2164
  """
2165
  HPATH = "cluster-rename"
2166
  HTYPE = constants.HTYPE_CLUSTER
2167
  _OP_REQP = ["name"]
2168

    
2169
  def BuildHooksEnv(self):
2170
    """Build hooks env.
2171

2172
    """
2173
    env = {
2174
      "OP_TARGET": self.cfg.GetClusterName(),
2175
      "NEW_NAME": self.op.name,
2176
      }
2177
    mn = self.cfg.GetMasterNode()
2178
    all_nodes = self.cfg.GetNodeList()
2179
    return env, [mn], all_nodes
2180

    
2181
  def CheckPrereq(self):
2182
    """Verify that the passed name is a valid one.
2183

2184
    """
2185
    hostname = utils.GetHostInfo(self.op.name)
2186

    
2187
    new_name = hostname.name
2188
    self.ip = new_ip = hostname.ip
2189
    old_name = self.cfg.GetClusterName()
2190
    old_ip = self.cfg.GetMasterIP()
2191
    if new_name == old_name and new_ip == old_ip:
2192
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2193
                                 " cluster has changed",
2194
                                 errors.ECODE_INVAL)
2195
    if new_ip != old_ip:
2196
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2197
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2198
                                   " reachable on the network. Aborting." %
2199
                                   new_ip, errors.ECODE_NOTUNIQUE)
2200

    
2201
    self.op.name = new_name
2202

    
2203
  def Exec(self, feedback_fn):
2204
    """Rename the cluster.
2205

2206
    """
2207
    clustername = self.op.name
2208
    ip = self.ip
2209

    
2210
    # shutdown the master IP
2211
    master = self.cfg.GetMasterNode()
2212
    result = self.rpc.call_node_stop_master(master, False)
2213
    result.Raise("Could not disable the master role")
2214

    
2215
    try:
2216
      cluster = self.cfg.GetClusterInfo()
2217
      cluster.cluster_name = clustername
2218
      cluster.master_ip = ip
2219
      self.cfg.Update(cluster, feedback_fn)
2220

    
2221
      # update the known hosts file
2222
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2223
      node_list = self.cfg.GetNodeList()
2224
      try:
2225
        node_list.remove(master)
2226
      except ValueError:
2227
        pass
2228
      result = self.rpc.call_upload_file(node_list,
2229
                                         constants.SSH_KNOWN_HOSTS_FILE)
2230
      for to_node, to_result in result.iteritems():
2231
        msg = to_result.fail_msg
2232
        if msg:
2233
          msg = ("Copy of file %s to node %s failed: %s" %
2234
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2235
          self.proc.LogWarning(msg)
2236

    
2237
    finally:
2238
      result = self.rpc.call_node_start_master(master, False, False)
2239
      msg = result.fail_msg
2240
      if msg:
2241
        self.LogWarning("Could not re-enable the master role on"
2242
                        " the master, please restart manually: %s", msg)
2243

    
2244

    
2245
def _RecursiveCheckIfLVMBased(disk):
2246
  """Check if the given disk or its children are lvm-based.
2247

2248
  @type disk: L{objects.Disk}
2249
  @param disk: the disk to check
2250
  @rtype: boolean
2251
  @return: boolean indicating whether a LD_LV dev_type was found or not
2252

2253
  """
2254
  if disk.children:
2255
    for chdisk in disk.children:
2256
      if _RecursiveCheckIfLVMBased(chdisk):
2257
        return True
2258
  return disk.dev_type == constants.LD_LV
2259

    
2260

    
2261
class LUSetClusterParams(LogicalUnit):
2262
  """Change the parameters of the cluster.
2263

2264
  """
2265
  HPATH = "cluster-modify"
2266
  HTYPE = constants.HTYPE_CLUSTER
2267
  _OP_REQP = []
2268
  REQ_BGL = False
2269

    
2270
  def CheckArguments(self):
2271
    """Check parameters
2272

2273
    """
2274
    for attr in ["candidate_pool_size",
2275
                 "uid_pool", "add_uids", "remove_uids"]:
2276
      if not hasattr(self.op, attr):
2277
        setattr(self.op, attr, None)
2278

    
2279
    if self.op.candidate_pool_size is not None:
2280
      try:
2281
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2282
      except (ValueError, TypeError), err:
2283
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2284
                                   str(err), errors.ECODE_INVAL)
2285
      if self.op.candidate_pool_size < 1:
2286
        raise errors.OpPrereqError("At least one master candidate needed",
2287
                                   errors.ECODE_INVAL)
2288

    
2289
    _CheckBooleanOpField(self.op, "maintain_node_health")
2290

    
2291
    if self.op.uid_pool:
2292
      uidpool.CheckUidPool(self.op.uid_pool)
2293

    
2294
    if self.op.add_uids:
2295
      uidpool.CheckUidPool(self.op.add_uids)
2296

    
2297
    if self.op.remove_uids:
2298
      uidpool.CheckUidPool(self.op.remove_uids)
2299

    
2300
  def ExpandNames(self):
2301
    # FIXME: in the future maybe other cluster params won't require checking on
2302
    # all nodes to be modified.
2303
    self.needed_locks = {
2304
      locking.LEVEL_NODE: locking.ALL_SET,
2305
    }
2306
    self.share_locks[locking.LEVEL_NODE] = 1
2307

    
2308
  def BuildHooksEnv(self):
2309
    """Build hooks env.
2310

2311
    """
2312
    env = {
2313
      "OP_TARGET": self.cfg.GetClusterName(),
2314
      "NEW_VG_NAME": self.op.vg_name,
2315
      }
2316
    mn = self.cfg.GetMasterNode()
2317
    return env, [mn], [mn]
2318

    
2319
  def CheckPrereq(self):
2320
    """Check prerequisites.
2321

2322
    This checks whether the given params don't conflict and
2323
    if the given volume group is valid.
2324

2325
    """
2326
    if self.op.vg_name is not None and not self.op.vg_name:
2327
      instances = self.cfg.GetAllInstancesInfo().values()
2328
      for inst in instances:
2329
        for disk in inst.disks:
2330
          if _RecursiveCheckIfLVMBased(disk):
2331
            raise errors.OpPrereqError("Cannot disable lvm storage while"
2332
                                       " lvm-based instances exist",
2333
                                       errors.ECODE_INVAL)
2334

    
2335
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2336

    
2337
    # if vg_name not None, checks given volume group on all nodes
2338
    if self.op.vg_name:
2339
      vglist = self.rpc.call_vg_list(node_list)
2340
      for node in node_list:
2341
        msg = vglist[node].fail_msg
2342
        if msg:
2343
          # ignoring down node
2344
          self.LogWarning("Error while gathering data on node %s"
2345
                          " (ignoring node): %s", node, msg)
2346
          continue
2347
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2348
                                              self.op.vg_name,
2349
                                              constants.MIN_VG_SIZE)
2350
        if vgstatus:
2351
          raise errors.OpPrereqError("Error on node '%s': %s" %
2352
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2353

    
2354
    self.cluster = cluster = self.cfg.GetClusterInfo()
2355
    # validate params changes
2356
    if self.op.beparams:
2357
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2358
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2359

    
2360
    if self.op.nicparams:
2361
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2362
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2363
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2364
      nic_errors = []
2365

    
2366
      # check all instances for consistency
2367
      for instance in self.cfg.GetAllInstancesInfo().values():
2368
        for nic_idx, nic in enumerate(instance.nics):
2369
          params_copy = copy.deepcopy(nic.nicparams)
2370
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2371

    
2372
          # check parameter syntax
2373
          try:
2374
            objects.NIC.CheckParameterSyntax(params_filled)
2375
          except errors.ConfigurationError, err:
2376
            nic_errors.append("Instance %s, nic/%d: %s" %
2377
                              (instance.name, nic_idx, err))
2378

    
2379
          # if we're moving instances to routed, check that they have an ip
2380
          target_mode = params_filled[constants.NIC_MODE]
2381
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2382
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2383
                              (instance.name, nic_idx))
2384
      if nic_errors:
2385
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2386
                                   "\n".join(nic_errors))
2387

    
2388
    # hypervisor list/parameters
2389
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2390
    if self.op.hvparams:
2391
      if not isinstance(self.op.hvparams, dict):
2392
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2393
                                   errors.ECODE_INVAL)
2394
      for hv_name, hv_dict in self.op.hvparams.items():
2395
        if hv_name not in self.new_hvparams:
2396
          self.new_hvparams[hv_name] = hv_dict
2397
        else:
2398
          self.new_hvparams[hv_name].update(hv_dict)
2399

    
2400
    # os hypervisor parameters
2401
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2402
    if self.op.os_hvp:
2403
      if not isinstance(self.op.os_hvp, dict):
2404
        raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2405
                                   errors.ECODE_INVAL)
2406
      for os_name, hvs in self.op.os_hvp.items():
2407
        if not isinstance(hvs, dict):
2408
          raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2409
                                      " input"), errors.ECODE_INVAL)
2410
        if os_name not in self.new_os_hvp:
2411
          self.new_os_hvp[os_name] = hvs
2412
        else:
2413
          for hv_name, hv_dict in hvs.items():
2414
            if hv_name not in self.new_os_hvp[os_name]:
2415
              self.new_os_hvp[os_name][hv_name] = hv_dict
2416
            else:
2417
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2418

    
2419
    # changes to the hypervisor list
2420
    if self.op.enabled_hypervisors is not None:
2421
      self.hv_list = self.op.enabled_hypervisors
2422
      if not self.hv_list:
2423
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2424
                                   " least one member",
2425
                                   errors.ECODE_INVAL)
2426
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2427
      if invalid_hvs:
2428
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2429
                                   " entries: %s" %
2430
                                   utils.CommaJoin(invalid_hvs),
2431
                                   errors.ECODE_INVAL)
2432
      for hv in self.hv_list:
2433
        # if the hypervisor doesn't already exist in the cluster
2434
        # hvparams, we initialize it to empty, and then (in both
2435
        # cases) we make sure to fill the defaults, as we might not
2436
        # have a complete defaults list if the hypervisor wasn't
2437
        # enabled before
2438
        if hv not in new_hvp:
2439
          new_hvp[hv] = {}
2440
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2441
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2442
    else:
2443
      self.hv_list = cluster.enabled_hypervisors
2444

    
2445
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2446
      # either the enabled list has changed, or the parameters have, validate
2447
      for hv_name, hv_params in self.new_hvparams.items():
2448
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2449
            (self.op.enabled_hypervisors and
2450
             hv_name in self.op.enabled_hypervisors)):
2451
          # either this is a new hypervisor, or its parameters have changed
2452
          hv_class = hypervisor.GetHypervisor(hv_name)
2453
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2454
          hv_class.CheckParameterSyntax(hv_params)
2455
          _CheckHVParams(self, node_list, hv_name, hv_params)
2456

    
2457
    if self.op.os_hvp:
2458
      # no need to check any newly-enabled hypervisors, since the
2459
      # defaults have already been checked in the above code-block
2460
      for os_name, os_hvp in self.new_os_hvp.items():
2461
        for hv_name, hv_params in os_hvp.items():
2462
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2463
          # we need to fill in the new os_hvp on top of the actual hv_p
2464
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2465
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2466
          hv_class = hypervisor.GetHypervisor(hv_name)
2467
          hv_class.CheckParameterSyntax(new_osp)
2468
          _CheckHVParams(self, node_list, hv_name, new_osp)
2469

    
2470

    
2471
  def Exec(self, feedback_fn):
2472
    """Change the parameters of the cluster.
2473

2474
    """
2475
    if self.op.vg_name is not None:
2476
      new_volume = self.op.vg_name
2477
      if not new_volume:
2478
        new_volume = None
2479
      if new_volume != self.cfg.GetVGName():
2480
        self.cfg.SetVGName(new_volume)
2481
      else:
2482
        feedback_fn("Cluster LVM configuration already in desired"
2483
                    " state, not changing")
2484
    if self.op.hvparams:
2485
      self.cluster.hvparams = self.new_hvparams
2486
    if self.op.os_hvp:
2487
      self.cluster.os_hvp = self.new_os_hvp
2488
    if self.op.enabled_hypervisors is not None:
2489
      self.cluster.hvparams = self.new_hvparams
2490
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2491
    if self.op.beparams:
2492
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2493
    if self.op.nicparams:
2494
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2495

    
2496
    if self.op.candidate_pool_size is not None:
2497
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2498
      # we need to update the pool size here, otherwise the save will fail
2499
      _AdjustCandidatePool(self, [])
2500

    
2501
    if self.op.maintain_node_health is not None:
2502
      self.cluster.maintain_node_health = self.op.maintain_node_health
2503

    
2504
    if self.op.add_uids is not None:
2505
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2506

    
2507
    if self.op.remove_uids is not None:
2508
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2509

    
2510
    if self.op.uid_pool is not None:
2511
      self.cluster.uid_pool = self.op.uid_pool
2512

    
2513
    self.cfg.Update(self.cluster, feedback_fn)
2514

    
2515

    
2516
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2517
  """Distribute additional files which are part of the cluster configuration.
2518

2519
  ConfigWriter takes care of distributing the config and ssconf files, but
2520
  there are more files which should be distributed to all nodes. This function
2521
  makes sure those are copied.
2522

2523
  @param lu: calling logical unit
2524
  @param additional_nodes: list of nodes not in the config to distribute to
2525

2526
  """
2527
  # 1. Gather target nodes
2528
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2529
  dist_nodes = lu.cfg.GetOnlineNodeList()
2530
  if additional_nodes is not None:
2531
    dist_nodes.extend(additional_nodes)
2532
  if myself.name in dist_nodes:
2533
    dist_nodes.remove(myself.name)
2534

    
2535
  # 2. Gather files to distribute
2536
  dist_files = set([constants.ETC_HOSTS,
2537
                    constants.SSH_KNOWN_HOSTS_FILE,
2538
                    constants.RAPI_CERT_FILE,
2539
                    constants.RAPI_USERS_FILE,
2540
                    constants.CONFD_HMAC_KEY,
2541
                    constants.CLUSTER_DOMAIN_SECRET_FILE,
2542
                   ])
2543

    
2544
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2545
  for hv_name in enabled_hypervisors:
2546
    hv_class = hypervisor.GetHypervisor(hv_name)
2547
    dist_files.update(hv_class.GetAncillaryFiles())
2548

    
2549
  # 3. Perform the files upload
2550
  for fname in dist_files:
2551
    if os.path.exists(fname):
2552
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2553
      for to_node, to_result in result.items():
2554
        msg = to_result.fail_msg
2555
        if msg:
2556
          msg = ("Copy of file %s to node %s failed: %s" %
2557
                 (fname, to_node, msg))
2558
          lu.proc.LogWarning(msg)
2559

    
2560

    
2561
class LURedistributeConfig(NoHooksLU):
2562
  """Force the redistribution of cluster configuration.
2563

2564
  This is a very simple LU.
2565

2566
  """
2567
  _OP_REQP = []
2568
  REQ_BGL = False
2569

    
2570
  def ExpandNames(self):
2571
    self.needed_locks = {
2572
      locking.LEVEL_NODE: locking.ALL_SET,
2573
    }
2574
    self.share_locks[locking.LEVEL_NODE] = 1
2575

    
2576
  def CheckPrereq(self):
2577
    """Check prerequisites.
2578

2579
    """
2580

    
2581
  def Exec(self, feedback_fn):
2582
    """Redistribute the configuration.
2583

2584
    """
2585
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2586
    _RedistributeAncillaryFiles(self)
2587

    
2588

    
2589
def _WaitForSync(lu, instance, disks=None, oneshot=False):
2590
  """Sleep and poll for an instance's disk to sync.
2591

2592
  """
2593
  if not instance.disks or disks is not None and not disks:
2594
    return True
2595

    
2596
  disks = _ExpandCheckDisks(instance, disks)
2597

    
2598
  if not oneshot:
2599
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2600

    
2601
  node = instance.primary_node
2602

    
2603
  for dev in disks:
2604
    lu.cfg.SetDiskID(dev, node)
2605

    
2606
  # TODO: Convert to utils.Retry
2607

    
2608
  retries = 0
2609
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2610
  while True:
2611
    max_time = 0
2612
    done = True
2613
    cumul_degraded = False
2614
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2615
    msg = rstats.fail_msg
2616
    if msg:
2617
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2618
      retries += 1
2619
      if retries >= 10:
2620
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2621
                                 " aborting." % node)
2622
      time.sleep(6)
2623
      continue
2624
    rstats = rstats.payload
2625
    retries = 0
2626
    for i, mstat in enumerate(rstats):
2627
      if mstat is None:
2628
        lu.LogWarning("Can't compute data for node %s/%s",
2629
                           node, disks[i].iv_name)
2630
        continue
2631

    
2632
      cumul_degraded = (cumul_degraded or
2633
                        (mstat.is_degraded and mstat.sync_percent is None))
2634
      if mstat.sync_percent is not None:
2635
        done = False
2636
        if mstat.estimated_time is not None:
2637
          rem_time = ("%s remaining (estimated)" %
2638
                      utils.FormatSeconds(mstat.estimated_time))
2639
          max_time = mstat.estimated_time
2640
        else:
2641
          rem_time = "no time estimate"
2642
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2643
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
2644

    
2645
    # if we're done but degraded, let's do a few small retries, to
2646
    # make sure we see a stable and not transient situation; therefore
2647
    # we force restart of the loop
2648
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2649
      logging.info("Degraded disks found, %d retries left", degr_retries)
2650
      degr_retries -= 1
2651
      time.sleep(1)
2652
      continue
2653

    
2654
    if done or oneshot:
2655
      break
2656

    
2657
    time.sleep(min(60, max_time))
2658

    
2659
  if done:
2660
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2661
  return not cumul_degraded
2662

    
2663

    
2664
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2665
  """Check that mirrors are not degraded.
2666

2667
  The ldisk parameter, if True, will change the test from the
2668
  is_degraded attribute (which represents overall non-ok status for
2669
  the device(s)) to the ldisk (representing the local storage status).
2670

2671
  """
2672
  lu.cfg.SetDiskID(dev, node)
2673

    
2674
  result = True
2675

    
2676
  if on_primary or dev.AssembleOnSecondary():
2677
    rstats = lu.rpc.call_blockdev_find(node, dev)
2678
    msg = rstats.fail_msg
2679
    if msg:
2680
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2681
      result = False
2682
    elif not rstats.payload:
2683
      lu.LogWarning("Can't find disk on node %s", node)
2684
      result = False
2685
    else:
2686
      if ldisk:
2687
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2688
      else:
2689
        result = result and not rstats.payload.is_degraded
2690

    
2691
  if dev.children:
2692
    for child in dev.children:
2693
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2694

    
2695
  return result
2696

    
2697

    
2698
class LUDiagnoseOS(NoHooksLU):
2699
  """Logical unit for OS diagnose/query.
2700

2701
  """
2702
  _OP_REQP = ["output_fields", "names"]
2703
  REQ_BGL = False
2704
  _FIELDS_STATIC = utils.FieldSet()
2705
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2706
  # Fields that need calculation of global os validity
2707
  _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2708

    
2709
  def ExpandNames(self):
2710
    if self.op.names:
2711
      raise errors.OpPrereqError("Selective OS query not supported",
2712
                                 errors.ECODE_INVAL)
2713

    
2714
    _CheckOutputFields(static=self._FIELDS_STATIC,
2715
                       dynamic=self._FIELDS_DYNAMIC,
2716
                       selected=self.op.output_fields)
2717

    
2718
    # Lock all nodes, in shared mode
2719
    # Temporary removal of locks, should be reverted later
2720
    # TODO: reintroduce locks when they are lighter-weight
2721
    self.needed_locks = {}
2722
    #self.share_locks[locking.LEVEL_NODE] = 1
2723
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2724

    
2725
  def CheckPrereq(self):
2726
    """Check prerequisites.
2727

2728
    """
2729

    
2730
  @staticmethod
2731
  def _DiagnoseByOS(rlist):
2732
    """Remaps a per-node return list into an a per-os per-node dictionary
2733

2734
    @param rlist: a map with node names as keys and OS objects as values
2735

2736
    @rtype: dict
2737
    @return: a dictionary with osnames as keys and as value another map, with
2738
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
2739

2740
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2741
                                     (/srv/..., False, "invalid api")],
2742
                           "node2": [(/srv/..., True, "")]}
2743
          }
2744

2745
    """
2746
    all_os = {}
2747
    # we build here the list of nodes that didn't fail the RPC (at RPC
2748
    # level), so that nodes with a non-responding node daemon don't
2749
    # make all OSes invalid
2750
    good_nodes = [node_name for node_name in rlist
2751
                  if not rlist[node_name].fail_msg]
2752
    for node_name, nr in rlist.items():
2753
      if nr.fail_msg or not nr.payload:
2754
        continue
2755
      for name, path, status, diagnose, variants in nr.payload:
2756
        if name not in all_os:
2757
          # build a list of nodes for this os containing empty lists
2758
          # for each node in node_list
2759
          all_os[name] = {}
2760
          for nname in good_nodes:
2761
            all_os[name][nname] = []
2762
        all_os[name][node_name].append((path, status, diagnose, variants))
2763
    return all_os
2764

    
2765
  def Exec(self, feedback_fn):
2766
    """Compute the list of OSes.
2767

2768
    """
2769
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2770
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2771
    pol = self._DiagnoseByOS(node_data)
2772
    output = []
2773
    calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2774
    calc_variants = "variants" in self.op.output_fields
2775

    
2776
    for os_name, os_data in pol.items():
2777
      row = []
2778
      if calc_valid:
2779
        valid = True
2780
        variants = None
2781
        for osl in os_data.values():
2782
          valid = bool(valid and osl and osl[0][1])
2783
          if not valid:
2784
            variants = set()
2785
            break
2786
          if calc_variants:
2787
            node_variants = osl[0][3]
2788
            if variants is None:
2789
              variants = set(node_variants)
2790
            else:
2791
              variants.intersection_update(node_variants)
2792

    
2793
      for field in self.op.output_fields:
2794
        if field == "name":
2795
          val = os_name
2796
        elif field == "valid":
2797
          val = valid
2798
        elif field == "node_status":
2799
          # this is just a copy of the dict
2800
          val = {}
2801
          for node_name, nos_list in os_data.items():
2802
            val[node_name] = nos_list
2803
        elif field == "variants":
2804
          val = list(variants)
2805
        else:
2806
          raise errors.ParameterError(field)
2807
        row.append(val)
2808
      output.append(row)
2809

    
2810
    return output
2811

    
2812

    
2813
class LURemoveNode(LogicalUnit):
2814
  """Logical unit for removing a node.
2815

2816
  """
2817
  HPATH = "node-remove"
2818
  HTYPE = constants.HTYPE_NODE
2819
  _OP_REQP = ["node_name"]
2820

    
2821
  def BuildHooksEnv(self):
2822
    """Build hooks env.
2823

2824
    This doesn't run on the target node in the pre phase as a failed
2825
    node would then be impossible to remove.
2826

2827
    """
2828
    env = {
2829
      "OP_TARGET": self.op.node_name,
2830
      "NODE_NAME": self.op.node_name,
2831
      }
2832
    all_nodes = self.cfg.GetNodeList()
2833
    try:
2834
      all_nodes.remove(self.op.node_name)
2835
    except ValueError:
2836
      logging.warning("Node %s which is about to be removed not found"
2837
                      " in the all nodes list", self.op.node_name)
2838
    return env, all_nodes, all_nodes
2839

    
2840
  def CheckPrereq(self):
2841
    """Check prerequisites.
2842

2843
    This checks:
2844
     - the node exists in the configuration
2845
     - it does not have primary or secondary instances
2846
     - it's not the master
2847

2848
    Any errors are signaled by raising errors.OpPrereqError.
2849

2850
    """
2851
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2852
    node = self.cfg.GetNodeInfo(self.op.node_name)
2853
    assert node is not None
2854

    
2855
    instance_list = self.cfg.GetInstanceList()
2856

    
2857
    masternode = self.cfg.GetMasterNode()
2858
    if node.name == masternode:
2859
      raise errors.OpPrereqError("Node is the master node,"
2860
                                 " you need to failover first.",
2861
                                 errors.ECODE_INVAL)
2862

    
2863
    for instance_name in instance_list:
2864
      instance = self.cfg.GetInstanceInfo(instance_name)
2865
      if node.name in instance.all_nodes:
2866
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2867
                                   " please remove first." % instance_name,
2868
                                   errors.ECODE_INVAL)
2869
    self.op.node_name = node.name
2870
    self.node = node
2871

    
2872
  def Exec(self, feedback_fn):
2873
    """Removes the node from the cluster.
2874

2875
    """
2876
    node = self.node
2877
    logging.info("Stopping the node daemon and removing configs from node %s",
2878
                 node.name)
2879

    
2880
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2881

    
2882
    # Promote nodes to master candidate as needed
2883
    _AdjustCandidatePool(self, exceptions=[node.name])
2884
    self.context.RemoveNode(node.name)
2885

    
2886
    # Run post hooks on the node before it's removed
2887
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2888
    try:
2889
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2890
    except:
2891
      # pylint: disable-msg=W0702
2892
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
2893

    
2894
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2895
    msg = result.fail_msg
2896
    if msg:
2897
      self.LogWarning("Errors encountered on the remote node while leaving"
2898
                      " the cluster: %s", msg)
2899

    
2900
    # Remove node from our /etc/hosts
2901
    if self.cfg.GetClusterInfo().modify_etc_hosts:
2902
      # FIXME: this should be done via an rpc call to node daemon
2903
      utils.RemoveHostFromEtcHosts(node.name)
2904
      _RedistributeAncillaryFiles(self)
2905

    
2906

    
2907
class LUQueryNodes(NoHooksLU):
2908
  """Logical unit for querying nodes.
2909

2910
  """
2911
  # pylint: disable-msg=W0142
2912
  _OP_REQP = ["output_fields", "names", "use_locking"]
2913
  REQ_BGL = False
2914

    
2915
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2916
                    "master_candidate", "offline", "drained"]
2917

    
2918
  _FIELDS_DYNAMIC = utils.FieldSet(
2919
    "dtotal", "dfree",
2920
    "mtotal", "mnode", "mfree",
2921
    "bootid",
2922
    "ctotal", "cnodes", "csockets",
2923
    )
2924

    
2925
  _FIELDS_STATIC = utils.FieldSet(*[
2926
    "pinst_cnt", "sinst_cnt",
2927
    "pinst_list", "sinst_list",
2928
    "pip", "sip", "tags",
2929
    "master",
2930
    "role"] + _SIMPLE_FIELDS
2931
    )
2932

    
2933
  def ExpandNames(self):
2934
    _CheckOutputFields(static=self._FIELDS_STATIC,
2935
                       dynamic=self._FIELDS_DYNAMIC,
2936
                       selected=self.op.output_fields)
2937

    
2938
    self.needed_locks = {}
2939
    self.share_locks[locking.LEVEL_NODE] = 1
2940

    
2941
    if self.op.names:
2942
      self.wanted = _GetWantedNodes(self, self.op.names)
2943
    else:
2944
      self.wanted = locking.ALL_SET
2945

    
2946
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2947
    self.do_locking = self.do_node_query and self.op.use_locking
2948
    if self.do_locking:
2949
      # if we don't request only static fields, we need to lock the nodes
2950
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2951

    
2952
  def CheckPrereq(self):
2953
    """Check prerequisites.
2954

2955
    """
2956
    # The validation of the node list is done in the _GetWantedNodes,
2957
    # if non empty, and if empty, there's no validation to do
2958
    pass
2959

    
2960
  def Exec(self, feedback_fn):
2961
    """Computes the list of nodes and their attributes.
2962

2963
    """
2964
    all_info = self.cfg.GetAllNodesInfo()
2965
    if self.do_locking:
2966
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2967
    elif self.wanted != locking.ALL_SET:
2968
      nodenames = self.wanted
2969
      missing = set(nodenames).difference(all_info.keys())
2970
      if missing:
2971
        raise errors.OpExecError(
2972
          "Some nodes were removed before retrieving their data: %s" % missing)
2973
    else:
2974
      nodenames = all_info.keys()
2975

    
2976
    nodenames = utils.NiceSort(nodenames)
2977
    nodelist = [all_info[name] for name in nodenames]
2978

    
2979
    # begin data gathering
2980

    
2981
    if self.do_node_query:
2982
      live_data = {}
2983
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2984
                                          self.cfg.GetHypervisorType())
2985
      for name in nodenames:
2986
        nodeinfo = node_data[name]
2987
        if not nodeinfo.fail_msg and nodeinfo.payload:
2988
          nodeinfo = nodeinfo.payload
2989
          fn = utils.TryConvert
2990
          live_data[name] = {
2991
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2992
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2993
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2994
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2995
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2996
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2997
            "bootid": nodeinfo.get('bootid', None),
2998
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2999
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3000
            }
3001
        else:
3002
          live_data[name] = {}
3003
    else:
3004
      live_data = dict.fromkeys(nodenames, {})
3005

    
3006
    node_to_primary = dict([(name, set()) for name in nodenames])
3007
    node_to_secondary = dict([(name, set()) for name in nodenames])
3008

    
3009
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
3010
                             "sinst_cnt", "sinst_list"))
3011
    if inst_fields & frozenset(self.op.output_fields):
3012
      inst_data = self.cfg.GetAllInstancesInfo()
3013

    
3014
      for inst in inst_data.values():
3015
        if inst.primary_node in node_to_primary:
3016
          node_to_primary[inst.primary_node].add(inst.name)
3017
        for secnode in inst.secondary_nodes:
3018
          if secnode in node_to_secondary:
3019
            node_to_secondary[secnode].add(inst.name)
3020

    
3021
    master_node = self.cfg.GetMasterNode()
3022

    
3023
    # end data gathering
3024

    
3025
    output = []
3026
    for node in nodelist:
3027
      node_output = []
3028
      for field in self.op.output_fields:
3029
        if field in self._SIMPLE_FIELDS:
3030
          val = getattr(node, field)
3031
        elif field == "pinst_list":
3032
          val = list(node_to_primary[node.name])
3033
        elif field == "sinst_list":
3034
          val = list(node_to_secondary[node.name])
3035
        elif field == "pinst_cnt":
3036
          val = len(node_to_primary[node.name])
3037
        elif field == "sinst_cnt":
3038
          val = len(node_to_secondary[node.name])
3039
        elif field == "pip":
3040
          val = node.primary_ip
3041
        elif field == "sip":
3042
          val = node.secondary_ip
3043
        elif field == "tags":
3044
          val = list(node.GetTags())
3045
        elif field == "master":
3046
          val = node.name == master_node
3047
        elif self._FIELDS_DYNAMIC.Matches(field):
3048
          val = live_data[node.name].get(field, None)
3049
        elif field == "role":
3050
          if node.name == master_node:
3051
            val = "M"
3052
          elif node.master_candidate:
3053
            val = "C"
3054
          elif node.drained:
3055
            val = "D"
3056
          elif node.offline:
3057
            val = "O"
3058
          else:
3059
            val = "R"
3060
        else:
3061
          raise errors.ParameterError(field)
3062
        node_output.append(val)
3063
      output.append(node_output)
3064

    
3065
    return output
3066

    
3067

    
3068
class LUQueryNodeVolumes(NoHooksLU):
3069
  """Logical unit for getting volumes on node(s).
3070

3071
  """
3072
  _OP_REQP = ["nodes", "output_fields"]
3073
  REQ_BGL = False
3074
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3075
  _FIELDS_STATIC = utils.FieldSet("node")
3076

    
3077
  def ExpandNames(self):
3078
    _CheckOutputFields(static=self._FIELDS_STATIC,
3079
                       dynamic=self._FIELDS_DYNAMIC,
3080
                       selected=self.op.output_fields)
3081

    
3082
    self.needed_locks = {}
3083
    self.share_locks[locking.LEVEL_NODE] = 1
3084
    if not self.op.nodes:
3085
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3086
    else:
3087
      self.needed_locks[locking.LEVEL_NODE] = \
3088
        _GetWantedNodes(self, self.op.nodes)
3089

    
3090
  def CheckPrereq(self):
3091
    """Check prerequisites.
3092

3093
    This checks that the fields required are valid output fields.
3094

3095
    """
3096
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3097

    
3098
  def Exec(self, feedback_fn):
3099
    """Computes the list of nodes and their attributes.
3100

3101
    """
3102
    nodenames = self.nodes
3103
    volumes = self.rpc.call_node_volumes(nodenames)
3104

    
3105
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3106
             in self.cfg.GetInstanceList()]
3107

    
3108
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3109

    
3110
    output = []
3111
    for node in nodenames:
3112
      nresult = volumes[node]
3113
      if nresult.offline:
3114
        continue
3115
      msg = nresult.fail_msg
3116
      if msg:
3117
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3118
        continue
3119

    
3120
      node_vols = nresult.payload[:]
3121
      node_vols.sort(key=lambda vol: vol['dev'])
3122

    
3123
      for vol in node_vols:
3124
        node_output = []
3125
        for field in self.op.output_fields:
3126
          if field == "node":
3127
            val = node
3128
          elif field == "phys":
3129
            val = vol['dev']
3130
          elif field == "vg":
3131
            val = vol['vg']
3132
          elif field == "name":
3133
            val = vol['name']
3134
          elif field == "size":
3135
            val = int(float(vol['size']))
3136
          elif field == "instance":
3137
            for inst in ilist:
3138
              if node not in lv_by_node[inst]:
3139
                continue
3140
              if vol['name'] in lv_by_node[inst][node]:
3141
                val = inst.name
3142
                break
3143
            else:
3144
              val = '-'
3145
          else:
3146
            raise errors.ParameterError(field)
3147
          node_output.append(str(val))
3148

    
3149
        output.append(node_output)
3150

    
3151
    return output
3152

    
3153

    
3154
class LUQueryNodeStorage(NoHooksLU):
3155
  """Logical unit for getting information on storage units on node(s).
3156

3157
  """
3158
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
3159
  REQ_BGL = False
3160
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3161

    
3162
  def CheckArguments(self):
3163
    _CheckStorageType(self.op.storage_type)
3164

    
3165
    _CheckOutputFields(static=self._FIELDS_STATIC,
3166
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3167
                       selected=self.op.output_fields)
3168

    
3169
  def ExpandNames(self):
3170
    self.needed_locks = {}
3171
    self.share_locks[locking.LEVEL_NODE] = 1
3172

    
3173
    if self.op.nodes:
3174
      self.needed_locks[locking.LEVEL_NODE] = \
3175
        _GetWantedNodes(self, self.op.nodes)
3176
    else:
3177
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3178

    
3179
  def CheckPrereq(self):
3180
    """Check prerequisites.
3181

3182
    This checks that the fields required are valid output fields.
3183

3184
    """
3185
    self.op.name = getattr(self.op, "name", None)
3186

    
3187
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3188

    
3189
  def Exec(self, feedback_fn):
3190
    """Computes the list of nodes and their attributes.
3191

3192
    """
3193
    # Always get name to sort by
3194
    if constants.SF_NAME in self.op.output_fields:
3195
      fields = self.op.output_fields[:]
3196
    else:
3197
      fields = [constants.SF_NAME] + self.op.output_fields
3198

    
3199
    # Never ask for node or type as it's only known to the LU
3200
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3201
      while extra in fields:
3202
        fields.remove(extra)
3203

    
3204
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3205
    name_idx = field_idx[constants.SF_NAME]
3206

    
3207
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3208
    data = self.rpc.call_storage_list(self.nodes,
3209
                                      self.op.storage_type, st_args,
3210
                                      self.op.name, fields)
3211

    
3212
    result = []
3213

    
3214
    for node in utils.NiceSort(self.nodes):
3215
      nresult = data[node]
3216
      if nresult.offline:
3217
        continue
3218

    
3219
      msg = nresult.fail_msg
3220
      if msg:
3221
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3222
        continue
3223

    
3224
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3225

    
3226
      for name in utils.NiceSort(rows.keys()):
3227
        row = rows[name]
3228

    
3229
        out = []
3230

    
3231
        for field in self.op.output_fields:
3232
          if field == constants.SF_NODE:
3233
            val = node
3234
          elif field == constants.SF_TYPE:
3235
            val = self.op.storage_type
3236
          elif field in field_idx:
3237
            val = row[field_idx[field]]
3238
          else:
3239
            raise errors.ParameterError(field)
3240

    
3241
          out.append(val)
3242

    
3243
        result.append(out)
3244

    
3245
    return result
3246

    
3247

    
3248
class LUModifyNodeStorage(NoHooksLU):
3249
  """Logical unit for modifying a storage volume on a node.
3250

3251
  """
3252
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3253
  REQ_BGL = False
3254

    
3255
  def CheckArguments(self):
3256
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3257

    
3258
    _CheckStorageType(self.op.storage_type)
3259

    
3260
  def ExpandNames(self):
3261
    self.needed_locks = {
3262
      locking.LEVEL_NODE: self.op.node_name,
3263
      }
3264

    
3265
  def CheckPrereq(self):
3266
    """Check prerequisites.
3267

3268
    """
3269
    storage_type = self.op.storage_type
3270

    
3271
    try:
3272
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3273
    except KeyError:
3274
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3275
                                 " modified" % storage_type,
3276
                                 errors.ECODE_INVAL)
3277

    
3278
    diff = set(self.op.changes.keys()) - modifiable
3279
    if diff:
3280
      raise errors.OpPrereqError("The following fields can not be modified for"
3281
                                 " storage units of type '%s': %r" %
3282
                                 (storage_type, list(diff)),
3283
                                 errors.ECODE_INVAL)
3284

    
3285
  def Exec(self, feedback_fn):
3286
    """Computes the list of nodes and their attributes.
3287

3288
    """
3289
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3290
    result = self.rpc.call_storage_modify(self.op.node_name,
3291
                                          self.op.storage_type, st_args,
3292
                                          self.op.name, self.op.changes)
3293
    result.Raise("Failed to modify storage unit '%s' on %s" %
3294
                 (self.op.name, self.op.node_name))
3295

    
3296

    
3297
class LUAddNode(LogicalUnit):
3298
  """Logical unit for adding node to the cluster.
3299

3300
  """
3301
  HPATH = "node-add"
3302
  HTYPE = constants.HTYPE_NODE
3303
  _OP_REQP = ["node_name"]
3304

    
3305
  def CheckArguments(self):
3306
    # validate/normalize the node name
3307
    self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3308

    
3309
  def BuildHooksEnv(self):
3310
    """Build hooks env.
3311

3312
    This will run on all nodes before, and on all nodes + the new node after.
3313

3314
    """
3315
    env = {
3316
      "OP_TARGET": self.op.node_name,
3317
      "NODE_NAME": self.op.node_name,
3318
      "NODE_PIP": self.op.primary_ip,
3319
      "NODE_SIP": self.op.secondary_ip,
3320
      }
3321
    nodes_0 = self.cfg.GetNodeList()
3322
    nodes_1 = nodes_0 + [self.op.node_name, ]
3323
    return env, nodes_0, nodes_1
3324

    
3325
  def CheckPrereq(self):
3326
    """Check prerequisites.
3327

3328
    This checks:
3329
     - the new node is not already in the config
3330
     - it is resolvable
3331
     - its parameters (single/dual homed) matches the cluster
3332

3333
    Any errors are signaled by raising errors.OpPrereqError.
3334

3335
    """
3336
    node_name = self.op.node_name
3337
    cfg = self.cfg
3338

    
3339
    dns_data = utils.GetHostInfo(node_name)
3340

    
3341
    node = dns_data.name
3342
    primary_ip = self.op.primary_ip = dns_data.ip
3343
    secondary_ip = getattr(self.op, "secondary_ip", None)
3344
    if secondary_ip is None:
3345
      secondary_ip = primary_ip
3346
    if not utils.IsValidIP(secondary_ip):
3347
      raise errors.OpPrereqError("Invalid secondary IP given",
3348
                                 errors.ECODE_INVAL)
3349
    self.op.secondary_ip = secondary_ip
3350

    
3351
    node_list = cfg.GetNodeList()
3352
    if not self.op.readd and node in node_list:
3353
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3354
                                 node, errors.ECODE_EXISTS)
3355
    elif self.op.readd and node not in node_list:
3356
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3357
                                 errors.ECODE_NOENT)
3358

    
3359
    self.changed_primary_ip = False
3360

    
3361
    for existing_node_name in node_list:
3362
      existing_node = cfg.GetNodeInfo(existing_node_name)
3363

    
3364
      if self.op.readd and node == existing_node_name:
3365
        if existing_node.secondary_ip != secondary_ip:
3366
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
3367
                                     " address configuration as before",
3368
                                     errors.ECODE_INVAL)
3369
        if existing_node.primary_ip != primary_ip:
3370
          self.changed_primary_ip = True
3371

    
3372
        continue
3373

    
3374
      if (existing_node.primary_ip == primary_ip or
3375
          existing_node.secondary_ip == primary_ip or
3376
          existing_node.primary_ip == secondary_ip or
3377
          existing_node.secondary_ip == secondary_ip):
3378
        raise errors.OpPrereqError("New node ip address(es) conflict with"
3379
                                   " existing node %s" % existing_node.name,
3380
                                   errors.ECODE_NOTUNIQUE)
3381

    
3382
    # check that the type of the node (single versus dual homed) is the
3383
    # same as for the master
3384
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3385
    master_singlehomed = myself.secondary_ip == myself.primary_ip
3386
    newbie_singlehomed = secondary_ip == primary_ip
3387
    if master_singlehomed != newbie_singlehomed:
3388
      if master_singlehomed:
3389
        raise errors.OpPrereqError("The master has no private ip but the"
3390
                                   " new node has one",
3391
                                   errors.ECODE_INVAL)
3392
      else:
3393
        raise errors.OpPrereqError("The master has a private ip but the"
3394
                                   " new node doesn't have one",
3395
                                   errors.ECODE_INVAL)
3396

    
3397
    # checks reachability
3398
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3399
      raise errors.OpPrereqError("Node not reachable by ping",
3400
                                 errors.ECODE_ENVIRON)
3401

    
3402
    if not newbie_singlehomed:
3403
      # check reachability from my secondary ip to newbie's secondary ip
3404
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3405
                           source=myself.secondary_ip):
3406
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3407
                                   " based ping to noded port",
3408
                                   errors.ECODE_ENVIRON)
3409

    
3410
    if self.op.readd:
3411
      exceptions = [node]
3412
    else:
3413
      exceptions = []
3414

    
3415
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3416

    
3417
    if self.op.readd:
3418
      self.new_node = self.cfg.GetNodeInfo(node)
3419
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
3420
    else:
3421
      self.new_node = objects.Node(name=node,
3422
                                   primary_ip=primary_ip,
3423
                                   secondary_ip=secondary_ip,
3424
                                   master_candidate=self.master_candidate,
3425
                                   offline=False, drained=False)
3426

    
3427
  def Exec(self, feedback_fn):
3428
    """Adds the new node to the cluster.
3429

3430
    """
3431
    new_node = self.new_node
3432
    node = new_node.name
3433

    
3434
    # for re-adds, reset the offline/drained/master-candidate flags;
3435
    # we need to reset here, otherwise offline would prevent RPC calls
3436
    # later in the procedure; this also means that if the re-add
3437
    # fails, we are left with a non-offlined, broken node
3438
    if self.op.readd:
3439
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3440
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3441
      # if we demote the node, we do cleanup later in the procedure
3442
      new_node.master_candidate = self.master_candidate
3443
      if self.changed_primary_ip:
3444
        new_node.primary_ip = self.op.primary_ip
3445

    
3446
    # notify the user about any possible mc promotion
3447
    if new_node.master_candidate:
3448
      self.LogInfo("Node will be a master candidate")
3449

    
3450
    # check connectivity
3451
    result = self.rpc.call_version([node])[node]
3452
    result.Raise("Can't get version information from node %s" % node)
3453
    if constants.PROTOCOL_VERSION == result.payload:
3454
      logging.info("Communication to node %s fine, sw version %s match",
3455
                   node, result.payload)
3456
    else:
3457
      raise errors.OpExecError("Version mismatch master version %s,"
3458
                               " node version %s" %
3459
                               (constants.PROTOCOL_VERSION, result.payload))
3460

    
3461
    # setup ssh on node
3462
    if self.cfg.GetClusterInfo().modify_ssh_setup:
3463
      logging.info("Copy ssh key to node %s", node)
3464
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3465
      keyarray = []
3466
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3467
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3468
                  priv_key, pub_key]
3469

    
3470
      for i in keyfiles:
3471
        keyarray.append(utils.ReadFile(i))
3472

    
3473
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3474
                                      keyarray[2], keyarray[3], keyarray[4],
3475
                                      keyarray[5])
3476
      result.Raise("Cannot transfer ssh keys to the new node")
3477

    
3478
    # Add node to our /etc/hosts, and add key to known_hosts
3479
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3480
      # FIXME: this should be done via an rpc call to node daemon
3481
      utils.AddHostToEtcHosts(new_node.name)
3482

    
3483
    if new_node.secondary_ip != new_node.primary_ip:
3484
      result = self.rpc.call_node_has_ip_address(new_node.name,
3485
                                                 new_node.secondary_ip)
3486
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3487
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3488
      if not result.payload:
3489
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3490
                                 " you gave (%s). Please fix and re-run this"
3491
                                 " command." % new_node.secondary_ip)
3492

    
3493
    node_verify_list = [self.cfg.GetMasterNode()]
3494
    node_verify_param = {
3495
      constants.NV_NODELIST: [node],
3496
      # TODO: do a node-net-test as well?
3497
    }
3498

    
3499
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3500
                                       self.cfg.GetClusterName())
3501
    for verifier in node_verify_list:
3502
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3503
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3504
      if nl_payload:
3505
        for failed in nl_payload:
3506
          feedback_fn("ssh/hostname verification failed"
3507
                      " (checking from %s): %s" %
3508
                      (verifier, nl_payload[failed]))
3509
        raise errors.OpExecError("ssh/hostname verification failed.")
3510

    
3511
    if self.op.readd:
3512
      _RedistributeAncillaryFiles(self)
3513
      self.context.ReaddNode(new_node)
3514
      # make sure we redistribute the config
3515
      self.cfg.Update(new_node, feedback_fn)
3516
      # and make sure the new node will not have old files around
3517
      if not new_node.master_candidate:
3518
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3519
        msg = result.fail_msg
3520
        if msg:
3521
          self.LogWarning("Node failed to demote itself from master"
3522
                          " candidate status: %s" % msg)
3523
    else:
3524
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3525
      self.context.AddNode(new_node, self.proc.GetECId())
3526

    
3527

    
3528
class LUSetNodeParams(LogicalUnit):
3529
  """Modifies the parameters of a node.
3530

3531
  """
3532
  HPATH = "node-modify"
3533
  HTYPE = constants.HTYPE_NODE
3534
  _OP_REQP = ["node_name"]
3535
  REQ_BGL = False
3536

    
3537
  def CheckArguments(self):
3538
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3539
    _CheckBooleanOpField(self.op, 'master_candidate')
3540
    _CheckBooleanOpField(self.op, 'offline')
3541
    _CheckBooleanOpField(self.op, 'drained')
3542
    _CheckBooleanOpField(self.op, 'auto_promote')
3543
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3544
    if all_mods.count(None) == 3:
3545
      raise errors.OpPrereqError("Please pass at least one modification",
3546
                                 errors.ECODE_INVAL)
3547
    if all_mods.count(True) > 1:
3548
      raise errors.OpPrereqError("Can't set the node into more than one"
3549
                                 " state at the same time",
3550
                                 errors.ECODE_INVAL)
3551

    
3552
    # Boolean value that tells us whether we're offlining or draining the node
3553
    self.offline_or_drain = (self.op.offline == True or
3554
                             self.op.drained == True)
3555
    self.deoffline_or_drain = (self.op.offline == False or
3556
                               self.op.drained == False)
3557
    self.might_demote = (self.op.master_candidate == False or
3558
                         self.offline_or_drain)
3559

    
3560
    self.lock_all = self.op.auto_promote and self.might_demote
3561

    
3562

    
3563
  def ExpandNames(self):
3564
    if self.lock_all:
3565
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3566
    else:
3567
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3568

    
3569
  def BuildHooksEnv(self):
3570
    """Build hooks env.
3571

3572
    This runs on the master node.
3573

3574
    """
3575
    env = {
3576
      "OP_TARGET": self.op.node_name,
3577
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3578
      "OFFLINE": str(self.op.offline),
3579
      "DRAINED": str(self.op.drained),
3580
      }
3581
    nl = [self.cfg.GetMasterNode(),
3582
          self.op.node_name]
3583
    return env, nl, nl
3584

    
3585
  def CheckPrereq(self):
3586
    """Check prerequisites.
3587

3588
    This only checks the instance list against the existing names.
3589

3590
    """
3591
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3592

    
3593
    if (self.op.master_candidate is not None or
3594
        self.op.drained is not None or
3595
        self.op.offline is not None):
3596
      # we can't change the master's node flags
3597
      if self.op.node_name == self.cfg.GetMasterNode():
3598
        raise errors.OpPrereqError("The master role can be changed"
3599
                                   " only via masterfailover",
3600
                                   errors.ECODE_INVAL)
3601

    
3602

    
3603
    if node.master_candidate and self.might_demote and not self.lock_all:
3604
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
3605
      # check if after removing the current node, we're missing master
3606
      # candidates
3607
      (mc_remaining, mc_should, _) = \
3608
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3609
      if mc_remaining < mc_should:
3610
        raise errors.OpPrereqError("Not enough master candidates, please"
3611
                                   " pass auto_promote to allow promotion",
3612
                                   errors.ECODE_INVAL)
3613

    
3614
    if (self.op.master_candidate == True and
3615
        ((node.offline and not self.op.offline == False) or
3616
         (node.drained and not self.op.drained == False))):
3617
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3618
                                 " to master_candidate" % node.name,
3619
                                 errors.ECODE_INVAL)
3620

    
3621
    # If we're being deofflined/drained, we'll MC ourself if needed
3622
    if (self.deoffline_or_drain and not self.offline_or_drain and not
3623
        self.op.master_candidate == True and not node.master_candidate):
3624
      self.op.master_candidate = _DecideSelfPromotion(self)
3625
      if self.op.master_candidate:
3626
        self.LogInfo("Autopromoting node to master candidate")
3627

    
3628
    return
3629

    
3630
  def Exec(self, feedback_fn):
3631
    """Modifies a node.
3632

3633
    """
3634
    node = self.node
3635

    
3636
    result = []
3637
    changed_mc = False
3638

    
3639
    if self.op.offline is not None:
3640
      node.offline = self.op.offline
3641
      result.append(("offline", str(self.op.offline)))
3642
      if self.op.offline == True:
3643
        if node.master_candidate:
3644
          node.master_candidate = False
3645
          changed_mc = True
3646
          result.append(("master_candidate", "auto-demotion due to offline"))
3647
        if node.drained:
3648
          node.drained = False
3649
          result.append(("drained", "clear drained status due to offline"))
3650

    
3651
    if self.op.master_candidate is not None:
3652
      node.master_candidate = self.op.master_candidate
3653
      changed_mc = True
3654
      result.append(("master_candidate", str(self.op.master_candidate)))
3655
      if self.op.master_candidate == False:
3656
        rrc = self.rpc.call_node_demote_from_mc(node.name)
3657
        msg = rrc.fail_msg
3658
        if msg:
3659
          self.LogWarning("Node failed to demote itself: %s" % msg)
3660

    
3661
    if self.op.drained is not None:
3662
      node.drained = self.op.drained
3663
      result.append(("drained", str(self.op.drained)))
3664
      if self.op.drained == True:
3665
        if node.master_candidate:
3666
          node.master_candidate = False
3667
          changed_mc = True
3668
          result.append(("master_candidate", "auto-demotion due to drain"))
3669
          rrc = self.rpc.call_node_demote_from_mc(node.name)
3670
          msg = rrc.fail_msg
3671
          if msg:
3672
            self.LogWarning("Node failed to demote itself: %s" % msg)
3673
        if node.offline:
3674
          node.offline = False
3675
          result.append(("offline", "clear offline status due to drain"))
3676

    
3677
    # we locked all nodes, we adjust the CP before updating this node
3678
    if self.lock_all:
3679
      _AdjustCandidatePool(self, [node.name])
3680

    
3681
    # this will trigger configuration file update, if needed
3682
    self.cfg.Update(node, feedback_fn)
3683

    
3684
    # this will trigger job queue propagation or cleanup
3685
    if changed_mc:
3686
      self.context.ReaddNode(node)
3687

    
3688
    return result
3689

    
3690

    
3691
class LUPowercycleNode(NoHooksLU):
3692
  """Powercycles a node.
3693

3694
  """
3695
  _OP_REQP = ["node_name", "force"]
3696
  REQ_BGL = False
3697

    
3698
  def CheckArguments(self):
3699
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3700
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3701
      raise errors.OpPrereqError("The node is the master and the force"
3702
                                 " parameter was not set",
3703
                                 errors.ECODE_INVAL)
3704

    
3705
  def ExpandNames(self):
3706
    """Locking for PowercycleNode.
3707

3708
    This is a last-resort option and shouldn't block on other
3709
    jobs. Therefore, we grab no locks.
3710

3711
    """
3712
    self.needed_locks = {}
3713

    
3714
  def CheckPrereq(self):
3715
    """Check prerequisites.
3716

3717
    This LU has no prereqs.
3718

3719
    """
3720
    pass
3721

    
3722
  def Exec(self, feedback_fn):
3723
    """Reboots a node.
3724

3725
    """
3726
    result = self.rpc.call_node_powercycle(self.op.node_name,
3727
                                           self.cfg.GetHypervisorType())
3728
    result.Raise("Failed to schedule the reboot")
3729
    return result.payload
3730

    
3731

    
3732
class LUQueryClusterInfo(NoHooksLU):
3733
  """Query cluster configuration.
3734

3735
  """
3736
  _OP_REQP = []
3737
  REQ_BGL = False
3738

    
3739
  def ExpandNames(self):
3740
    self.needed_locks = {}
3741

    
3742
  def CheckPrereq(self):
3743
    """No prerequsites needed for this LU.
3744

3745
    """
3746
    pass
3747

    
3748
  def Exec(self, feedback_fn):
3749
    """Return cluster config.
3750

3751
    """
3752
    cluster = self.cfg.GetClusterInfo()
3753
    os_hvp = {}
3754

    
3755
    # Filter just for enabled hypervisors
3756
    for os_name, hv_dict in cluster.os_hvp.items():
3757
      os_hvp[os_name] = {}
3758
      for hv_name, hv_params in hv_dict.items():
3759
        if hv_name in cluster.enabled_hypervisors:
3760
          os_hvp[os_name][hv_name] = hv_params
3761

    
3762
    result = {
3763
      "software_version": constants.RELEASE_VERSION,
3764
      "protocol_version": constants.PROTOCOL_VERSION,
3765
      "config_version": constants.CONFIG_VERSION,
3766
      "os_api_version": max(constants.OS_API_VERSIONS),
3767
      "export_version": constants.EXPORT_VERSION,
3768
      "architecture": (platform.architecture()[0], platform.machine()),
3769
      "name": cluster.cluster_name,
3770
      "master": cluster.master_node,
3771
      "default_hypervisor": cluster.enabled_hypervisors[0],
3772
      "enabled_hypervisors": cluster.enabled_hypervisors,
3773
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3774
                        for hypervisor_name in cluster.enabled_hypervisors]),
3775
      "os_hvp": os_hvp,
3776
      "beparams": cluster.beparams,
3777
      "nicparams": cluster.nicparams,
3778
      "candidate_pool_size": cluster.candidate_pool_size,
3779
      "master_netdev": cluster.master_netdev,
3780
      "volume_group_name": cluster.volume_group_name,
3781
      "file_storage_dir": cluster.file_storage_dir,
3782
      "maintain_node_health": cluster.maintain_node_health,
3783
      "ctime": cluster.ctime,
3784
      "mtime": cluster.mtime,
3785
      "uuid": cluster.uuid,
3786
      "tags": list(cluster.GetTags()),
3787
      "uid_pool": cluster.uid_pool,
3788
      }
3789

    
3790
    return result
3791

    
3792

    
3793
class LUQueryConfigValues(NoHooksLU):
3794
  """Return configuration values.
3795

3796
  """
3797
  _OP_REQP = []
3798
  REQ_BGL = False
3799
  _FIELDS_DYNAMIC = utils.FieldSet()
3800
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3801
                                  "watcher_pause")
3802

    
3803
  def ExpandNames(self):
3804
    self.needed_locks = {}
3805

    
3806
    _CheckOutputFields(static=self._FIELDS_STATIC,
3807
                       dynamic=self._FIELDS_DYNAMIC,
3808
                       selected=self.op.output_fields)
3809

    
3810
  def CheckPrereq(self):
3811
    """No prerequisites.
3812

3813
    """
3814
    pass
3815

    
3816
  def Exec(self, feedback_fn):
3817
    """Dump a representation of the cluster config to the standard output.
3818

3819
    """
3820
    values = []
3821
    for field in self.op.output_fields:
3822
      if field == "cluster_name":
3823
        entry = self.cfg.GetClusterName()
3824
      elif field == "master_node":
3825
        entry = self.cfg.GetMasterNode()
3826
      elif field == "drain_flag":
3827
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3828
      elif field == "watcher_pause":
3829
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3830
      else:
3831
        raise errors.ParameterError(field)
3832
      values.append(entry)
3833
    return values
3834

    
3835

    
3836
class LUActivateInstanceDisks(NoHooksLU):
3837
  """Bring up an instance's disks.
3838

3839
  """
3840
  _OP_REQP = ["instance_name"]
3841
  REQ_BGL = False
3842

    
3843
  def ExpandNames(self):
3844
    self._ExpandAndLockInstance()
3845
    self.needed_locks[locking.LEVEL_NODE] = []
3846
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3847

    
3848
  def DeclareLocks(self, level):
3849
    if level == locking.LEVEL_NODE:
3850
      self._LockInstancesNodes()
3851

    
3852
  def CheckPrereq(self):
3853
    """Check prerequisites.
3854

3855
    This checks that the instance is in the cluster.
3856

3857
    """
3858
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3859
    assert self.instance is not None, \
3860
      "Cannot retrieve locked instance %s" % self.op.instance_name
3861
    _CheckNodeOnline(self, self.instance.primary_node)
3862
    if not hasattr(self.op, "ignore_size"):
3863
      self.op.ignore_size = False
3864

    
3865
  def Exec(self, feedback_fn):
3866
    """Activate the disks.
3867

3868
    """
3869
    disks_ok, disks_info = \
3870
              _AssembleInstanceDisks(self, self.instance,
3871
                                     ignore_size=self.op.ignore_size)
3872
    if not disks_ok:
3873
      raise errors.OpExecError("Cannot activate block devices")
3874

    
3875
    return disks_info
3876

    
3877

    
3878
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
3879
                           ignore_size=False):
3880
  """Prepare the block devices for an instance.
3881

3882
  This sets up the block devices on all nodes.
3883

3884
  @type lu: L{LogicalUnit}
3885
  @param lu: the logical unit on whose behalf we execute
3886
  @type instance: L{objects.Instance}
3887
  @param instance: the instance for whose disks we assemble
3888
  @type disks: list of L{objects.Disk} or None
3889
  @param disks: which disks to assemble (or all, if None)
3890
  @type ignore_secondaries: boolean
3891
  @param ignore_secondaries: if true, errors on secondary nodes
3892
      won't result in an error return from the function
3893
  @type ignore_size: boolean
3894
  @param ignore_size: if true, the current known size of the disk
3895
      will not be used during the disk activation, useful for cases
3896
      when the size is wrong
3897
  @return: False if the operation failed, otherwise a list of
3898
      (host, instance_visible_name, node_visible_name)
3899
      with the mapping from node devices to instance devices
3900

3901
  """
3902
  device_info = []
3903
  disks_ok = True
3904
  iname = instance.name
3905
  disks = _ExpandCheckDisks(instance, disks)
3906

    
3907
  # With the two passes mechanism we try to reduce the window of
3908
  # opportunity for the race condition of switching DRBD to primary
3909
  # before handshaking occured, but we do not eliminate it
3910

    
3911
  # The proper fix would be to wait (with some limits) until the
3912
  # connection has been made and drbd transitions from WFConnection
3913
  # into any other network-connected state (Connected, SyncTarget,
3914
  # SyncSource, etc.)
3915

    
3916
  # 1st pass, assemble on all nodes in secondary mode
3917
  for inst_disk in disks:
3918
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3919
      if ignore_size:
3920
        node_disk = node_disk.Copy()
3921
        node_disk.UnsetSize()
3922
      lu.cfg.SetDiskID(node_disk, node)
3923
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3924
      msg = result.fail_msg
3925
      if msg:
3926
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3927
                           " (is_primary=False, pass=1): %s",
3928
                           inst_disk.iv_name, node, msg)
3929
        if not ignore_secondaries:
3930
          disks_ok = False
3931

    
3932
  # FIXME: race condition on drbd migration to primary
3933

    
3934
  # 2nd pass, do only the primary node
3935
  for inst_disk in disks:
3936
    dev_path = None
3937

    
3938
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3939
      if node != instance.primary_node:
3940
        continue
3941
      if ignore_size:
3942
        node_disk = node_disk.Copy()
3943
        node_disk.UnsetSize()
3944
      lu.cfg.SetDiskID(node_disk, node)
3945
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3946
      msg = result.fail_msg
3947
      if msg:
3948
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3949
                           " (is_primary=True, pass=2): %s",
3950
                           inst_disk.iv_name, node, msg)
3951
        disks_ok = False
3952
      else:
3953
        dev_path = result.payload
3954

    
3955
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3956

    
3957
  # leave the disks configured for the primary node
3958
  # this is a workaround that would be fixed better by
3959
  # improving the logical/physical id handling
3960
  for disk in disks:
3961
    lu.cfg.SetDiskID(disk, instance.primary_node)
3962

    
3963
  return disks_ok, device_info
3964

    
3965

    
3966
def _StartInstanceDisks(lu, instance, force):
3967
  """Start the disks of an instance.
3968

3969
  """
3970
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3971
                                           ignore_secondaries=force)
3972
  if not disks_ok:
3973
    _ShutdownInstanceDisks(lu, instance)
3974
    if force is not None and not force:
3975
      lu.proc.LogWarning("", hint="If the message above refers to a"
3976
                         " secondary node,"
3977
                         " you can retry the operation using '--force'.")
3978
    raise errors.OpExecError("Disk consistency error")
3979

    
3980

    
3981
class LUDeactivateInstanceDisks(NoHooksLU):
3982
  """Shutdown an instance's disks.
3983

3984
  """
3985
  _OP_REQP = ["instance_name"]
3986
  REQ_BGL = False
3987

    
3988
  def ExpandNames(self):
3989
    self._ExpandAndLockInstance()
3990
    self.needed_locks[locking.LEVEL_NODE] = []
3991
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3992

    
3993
  def DeclareLocks(self, level):
3994
    if level == locking.LEVEL_NODE:
3995
      self._LockInstancesNodes()
3996

    
3997
  def CheckPrereq(self):
3998
    """Check prerequisites.
3999

4000
    This checks that the instance is in the cluster.
4001

4002
    """
4003
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4004
    assert self.instance is not None, \
4005
      "Cannot retrieve locked instance %s" % self.op.instance_name
4006

    
4007
  def Exec(self, feedback_fn):
4008
    """Deactivate the disks
4009

4010
    """
4011
    instance = self.instance
4012
    _SafeShutdownInstanceDisks(self, instance)
4013

    
4014

    
4015
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4016
  """Shutdown block devices of an instance.
4017

4018
  This function checks if an instance is running, before calling
4019
  _ShutdownInstanceDisks.
4020

4021
  """
4022
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4023
  _ShutdownInstanceDisks(lu, instance, disks=disks)
4024

    
4025

    
4026
def _ExpandCheckDisks(instance, disks):
4027
  """Return the instance disks selected by the disks list
4028

4029
  @type disks: list of L{objects.Disk} or None
4030
  @param disks: selected disks
4031
  @rtype: list of L{objects.Disk}
4032
  @return: selected instance disks to act on
4033

4034
  """
4035
  if disks is None:
4036
    return instance.disks
4037
  else:
4038
    if not set(disks).issubset(instance.disks):
4039
      raise errors.ProgrammerError("Can only act on disks belonging to the"
4040
                                   " target instance")
4041
    return disks
4042

    
4043

    
4044
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4045
  """Shutdown block devices of an instance.
4046

4047
  This does the shutdown on all nodes of the instance.
4048

4049
  If the ignore_primary is false, errors on the primary node are
4050
  ignored.
4051

4052
  """
4053
  all_result = True
4054
  disks = _ExpandCheckDisks(instance, disks)
4055

    
4056
  for disk in disks:
4057
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4058
      lu.cfg.SetDiskID(top_disk, node)
4059
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4060
      msg = result.fail_msg
4061
      if msg:
4062
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4063
                      disk.iv_name, node, msg)
4064
        if not ignore_primary or node != instance.primary_node:
4065
          all_result = False
4066
  return all_result
4067

    
4068

    
4069
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4070
  """Checks if a node has enough free memory.
4071

4072
  This function check if a given node has the needed amount of free
4073
  memory. In case the node has less memory or we cannot get the
4074
  information from the node, this function raise an OpPrereqError
4075
  exception.
4076

4077
  @type lu: C{LogicalUnit}
4078
  @param lu: a logical unit from which we get configuration data
4079
  @type node: C{str}
4080
  @param node: the node to check
4081
  @type reason: C{str}
4082
  @param reason: string to use in the error message
4083
  @type requested: C{int}
4084
  @param requested: the amount of memory in MiB to check for
4085
  @type hypervisor_name: C{str}
4086
  @param hypervisor_name: the hypervisor to ask for memory stats
4087
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4088
      we cannot check the node
4089

4090
  """
4091
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4092
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4093
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4094
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4095
  if not isinstance(free_mem, int):
4096
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4097
                               " was '%s'" % (node, free_mem),
4098
                               errors.ECODE_ENVIRON)
4099
  if requested > free_mem:
4100
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4101
                               " needed %s MiB, available %s MiB" %
4102
                               (node, reason, requested, free_mem),
4103
                               errors.ECODE_NORES)
4104

    
4105

    
4106
def _CheckNodesFreeDisk(lu, nodenames, requested):
4107
  """Checks if nodes have enough free disk space in the default VG.
4108

4109
  This function check if all given nodes have the needed amount of
4110
  free disk. In case any node has less disk or we cannot get the
4111
  information from the node, this function raise an OpPrereqError
4112
  exception.
4113

4114
  @type lu: C{LogicalUnit}
4115
  @param lu: a logical unit from which we get configuration data
4116
  @type nodenames: C{list}
4117
  @param nodenames: the list of node names to check
4118
  @type requested: C{int}
4119
  @param requested: the amount of disk in MiB to check for
4120
  @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4121
      we cannot check the node
4122

4123
  """
4124
  nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4125
                                   lu.cfg.GetHypervisorType())
4126
  for node in nodenames:
4127
    info = nodeinfo[node]
4128
    info.Raise("Cannot get current information from node %s" % node,
4129
               prereq=True, ecode=errors.ECODE_ENVIRON)
4130
    vg_free = info.payload.get("vg_free", None)
4131
    if not isinstance(vg_free, int):
4132
      raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4133
                                 " result was '%s'" % (node, vg_free),
4134
                                 errors.ECODE_ENVIRON)
4135
    if requested > vg_free:
4136
      raise errors.OpPrereqError("Not enough disk space on target node %s:"
4137
                                 " required %d MiB, available %d MiB" %
4138
                                 (node, requested, vg_free),
4139
                                 errors.ECODE_NORES)
4140

    
4141

    
4142
class LUStartupInstance(LogicalUnit):
4143
  """Starts an instance.
4144

4145
  """
4146
  HPATH = "instance-start"
4147
  HTYPE = constants.HTYPE_INSTANCE
4148
  _OP_REQP = ["instance_name", "force"]
4149
  REQ_BGL = False
4150

    
4151
  def ExpandNames(self):
4152
    self._ExpandAndLockInstance()
4153

    
4154
  def BuildHooksEnv(self):
4155
    """Build hooks env.
4156

4157
    This runs on master, primary and secondary nodes of the instance.
4158

4159
    """
4160
    env = {
4161
      "FORCE": self.op.force,
4162
      }
4163
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4164
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4165
    return env, nl, nl
4166

    
4167
  def CheckPrereq(self):
4168
    """Check prerequisites.
4169

4170
    This checks that the instance is in the cluster.
4171

4172
    """
4173
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4174
    assert self.instance is not None, \
4175
      "Cannot retrieve locked instance %s" % self.op.instance_name
4176

    
4177
    # extra beparams
4178
    self.beparams = getattr(self.op, "beparams", {})
4179
    if self.beparams:
4180
      if not isinstance(self.beparams, dict):
4181
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4182
                                   " dict" % (type(self.beparams), ),
4183
                                   errors.ECODE_INVAL)
4184
      # fill the beparams dict
4185
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
4186
      self.op.beparams = self.beparams
4187

    
4188
    # extra hvparams
4189
    self.hvparams = getattr(self.op, "hvparams", {})
4190
    if self.hvparams:
4191
      if not isinstance(self.hvparams, dict):
4192
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4193
                                   " dict" % (type(self.hvparams), ),
4194
                                   errors.ECODE_INVAL)
4195

    
4196
      # check hypervisor parameter syntax (locally)
4197
      cluster = self.cfg.GetClusterInfo()
4198
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
4199
      filled_hvp = cluster.FillHV(instance)
4200
      filled_hvp.update(self.hvparams)
4201
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4202
      hv_type.CheckParameterSyntax(filled_hvp)
4203
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4204
      self.op.hvparams = self.hvparams
4205

    
4206
    _CheckNodeOnline(self, instance.primary_node)
4207

    
4208
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4209
    # check bridges existence
4210
    _CheckInstanceBridgesExist(self, instance)
4211

    
4212
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4213
                                              instance.name,
4214
                                              instance.hypervisor)
4215
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4216
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4217
    if not remote_info.payload: # not running already
4218
      _CheckNodeFreeMemory(self, instance.primary_node,
4219
                           "starting instance %s" % instance.name,
4220
                           bep[constants.BE_MEMORY], instance.hypervisor)
4221

    
4222
  def Exec(self, feedback_fn):
4223
    """Start the instance.
4224

4225
    """
4226
    instance = self.instance
4227
    force = self.op.force
4228

    
4229
    self.cfg.MarkInstanceUp(instance.name)
4230

    
4231
    node_current = instance.primary_node
4232

    
4233
    _StartInstanceDisks(self, instance, force)
4234

    
4235
    result = self.rpc.call_instance_start(node_current, instance,
4236
                                          self.hvparams, self.beparams)
4237
    msg = result.fail_msg
4238
    if msg:
4239
      _ShutdownInstanceDisks(self, instance)
4240
      raise errors.OpExecError("Could not start instance: %s" % msg)
4241

    
4242

    
4243
class LURebootInstance(LogicalUnit):
4244
  """Reboot an instance.
4245

4246
  """
4247
  HPATH = "instance-reboot"
4248
  HTYPE = constants.HTYPE_INSTANCE
4249
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4250
  REQ_BGL = False
4251

    
4252
  def CheckArguments(self):
4253
    """Check the arguments.
4254

4255
    """
4256
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4257
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4258

    
4259
  def ExpandNames(self):
4260
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
4261
                                   constants.INSTANCE_REBOOT_HARD,
4262
                                   constants.INSTANCE_REBOOT_FULL]:
4263
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
4264
                                  (constants.INSTANCE_REBOOT_SOFT,
4265
                                   constants.INSTANCE_REBOOT_HARD,
4266
                                   constants.INSTANCE_REBOOT_FULL))
4267
    self._ExpandAndLockInstance()
4268

    
4269
  def BuildHooksEnv(self):
4270
    """Build hooks env.
4271

4272
    This runs on master, primary and secondary nodes of the instance.
4273

4274
    """
4275
    env = {
4276
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4277
      "REBOOT_TYPE": self.op.reboot_type,
4278
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4279
      }
4280
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4281
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4282
    return env, nl, nl
4283

    
4284
  def CheckPrereq(self):
4285
    """Check prerequisites.
4286

4287
    This checks that the instance is in the cluster.
4288

4289
    """
4290
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4291
    assert self.instance is not None, \
4292
      "Cannot retrieve locked instance %s" % self.op.instance_name
4293

    
4294
    _CheckNodeOnline(self, instance.primary_node)
4295

    
4296
    # check bridges existence
4297
    _CheckInstanceBridgesExist(self, instance)
4298

    
4299
  def Exec(self, feedback_fn):
4300
    """Reboot the instance.
4301

4302
    """
4303
    instance = self.instance
4304
    ignore_secondaries = self.op.ignore_secondaries
4305
    reboot_type = self.op.reboot_type
4306

    
4307
    node_current = instance.primary_node
4308

    
4309
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4310
                       constants.INSTANCE_REBOOT_HARD]:
4311
      for disk in instance.disks:
4312
        self.cfg.SetDiskID(disk, node_current)
4313
      result = self.rpc.call_instance_reboot(node_current, instance,
4314
                                             reboot_type,
4315
                                             self.shutdown_timeout)
4316
      result.Raise("Could not reboot instance")
4317
    else:
4318
      result = self.rpc.call_instance_shutdown(node_current, instance,
4319
                                               self.shutdown_timeout)
4320
      result.Raise("Could not shutdown instance for full reboot")
4321
      _ShutdownInstanceDisks(self, instance)
4322
      _StartInstanceDisks(self, instance, ignore_secondaries)
4323
      result = self.rpc.call_instance_start(node_current, instance, None, None)
4324
      msg = result.fail_msg
4325
      if msg:
4326
        _ShutdownInstanceDisks(self, instance)
4327
        raise errors.OpExecError("Could not start instance for"
4328
                                 " full reboot: %s" % msg)
4329

    
4330
    self.cfg.MarkInstanceUp(instance.name)
4331

    
4332

    
4333
class LUShutdownInstance(LogicalUnit):
4334
  """Shutdown an instance.
4335

4336
  """
4337
  HPATH = "instance-stop"
4338
  HTYPE = constants.HTYPE_INSTANCE
4339
  _OP_REQP = ["instance_name"]
4340
  REQ_BGL = False
4341

    
4342
  def CheckArguments(self):
4343
    """Check the arguments.
4344

4345
    """
4346
    self.timeout = getattr(self.op, "timeout",
4347
                           constants.DEFAULT_SHUTDOWN_TIMEOUT)
4348

    
4349
  def ExpandNames(self):
4350
    self._ExpandAndLockInstance()
4351

    
4352
  def BuildHooksEnv(self):
4353
    """Build hooks env.
4354

4355
    This runs on master, primary and secondary nodes of the instance.
4356

4357
    """
4358
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4359
    env["TIMEOUT"] = self.timeout
4360
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4361
    return env, nl, nl
4362

    
4363
  def CheckPrereq(self):
4364
    """Check prerequisites.
4365

4366
    This checks that the instance is in the cluster.
4367

4368
    """
4369
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4370
    assert self.instance is not None, \
4371
      "Cannot retrieve locked instance %s" % self.op.instance_name
4372
    _CheckNodeOnline(self, self.instance.primary_node)
4373

    
4374
  def Exec(self, feedback_fn):
4375
    """Shutdown the instance.
4376

4377
    """
4378
    instance = self.instance
4379
    node_current = instance.primary_node
4380
    timeout = self.timeout
4381
    self.cfg.MarkInstanceDown(instance.name)
4382
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4383
    msg = result.fail_msg
4384
    if msg:
4385
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4386

    
4387
    _ShutdownInstanceDisks(self, instance)
4388

    
4389

    
4390
class LUReinstallInstance(LogicalUnit):
4391
  """Reinstall an instance.
4392

4393
  """
4394
  HPATH = "instance-reinstall"
4395
  HTYPE = constants.HTYPE_INSTANCE
4396
  _OP_REQP = ["instance_name"]
4397
  REQ_BGL = False
4398

    
4399
  def ExpandNames(self):
4400
    self._ExpandAndLockInstance()
4401

    
4402
  def BuildHooksEnv(self):
4403
    """Build hooks env.
4404

4405
    This runs on master, primary and secondary nodes of the instance.
4406

4407
    """
4408
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4409
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4410
    return env, nl, nl
4411

    
4412
  def CheckPrereq(self):
4413
    """Check prerequisites.
4414

4415
    This checks that the instance is in the cluster and is not running.
4416

4417
    """
4418
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4419
    assert instance is not None, \
4420
      "Cannot retrieve locked instance %s" % self.op.instance_name
4421
    _CheckNodeOnline(self, instance.primary_node)
4422

    
4423
    if instance.disk_template == constants.DT_DISKLESS:
4424
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4425
                                 self.op.instance_name,
4426
                                 errors.ECODE_INVAL)
4427
    _CheckInstanceDown(self, instance, "cannot reinstall")
4428

    
4429
    self.op.os_type = getattr(self.op, "os_type", None)
4430
    self.op.force_variant = getattr(self.op, "force_variant", False)
4431
    if self.op.os_type is not None:
4432
      # OS verification
4433
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4434
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4435

    
4436
    self.instance = instance
4437

    
4438
  def Exec(self, feedback_fn):
4439
    """Reinstall the instance.
4440

4441
    """
4442
    inst = self.instance
4443

    
4444
    if self.op.os_type is not None:
4445
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4446
      inst.os = self.op.os_type
4447
      self.cfg.Update(inst, feedback_fn)
4448

    
4449
    _StartInstanceDisks(self, inst, None)
4450
    try:
4451
      feedback_fn("Running the instance OS create scripts...")
4452
      # FIXME: pass debug option from opcode to backend
4453
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4454
                                             self.op.debug_level)
4455
      result.Raise("Could not install OS for instance %s on node %s" %
4456
                   (inst.name, inst.primary_node))
4457
    finally:
4458
      _ShutdownInstanceDisks(self, inst)
4459

    
4460

    
4461
class LURecreateInstanceDisks(LogicalUnit):
4462
  """Recreate an instance's missing disks.
4463

4464
  """
4465
  HPATH = "instance-recreate-disks"
4466
  HTYPE = constants.HTYPE_INSTANCE
4467
  _OP_REQP = ["instance_name", "disks"]
4468
  REQ_BGL = False
4469

    
4470
  def CheckArguments(self):
4471
    """Check the arguments.
4472

4473
    """
4474
    if not isinstance(self.op.disks, list):
4475
      raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4476
    for item in self.op.disks:
4477
      if (not isinstance(item, int) or
4478
          item < 0):
4479
        raise errors.OpPrereqError("Invalid disk specification '%s'" %
4480
                                   str(item), errors.ECODE_INVAL)
4481

    
4482
  def ExpandNames(self):
4483
    self._ExpandAndLockInstance()
4484

    
4485
  def BuildHooksEnv(self):
4486
    """Build hooks env.
4487

4488
    This runs on master, primary and secondary nodes of the instance.
4489

4490
    """
4491
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4492
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4493
    return env, nl, nl
4494

    
4495
  def CheckPrereq(self):
4496
    """Check prerequisites.
4497

4498
    This checks that the instance is in the cluster and is not running.
4499

4500
    """
4501
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4502
    assert instance is not None, \
4503
      "Cannot retrieve locked instance %s" % self.op.instance_name
4504
    _CheckNodeOnline(self, instance.primary_node)
4505

    
4506
    if instance.disk_template == constants.DT_DISKLESS:
4507
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4508
                                 self.op.instance_name, errors.ECODE_INVAL)
4509
    _CheckInstanceDown(self, instance, "cannot recreate disks")
4510

    
4511
    if not self.op.disks:
4512
      self.op.disks = range(len(instance.disks))
4513
    else:
4514
      for idx in self.op.disks:
4515
        if idx >= len(instance.disks):
4516
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4517
                                     errors.ECODE_INVAL)
4518

    
4519
    self.instance = instance
4520

    
4521
  def Exec(self, feedback_fn):
4522
    """Recreate the disks.
4523

4524
    """
4525
    to_skip = []
4526
    for idx, _ in enumerate(self.instance.disks):
4527
      if idx not in self.op.disks: # disk idx has not been passed in
4528
        to_skip.append(idx)
4529
        continue
4530

    
4531
    _CreateDisks(self, self.instance, to_skip=to_skip)
4532

    
4533

    
4534
class LURenameInstance(LogicalUnit):
4535
  """Rename an instance.
4536

4537
  """
4538
  HPATH = "instance-rename"
4539
  HTYPE = constants.HTYPE_INSTANCE
4540
  _OP_REQP = ["instance_name", "new_name"]
4541

    
4542
  def BuildHooksEnv(self):
4543
    """Build hooks env.
4544

4545
    This runs on master, primary and secondary nodes of the instance.
4546

4547
    """
4548
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4549
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4550
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4551
    return env, nl, nl
4552

    
4553
  def CheckPrereq(self):
4554
    """Check prerequisites.
4555

4556
    This checks that the instance is in the cluster and is not running.
4557

4558
    """
4559
    self.op.instance_name = _ExpandInstanceName(self.cfg,
4560
                                                self.op.instance_name)
4561
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4562
    assert instance is not None
4563
    _CheckNodeOnline(self, instance.primary_node)
4564
    _CheckInstanceDown(self, instance, "cannot rename")
4565
    self.instance = instance
4566

    
4567
    # new name verification
4568
    name_info = utils.GetHostInfo(self.op.new_name)
4569

    
4570
    self.op.new_name = new_name = name_info.name
4571
    instance_list = self.cfg.GetInstanceList()
4572
    if new_name in instance_list:
4573
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4574
                                 new_name, errors.ECODE_EXISTS)
4575

    
4576
    if not getattr(self.op, "ignore_ip", False):
4577
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4578
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4579
                                   (name_info.ip, new_name),
4580
                                   errors.ECODE_NOTUNIQUE)
4581

    
4582

    
4583
  def Exec(self, feedback_fn):
4584
    """Reinstall the instance.
4585

4586
    """
4587
    inst = self.instance
4588
    old_name = inst.name
4589

    
4590
    if inst.disk_template == constants.DT_FILE:
4591
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4592

    
4593
    self.cfg.RenameInstance(inst.name, self.op.new_name)
4594
    # Change the instance lock. This is definitely safe while we hold the BGL
4595
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4596
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4597

    
4598
    # re-read the instance from the configuration after rename
4599
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4600

    
4601
    if inst.disk_template == constants.DT_FILE:
4602
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4603
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4604
                                                     old_file_storage_dir,
4605
                                                     new_file_storage_dir)
4606
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4607
                   " (but the instance has been renamed in Ganeti)" %
4608
                   (inst.primary_node, old_file_storage_dir,
4609
                    new_file_storage_dir))
4610

    
4611
    _StartInstanceDisks(self, inst, None)
4612
    try:
4613
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4614
                                                 old_name, self.op.debug_level)
4615
      msg = result.fail_msg
4616
      if msg:
4617
        msg = ("Could not run OS rename script for instance %s on node %s"
4618
               " (but the instance has been renamed in Ganeti): %s" %
4619
               (inst.name, inst.primary_node, msg))
4620
        self.proc.LogWarning(msg)
4621
    finally:
4622
      _ShutdownInstanceDisks(self, inst)
4623

    
4624

    
4625
class LURemoveInstance(LogicalUnit):
4626
  """Remove an instance.
4627

4628
  """
4629
  HPATH = "instance-remove"
4630
  HTYPE = constants.HTYPE_INSTANCE
4631
  _OP_REQP = ["instance_name", "ignore_failures"]
4632
  REQ_BGL = False
4633

    
4634
  def CheckArguments(self):
4635
    """Check the arguments.
4636

4637
    """
4638
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4639
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4640

    
4641
  def ExpandNames(self):
4642
    self._ExpandAndLockInstance()
4643
    self.needed_locks[locking.LEVEL_NODE] = []
4644
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4645

    
4646
  def DeclareLocks(self, level):
4647
    if level == locking.LEVEL_NODE:
4648
      self._LockInstancesNodes()
4649

    
4650
  def BuildHooksEnv(self):
4651
    """Build hooks env.
4652

4653
    This runs on master, primary and secondary nodes of the instance.
4654

4655
    """
4656
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4657
    env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4658
    nl = [self.cfg.GetMasterNode()]
4659
    nl_post = list(self.instance.all_nodes) + nl
4660
    return env, nl, nl_post
4661

    
4662
  def CheckPrereq(self):
4663
    """Check prerequisites.
4664

4665
    This checks that the instance is in the cluster.
4666

4667
    """
4668
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4669
    assert self.instance is not None, \
4670
      "Cannot retrieve locked instance %s" % self.op.instance_name
4671

    
4672
  def Exec(self, feedback_fn):
4673
    """Remove the instance.
4674

4675
    """
4676
    instance = self.instance
4677
    logging.info("Shutting down instance %s on node %s",
4678
                 instance.name, instance.primary_node)
4679

    
4680
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4681
                                             self.shutdown_timeout)
4682
    msg = result.fail_msg
4683
    if msg:
4684
      if self.op.ignore_failures:
4685
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
4686
      else:
4687
        raise errors.OpExecError("Could not shutdown instance %s on"
4688
                                 " node %s: %s" %
4689
                                 (instance.name, instance.primary_node, msg))
4690

    
4691
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4692

    
4693

    
4694
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4695
  """Utility function to remove an instance.
4696

4697
  """
4698
  logging.info("Removing block devices for instance %s", instance.name)
4699

    
4700
  if not _RemoveDisks(lu, instance):
4701
    if not ignore_failures:
4702
      raise errors.OpExecError("Can't remove instance's disks")
4703
    feedback_fn("Warning: can't remove instance's disks")
4704

    
4705
  logging.info("Removing instance %s out of cluster config", instance.name)
4706

    
4707
  lu.cfg.RemoveInstance(instance.name)
4708

    
4709
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4710
    "Instance lock removal conflict"
4711

    
4712
  # Remove lock for the instance
4713
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4714

    
4715

    
4716
class LUQueryInstances(NoHooksLU):
4717
  """Logical unit for querying instances.
4718

4719
  """
4720
  # pylint: disable-msg=W0142
4721
  _OP_REQP = ["output_fields", "names", "use_locking"]
4722
  REQ_BGL = False
4723
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4724
                    "serial_no", "ctime", "mtime", "uuid"]
4725
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4726
                                    "admin_state",
4727
                                    "disk_template", "ip", "mac", "bridge",
4728
                                    "nic_mode", "nic_link",
4729
                                    "sda_size", "sdb_size", "vcpus", "tags",
4730
                                    "network_port", "beparams",
4731
                                    r"(disk)\.(size)/([0-9]+)",
4732
                                    r"(disk)\.(sizes)", "disk_usage",
4733
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4734
                                    r"(nic)\.(bridge)/([0-9]+)",
4735
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
4736
                                    r"(disk|nic)\.(count)",
4737
                                    "hvparams",
4738
                                    ] + _SIMPLE_FIELDS +
4739
                                  ["hv/%s" % name
4740
                                   for name in constants.HVS_PARAMETERS
4741
                                   if name not in constants.HVC_GLOBALS] +
4742
                                  ["be/%s" % name
4743
                                   for name in constants.BES_PARAMETERS])
4744
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4745

    
4746

    
4747
  def ExpandNames(self):
4748
    _CheckOutputFields(static=self._FIELDS_STATIC,
4749
                       dynamic=self._FIELDS_DYNAMIC,
4750
                       selected=self.op.output_fields)
4751

    
4752
    self.needed_locks = {}
4753
    self.share_locks[locking.LEVEL_INSTANCE] = 1
4754
    self.share_locks[locking.LEVEL_NODE] = 1
4755

    
4756
    if self.op.names:
4757
      self.wanted = _GetWantedInstances(self, self.op.names)
4758
    else:
4759
      self.wanted = locking.ALL_SET
4760

    
4761
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4762
    self.do_locking = self.do_node_query and self.op.use_locking
4763
    if self.do_locking:
4764
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4765
      self.needed_locks[locking.LEVEL_NODE] = []
4766
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4767

    
4768
  def DeclareLocks(self, level):
4769
    if level == locking.LEVEL_NODE and self.do_locking:
4770
      self._LockInstancesNodes()
4771

    
4772
  def CheckPrereq(self):
4773
    """Check prerequisites.
4774

4775
    """
4776
    pass
4777

    
4778
  def Exec(self, feedback_fn):
4779
    """Computes the list of nodes and their attributes.
4780

4781
    """
4782
    # pylint: disable-msg=R0912
4783
    # way too many branches here
4784
    all_info = self.cfg.GetAllInstancesInfo()
4785
    if self.wanted == locking.ALL_SET:
4786
      # caller didn't specify instance names, so ordering is not important
4787
      if self.do_locking:
4788
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4789
      else:
4790
        instance_names = all_info.keys()
4791
      instance_names = utils.NiceSort(instance_names)
4792
    else:
4793
      # caller did specify names, so we must keep the ordering
4794
      if self.do_locking:
4795
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4796
      else:
4797
        tgt_set = all_info.keys()
4798
      missing = set(self.wanted).difference(tgt_set)
4799
      if missing:
4800
        raise errors.OpExecError("Some instances were removed before"
4801
                                 " retrieving their data: %s" % missing)
4802
      instance_names = self.wanted
4803

    
4804
    instance_list = [all_info[iname] for iname in instance_names]
4805

    
4806
    # begin data gathering
4807

    
4808
    nodes = frozenset([inst.primary_node for inst in instance_list])
4809
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4810

    
4811
    bad_nodes = []
4812
    off_nodes = []
4813
    if self.do_node_query:
4814
      live_data = {}
4815
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4816
      for name in nodes:
4817
        result = node_data[name]
4818
        if result.offline:
4819
          # offline nodes will be in both lists
4820
          off_nodes.append(name)
4821
        if result.fail_msg:
4822
          bad_nodes.append(name)
4823
        else:
4824
          if result.payload:
4825
            live_data.update(result.payload)
4826
          # else no instance is alive
4827
    else:
4828
      live_data = dict([(name, {}) for name in instance_names])
4829

    
4830
    # end data gathering
4831

    
4832
    HVPREFIX = "hv/"
4833
    BEPREFIX = "be/"
4834
    output = []
4835
    cluster = self.cfg.GetClusterInfo()
4836
    for instance in instance_list:
4837
      iout = []
4838
      i_hv = cluster.FillHV(instance, skip_globals=True)
4839
      i_be = cluster.FillBE(instance)
4840
      i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
4841
      for field in self.op.output_fields:
4842
        st_match = self._FIELDS_STATIC.Matches(field)
4843
        if field in self._SIMPLE_FIELDS:
4844
          val = getattr(instance, field)
4845
        elif field == "pnode":
4846
          val = instance.primary_node
4847
        elif field == "snodes":
4848
          val = list(instance.secondary_nodes)
4849
        elif field == "admin_state":
4850
          val = instance.admin_up
4851
        elif field == "oper_state":
4852
          if instance.primary_node in bad_nodes:
4853
            val = None
4854
          else:
4855
            val = bool(live_data.get(instance.name))
4856
        elif field == "status":
4857
          if instance.primary_node in off_nodes:
4858
            val = "ERROR_nodeoffline"
4859
          elif instance.primary_node in bad_nodes:
4860
            val = "ERROR_nodedown"
4861
          else:
4862
            running = bool(live_data.get(instance.name))
4863
            if running:
4864
              if instance.admin_up:
4865
                val = "running"
4866
              else:
4867
                val = "ERROR_up"
4868
            else:
4869
              if instance.admin_up:
4870
                val = "ERROR_down"
4871
              else:
4872
                val = "ADMIN_down"
4873
        elif field == "oper_ram":
4874
          if instance.primary_node in bad_nodes:
4875
            val = None
4876
          elif instance.name in live_data:
4877
            val = live_data[instance.name].get("memory", "?")
4878
          else:
4879
            val = "-"
4880
        elif field == "vcpus":
4881
          val = i_be[constants.BE_VCPUS]
4882
        elif field == "disk_template":
4883
          val = instance.disk_template
4884
        elif field == "ip":
4885
          if instance.nics:
4886
            val = instance.nics[0].ip
4887
          else:
4888
            val = None
4889
        elif field == "nic_mode":
4890
          if instance.nics:
4891
            val = i_nicp[0][constants.NIC_MODE]
4892
          else:
4893
            val = None
4894
        elif field == "nic_link":
4895
          if instance.nics:
4896
            val = i_nicp[0][constants.NIC_LINK]
4897
          else:
4898
            val = None
4899
        elif field == "bridge":
4900
          if (instance.nics and
4901
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4902
            val = i_nicp[0][constants.NIC_LINK]
4903
          else:
4904
            val = None
4905
        elif field == "mac":
4906
          if instance.nics:
4907
            val = instance.nics[0].mac
4908
          else:
4909
            val = None
4910
        elif field == "sda_size" or field == "sdb_size":
4911
          idx = ord(field[2]) - ord('a')
4912
          try:
4913
            val = instance.FindDisk(idx).size
4914
          except errors.OpPrereqError:
4915
            val = None
4916
        elif field == "disk_usage": # total disk usage per node
4917
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
4918
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4919
        elif field == "tags":
4920
          val = list(instance.GetTags())
4921
        elif field == "hvparams":
4922
          val = i_hv
4923
        elif (field.startswith(HVPREFIX) and
4924
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4925
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4926
          val = i_hv.get(field[len(HVPREFIX):], None)
4927
        elif field == "beparams":
4928
          val = i_be
4929
        elif (field.startswith(BEPREFIX) and
4930
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4931
          val = i_be.get(field[len(BEPREFIX):], None)
4932
        elif st_match and st_match.groups():
4933
          # matches a variable list
4934
          st_groups = st_match.groups()
4935
          if st_groups and st_groups[0] == "disk":
4936
            if st_groups[1] == "count":
4937
              val = len(instance.disks)
4938
            elif st_groups[1] == "sizes":
4939
              val = [disk.size for disk in instance.disks]
4940
            elif st_groups[1] == "size":
4941
              try:
4942
                val = instance.FindDisk(st_groups[2]).size
4943
              except errors.OpPrereqError:
4944
                val = None
4945
            else:
4946
              assert False, "Unhandled disk parameter"
4947
          elif st_groups[0] == "nic":
4948
            if st_groups[1] == "count":
4949
              val = len(instance.nics)
4950
            elif st_groups[1] == "macs":
4951
              val = [nic.mac for nic in instance.nics]
4952
            elif st_groups[1] == "ips":
4953
              val = [nic.ip for nic in instance.nics]
4954
            elif st_groups[1] == "modes":
4955
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4956
            elif st_groups[1] == "links":
4957
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4958
            elif st_groups[1] == "bridges":
4959
              val = []
4960
              for nicp in i_nicp:
4961
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4962
                  val.append(nicp[constants.NIC_LINK])
4963
                else:
4964
                  val.append(None)
4965
            else:
4966
              # index-based item
4967
              nic_idx = int(st_groups[2])
4968
              if nic_idx >= len(instance.nics):
4969
                val = None
4970
              else:
4971
                if st_groups[1] == "mac":
4972
                  val = instance.nics[nic_idx].mac
4973
                elif st_groups[1] == "ip":
4974
                  val = instance.nics[nic_idx].ip
4975
                elif st_groups[1] == "mode":
4976
                  val = i_nicp[nic_idx][constants.NIC_MODE]
4977
                elif st_groups[1] == "link":
4978
                  val = i_nicp[nic_idx][constants.NIC_LINK]
4979
                elif st_groups[1] == "bridge":
4980
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4981
                  if nic_mode == constants.NIC_MODE_BRIDGED:
4982
                    val = i_nicp[nic_idx][constants.NIC_LINK]
4983
                  else:
4984
                    val = None
4985
                else:
4986
                  assert False, "Unhandled NIC parameter"
4987
          else:
4988
            assert False, ("Declared but unhandled variable parameter '%s'" %
4989
                           field)
4990
        else:
4991
          assert False, "Declared but unhandled parameter '%s'" % field
4992
        iout.append(val)
4993
      output.append(iout)
4994

    
4995
    return output
4996

    
4997

    
4998
class LUFailoverInstance(LogicalUnit):
4999
  """Failover an instance.
5000

5001
  """
5002
  HPATH = "instance-failover"
5003
  HTYPE = constants.HTYPE_INSTANCE
5004
  _OP_REQP = ["instance_name", "ignore_consistency"]
5005
  REQ_BGL = False
5006

    
5007
  def CheckArguments(self):
5008
    """Check the arguments.
5009

5010
    """
5011
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5012
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
5013

    
5014
  def ExpandNames(self):
5015
    self._ExpandAndLockInstance()
5016
    self.needed_locks[locking.LEVEL_NODE] = []
5017
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5018

    
5019
  def DeclareLocks(self, level):
5020
    if level == locking.LEVEL_NODE:
5021
      self._LockInstancesNodes()
5022

    
5023
  def BuildHooksEnv(self):
5024
    """Build hooks env.
5025

5026
    This runs on master, primary and secondary nodes of the instance.
5027

5028
    """
5029
    instance = self.instance
5030
    source_node = instance.primary_node
5031
    target_node = instance.secondary_nodes[0]
5032
    env = {
5033
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5034
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5035
      "OLD_PRIMARY": source_node,
5036
      "OLD_SECONDARY": target_node,
5037
      "NEW_PRIMARY": target_node,
5038
      "NEW_SECONDARY": source_node,
5039
      }
5040
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5041
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5042
    nl_post = list(nl)
5043
    nl_post.append(source_node)
5044
    return env, nl, nl_post
5045

    
5046
  def CheckPrereq(self):
5047
    """Check prerequisites.
5048

5049
    This checks that the instance is in the cluster.
5050

5051
    """
5052
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5053
    assert self.instance is not None, \
5054
      "Cannot retrieve locked instance %s" % self.op.instance_name
5055

    
5056
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5057
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5058
      raise errors.OpPrereqError("Instance's disk layout is not"
5059
                                 " network mirrored, cannot failover.",
5060
                                 errors.ECODE_STATE)
5061

    
5062
    secondary_nodes = instance.secondary_nodes
5063
    if not secondary_nodes:
5064
      raise errors.ProgrammerError("no secondary node but using "
5065
                                   "a mirrored disk template")
5066

    
5067
    target_node = secondary_nodes[0]
5068
    _CheckNodeOnline(self, target_node)
5069
    _CheckNodeNotDrained(self, target_node)
5070
    if instance.admin_up:
5071
      # check memory requirements on the secondary node
5072
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5073
                           instance.name, bep[constants.BE_MEMORY],
5074
                           instance.hypervisor)
5075
    else:
5076
      self.LogInfo("Not checking memory on the secondary node as"
5077
                   " instance will not be started")
5078

    
5079
    # check bridge existance
5080
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5081

    
5082
  def Exec(self, feedback_fn):
5083
    """Failover an instance.
5084

5085
    The failover is done by shutting it down on its present node and
5086
    starting it on the secondary.
5087

5088
    """
5089
    instance = self.instance
5090

    
5091
    source_node = instance.primary_node
5092
    target_node = instance.secondary_nodes[0]
5093

    
5094
    if instance.admin_up:
5095
      feedback_fn("* checking disk consistency between source and target")
5096
      for dev in instance.disks:
5097
        # for drbd, these are drbd over lvm
5098
        if not _CheckDiskConsistency(self, dev, target_node, False):
5099
          if not self.op.ignore_consistency:
5100
            raise errors.OpExecError("Disk %s is degraded on target node,"
5101
                                     " aborting failover." % dev.iv_name)
5102
    else:
5103
      feedback_fn("* not checking disk consistency as instance is not running")
5104

    
5105
    feedback_fn("* shutting down instance on source node")
5106
    logging.info("Shutting down instance %s on node %s",
5107
                 instance.name, source_node)
5108

    
5109
    result = self.rpc.call_instance_shutdown(source_node, instance,
5110
                                             self.shutdown_timeout)
5111
    msg = result.fail_msg
5112
    if msg:
5113
      if self.op.ignore_consistency:
5114
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5115
                             " Proceeding anyway. Please make sure node"
5116
                             " %s is down. Error details: %s",
5117
                             instance.name, source_node, source_node, msg)
5118
      else:
5119
        raise errors.OpExecError("Could not shutdown instance %s on"
5120
                                 " node %s: %s" %
5121
                                 (instance.name, source_node, msg))
5122

    
5123
    feedback_fn("* deactivating the instance's disks on source node")
5124
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5125
      raise errors.OpExecError("Can't shut down the instance's disks.")
5126

    
5127
    instance.primary_node = target_node
5128
    # distribute new instance config to the other nodes
5129
    self.cfg.Update(instance, feedback_fn)
5130

    
5131
    # Only start the instance if it's marked as up
5132
    if instance.admin_up:
5133
      feedback_fn("* activating the instance's disks on target node")
5134
      logging.info("Starting instance %s on node %s",
5135
                   instance.name, target_node)
5136

    
5137
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5138
                                               ignore_secondaries=True)
5139
      if not disks_ok:
5140
        _ShutdownInstanceDisks(self, instance)
5141
        raise errors.OpExecError("Can't activate the instance's disks")
5142

    
5143
      feedback_fn("* starting the instance on the target node")
5144
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5145
      msg = result.fail_msg
5146
      if msg:
5147
        _ShutdownInstanceDisks(self, instance)
5148
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5149
                                 (instance.name, target_node, msg))
5150

    
5151

    
5152
class LUMigrateInstance(LogicalUnit):
5153
  """Migrate an instance.
5154

5155
  This is migration without shutting down, compared to the failover,
5156
  which is done with shutdown.
5157

5158
  """
5159
  HPATH = "instance-migrate"
5160
  HTYPE = constants.HTYPE_INSTANCE
5161
  _OP_REQP = ["instance_name", "live", "cleanup"]
5162

    
5163
  REQ_BGL = False
5164

    
5165
  def ExpandNames(self):
5166
    self._ExpandAndLockInstance()
5167

    
5168
    self.needed_locks[locking.LEVEL_NODE] = []
5169
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5170

    
5171
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5172
                                       self.op.live, self.op.cleanup)
5173
    self.tasklets = [self._migrater]
5174

    
5175
  def DeclareLocks(self, level):
5176
    if level == locking.LEVEL_NODE:
5177
      self._LockInstancesNodes()
5178

    
5179
  def BuildHooksEnv(self):
5180
    """Build hooks env.
5181

5182
    This runs on master, primary and secondary nodes of the instance.
5183

5184
    """
5185
    instance = self._migrater.instance
5186
    source_node = instance.primary_node
5187
    target_node = instance.secondary_nodes[0]
5188
    env = _BuildInstanceHookEnvByObject(self, instance)
5189
    env["MIGRATE_LIVE"] = self.op.live
5190
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5191
    env.update({
5192
        "OLD_PRIMARY": source_node,
5193
        "OLD_SECONDARY": target_node,
5194
        "NEW_PRIMARY": target_node,
5195
        "NEW_SECONDARY": source_node,
5196
        })
5197
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5198
    nl_post = list(nl)
5199
    nl_post.append(source_node)
5200
    return env, nl, nl_post
5201

    
5202

    
5203
class LUMoveInstance(LogicalUnit):
5204
  """Move an instance by data-copying.
5205

5206
  """
5207
  HPATH = "instance-move"
5208
  HTYPE = constants.HTYPE_INSTANCE
5209
  _OP_REQP = ["instance_name", "target_node"]
5210
  REQ_BGL = False
5211

    
5212
  def CheckArguments(self):
5213
    """Check the arguments.
5214

5215
    """
5216
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5217
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
5218

    
5219
  def ExpandNames(self):
5220
    self._ExpandAndLockInstance()
5221
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5222
    self.op.target_node = target_node
5223
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5224
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5225

    
5226
  def DeclareLocks(self, level):
5227
    if level == locking.LEVEL_NODE:
5228
      self._LockInstancesNodes(primary_only=True)
5229

    
5230
  def BuildHooksEnv(self):
5231
    """Build hooks env.
5232

5233
    This runs on master, primary and secondary nodes of the instance.
5234

5235
    """
5236
    env = {
5237
      "TARGET_NODE": self.op.target_node,
5238
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5239
      }
5240
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5241
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5242
                                       self.op.target_node]
5243
    return env, nl, nl
5244

    
5245
  def CheckPrereq(self):
5246
    """Check prerequisites.
5247

5248
    This checks that the instance is in the cluster.
5249

5250
    """
5251
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5252
    assert self.instance is not None, \
5253
      "Cannot retrieve locked instance %s" % self.op.instance_name
5254

    
5255
    node = self.cfg.GetNodeInfo(self.op.target_node)
5256
    assert node is not None, \
5257
      "Cannot retrieve locked node %s" % self.op.target_node
5258

    
5259
    self.target_node = target_node = node.name
5260

    
5261
    if target_node == instance.primary_node:
5262
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
5263
                                 (instance.name, target_node),
5264
                                 errors.ECODE_STATE)
5265

    
5266
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5267

    
5268
    for idx, dsk in enumerate(instance.disks):
5269
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5270
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5271
                                   " cannot copy" % idx, errors.ECODE_STATE)
5272

    
5273
    _CheckNodeOnline(self, target_node)
5274
    _CheckNodeNotDrained(self, target_node)
5275

    
5276
    if instance.admin_up:
5277
      # check memory requirements on the secondary node
5278
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5279
                           instance.name, bep[constants.BE_MEMORY],
5280
                           instance.hypervisor)
5281
    else:
5282
      self.LogInfo("Not checking memory on the secondary node as"
5283
                   " instance will not be started")
5284

    
5285
    # check bridge existance
5286
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5287

    
5288
  def Exec(self, feedback_fn):
5289
    """Move an instance.
5290

5291
    The move is done by shutting it down on its present node, copying
5292
    the data over (slow) and starting it on the new node.
5293

5294
    """
5295
    instance = self.instance
5296

    
5297
    source_node = instance.primary_node
5298
    target_node = self.target_node
5299

    
5300
    self.LogInfo("Shutting down instance %s on source node %s",
5301
                 instance.name, source_node)
5302

    
5303
    result = self.rpc.call_instance_shutdown(source_node, instance,
5304
                                             self.shutdown_timeout)
5305
    msg = result.fail_msg
5306
    if msg:
5307
      if self.op.ignore_consistency:
5308
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5309
                             " Proceeding anyway. Please make sure node"
5310
                             " %s is down. Error details: %s",
5311
                             instance.name, source_node, source_node, msg)
5312
      else:
5313
        raise errors.OpExecError("Could not shutdown instance %s on"
5314
                                 " node %s: %s" %
5315
                                 (instance.name, source_node, msg))
5316

    
5317
    # create the target disks
5318
    try:
5319
      _CreateDisks(self, instance, target_node=target_node)
5320
    except errors.OpExecError:
5321
      self.LogWarning("Device creation failed, reverting...")
5322
      try:
5323
        _RemoveDisks(self, instance, target_node=target_node)
5324
      finally:
5325
        self.cfg.ReleaseDRBDMinors(instance.name)
5326
        raise
5327

    
5328
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5329

    
5330
    errs = []
5331
    # activate, get path, copy the data over
5332
    for idx, disk in enumerate(instance.disks):
5333
      self.LogInfo("Copying data for disk %d", idx)
5334
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5335
                                               instance.name, True)
5336
      if result.fail_msg:
5337
        self.LogWarning("Can't assemble newly created disk %d: %s",
5338
                        idx, result.fail_msg)
5339
        errs.append(result.fail_msg)
5340
        break
5341
      dev_path = result.payload
5342
      result = self.rpc.call_blockdev_export(source_node, disk,
5343
                                             target_node, dev_path,
5344
                                             cluster_name)
5345
      if result.fail_msg:
5346
        self.LogWarning("Can't copy data over for disk %d: %s",
5347
                        idx, result.fail_msg)
5348
        errs.append(result.fail_msg)
5349
        break
5350

    
5351
    if errs:
5352
      self.LogWarning("Some disks failed to copy, aborting")
5353
      try:
5354
        _RemoveDisks(self, instance, target_node=target_node)
5355
      finally:
5356
        self.cfg.ReleaseDRBDMinors(instance.name)
5357
        raise errors.OpExecError("Errors during disk copy: %s" %
5358
                                 (",".join(errs),))
5359

    
5360
    instance.primary_node = target_node
5361
    self.cfg.Update(instance, feedback_fn)
5362

    
5363
    self.LogInfo("Removing the disks on the original node")
5364
    _RemoveDisks(self, instance, target_node=source_node)
5365

    
5366
    # Only start the instance if it's marked as up
5367
    if instance.admin_up:
5368
      self.LogInfo("Starting instance %s on node %s",
5369
                   instance.name, target_node)
5370

    
5371
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5372
                                           ignore_secondaries=True)
5373
      if not disks_ok:
5374
        _ShutdownInstanceDisks(self, instance)
5375
        raise errors.OpExecError("Can't activate the instance's disks")
5376

    
5377
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5378
      msg = result.fail_msg
5379
      if msg:
5380
        _ShutdownInstanceDisks(self, instance)
5381
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5382
                                 (instance.name, target_node, msg))
5383

    
5384

    
5385
class LUMigrateNode(LogicalUnit):
5386
  """Migrate all instances from a node.
5387

5388
  """
5389
  HPATH = "node-migrate"
5390
  HTYPE = constants.HTYPE_NODE
5391
  _OP_REQP = ["node_name", "live"]
5392
  REQ_BGL = False
5393

    
5394
  def ExpandNames(self):
5395
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5396

    
5397
    self.needed_locks = {
5398
      locking.LEVEL_NODE: [self.op.node_name],
5399
      }
5400

    
5401
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5402

    
5403
    # Create tasklets for migrating instances for all instances on this node
5404
    names = []
5405
    tasklets = []
5406

    
5407
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5408
      logging.debug("Migrating instance %s", inst.name)
5409
      names.append(inst.name)
5410

    
5411
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5412

    
5413
    self.tasklets = tasklets
5414

    
5415
    # Declare instance locks
5416
    self.needed_locks[locking.LEVEL_INSTANCE] = names
5417

    
5418
  def DeclareLocks(self, level):
5419
    if level == locking.LEVEL_NODE:
5420
      self._LockInstancesNodes()
5421

    
5422
  def BuildHooksEnv(self):
5423
    """Build hooks env.
5424

5425
    This runs on the master, the primary and all the secondaries.
5426

5427
    """
5428
    env = {
5429
      "NODE_NAME": self.op.node_name,
5430
      }
5431

    
5432
    nl = [self.cfg.GetMasterNode()]
5433

    
5434
    return (env, nl, nl)
5435

    
5436

    
5437
class TLMigrateInstance(Tasklet):
5438
  def __init__(self, lu, instance_name, live, cleanup):
5439
    """Initializes this class.
5440

5441
    """
5442
    Tasklet.__init__(self, lu)
5443

    
5444
    # Parameters
5445
    self.instance_name = instance_name
5446
    self.live = live
5447
    self.cleanup = cleanup
5448

    
5449
  def CheckPrereq(self):
5450
    """Check prerequisites.
5451

5452
    This checks that the instance is in the cluster.
5453

5454
    """
5455
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5456
    instance = self.cfg.GetInstanceInfo(instance_name)
5457
    assert instance is not None
5458

    
5459
    if instance.disk_template != constants.DT_DRBD8:
5460
      raise errors.OpPrereqError("Instance's disk layout is not"
5461
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5462

    
5463
    secondary_nodes = instance.secondary_nodes
5464
    if not secondary_nodes:
5465
      raise errors.ConfigurationError("No secondary node but using"
5466
                                      " drbd8 disk template")
5467

    
5468
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
5469

    
5470
    target_node = secondary_nodes[0]
5471
    # check memory requirements on the secondary node
5472
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5473
                         instance.name, i_be[constants.BE_MEMORY],
5474
                         instance.hypervisor)
5475

    
5476
    # check bridge existance
5477
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5478

    
5479
    if not self.cleanup:
5480
      _CheckNodeNotDrained(self.lu, target_node)
5481
      result = self.rpc.call_instance_migratable(instance.primary_node,
5482
                                                 instance)
5483
      result.Raise("Can't migrate, please use failover",
5484
                   prereq=True, ecode=errors.ECODE_STATE)
5485

    
5486
    self.instance = instance
5487

    
5488
  def _WaitUntilSync(self):
5489
    """Poll with custom rpc for disk sync.
5490

5491
    This uses our own step-based rpc call.
5492

5493
    """
5494
    self.feedback_fn("* wait until resync is done")
5495
    all_done = False
5496
    while not all_done:
5497
      all_done = True
5498
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5499
                                            self.nodes_ip,
5500
                                            self.instance.disks)
5501
      min_percent = 100
5502
      for node, nres in result.items():
5503
        nres.Raise("Cannot resync disks on node %s" % node)
5504
        node_done, node_percent = nres.payload
5505
        all_done = all_done and node_done
5506
        if node_percent is not None:
5507
          min_percent = min(min_percent, node_percent)
5508
      if not all_done:
5509
        if min_percent < 100:
5510
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
5511
        time.sleep(2)
5512

    
5513
  def _EnsureSecondary(self, node):
5514
    """Demote a node to secondary.
5515

5516
    """
5517
    self.feedback_fn("* switching node %s to secondary mode" % node)
5518

    
5519
    for dev in self.instance.disks:
5520
      self.cfg.SetDiskID(dev, node)
5521

    
5522
    result = self.rpc.call_blockdev_close(node, self.instance.name,
5523
                                          self.instance.disks)
5524
    result.Raise("Cannot change disk to secondary on node %s" % node)
5525

    
5526
  def _GoStandalone(self):
5527
    """Disconnect from the network.
5528

5529
    """
5530
    self.feedback_fn("* changing into standalone mode")
5531
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5532
                                               self.instance.disks)
5533
    for node, nres in result.items():
5534
      nres.Raise("Cannot disconnect disks node %s" % node)
5535

    
5536
  def _GoReconnect(self, multimaster):
5537
    """Reconnect to the network.
5538

5539
    """
5540
    if multimaster:
5541
      msg = "dual-master"
5542
    else:
5543
      msg = "single-master"
5544
    self.feedback_fn("* changing disks into %s mode" % msg)
5545
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5546
                                           self.instance.disks,
5547
                                           self.instance.name, multimaster)
5548
    for node, nres in result.items():
5549
      nres.Raise("Cannot change disks config on node %s" % node)
5550

    
5551
  def _ExecCleanup(self):
5552
    """Try to cleanup after a failed migration.
5553

5554
    The cleanup is done by:
5555
      - check that the instance is running only on one node
5556
        (and update the config if needed)
5557
      - change disks on its secondary node to secondary
5558
      - wait until disks are fully synchronized
5559
      - disconnect from the network
5560
      - change disks into single-master mode
5561
      - wait again until disks are fully synchronized
5562

5563
    """
5564
    instance = self.instance
5565
    target_node = self.target_node
5566
    source_node = self.source_node
5567

    
5568
    # check running on only one node
5569
    self.feedback_fn("* checking where the instance actually runs"
5570
                     " (if this hangs, the hypervisor might be in"
5571
                     " a bad state)")
5572
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5573
    for node, result in ins_l.items():
5574
      result.Raise("Can't contact node %s" % node)
5575

    
5576
    runningon_source = instance.name in ins_l[source_node].payload
5577
    runningon_target = instance.name in ins_l[target_node].payload
5578

    
5579
    if runningon_source and runningon_target:
5580
      raise errors.OpExecError("Instance seems to be running on two nodes,"
5581
                               " or the hypervisor is confused. You will have"
5582
                               " to ensure manually that it runs only on one"
5583
                               " and restart this operation.")
5584

    
5585
    if not (runningon_source or runningon_target):
5586
      raise errors.OpExecError("Instance does not seem to be running at all."
5587
                               " In this case, it's safer to repair by"
5588
                               " running 'gnt-instance stop' to ensure disk"
5589
                               " shutdown, and then restarting it.")
5590

    
5591
    if runningon_target:
5592
      # the migration has actually succeeded, we need to update the config
5593
      self.feedback_fn("* instance running on secondary node (%s),"
5594
                       " updating config" % target_node)
5595
      instance.primary_node = target_node
5596
      self.cfg.Update(instance, self.feedback_fn)
5597
      demoted_node = source_node
5598
    else:
5599
      self.feedback_fn("* instance confirmed to be running on its"
5600
                       " primary node (%s)" % source_node)
5601
      demoted_node = target_node
5602

    
5603
    self._EnsureSecondary(demoted_node)
5604
    try:
5605
      self._WaitUntilSync()
5606
    except errors.OpExecError:
5607
      # we ignore here errors, since if the device is standalone, it
5608
      # won't be able to sync
5609
      pass
5610
    self._GoStandalone()
5611
    self._GoReconnect(False)
5612
    self._WaitUntilSync()
5613

    
5614
    self.feedback_fn("* done")
5615

    
5616
  def _RevertDiskStatus(self):
5617
    """Try to revert the disk status after a failed migration.
5618

5619
    """
5620
    target_node = self.target_node
5621
    try:
5622
      self._EnsureSecondary(target_node)
5623
      self._GoStandalone()
5624
      self._GoReconnect(False)
5625
      self._WaitUntilSync()
5626
    except errors.OpExecError, err:
5627
      self.lu.LogWarning("Migration failed and I can't reconnect the"
5628
                         " drives: error '%s'\n"
5629
                         "Please look and recover the instance status" %
5630
                         str(err))
5631

    
5632
  def _AbortMigration(self):
5633
    """Call the hypervisor code to abort a started migration.
5634

5635
    """
5636
    instance = self.instance
5637
    target_node = self.target_node
5638
    migration_info = self.migration_info
5639

    
5640
    abort_result = self.rpc.call_finalize_migration(target_node,
5641
                                                    instance,
5642
                                                    migration_info,
5643
                                                    False)
5644
    abort_msg = abort_result.fail_msg
5645
    if abort_msg:
5646
      logging.error("Aborting migration failed on target node %s: %s",
5647
                    target_node, abort_msg)
5648
      # Don't raise an exception here, as we stil have to try to revert the
5649
      # disk status, even if this step failed.
5650

    
5651
  def _ExecMigration(self):
5652
    """Migrate an instance.
5653

5654
    The migrate is done by:
5655
      - change the disks into dual-master mode
5656
      - wait until disks are fully synchronized again
5657
      - migrate the instance
5658
      - change disks on the new secondary node (the old primary) to secondary
5659
      - wait until disks are fully synchronized
5660
      - change disks into single-master mode
5661

5662
    """
5663
    instance = self.instance
5664
    target_node = self.target_node
5665
    source_node = self.source_node
5666

    
5667
    self.feedback_fn("* checking disk consistency between source and target")
5668
    for dev in instance.disks:
5669
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
5670
        raise errors.OpExecError("Disk %s is degraded or not fully"
5671
                                 " synchronized on target node,"
5672
                                 " aborting migrate." % dev.iv_name)
5673

    
5674
    # First get the migration information from the remote node
5675
    result = self.rpc.call_migration_info(source_node, instance)
5676
    msg = result.fail_msg
5677
    if msg:
5678
      log_err = ("Failed fetching source migration information from %s: %s" %
5679
                 (source_node, msg))
5680
      logging.error(log_err)
5681
      raise errors.OpExecError(log_err)
5682

    
5683
    self.migration_info = migration_info = result.payload
5684

    
5685
    # Then switch the disks to master/master mode
5686
    self._EnsureSecondary(target_node)
5687
    self._GoStandalone()
5688
    self._GoReconnect(True)
5689
    self._WaitUntilSync()
5690

    
5691
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
5692
    result = self.rpc.call_accept_instance(target_node,
5693
                                           instance,
5694
                                           migration_info,
5695
                                           self.nodes_ip[target_node])
5696

    
5697
    msg = result.fail_msg
5698
    if msg:
5699
      logging.error("Instance pre-migration failed, trying to revert"
5700
                    " disk status: %s", msg)
5701
      self.feedback_fn("Pre-migration failed, aborting")
5702
      self._AbortMigration()
5703
      self._RevertDiskStatus()
5704
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5705
                               (instance.name, msg))
5706

    
5707
    self.feedback_fn("* migrating instance to %s" % target_node)
5708
    time.sleep(10)
5709
    result = self.rpc.call_instance_migrate(source_node, instance,
5710
                                            self.nodes_ip[target_node],
5711
                                            self.live)
5712
    msg = result.fail_msg
5713
    if msg:
5714
      logging.error("Instance migration failed, trying to revert"
5715
                    " disk status: %s", msg)
5716
      self.feedback_fn("Migration failed, aborting")
5717
      self._AbortMigration()
5718
      self._RevertDiskStatus()
5719
      raise errors.OpExecError("Could not migrate instance %s: %s" %
5720
                               (instance.name, msg))
5721
    time.sleep(10)
5722

    
5723
    instance.primary_node = target_node
5724
    # distribute new instance config to the other nodes
5725
    self.cfg.Update(instance, self.feedback_fn)
5726

    
5727
    result = self.rpc.call_finalize_migration(target_node,
5728
                                              instance,
5729
                                              migration_info,
5730
                                              True)
5731
    msg = result.fail_msg
5732
    if msg:
5733
      logging.error("Instance migration succeeded, but finalization failed:"
5734
                    " %s", msg)
5735
      raise errors.OpExecError("Could not finalize instance migration: %s" %
5736
                               msg)
5737

    
5738
    self._EnsureSecondary(source_node)
5739
    self._WaitUntilSync()
5740
    self._GoStandalone()
5741
    self._GoReconnect(False)
5742
    self._WaitUntilSync()
5743

    
5744
    self.feedback_fn("* done")
5745

    
5746
  def Exec(self, feedback_fn):
5747
    """Perform the migration.
5748

5749
    """
5750
    feedback_fn("Migrating instance %s" % self.instance.name)
5751

    
5752
    self.feedback_fn = feedback_fn
5753

    
5754
    self.source_node = self.instance.primary_node
5755
    self.target_node = self.instance.secondary_nodes[0]
5756
    self.all_nodes = [self.source_node, self.target_node]
5757
    self.nodes_ip = {
5758
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5759
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5760
      }
5761

    
5762
    if self.cleanup:
5763
      return self._ExecCleanup()
5764
    else:
5765
      return self._ExecMigration()
5766

    
5767

    
5768
def _CreateBlockDev(lu, node, instance, device, force_create,
5769
                    info, force_open):
5770
  """Create a tree of block devices on a given node.
5771

5772
  If this device type has to be created on secondaries, create it and
5773
  all its children.
5774

5775
  If not, just recurse to children keeping the same 'force' value.
5776

5777
  @param lu: the lu on whose behalf we execute
5778
  @param node: the node on which to create the device
5779
  @type instance: L{objects.Instance}
5780
  @param instance: the instance which owns the device
5781
  @type device: L{objects.Disk}
5782
  @param device: the device to create
5783
  @type force_create: boolean
5784
  @param force_create: whether to force creation of this device; this
5785
      will be change to True whenever we find a device which has
5786
      CreateOnSecondary() attribute
5787
  @param info: the extra 'metadata' we should attach to the device
5788
      (this will be represented as a LVM tag)
5789
  @type force_open: boolean
5790
  @param force_open: this parameter will be passes to the
5791
      L{backend.BlockdevCreate} function where it specifies
5792
      whether we run on primary or not, and it affects both
5793
      the child assembly and the device own Open() execution
5794

5795
  """
5796
  if device.CreateOnSecondary():
5797
    force_create = True
5798

    
5799
  if device.children:
5800
    for child in device.children:
5801
      _CreateBlockDev(lu, node, instance, child, force_create,
5802
                      info, force_open)
5803

    
5804
  if not force_create:
5805
    return
5806

    
5807
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5808

    
5809

    
5810
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5811
  """Create a single block device on a given node.
5812

5813
  This will not recurse over children of the device, so they must be
5814
  created in advance.
5815

5816
  @param lu: the lu on whose behalf we execute
5817
  @param node: the node on which to create the device
5818
  @type instance: L{objects.Instance}
5819
  @param instance: the instance which owns the device
5820
  @type device: L{objects.Disk}
5821
  @param device: the device to create
5822
  @param info: the extra 'metadata' we should attach to the device
5823
      (this will be represented as a LVM tag)
5824
  @type force_open: boolean
5825
  @param force_open: this parameter will be passes to the
5826
      L{backend.BlockdevCreate} function where it specifies
5827
      whether we run on primary or not, and it affects both
5828
      the child assembly and the device own Open() execution
5829

5830
  """
5831
  lu.cfg.SetDiskID(device, node)
5832
  result = lu.rpc.call_blockdev_create(node, device, device.size,
5833
                                       instance.name, force_open, info)
5834
  result.Raise("Can't create block device %s on"
5835
               " node %s for instance %s" % (device, node, instance.name))
5836
  if device.physical_id is None:
5837
    device.physical_id = result.payload
5838

    
5839

    
5840
def _GenerateUniqueNames(lu, exts):
5841
  """Generate a suitable LV name.
5842

5843
  This will generate a logical volume name for the given instance.
5844

5845
  """
5846
  results = []
5847
  for val in exts:
5848
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5849
    results.append("%s%s" % (new_id, val))
5850
  return results
5851

    
5852

    
5853
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5854
                         p_minor, s_minor):
5855
  """Generate a drbd8 device complete with its children.
5856

5857
  """
5858
  port = lu.cfg.AllocatePort()
5859
  vgname = lu.cfg.GetVGName()
5860
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5861
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5862
                          logical_id=(vgname, names[0]))
5863
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5864
                          logical_id=(vgname, names[1]))
5865
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5866
                          logical_id=(primary, secondary, port,
5867
                                      p_minor, s_minor,
5868
                                      shared_secret),
5869
                          children=[dev_data, dev_meta],
5870
                          iv_name=iv_name)
5871
  return drbd_dev
5872

    
5873

    
5874
def _GenerateDiskTemplate(lu, template_name,
5875
                          instance_name, primary_node,
5876
                          secondary_nodes, disk_info,
5877
                          file_storage_dir, file_driver,
5878
                          base_index):
5879
  """Generate the entire disk layout for a given template type.
5880

5881
  """
5882
  #TODO: compute space requirements
5883

    
5884
  vgname = lu.cfg.GetVGName()
5885
  disk_count = len(disk_info)
5886
  disks = []
5887
  if template_name == constants.DT_DISKLESS:
5888
    pass
5889
  elif template_name == constants.DT_PLAIN:
5890
    if len(secondary_nodes) != 0:
5891
      raise errors.ProgrammerError("Wrong template configuration")
5892

    
5893
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5894
                                      for i in range(disk_count)])
5895
    for idx, disk in enumerate(disk_info):
5896
      disk_index = idx + base_index
5897
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5898
                              logical_id=(vgname, names[idx]),
5899
                              iv_name="disk/%d" % disk_index,
5900
                              mode=disk["mode"])
5901
      disks.append(disk_dev)
5902
  elif template_name == constants.DT_DRBD8:
5903
    if len(secondary_nodes) != 1:
5904
      raise errors.ProgrammerError("Wrong template configuration")
5905
    remote_node = secondary_nodes[0]
5906
    minors = lu.cfg.AllocateDRBDMinor(
5907
      [primary_node, remote_node] * len(disk_info), instance_name)
5908

    
5909
    names = []
5910
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5911
                                               for i in range(disk_count)]):
5912
      names.append(lv_prefix + "_data")
5913
      names.append(lv_prefix + "_meta")
5914
    for idx, disk in enumerate(disk_info):
5915
      disk_index = idx + base_index
5916
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5917
                                      disk["size"], names[idx*2:idx*2+2],
5918
                                      "disk/%d" % disk_index,
5919
                                      minors[idx*2], minors[idx*2+1])
5920
      disk_dev.mode = disk["mode"]
5921
      disks.append(disk_dev)
5922
  elif template_name == constants.DT_FILE:
5923
    if len(secondary_nodes) != 0:
5924
      raise errors.ProgrammerError("Wrong template configuration")
5925

    
5926
    _RequireFileStorage()
5927

    
5928
    for idx, disk in enumerate(disk_info):
5929
      disk_index = idx + base_index
5930
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5931
                              iv_name="disk/%d" % disk_index,
5932
                              logical_id=(file_driver,
5933
                                          "%s/disk%d" % (file_storage_dir,
5934
                                                         disk_index)),
5935
                              mode=disk["mode"])
5936
      disks.append(disk_dev)
5937
  else:
5938
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5939
  return disks
5940

    
5941

    
5942
def _GetInstanceInfoText(instance):
5943
  """Compute that text that should be added to the disk's metadata.
5944

5945
  """
5946
  return "originstname+%s" % instance.name
5947

    
5948

    
5949
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5950
  """Create all disks for an instance.
5951

5952
  This abstracts away some work from AddInstance.
5953

5954
  @type lu: L{LogicalUnit}
5955
  @param lu: the logical unit on whose behalf we execute
5956
  @type instance: L{objects.Instance}
5957
  @param instance: the instance whose disks we should create
5958
  @type to_skip: list
5959
  @param to_skip: list of indices to skip
5960
  @type target_node: string
5961
  @param target_node: if passed, overrides the target node for creation
5962
  @rtype: boolean
5963
  @return: the success of the creation
5964

5965
  """
5966
  info = _GetInstanceInfoText(instance)
5967
  if target_node is None:
5968
    pnode = instance.primary_node
5969
    all_nodes = instance.all_nodes
5970
  else:
5971
    pnode = target_node
5972
    all_nodes = [pnode]
5973

    
5974
  if instance.disk_template == constants.DT_FILE:
5975
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5976
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5977

    
5978
    result.Raise("Failed to create directory '%s' on"
5979
                 " node %s" % (file_storage_dir, pnode))
5980

    
5981
  # Note: this needs to be kept in sync with adding of disks in
5982
  # LUSetInstanceParams
5983
  for idx, device in enumerate(instance.disks):
5984
    if to_skip and idx in to_skip:
5985
      continue
5986
    logging.info("Creating volume %s for instance %s",
5987
                 device.iv_name, instance.name)
5988
    #HARDCODE
5989
    for node in all_nodes:
5990
      f_create = node == pnode
5991
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5992

    
5993

    
5994
def _RemoveDisks(lu, instance, target_node=None):
5995
  """Remove all disks for an instance.
5996

5997
  This abstracts away some work from `AddInstance()` and
5998
  `RemoveInstance()`. Note that in case some of the devices couldn't
5999
  be removed, the removal will continue with the other ones (compare
6000
  with `_CreateDisks()`).
6001

6002
  @type lu: L{LogicalUnit}
6003
  @param lu: the logical unit on whose behalf we execute
6004
  @type instance: L{objects.Instance}
6005
  @param instance: the instance whose disks we should remove
6006
  @type target_node: string
6007
  @param target_node: used to override the node on which to remove the disks
6008
  @rtype: boolean
6009
  @return: the success of the removal
6010

6011
  """
6012
  logging.info("Removing block devices for instance %s", instance.name)
6013

    
6014
  all_result = True
6015
  for device in instance.disks:
6016
    if target_node:
6017
      edata = [(target_node, device)]
6018
    else:
6019
      edata = device.ComputeNodeTree(instance.primary_node)
6020
    for node, disk in edata:
6021
      lu.cfg.SetDiskID(disk, node)
6022
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6023
      if msg:
6024
        lu.LogWarning("Could not remove block device %s on node %s,"
6025
                      " continuing anyway: %s", device.iv_name, node, msg)
6026
        all_result = False
6027

    
6028
  if instance.disk_template == constants.DT_FILE:
6029
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6030
    if target_node:
6031
      tgt = target_node
6032
    else:
6033
      tgt = instance.primary_node
6034
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6035
    if result.fail_msg:
6036
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6037
                    file_storage_dir, instance.primary_node, result.fail_msg)
6038
      all_result = False
6039

    
6040
  return all_result
6041

    
6042

    
6043
def _ComputeDiskSize(disk_template, disks):
6044
  """Compute disk size requirements in the volume group
6045

6046
  """
6047
  # Required free disk space as a function of disk and swap space
6048
  req_size_dict = {
6049
    constants.DT_DISKLESS: None,
6050
    constants.DT_PLAIN: sum(d["size"] for d in disks),
6051
    # 128 MB are added for drbd metadata for each disk
6052
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6053
    constants.DT_FILE: None,
6054
  }
6055

    
6056
  if disk_template not in req_size_dict:
6057
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6058
                                 " is unknown" %  disk_template)
6059

    
6060
  return req_size_dict[disk_template]
6061

    
6062

    
6063
def _CheckHVParams(lu, nodenames, hvname, hvparams):
6064
  """Hypervisor parameter validation.
6065

6066
  This function abstract the hypervisor parameter validation to be
6067
  used in both instance create and instance modify.
6068

6069
  @type lu: L{LogicalUnit}
6070
  @param lu: the logical unit for which we check
6071
  @type nodenames: list
6072
  @param nodenames: the list of nodes on which we should check
6073
  @type hvname: string
6074
  @param hvname: the name of the hypervisor we should use
6075
  @type hvparams: dict
6076
  @param hvparams: the parameters which we need to check
6077
  @raise errors.OpPrereqError: if the parameters are not valid
6078

6079
  """
6080
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6081
                                                  hvname,
6082
                                                  hvparams)
6083
  for node in nodenames:
6084
    info = hvinfo[node]
6085
    if info.offline:
6086
      continue
6087
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6088

    
6089

    
6090
class LUCreateInstance(LogicalUnit):
6091
  """Create an instance.
6092

6093
  """
6094
  HPATH = "instance-add"
6095
  HTYPE = constants.HTYPE_INSTANCE
6096
  _OP_REQP = ["instance_name", "disks",
6097
              "mode", "start",
6098
              "wait_for_sync", "ip_check", "nics",
6099
              "hvparams", "beparams"]
6100
  REQ_BGL = False
6101

    
6102
  def CheckArguments(self):
6103
    """Check arguments.
6104

6105
    """
6106
    # set optional parameters to none if they don't exist
6107
    for attr in ["pnode", "snode", "iallocator", "hypervisor",
6108
                 "disk_template", "identify_defaults"]:
6109
      if not hasattr(self.op, attr):
6110
        setattr(self.op, attr, None)
6111

    
6112
    # do not require name_check to ease forward/backward compatibility
6113
    # for tools
6114
    if not hasattr(self.op, "name_check"):
6115
      self.op.name_check = True
6116
    if not hasattr(self.op, "no_install"):
6117
      self.op.no_install = False
6118
    if self.op.no_install and self.op.start:
6119
      self.LogInfo("No-installation mode selected, disabling startup")
6120
      self.op.start = False
6121
    # validate/normalize the instance name
6122
    self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6123
    if self.op.ip_check and not self.op.name_check:
6124
      # TODO: make the ip check more flexible and not depend on the name check
6125
      raise errors.OpPrereqError("Cannot do ip checks without a name check",
6126
                                 errors.ECODE_INVAL)
6127

    
6128
    # check nics' parameter names
6129
    for nic in self.op.nics:
6130
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6131

    
6132
    # check disks. parameter names and consistent adopt/no-adopt strategy
6133
    has_adopt = has_no_adopt = False
6134
    for disk in self.op.disks:
6135
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6136
      if "adopt" in disk:
6137
        has_adopt = True
6138
      else:
6139
        has_no_adopt = True
6140
    if has_adopt and has_no_adopt:
6141
      raise errors.OpPrereqError("Either all disks are adopted or none is",
6142
                                 errors.ECODE_INVAL)
6143
    if has_adopt:
6144
      if self.op.disk_template != constants.DT_PLAIN:
6145
        raise errors.OpPrereqError("Disk adoption is only supported for the"
6146
                                   " 'plain' disk template",
6147
                                   errors.ECODE_INVAL)
6148
      if self.op.iallocator is not None:
6149
        raise errors.OpPrereqError("Disk adoption not allowed with an"
6150
                                   " iallocator script", errors.ECODE_INVAL)
6151
      if self.op.mode == constants.INSTANCE_IMPORT:
6152
        raise errors.OpPrereqError("Disk adoption not allowed for"
6153
                                   " instance import", errors.ECODE_INVAL)
6154

    
6155
    self.adopt_disks = has_adopt
6156

    
6157
    # verify creation mode
6158
    if self.op.mode not in constants.INSTANCE_CREATE_MODES:
6159
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6160
                                 self.op.mode, errors.ECODE_INVAL)
6161

    
6162
    # instance name verification
6163
    if self.op.name_check:
6164
      self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6165
      self.op.instance_name = self.hostname1.name
6166
      # used in CheckPrereq for ip ping check
6167
      self.check_ip = self.hostname1.ip
6168
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6169
      raise errors.OpPrereqError("Remote imports require names to be checked" %
6170
                                 errors.ECODE_INVAL)
6171
    else:
6172
      self.check_ip = None
6173

    
6174
    # file storage checks
6175
    if (self.op.file_driver and
6176
        not self.op.file_driver in constants.FILE_DRIVER):
6177
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6178
                                 self.op.file_driver, errors.ECODE_INVAL)
6179

    
6180
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6181
      raise errors.OpPrereqError("File storage directory path not absolute",
6182
                                 errors.ECODE_INVAL)
6183

    
6184
    ### Node/iallocator related checks
6185
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
6186
      raise errors.OpPrereqError("One and only one of iallocator and primary"
6187
                                 " node must be given",
6188
                                 errors.ECODE_INVAL)
6189

    
6190
    self._cds = _GetClusterDomainSecret()
6191

    
6192
    if self.op.mode == constants.INSTANCE_IMPORT:
6193
      # On import force_variant must be True, because if we forced it at
6194
      # initial install, our only chance when importing it back is that it
6195
      # works again!
6196
      self.op.force_variant = True
6197

    
6198
      if self.op.no_install:
6199
        self.LogInfo("No-installation mode has no effect during import")
6200

    
6201
    elif self.op.mode == constants.INSTANCE_CREATE:
6202
      if getattr(self.op, "os_type", None) is None:
6203
        raise errors.OpPrereqError("No guest OS specified",
6204
                                   errors.ECODE_INVAL)
6205
      self.op.force_variant = getattr(self.op, "force_variant", False)
6206
      if self.op.disk_template is None:
6207
        raise errors.OpPrereqError("No disk template specified",
6208
                                   errors.ECODE_INVAL)
6209

    
6210
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6211
      # Check handshake to ensure both clusters have the same domain secret
6212
      src_handshake = getattr(self.op, "source_handshake", None)
6213
      if not src_handshake:
6214
        raise errors.OpPrereqError("Missing source handshake",
6215
                                   errors.ECODE_INVAL)
6216

    
6217
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6218
                                                           src_handshake)
6219
      if errmsg:
6220
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6221
                                   errors.ECODE_INVAL)
6222

    
6223
      # Load and check source CA
6224
      self.source_x509_ca_pem = getattr(self.op, "source_x509_ca", None)
6225
      if not self.source_x509_ca_pem:
6226
        raise errors.OpPrereqError("Missing source X509 CA",
6227
                                   errors.ECODE_INVAL)
6228

    
6229
      try:
6230
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6231
                                                    self._cds)
6232
      except OpenSSL.crypto.Error, err:
6233
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6234
                                   (err, ), errors.ECODE_INVAL)
6235

    
6236
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6237
      if errcode is not None:
6238
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6239
                                   errors.ECODE_INVAL)
6240

    
6241
      self.source_x509_ca = cert
6242

    
6243
      src_instance_name = getattr(self.op, "source_instance_name", None)
6244
      if not src_instance_name:
6245
        raise errors.OpPrereqError("Missing source instance name",
6246
                                   errors.ECODE_INVAL)
6247

    
6248
      self.source_instance_name = \
6249
        utils.GetHostInfo(utils.HostInfo.NormalizeName(src_instance_name)).name
6250

    
6251
    else:
6252
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
6253
                                 self.op.mode, errors.ECODE_INVAL)
6254

    
6255
  def ExpandNames(self):
6256
    """ExpandNames for CreateInstance.
6257

6258
    Figure out the right locks for instance creation.
6259

6260
    """
6261
    self.needed_locks = {}
6262

    
6263
    instance_name = self.op.instance_name
6264
    # this is just a preventive check, but someone might still add this
6265
    # instance in the meantime, and creation will fail at lock-add time
6266
    if instance_name in self.cfg.GetInstanceList():
6267
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6268
                                 instance_name, errors.ECODE_EXISTS)
6269

    
6270
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6271

    
6272
    if self.op.iallocator:
6273
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6274
    else:
6275
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6276
      nodelist = [self.op.pnode]
6277
      if self.op.snode is not None:
6278
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6279
        nodelist.append(self.op.snode)
6280
      self.needed_locks[locking.LEVEL_NODE] = nodelist
6281

    
6282
    # in case of import lock the source node too
6283
    if self.op.mode == constants.INSTANCE_IMPORT:
6284
      src_node = getattr(self.op, "src_node", None)
6285
      src_path = getattr(self.op, "src_path", None)
6286

    
6287
      if src_path is None:
6288
        self.op.src_path = src_path = self.op.instance_name
6289

    
6290
      if src_node is None:
6291
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6292
        self.op.src_node = None
6293
        if os.path.isabs(src_path):
6294
          raise errors.OpPrereqError("Importing an instance from an absolute"
6295
                                     " path requires a source node option.",
6296
                                     errors.ECODE_INVAL)
6297
      else:
6298
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6299
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6300
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
6301
        if not os.path.isabs(src_path):
6302
          self.op.src_path = src_path = \
6303
            utils.PathJoin(constants.EXPORT_DIR, src_path)
6304

    
6305
  def _RunAllocator(self):
6306
    """Run the allocator based on input opcode.
6307

6308
    """
6309
    nics = [n.ToDict() for n in self.nics]
6310
    ial = IAllocator(self.cfg, self.rpc,
6311
                     mode=constants.IALLOCATOR_MODE_ALLOC,
6312
                     name=self.op.instance_name,
6313
                     disk_template=self.op.disk_template,
6314
                     tags=[],
6315
                     os=self.op.os_type,
6316
                     vcpus=self.be_full[constants.BE_VCPUS],
6317
                     mem_size=self.be_full[constants.BE_MEMORY],
6318
                     disks=self.disks,
6319
                     nics=nics,
6320
                     hypervisor=self.op.hypervisor,
6321
                     )
6322

    
6323
    ial.Run(self.op.iallocator)
6324

    
6325
    if not ial.success:
6326
      raise errors.OpPrereqError("Can't compute nodes using"
6327
                                 " iallocator '%s': %s" %
6328
                                 (self.op.iallocator, ial.info),
6329
                                 errors.ECODE_NORES)
6330
    if len(ial.result) != ial.required_nodes:
6331
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6332
                                 " of nodes (%s), required %s" %
6333
                                 (self.op.iallocator, len(ial.result),
6334
                                  ial.required_nodes), errors.ECODE_FAULT)
6335
    self.op.pnode = ial.result[0]
6336
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6337
                 self.op.instance_name, self.op.iallocator,
6338
                 utils.CommaJoin(ial.result))
6339
    if ial.required_nodes == 2:
6340
      self.op.snode = ial.result[1]
6341

    
6342
  def BuildHooksEnv(self):
6343
    """Build hooks env.
6344

6345
    This runs on master, primary and secondary nodes of the instance.
6346

6347
    """
6348
    env = {
6349
      "ADD_MODE": self.op.mode,
6350
      }
6351
    if self.op.mode == constants.INSTANCE_IMPORT:
6352
      env["SRC_NODE"] = self.op.src_node
6353
      env["SRC_PATH"] = self.op.src_path
6354
      env["SRC_IMAGES"] = self.src_images
6355

    
6356
    env.update(_BuildInstanceHookEnv(
6357
      name=self.op.instance_name,
6358
      primary_node=self.op.pnode,
6359
      secondary_nodes=self.secondaries,
6360
      status=self.op.start,
6361
      os_type=self.op.os_type,
6362
      memory=self.be_full[constants.BE_MEMORY],
6363
      vcpus=self.be_full[constants.BE_VCPUS],
6364
      nics=_NICListToTuple(self, self.nics),
6365
      disk_template=self.op.disk_template,
6366
      disks=[(d["size"], d["mode"]) for d in self.disks],
6367
      bep=self.be_full,
6368
      hvp=self.hv_full,
6369
      hypervisor_name=self.op.hypervisor,
6370
    ))
6371

    
6372
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6373
          self.secondaries)
6374
    return env, nl, nl
6375

    
6376
  def _ReadExportInfo(self):
6377
    """Reads the export information from disk.
6378

6379
    It will override the opcode source node and path with the actual
6380
    information, if these two were not specified before.
6381

6382
    @return: the export information
6383

6384
    """
6385
    assert self.op.mode == constants.INSTANCE_IMPORT
6386

    
6387
    src_node = self.op.src_node
6388
    src_path = self.op.src_path
6389

    
6390
    if src_node is None:
6391
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6392
      exp_list = self.rpc.call_export_list(locked_nodes)
6393
      found = False
6394
      for node in exp_list:
6395
        if exp_list[node].fail_msg:
6396
          continue
6397
        if src_path in exp_list[node].payload:
6398
          found = True
6399
          self.op.src_node = src_node = node
6400
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6401
                                                       src_path)
6402
          break
6403
      if not found:
6404
        raise errors.OpPrereqError("No export found for relative path %s" %
6405
                                    src_path, errors.ECODE_INVAL)
6406

    
6407
    _CheckNodeOnline(self, src_node)
6408
    result = self.rpc.call_export_info(src_node, src_path)
6409
    result.Raise("No export or invalid export found in dir %s" % src_path)
6410

    
6411
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6412
    if not export_info.has_section(constants.INISECT_EXP):
6413
      raise errors.ProgrammerError("Corrupted export config",
6414
                                   errors.ECODE_ENVIRON)
6415

    
6416
    ei_version = export_info.get(constants.INISECT_EXP, "version")
6417
    if (int(ei_version) != constants.EXPORT_VERSION):
6418
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6419
                                 (ei_version, constants.EXPORT_VERSION),
6420
                                 errors.ECODE_ENVIRON)
6421
    return export_info
6422

    
6423
  def _ReadExportParams(self, einfo):
6424
    """Use export parameters as defaults.
6425

6426
    In case the opcode doesn't specify (as in override) some instance
6427
    parameters, then try to use them from the export information, if
6428
    that declares them.
6429

6430
    """
6431
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6432

    
6433
    if self.op.disk_template is None:
6434
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
6435
        self.op.disk_template = einfo.get(constants.INISECT_INS,
6436
                                          "disk_template")
6437
      else:
6438
        raise errors.OpPrereqError("No disk template specified and the export"
6439
                                   " is missing the disk_template information",
6440
                                   errors.ECODE_INVAL)
6441

    
6442
    if not self.op.disks:
6443
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
6444
        disks = []
6445
        # TODO: import the disk iv_name too
6446
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6447
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6448
          disks.append({"size": disk_sz})
6449
        self.op.disks = disks
6450
      else:
6451
        raise errors.OpPrereqError("No disk info specified and the export"
6452
                                   " is missing the disk information",
6453
                                   errors.ECODE_INVAL)
6454

    
6455
    if (not self.op.nics and
6456
        einfo.has_option(constants.INISECT_INS, "nic_count")):
6457
      nics = []
6458
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6459
        ndict = {}
6460
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6461
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6462
          ndict[name] = v
6463
        nics.append(ndict)
6464
      self.op.nics = nics
6465

    
6466
    if (self.op.hypervisor is None and
6467
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
6468
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6469
    if einfo.has_section(constants.INISECT_HYP):
6470
      # use the export parameters but do not override the ones
6471
      # specified by the user
6472
      for name, value in einfo.items(constants.INISECT_HYP):
6473
        if name not in self.op.hvparams:
6474
          self.op.hvparams[name] = value
6475

    
6476
    if einfo.has_section(constants.INISECT_BEP):
6477
      # use the parameters, without overriding
6478
      for name, value in einfo.items(constants.INISECT_BEP):
6479
        if name not in self.op.beparams:
6480
          self.op.beparams[name] = value
6481
    else:
6482
      # try to read the parameters old style, from the main section
6483
      for name in constants.BES_PARAMETERS:
6484
        if (name not in self.op.beparams and
6485
            einfo.has_option(constants.INISECT_INS, name)):
6486
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6487

    
6488
  def _RevertToDefaults(self, cluster):
6489
    """Revert the instance parameters to the default values.
6490

6491
    """
6492
    # hvparams
6493
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6494
    for name in self.op.hvparams.keys():
6495
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6496
        del self.op.hvparams[name]
6497
    # beparams
6498
    be_defs = cluster.SimpleFillBE({})
6499
    for name in self.op.beparams.keys():
6500
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
6501
        del self.op.beparams[name]
6502
    # nic params
6503
    nic_defs = cluster.SimpleFillNIC({})
6504
    for nic in self.op.nics:
6505
      for name in constants.NICS_PARAMETERS:
6506
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6507
          del nic[name]
6508

    
6509
  def CheckPrereq(self):
6510
    """Check prerequisites.
6511

6512
    """
6513
    if self.op.mode == constants.INSTANCE_IMPORT:
6514
      export_info = self._ReadExportInfo()
6515
      self._ReadExportParams(export_info)
6516

    
6517
    _CheckDiskTemplate(self.op.disk_template)
6518

    
6519
    if (not self.cfg.GetVGName() and
6520
        self.op.disk_template not in constants.DTS_NOT_LVM):
6521
      raise errors.OpPrereqError("Cluster does not support lvm-based"
6522
                                 " instances", errors.ECODE_STATE)
6523

    
6524
    if self.op.hypervisor is None:
6525
      self.op.hypervisor = self.cfg.GetHypervisorType()
6526

    
6527
    cluster = self.cfg.GetClusterInfo()
6528
    enabled_hvs = cluster.enabled_hypervisors
6529
    if self.op.hypervisor not in enabled_hvs:
6530
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6531
                                 " cluster (%s)" % (self.op.hypervisor,
6532
                                  ",".join(enabled_hvs)),
6533
                                 errors.ECODE_STATE)
6534

    
6535
    # check hypervisor parameter syntax (locally)
6536
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6537
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6538
                                      self.op.hvparams)
6539
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6540
    hv_type.CheckParameterSyntax(filled_hvp)
6541
    self.hv_full = filled_hvp
6542
    # check that we don't specify global parameters on an instance
6543
    _CheckGlobalHvParams(self.op.hvparams)
6544

    
6545
    # fill and remember the beparams dict
6546
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6547
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
6548

    
6549
    # now that hvp/bep are in final format, let's reset to defaults,
6550
    # if told to do so
6551
    if self.op.identify_defaults:
6552
      self._RevertToDefaults(cluster)
6553

    
6554
    # NIC buildup
6555
    self.nics = []
6556
    for idx, nic in enumerate(self.op.nics):
6557
      nic_mode_req = nic.get("mode", None)
6558
      nic_mode = nic_mode_req
6559
      if nic_mode is None:
6560
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6561

    
6562
      # in routed mode, for the first nic, the default ip is 'auto'
6563
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6564
        default_ip_mode = constants.VALUE_AUTO
6565
      else:
6566
        default_ip_mode = constants.VALUE_NONE
6567

    
6568
      # ip validity checks
6569
      ip = nic.get("ip", default_ip_mode)
6570
      if ip is None or ip.lower() == constants.VALUE_NONE:
6571
        nic_ip = None
6572
      elif ip.lower() == constants.VALUE_AUTO:
6573
        if not self.op.name_check:
6574
          raise errors.OpPrereqError("IP address set to auto but name checks"
6575
                                     " have been skipped. Aborting.",
6576
                                     errors.ECODE_INVAL)
6577
        nic_ip = self.hostname1.ip
6578
      else:
6579
        if not utils.IsValidIP(ip):
6580
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6581
                                     " like a valid IP" % ip,
6582
                                     errors.ECODE_INVAL)
6583
        nic_ip = ip
6584

    
6585
      # TODO: check the ip address for uniqueness
6586
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6587
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
6588
                                   errors.ECODE_INVAL)
6589

    
6590
      # MAC address verification
6591
      mac = nic.get("mac", constants.VALUE_AUTO)
6592
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6593
        mac = utils.NormalizeAndValidateMac(mac)
6594

    
6595
        try:
6596
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
6597
        except errors.ReservationError:
6598
          raise errors.OpPrereqError("MAC address %s already in use"
6599
                                     " in cluster" % mac,
6600
                                     errors.ECODE_NOTUNIQUE)
6601

    
6602
      # bridge verification
6603
      bridge = nic.get("bridge", None)
6604
      link = nic.get("link", None)
6605
      if bridge and link:
6606
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6607
                                   " at the same time", errors.ECODE_INVAL)
6608
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6609
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6610
                                   errors.ECODE_INVAL)
6611
      elif bridge:
6612
        link = bridge
6613

    
6614
      nicparams = {}
6615
      if nic_mode_req:
6616
        nicparams[constants.NIC_MODE] = nic_mode_req
6617
      if link:
6618
        nicparams[constants.NIC_LINK] = link
6619

    
6620
      check_params = cluster.SimpleFillNIC(nicparams)
6621
      objects.NIC.CheckParameterSyntax(check_params)
6622
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6623

    
6624
    # disk checks/pre-build
6625
    self.disks = []
6626
    for disk in self.op.disks:
6627
      mode = disk.get("mode", constants.DISK_RDWR)
6628
      if mode not in constants.DISK_ACCESS_SET:
6629
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6630
                                   mode, errors.ECODE_INVAL)
6631
      size = disk.get("size", None)
6632
      if size is None:
6633
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6634
      try:
6635
        size = int(size)
6636
      except (TypeError, ValueError):
6637
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6638
                                   errors.ECODE_INVAL)
6639
      new_disk = {"size": size, "mode": mode}
6640
      if "adopt" in disk:
6641
        new_disk["adopt"] = disk["adopt"]
6642
      self.disks.append(new_disk)
6643

    
6644
    if self.op.mode == constants.INSTANCE_IMPORT:
6645

    
6646
      # Check that the new instance doesn't have less disks than the export
6647
      instance_disks = len(self.disks)
6648
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6649
      if instance_disks < export_disks:
6650
        raise errors.OpPrereqError("Not enough disks to import."
6651
                                   " (instance: %d, export: %d)" %
6652
                                   (instance_disks, export_disks),
6653
                                   errors.ECODE_INVAL)
6654

    
6655
      disk_images = []
6656
      for idx in range(export_disks):
6657
        option = 'disk%d_dump' % idx
6658
        if export_info.has_option(constants.INISECT_INS, option):
6659
          # FIXME: are the old os-es, disk sizes, etc. useful?
6660
          export_name = export_info.get(constants.INISECT_INS, option)
6661
          image = utils.PathJoin(self.op.src_path, export_name)
6662
          disk_images.append(image)
6663
        else:
6664
          disk_images.append(False)
6665

    
6666
      self.src_images = disk_images
6667

    
6668
      old_name = export_info.get(constants.INISECT_INS, 'name')
6669
      try:
6670
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6671
      except (TypeError, ValueError), err:
6672
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
6673
                                   " an integer: %s" % str(err),
6674
                                   errors.ECODE_STATE)
6675
      if self.op.instance_name == old_name:
6676
        for idx, nic in enumerate(self.nics):
6677
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6678
            nic_mac_ini = 'nic%d_mac' % idx
6679
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6680

    
6681
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6682

    
6683
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
6684
    if self.op.ip_check:
6685
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6686
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6687
                                   (self.check_ip, self.op.instance_name),
6688
                                   errors.ECODE_NOTUNIQUE)
6689

    
6690
    #### mac address generation
6691
    # By generating here the mac address both the allocator and the hooks get
6692
    # the real final mac address rather than the 'auto' or 'generate' value.
6693
    # There is a race condition between the generation and the instance object
6694
    # creation, which means that we know the mac is valid now, but we're not
6695
    # sure it will be when we actually add the instance. If things go bad
6696
    # adding the instance will abort because of a duplicate mac, and the
6697
    # creation job will fail.
6698
    for nic in self.nics:
6699
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6700
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6701

    
6702
    #### allocator run
6703

    
6704
    if self.op.iallocator is not None:
6705
      self._RunAllocator()
6706

    
6707
    #### node related checks
6708

    
6709
    # check primary node
6710
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6711
    assert self.pnode is not None, \
6712
      "Cannot retrieve locked node %s" % self.op.pnode
6713
    if pnode.offline:
6714
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6715
                                 pnode.name, errors.ECODE_STATE)
6716
    if pnode.drained:
6717
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6718
                                 pnode.name, errors.ECODE_STATE)
6719

    
6720
    self.secondaries = []
6721

    
6722
    # mirror node verification
6723
    if self.op.disk_template in constants.DTS_NET_MIRROR:
6724
      if self.op.snode is None:
6725
        raise errors.OpPrereqError("The networked disk templates need"
6726
                                   " a mirror node", errors.ECODE_INVAL)
6727
      if self.op.snode == pnode.name:
6728
        raise errors.OpPrereqError("The secondary node cannot be the"
6729
                                   " primary node.", errors.ECODE_INVAL)
6730
      _CheckNodeOnline(self, self.op.snode)
6731
      _CheckNodeNotDrained(self, self.op.snode)
6732
      self.secondaries.append(self.op.snode)
6733

    
6734
    nodenames = [pnode.name] + self.secondaries
6735

    
6736
    req_size = _ComputeDiskSize(self.op.disk_template,
6737
                                self.disks)
6738

    
6739
    # Check lv size requirements, if not adopting
6740
    if req_size is not None and not self.adopt_disks:
6741
      _CheckNodesFreeDisk(self, nodenames, req_size)
6742

    
6743
    if self.adopt_disks: # instead, we must check the adoption data
6744
      all_lvs = set([i["adopt"] for i in self.disks])
6745
      if len(all_lvs) != len(self.disks):
6746
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
6747
                                   errors.ECODE_INVAL)
6748
      for lv_name in all_lvs:
6749
        try:
6750
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6751
        except errors.ReservationError:
6752
          raise errors.OpPrereqError("LV named %s used by another instance" %
6753
                                     lv_name, errors.ECODE_NOTUNIQUE)
6754

    
6755
      node_lvs = self.rpc.call_lv_list([pnode.name],
6756
                                       self.cfg.GetVGName())[pnode.name]
6757
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6758
      node_lvs = node_lvs.payload
6759
      delta = all_lvs.difference(node_lvs.keys())
6760
      if delta:
6761
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
6762
                                   utils.CommaJoin(delta),
6763
                                   errors.ECODE_INVAL)
6764
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6765
      if online_lvs:
6766
        raise errors.OpPrereqError("Online logical volumes found, cannot"
6767
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
6768
                                   errors.ECODE_STATE)
6769
      # update the size of disk based on what is found
6770
      for dsk in self.disks:
6771
        dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6772

    
6773
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6774

    
6775
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6776

    
6777
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6778

    
6779
    # memory check on primary node
6780
    if self.op.start:
6781
      _CheckNodeFreeMemory(self, self.pnode.name,
6782
                           "creating instance %s" % self.op.instance_name,
6783
                           self.be_full[constants.BE_MEMORY],
6784
                           self.op.hypervisor)
6785

    
6786
    self.dry_run_result = list(nodenames)
6787

    
6788
  def Exec(self, feedback_fn):
6789
    """Create and add the instance to the cluster.
6790

6791
    """
6792
    instance = self.op.instance_name
6793
    pnode_name = self.pnode.name
6794

    
6795
    ht_kind = self.op.hypervisor
6796
    if ht_kind in constants.HTS_REQ_PORT:
6797
      network_port = self.cfg.AllocatePort()
6798
    else:
6799
      network_port = None
6800

    
6801
    if constants.ENABLE_FILE_STORAGE:
6802
      # this is needed because os.path.join does not accept None arguments
6803
      if self.op.file_storage_dir is None:
6804
        string_file_storage_dir = ""
6805
      else:
6806
        string_file_storage_dir = self.op.file_storage_dir
6807

    
6808
      # build the full file storage dir path
6809
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6810
                                        string_file_storage_dir, instance)
6811
    else:
6812
      file_storage_dir = ""
6813

    
6814
    disks = _GenerateDiskTemplate(self,
6815
                                  self.op.disk_template,
6816
                                  instance, pnode_name,
6817
                                  self.secondaries,
6818
                                  self.disks,
6819
                                  file_storage_dir,
6820
                                  self.op.file_driver,
6821
                                  0)
6822

    
6823
    iobj = objects.Instance(name=instance, os=self.op.os_type,
6824
                            primary_node=pnode_name,
6825
                            nics=self.nics, disks=disks,
6826
                            disk_template=self.op.disk_template,
6827
                            admin_up=False,
6828
                            network_port=network_port,
6829
                            beparams=self.op.beparams,
6830
                            hvparams=self.op.hvparams,
6831
                            hypervisor=self.op.hypervisor,
6832
                            )
6833

    
6834
    if self.adopt_disks:
6835
      # rename LVs to the newly-generated names; we need to construct
6836
      # 'fake' LV disks with the old data, plus the new unique_id
6837
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6838
      rename_to = []
6839
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6840
        rename_to.append(t_dsk.logical_id)
6841
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6842
        self.cfg.SetDiskID(t_dsk, pnode_name)
6843
      result = self.rpc.call_blockdev_rename(pnode_name,
6844
                                             zip(tmp_disks, rename_to))
6845
      result.Raise("Failed to rename adoped LVs")
6846
    else:
6847
      feedback_fn("* creating instance disks...")
6848
      try:
6849
        _CreateDisks(self, iobj)
6850
      except errors.OpExecError:
6851
        self.LogWarning("Device creation failed, reverting...")
6852
        try:
6853
          _RemoveDisks(self, iobj)
6854
        finally:
6855
          self.cfg.ReleaseDRBDMinors(instance)
6856
          raise
6857

    
6858
    feedback_fn("adding instance %s to cluster config" % instance)
6859

    
6860
    self.cfg.AddInstance(iobj, self.proc.GetECId())
6861

    
6862
    # Declare that we don't want to remove the instance lock anymore, as we've
6863
    # added the instance to the config
6864
    del self.remove_locks[locking.LEVEL_INSTANCE]
6865
    # Unlock all the nodes
6866
    if self.op.mode == constants.INSTANCE_IMPORT:
6867
      nodes_keep = [self.op.src_node]
6868
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6869
                       if node != self.op.src_node]
6870
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6871
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6872
    else:
6873
      self.context.glm.release(locking.LEVEL_NODE)
6874
      del self.acquired_locks[locking.LEVEL_NODE]
6875

    
6876
    if self.op.wait_for_sync:
6877
      disk_abort = not _WaitForSync(self, iobj)
6878
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
6879
      # make sure the disks are not degraded (still sync-ing is ok)
6880
      time.sleep(15)
6881
      feedback_fn("* checking mirrors status")
6882
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6883
    else:
6884
      disk_abort = False
6885

    
6886
    if disk_abort:
6887
      _RemoveDisks(self, iobj)
6888
      self.cfg.RemoveInstance(iobj.name)
6889
      # Make sure the instance lock gets removed
6890
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6891
      raise errors.OpExecError("There are some degraded disks for"
6892
                               " this instance")
6893

    
6894
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6895
      if self.op.mode == constants.INSTANCE_CREATE:
6896
        if not self.op.no_install:
6897
          feedback_fn("* running the instance OS create scripts...")
6898
          # FIXME: pass debug option from opcode to backend
6899
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6900
                                                 self.op.debug_level)
6901
          result.Raise("Could not add os for instance %s"
6902
                       " on node %s" % (instance, pnode_name))
6903

    
6904
      elif self.op.mode == constants.INSTANCE_IMPORT:
6905
        feedback_fn("* running the instance OS import scripts...")
6906

    
6907
        transfers = []
6908

    
6909
        for idx, image in enumerate(self.src_images):
6910
          if not image:
6911
            continue
6912

    
6913
          # FIXME: pass debug option from opcode to backend
6914
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
6915
                                             constants.IEIO_FILE, (image, ),
6916
                                             constants.IEIO_SCRIPT,
6917
                                             (iobj.disks[idx], idx),
6918
                                             None)
6919
          transfers.append(dt)
6920

    
6921
        import_result = \
6922
          masterd.instance.TransferInstanceData(self, feedback_fn,
6923
                                                self.op.src_node, pnode_name,
6924
                                                self.pnode.secondary_ip,
6925
                                                iobj, transfers)
6926
        if not compat.all(import_result):
6927
          self.LogWarning("Some disks for instance %s on node %s were not"
6928
                          " imported successfully" % (instance, pnode_name))
6929

    
6930
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6931
        feedback_fn("* preparing remote import...")
6932
        connect_timeout = constants.RIE_CONNECT_TIMEOUT
6933
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
6934

    
6935
        disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
6936
                                                     self.source_x509_ca,
6937
                                                     self._cds, timeouts)
6938
        if not compat.all(disk_results):
6939
          # TODO: Should the instance still be started, even if some disks
6940
          # failed to import (valid for local imports, too)?
6941
          self.LogWarning("Some disks for instance %s on node %s were not"
6942
                          " imported successfully" % (instance, pnode_name))
6943

    
6944
        # Run rename script on newly imported instance
6945
        assert iobj.name == instance
6946
        feedback_fn("Running rename script for %s" % instance)
6947
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
6948
                                                   self.source_instance_name,
6949
                                                   self.op.debug_level)
6950
        if result.fail_msg:
6951
          self.LogWarning("Failed to run rename script for %s on node"
6952
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
6953

    
6954
      else:
6955
        # also checked in the prereq part
6956
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6957
                                     % self.op.mode)
6958

    
6959
    if self.op.start:
6960
      iobj.admin_up = True
6961
      self.cfg.Update(iobj, feedback_fn)
6962
      logging.info("Starting instance %s on node %s", instance, pnode_name)
6963
      feedback_fn("* starting instance...")
6964
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6965
      result.Raise("Could not start instance")
6966

    
6967
    return list(iobj.all_nodes)
6968

    
6969

    
6970
class LUConnectConsole(NoHooksLU):
6971
  """Connect to an instance's console.
6972

6973
  This is somewhat special in that it returns the command line that
6974
  you need to run on the master node in order to connect to the
6975
  console.
6976

6977
  """
6978
  _OP_REQP = ["instance_name"]
6979
  REQ_BGL = False
6980

    
6981
  def ExpandNames(self):
6982
    self._ExpandAndLockInstance()
6983

    
6984
  def CheckPrereq(self):
6985
    """Check prerequisites.
6986

6987
    This checks that the instance is in the cluster.
6988

6989
    """
6990
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6991
    assert self.instance is not None, \
6992
      "Cannot retrieve locked instance %s" % self.op.instance_name
6993
    _CheckNodeOnline(self, self.instance.primary_node)
6994

    
6995
  def Exec(self, feedback_fn):
6996
    """Connect to the console of an instance
6997

6998
    """
6999
    instance = self.instance
7000
    node = instance.primary_node
7001

    
7002
    node_insts = self.rpc.call_instance_list([node],
7003
                                             [instance.hypervisor])[node]
7004
    node_insts.Raise("Can't get node information from %s" % node)
7005

    
7006
    if instance.name not in node_insts.payload:
7007
      raise errors.OpExecError("Instance %s is not running." % instance.name)
7008

    
7009
    logging.debug("Connecting to console of %s on %s", instance.name, node)
7010

    
7011
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
7012
    cluster = self.cfg.GetClusterInfo()
7013
    # beparams and hvparams are passed separately, to avoid editing the
7014
    # instance and then saving the defaults in the instance itself.
7015
    hvparams = cluster.FillHV(instance)
7016
    beparams = cluster.FillBE(instance)
7017
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7018

    
7019
    # build ssh cmdline
7020
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7021

    
7022

    
7023
class LUReplaceDisks(LogicalUnit):
7024
  """Replace the disks of an instance.
7025

7026
  """
7027
  HPATH = "mirrors-replace"
7028
  HTYPE = constants.HTYPE_INSTANCE
7029
  _OP_REQP = ["instance_name", "mode", "disks"]
7030
  REQ_BGL = False
7031

    
7032
  def CheckArguments(self):
7033
    if not hasattr(self.op, "remote_node"):
7034
      self.op.remote_node = None
7035
    if not hasattr(self.op, "iallocator"):
7036
      self.op.iallocator = None
7037
    if not hasattr(self.op, "early_release"):
7038
      self.op.early_release = False
7039

    
7040
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7041
                                  self.op.iallocator)
7042

    
7043
  def ExpandNames(self):
7044
    self._ExpandAndLockInstance()
7045

    
7046
    if self.op.iallocator is not None:
7047
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7048

    
7049
    elif self.op.remote_node is not None:
7050
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7051
      self.op.remote_node = remote_node
7052

    
7053
      # Warning: do not remove the locking of the new secondary here
7054
      # unless DRBD8.AddChildren is changed to work in parallel;
7055
      # currently it doesn't since parallel invocations of
7056
      # FindUnusedMinor will conflict
7057
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7058
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7059

    
7060
    else:
7061
      self.needed_locks[locking.LEVEL_NODE] = []
7062
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7063

    
7064
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7065
                                   self.op.iallocator, self.op.remote_node,
7066
                                   self.op.disks, False, self.op.early_release)
7067

    
7068
    self.tasklets = [self.replacer]
7069

    
7070
  def DeclareLocks(self, level):
7071
    # If we're not already locking all nodes in the set we have to declare the
7072
    # instance's primary/secondary nodes.
7073
    if (level == locking.LEVEL_NODE and
7074
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7075
      self._LockInstancesNodes()
7076

    
7077
  def BuildHooksEnv(self):
7078
    """Build hooks env.
7079

7080
    This runs on the master, the primary and all the secondaries.
7081

7082
    """
7083
    instance = self.replacer.instance
7084
    env = {
7085
      "MODE": self.op.mode,
7086
      "NEW_SECONDARY": self.op.remote_node,
7087
      "OLD_SECONDARY": instance.secondary_nodes[0],
7088
      }
7089
    env.update(_BuildInstanceHookEnvByObject(self, instance))
7090
    nl = [
7091
      self.cfg.GetMasterNode(),
7092
      instance.primary_node,
7093
      ]
7094
    if self.op.remote_node is not None:
7095
      nl.append(self.op.remote_node)
7096
    return env, nl, nl
7097

    
7098

    
7099
class LUEvacuateNode(LogicalUnit):
7100
  """Relocate the secondary instances from a node.
7101

7102
  """
7103
  HPATH = "node-evacuate"
7104
  HTYPE = constants.HTYPE_NODE
7105
  _OP_REQP = ["node_name"]
7106
  REQ_BGL = False
7107

    
7108
  def CheckArguments(self):
7109
    if not hasattr(self.op, "remote_node"):
7110
      self.op.remote_node = None
7111
    if not hasattr(self.op, "iallocator"):
7112
      self.op.iallocator = None
7113
    if not hasattr(self.op, "early_release"):
7114
      self.op.early_release = False
7115

    
7116
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
7117
                                  self.op.remote_node,
7118
                                  self.op.iallocator)
7119

    
7120
  def ExpandNames(self):
7121
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7122

    
7123
    self.needed_locks = {}
7124

    
7125
    # Declare node locks
7126
    if self.op.iallocator is not None:
7127
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7128

    
7129
    elif self.op.remote_node is not None:
7130
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7131

    
7132
      # Warning: do not remove the locking of the new secondary here
7133
      # unless DRBD8.AddChildren is changed to work in parallel;
7134
      # currently it doesn't since parallel invocations of
7135
      # FindUnusedMinor will conflict
7136
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
7137
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7138

    
7139
    else:
7140
      raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
7141

    
7142
    # Create tasklets for replacing disks for all secondary instances on this
7143
    # node
7144
    names = []
7145
    tasklets = []
7146

    
7147
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
7148
      logging.debug("Replacing disks for instance %s", inst.name)
7149
      names.append(inst.name)
7150

    
7151
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
7152
                                self.op.iallocator, self.op.remote_node, [],
7153
                                True, self.op.early_release)
7154
      tasklets.append(replacer)
7155

    
7156
    self.tasklets = tasklets
7157
    self.instance_names = names
7158

    
7159
    # Declare instance locks
7160
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
7161

    
7162
  def DeclareLocks(self, level):
7163
    # If we're not already locking all nodes in the set we have to declare the
7164
    # instance's primary/secondary nodes.
7165
    if (level == locking.LEVEL_NODE and
7166
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7167
      self._LockInstancesNodes()
7168

    
7169
  def BuildHooksEnv(self):
7170
    """Build hooks env.
7171

7172
    This runs on the master, the primary and all the secondaries.
7173

7174
    """
7175
    env = {
7176
      "NODE_NAME": self.op.node_name,
7177
      }
7178

    
7179
    nl = [self.cfg.GetMasterNode()]
7180

    
7181
    if self.op.remote_node is not None:
7182
      env["NEW_SECONDARY"] = self.op.remote_node
7183
      nl.append(self.op.remote_node)
7184

    
7185
    return (env, nl, nl)
7186

    
7187

    
7188
class TLReplaceDisks(Tasklet):
7189
  """Replaces disks for an instance.
7190

7191
  Note: Locking is not within the scope of this class.
7192

7193
  """
7194
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7195
               disks, delay_iallocator, early_release):
7196
    """Initializes this class.
7197

7198
    """
7199
    Tasklet.__init__(self, lu)
7200

    
7201
    # Parameters
7202
    self.instance_name = instance_name
7203
    self.mode = mode
7204
    self.iallocator_name = iallocator_name
7205
    self.remote_node = remote_node
7206
    self.disks = disks
7207
    self.delay_iallocator = delay_iallocator
7208
    self.early_release = early_release
7209

    
7210
    # Runtime data
7211
    self.instance = None
7212
    self.new_node = None
7213
    self.target_node = None
7214
    self.other_node = None
7215
    self.remote_node_info = None
7216
    self.node_secondary_ip = None
7217

    
7218
  @staticmethod
7219
  def CheckArguments(mode, remote_node, iallocator):
7220
    """Helper function for users of this class.
7221

7222
    """
7223
    # check for valid parameter combination
7224
    if mode == constants.REPLACE_DISK_CHG:
7225
      if remote_node is None and iallocator is None:
7226
        raise errors.OpPrereqError("When changing the secondary either an"
7227
                                   " iallocator script must be used or the"
7228
                                   " new node given", errors.ECODE_INVAL)
7229

    
7230
      if remote_node is not None and iallocator is not None:
7231
        raise errors.OpPrereqError("Give either the iallocator or the new"
7232
                                   " secondary, not both", errors.ECODE_INVAL)
7233

    
7234
    elif remote_node is not None or iallocator is not None:
7235
      # Not replacing the secondary
7236
      raise errors.OpPrereqError("The iallocator and new node options can"
7237
                                 " only be used when changing the"
7238
                                 " secondary node", errors.ECODE_INVAL)
7239

    
7240
  @staticmethod
7241
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7242
    """Compute a new secondary node using an IAllocator.
7243

7244
    """
7245
    ial = IAllocator(lu.cfg, lu.rpc,
7246
                     mode=constants.IALLOCATOR_MODE_RELOC,
7247
                     name=instance_name,
7248
                     relocate_from=relocate_from)
7249

    
7250
    ial.Run(iallocator_name)
7251

    
7252
    if not ial.success:
7253
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7254
                                 " %s" % (iallocator_name, ial.info),
7255
                                 errors.ECODE_NORES)
7256

    
7257
    if len(ial.result) != ial.required_nodes:
7258
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7259
                                 " of nodes (%s), required %s" %
7260
                                 (iallocator_name,
7261
                                  len(ial.result), ial.required_nodes),
7262
                                 errors.ECODE_FAULT)
7263

    
7264
    remote_node_name = ial.result[0]
7265

    
7266
    lu.LogInfo("Selected new secondary for instance '%s': %s",
7267
               instance_name, remote_node_name)
7268

    
7269
    return remote_node_name
7270

    
7271
  def _FindFaultyDisks(self, node_name):
7272
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7273
                                    node_name, True)
7274

    
7275
  def CheckPrereq(self):
7276
    """Check prerequisites.
7277

7278
    This checks that the instance is in the cluster.
7279

7280
    """
7281
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7282
    assert instance is not None, \
7283
      "Cannot retrieve locked instance %s" % self.instance_name
7284

    
7285
    if instance.disk_template != constants.DT_DRBD8:
7286
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7287
                                 " instances", errors.ECODE_INVAL)
7288

    
7289
    if len(instance.secondary_nodes) != 1:
7290
      raise errors.OpPrereqError("The instance has a strange layout,"
7291
                                 " expected one secondary but found %d" %
7292
                                 len(instance.secondary_nodes),
7293
                                 errors.ECODE_FAULT)
7294

    
7295
    if not self.delay_iallocator:
7296
      self._CheckPrereq2()
7297

    
7298
  def _CheckPrereq2(self):
7299
    """Check prerequisites, second part.
7300

7301
    This function should always be part of CheckPrereq. It was separated and is
7302
    now called from Exec because during node evacuation iallocator was only
7303
    called with an unmodified cluster model, not taking planned changes into
7304
    account.
7305

7306
    """
7307
    instance = self.instance
7308
    secondary_node = instance.secondary_nodes[0]
7309

    
7310
    if self.iallocator_name is None:
7311
      remote_node = self.remote_node
7312
    else:
7313
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7314
                                       instance.name, instance.secondary_nodes)
7315

    
7316
    if remote_node is not None:
7317
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7318
      assert self.remote_node_info is not None, \
7319
        "Cannot retrieve locked node %s" % remote_node
7320
    else:
7321
      self.remote_node_info = None
7322

    
7323
    if remote_node == self.instance.primary_node:
7324
      raise errors.OpPrereqError("The specified node is the primary node of"
7325
                                 " the instance.", errors.ECODE_INVAL)
7326

    
7327
    if remote_node == secondary_node:
7328
      raise errors.OpPrereqError("The specified node is already the"
7329
                                 " secondary node of the instance.",
7330
                                 errors.ECODE_INVAL)
7331

    
7332
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7333
                                    constants.REPLACE_DISK_CHG):
7334
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
7335
                                 errors.ECODE_INVAL)
7336

    
7337
    if self.mode == constants.REPLACE_DISK_AUTO:
7338
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
7339
      faulty_secondary = self._FindFaultyDisks(secondary_node)
7340

    
7341
      if faulty_primary and faulty_secondary:
7342
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7343
                                   " one node and can not be repaired"
7344
                                   " automatically" % self.instance_name,
7345
                                   errors.ECODE_STATE)
7346

    
7347
      if faulty_primary:
7348
        self.disks = faulty_primary
7349
        self.target_node = instance.primary_node
7350
        self.other_node = secondary_node
7351
        check_nodes = [self.target_node, self.other_node]
7352
      elif faulty_secondary:
7353
        self.disks = faulty_secondary
7354
        self.target_node = secondary_node
7355
        self.other_node = instance.primary_node
7356
        check_nodes = [self.target_node, self.other_node]
7357
      else:
7358
        self.disks = []
7359
        check_nodes = []
7360

    
7361
    else:
7362
      # Non-automatic modes
7363
      if self.mode == constants.REPLACE_DISK_PRI:
7364
        self.target_node = instance.primary_node
7365
        self.other_node = secondary_node
7366
        check_nodes = [self.target_node, self.other_node]
7367

    
7368
      elif self.mode == constants.REPLACE_DISK_SEC:
7369
        self.target_node = secondary_node
7370
        self.other_node = instance.primary_node
7371
        check_nodes = [self.target_node, self.other_node]
7372

    
7373
      elif self.mode == constants.REPLACE_DISK_CHG:
7374
        self.new_node = remote_node
7375
        self.other_node = instance.primary_node
7376
        self.target_node = secondary_node
7377
        check_nodes = [self.new_node, self.other_node]
7378

    
7379
        _CheckNodeNotDrained(self.lu, remote_node)
7380

    
7381
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
7382
        assert old_node_info is not None
7383
        if old_node_info.offline and not self.early_release:
7384
          # doesn't make sense to delay the release
7385
          self.early_release = True
7386
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7387
                          " early-release mode", secondary_node)
7388

    
7389
      else:
7390
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7391
                                     self.mode)
7392

    
7393
      # If not specified all disks should be replaced
7394
      if not self.disks:
7395
        self.disks = range(len(self.instance.disks))
7396

    
7397
    for node in check_nodes:
7398
      _CheckNodeOnline(self.lu, node)
7399

    
7400
    # Check whether disks are valid
7401
    for disk_idx in self.disks:
7402
      instance.FindDisk(disk_idx)
7403

    
7404
    # Get secondary node IP addresses
7405
    node_2nd_ip = {}
7406

    
7407
    for node_name in [self.target_node, self.other_node, self.new_node]:
7408
      if node_name is not None:
7409
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7410

    
7411
    self.node_secondary_ip = node_2nd_ip
7412

    
7413
  def Exec(self, feedback_fn):
7414
    """Execute disk replacement.
7415

7416
    This dispatches the disk replacement to the appropriate handler.
7417

7418
    """
7419
    if self.delay_iallocator:
7420
      self._CheckPrereq2()
7421

    
7422
    if not self.disks:
7423
      feedback_fn("No disks need replacement")
7424
      return
7425

    
7426
    feedback_fn("Replacing disk(s) %s for %s" %
7427
                (utils.CommaJoin(self.disks), self.instance.name))
7428

    
7429
    activate_disks = (not self.instance.admin_up)
7430

    
7431
    # Activate the instance disks if we're replacing them on a down instance
7432
    if activate_disks:
7433
      _StartInstanceDisks(self.lu, self.instance, True)
7434

    
7435
    try:
7436
      # Should we replace the secondary node?
7437
      if self.new_node is not None:
7438
        fn = self._ExecDrbd8Secondary
7439
      else:
7440
        fn = self._ExecDrbd8DiskOnly
7441

    
7442
      return fn(feedback_fn)
7443

    
7444
    finally:
7445
      # Deactivate the instance disks if we're replacing them on a
7446
      # down instance
7447
      if activate_disks:
7448
        _SafeShutdownInstanceDisks(self.lu, self.instance)
7449

    
7450
  def _CheckVolumeGroup(self, nodes):
7451
    self.lu.LogInfo("Checking volume groups")
7452

    
7453
    vgname = self.cfg.GetVGName()
7454

    
7455
    # Make sure volume group exists on all involved nodes
7456
    results = self.rpc.call_vg_list(nodes)
7457
    if not results:
7458
      raise errors.OpExecError("Can't list volume groups on the nodes")
7459

    
7460
    for node in nodes:
7461
      res = results[node]
7462
      res.Raise("Error checking node %s" % node)
7463
      if vgname not in res.payload:
7464
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
7465
                                 (vgname, node))
7466

    
7467
  def _CheckDisksExistence(self, nodes):
7468
    # Check disk existence
7469
    for idx, dev in enumerate(self.instance.disks):
7470
      if idx not in self.disks:
7471
        continue
7472

    
7473
      for node in nodes:
7474
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7475
        self.cfg.SetDiskID(dev, node)
7476

    
7477
        result = self.rpc.call_blockdev_find(node, dev)
7478

    
7479
        msg = result.fail_msg
7480
        if msg or not result.payload:
7481
          if not msg:
7482
            msg = "disk not found"
7483
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7484
                                   (idx, node, msg))
7485

    
7486
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7487
    for idx, dev in enumerate(self.instance.disks):
7488
      if idx not in self.disks:
7489
        continue
7490

    
7491
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7492
                      (idx, node_name))
7493

    
7494
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7495
                                   ldisk=ldisk):
7496
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7497
                                 " replace disks for instance %s" %
7498
                                 (node_name, self.instance.name))
7499

    
7500
  def _CreateNewStorage(self, node_name):
7501
    vgname = self.cfg.GetVGName()
7502
    iv_names = {}
7503

    
7504
    for idx, dev in enumerate(self.instance.disks):
7505
      if idx not in self.disks:
7506
        continue
7507

    
7508
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7509

    
7510
      self.cfg.SetDiskID(dev, node_name)
7511

    
7512
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7513
      names = _GenerateUniqueNames(self.lu, lv_names)
7514

    
7515
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7516
                             logical_id=(vgname, names[0]))
7517
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7518
                             logical_id=(vgname, names[1]))
7519

    
7520
      new_lvs = [lv_data, lv_meta]
7521
      old_lvs = dev.children
7522
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7523

    
7524
      # we pass force_create=True to force the LVM creation
7525
      for new_lv in new_lvs:
7526
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7527
                        _GetInstanceInfoText(self.instance), False)
7528

    
7529
    return iv_names
7530

    
7531
  def _CheckDevices(self, node_name, iv_names):
7532
    for name, (dev, _, _) in iv_names.iteritems():
7533
      self.cfg.SetDiskID(dev, node_name)
7534

    
7535
      result = self.rpc.call_blockdev_find(node_name, dev)
7536

    
7537
      msg = result.fail_msg
7538
      if msg or not result.payload:
7539
        if not msg:
7540
          msg = "disk not found"
7541
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
7542
                                 (name, msg))
7543

    
7544
      if result.payload.is_degraded:
7545
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
7546

    
7547
  def _RemoveOldStorage(self, node_name, iv_names):
7548
    for name, (_, old_lvs, _) in iv_names.iteritems():
7549
      self.lu.LogInfo("Remove logical volumes for %s" % name)
7550

    
7551
      for lv in old_lvs:
7552
        self.cfg.SetDiskID(lv, node_name)
7553

    
7554
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7555
        if msg:
7556
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
7557
                             hint="remove unused LVs manually")
7558

    
7559
  def _ReleaseNodeLock(self, node_name):
7560
    """Releases the lock for a given node."""
7561
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7562

    
7563
  def _ExecDrbd8DiskOnly(self, feedback_fn):
7564
    """Replace a disk on the primary or secondary for DRBD 8.
7565

7566
    The algorithm for replace is quite complicated:
7567

7568
      1. for each disk to be replaced:
7569

7570
        1. create new LVs on the target node with unique names
7571
        1. detach old LVs from the drbd device
7572
        1. rename old LVs to name_replaced.<time_t>
7573
        1. rename new LVs to old LVs
7574
        1. attach the new LVs (with the old names now) to the drbd device
7575

7576
      1. wait for sync across all devices
7577

7578
      1. for each modified disk:
7579

7580
        1. remove old LVs (which have the name name_replaces.<time_t>)
7581

7582
    Failures are not very well handled.
7583

7584
    """
7585
    steps_total = 6
7586

    
7587
    # Step: check device activation
7588
    self.lu.LogStep(1, steps_total, "Check device existence")
7589
    self._CheckDisksExistence([self.other_node, self.target_node])
7590
    self._CheckVolumeGroup([self.target_node, self.other_node])
7591

    
7592
    # Step: check other node consistency
7593
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7594
    self._CheckDisksConsistency(self.other_node,
7595
                                self.other_node == self.instance.primary_node,
7596
                                False)
7597

    
7598
    # Step: create new storage
7599
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7600
    iv_names = self._CreateNewStorage(self.target_node)
7601

    
7602
    # Step: for each lv, detach+rename*2+attach
7603
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7604
    for dev, old_lvs, new_lvs in iv_names.itervalues():
7605
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7606

    
7607
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7608
                                                     old_lvs)
7609
      result.Raise("Can't detach drbd from local storage on node"
7610
                   " %s for device %s" % (self.target_node, dev.iv_name))
7611
      #dev.children = []
7612
      #cfg.Update(instance)
7613

    
7614
      # ok, we created the new LVs, so now we know we have the needed
7615
      # storage; as such, we proceed on the target node to rename
7616
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7617
      # using the assumption that logical_id == physical_id (which in
7618
      # turn is the unique_id on that node)
7619

    
7620
      # FIXME(iustin): use a better name for the replaced LVs
7621
      temp_suffix = int(time.time())
7622
      ren_fn = lambda d, suff: (d.physical_id[0],
7623
                                d.physical_id[1] + "_replaced-%s" % suff)
7624

    
7625
      # Build the rename list based on what LVs exist on the node
7626
      rename_old_to_new = []
7627
      for to_ren in old_lvs:
7628
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7629
        if not result.fail_msg and result.payload:
7630
          # device exists
7631
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7632

    
7633
      self.lu.LogInfo("Renaming the old LVs on the target node")
7634
      result = self.rpc.call_blockdev_rename(self.target_node,
7635
                                             rename_old_to_new)
7636
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
7637

    
7638
      # Now we rename the new LVs to the old LVs
7639
      self.lu.LogInfo("Renaming the new LVs on the target node")
7640
      rename_new_to_old = [(new, old.physical_id)
7641
                           for old, new in zip(old_lvs, new_lvs)]
7642
      result = self.rpc.call_blockdev_rename(self.target_node,
7643
                                             rename_new_to_old)
7644
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
7645

    
7646
      for old, new in zip(old_lvs, new_lvs):
7647
        new.logical_id = old.logical_id
7648
        self.cfg.SetDiskID(new, self.target_node)
7649

    
7650
      for disk in old_lvs:
7651
        disk.logical_id = ren_fn(disk, temp_suffix)
7652
        self.cfg.SetDiskID(disk, self.target_node)
7653

    
7654
      # Now that the new lvs have the old name, we can add them to the device
7655
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7656
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7657
                                                  new_lvs)
7658
      msg = result.fail_msg
7659
      if msg:
7660
        for new_lv in new_lvs:
7661
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
7662
                                               new_lv).fail_msg
7663
          if msg2:
7664
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7665
                               hint=("cleanup manually the unused logical"
7666
                                     "volumes"))
7667
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7668

    
7669
      dev.children = new_lvs
7670

    
7671
      self.cfg.Update(self.instance, feedback_fn)
7672

    
7673
    cstep = 5
7674
    if self.early_release:
7675
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7676
      cstep += 1
7677
      self._RemoveOldStorage(self.target_node, iv_names)
7678
      # WARNING: we release both node locks here, do not do other RPCs
7679
      # than WaitForSync to the primary node
7680
      self._ReleaseNodeLock([self.target_node, self.other_node])
7681

    
7682
    # Wait for sync
7683
    # This can fail as the old devices are degraded and _WaitForSync
7684
    # does a combined result over all disks, so we don't check its return value
7685
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7686
    cstep += 1
7687
    _WaitForSync(self.lu, self.instance)
7688

    
7689
    # Check all devices manually
7690
    self._CheckDevices(self.instance.primary_node, iv_names)
7691

    
7692
    # Step: remove old storage
7693
    if not self.early_release:
7694
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7695
      cstep += 1
7696
      self._RemoveOldStorage(self.target_node, iv_names)
7697

    
7698
  def _ExecDrbd8Secondary(self, feedback_fn):
7699
    """Replace the secondary node for DRBD 8.
7700

7701
    The algorithm for replace is quite complicated:
7702
      - for all disks of the instance:
7703
        - create new LVs on the new node with same names
7704
        - shutdown the drbd device on the old secondary
7705
        - disconnect the drbd network on the primary
7706
        - create the drbd device on the new secondary
7707
        - network attach the drbd on the primary, using an artifice:
7708
          the drbd code for Attach() will connect to the network if it
7709
          finds a device which is connected to the good local disks but
7710
          not network enabled
7711
      - wait for sync across all devices
7712
      - remove all disks from the old secondary
7713

7714
    Failures are not very well handled.
7715

7716
    """
7717
    steps_total = 6
7718

    
7719
    # Step: check device activation
7720
    self.lu.LogStep(1, steps_total, "Check device existence")
7721
    self._CheckDisksExistence([self.instance.primary_node])
7722
    self._CheckVolumeGroup([self.instance.primary_node])
7723

    
7724
    # Step: check other node consistency
7725
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7726
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
7727

    
7728
    # Step: create new storage
7729
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7730
    for idx, dev in enumerate(self.instance.disks):
7731
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7732
                      (self.new_node, idx))
7733
      # we pass force_create=True to force LVM creation
7734
      for new_lv in dev.children:
7735
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7736
                        _GetInstanceInfoText(self.instance), False)
7737

    
7738
    # Step 4: dbrd minors and drbd setups changes
7739
    # after this, we must manually remove the drbd minors on both the
7740
    # error and the success paths
7741
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7742
    minors = self.cfg.AllocateDRBDMinor([self.new_node
7743
                                         for dev in self.instance.disks],
7744
                                        self.instance.name)
7745
    logging.debug("Allocated minors %r", minors)
7746

    
7747
    iv_names = {}
7748
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7749
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7750
                      (self.new_node, idx))
7751
      # create new devices on new_node; note that we create two IDs:
7752
      # one without port, so the drbd will be activated without
7753
      # networking information on the new node at this stage, and one
7754
      # with network, for the latter activation in step 4
7755
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7756
      if self.instance.primary_node == o_node1:
7757
        p_minor = o_minor1
7758
      else:
7759
        assert self.instance.primary_node == o_node2, "Three-node instance?"
7760
        p_minor = o_minor2
7761

    
7762
      new_alone_id = (self.instance.primary_node, self.new_node, None,
7763
                      p_minor, new_minor, o_secret)
7764
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
7765
                    p_minor, new_minor, o_secret)
7766

    
7767
      iv_names[idx] = (dev, dev.children, new_net_id)
7768
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7769
                    new_net_id)
7770
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7771
                              logical_id=new_alone_id,
7772
                              children=dev.children,
7773
                              size=dev.size)
7774
      try:
7775
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7776
                              _GetInstanceInfoText(self.instance), False)
7777
      except errors.GenericError:
7778
        self.cfg.ReleaseDRBDMinors(self.instance.name)
7779
        raise
7780

    
7781
    # We have new devices, shutdown the drbd on the old secondary
7782
    for idx, dev in enumerate(self.instance.disks):
7783
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7784
      self.cfg.SetDiskID(dev, self.target_node)
7785
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7786
      if msg:
7787
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7788
                           "node: %s" % (idx, msg),
7789
                           hint=("Please cleanup this device manually as"
7790
                                 " soon as possible"))
7791

    
7792
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7793
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7794
                                               self.node_secondary_ip,
7795
                                               self.instance.disks)\
7796
                                              [self.instance.primary_node]
7797

    
7798
    msg = result.fail_msg
7799
    if msg:
7800
      # detaches didn't succeed (unlikely)
7801
      self.cfg.ReleaseDRBDMinors(self.instance.name)
7802
      raise errors.OpExecError("Can't detach the disks from the network on"
7803
                               " old node: %s" % (msg,))
7804

    
7805
    # if we managed to detach at least one, we update all the disks of
7806
    # the instance to point to the new secondary
7807
    self.lu.LogInfo("Updating instance configuration")
7808
    for dev, _, new_logical_id in iv_names.itervalues():
7809
      dev.logical_id = new_logical_id
7810
      self.cfg.SetDiskID(dev, self.instance.primary_node)
7811

    
7812
    self.cfg.Update(self.instance, feedback_fn)
7813

    
7814
    # and now perform the drbd attach
7815
    self.lu.LogInfo("Attaching primary drbds to new secondary"
7816
                    " (standalone => connected)")
7817
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7818
                                            self.new_node],
7819
                                           self.node_secondary_ip,
7820
                                           self.instance.disks,
7821
                                           self.instance.name,
7822
                                           False)
7823
    for to_node, to_result in result.items():
7824
      msg = to_result.fail_msg
7825
      if msg:
7826
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7827
                           to_node, msg,
7828
                           hint=("please do a gnt-instance info to see the"
7829
                                 " status of disks"))
7830
    cstep = 5
7831
    if self.early_release:
7832
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7833
      cstep += 1
7834
      self._RemoveOldStorage(self.target_node, iv_names)
7835
      # WARNING: we release all node locks here, do not do other RPCs
7836
      # than WaitForSync to the primary node
7837
      self._ReleaseNodeLock([self.instance.primary_node,
7838
                             self.target_node,
7839
                             self.new_node])
7840

    
7841
    # Wait for sync
7842
    # This can fail as the old devices are degraded and _WaitForSync
7843
    # does a combined result over all disks, so we don't check its return value
7844
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7845
    cstep += 1
7846
    _WaitForSync(self.lu, self.instance)
7847

    
7848
    # Check all devices manually
7849
    self._CheckDevices(self.instance.primary_node, iv_names)
7850

    
7851
    # Step: remove old storage
7852
    if not self.early_release:
7853
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7854
      self._RemoveOldStorage(self.target_node, iv_names)
7855

    
7856

    
7857
class LURepairNodeStorage(NoHooksLU):
7858
  """Repairs the volume group on a node.
7859

7860
  """
7861
  _OP_REQP = ["node_name"]
7862
  REQ_BGL = False
7863

    
7864
  def CheckArguments(self):
7865
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7866

    
7867
    _CheckStorageType(self.op.storage_type)
7868

    
7869
  def ExpandNames(self):
7870
    self.needed_locks = {
7871
      locking.LEVEL_NODE: [self.op.node_name],
7872
      }
7873

    
7874
  def _CheckFaultyDisks(self, instance, node_name):
7875
    """Ensure faulty disks abort the opcode or at least warn."""
7876
    try:
7877
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7878
                                  node_name, True):
7879
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7880
                                   " node '%s'" % (instance.name, node_name),
7881
                                   errors.ECODE_STATE)
7882
    except errors.OpPrereqError, err:
7883
      if self.op.ignore_consistency:
7884
        self.proc.LogWarning(str(err.args[0]))
7885
      else:
7886
        raise
7887

    
7888
  def CheckPrereq(self):
7889
    """Check prerequisites.
7890

7891
    """
7892
    storage_type = self.op.storage_type
7893

    
7894
    if (constants.SO_FIX_CONSISTENCY not in
7895
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7896
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
7897
                                 " repaired" % storage_type,
7898
                                 errors.ECODE_INVAL)
7899

    
7900
    # Check whether any instance on this node has faulty disks
7901
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7902
      if not inst.admin_up:
7903
        continue
7904
      check_nodes = set(inst.all_nodes)
7905
      check_nodes.discard(self.op.node_name)
7906
      for inst_node_name in check_nodes:
7907
        self._CheckFaultyDisks(inst, inst_node_name)
7908

    
7909
  def Exec(self, feedback_fn):
7910
    feedback_fn("Repairing storage unit '%s' on %s ..." %
7911
                (self.op.name, self.op.node_name))
7912

    
7913
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7914
    result = self.rpc.call_storage_execute(self.op.node_name,
7915
                                           self.op.storage_type, st_args,
7916
                                           self.op.name,
7917
                                           constants.SO_FIX_CONSISTENCY)
7918
    result.Raise("Failed to repair storage unit '%s' on %s" %
7919
                 (self.op.name, self.op.node_name))
7920

    
7921

    
7922
class LUNodeEvacuationStrategy(NoHooksLU):
7923
  """Computes the node evacuation strategy.
7924

7925
  """
7926
  _OP_REQP = ["nodes"]
7927
  REQ_BGL = False
7928

    
7929
  def CheckArguments(self):
7930
    if not hasattr(self.op, "remote_node"):
7931
      self.op.remote_node = None
7932
    if not hasattr(self.op, "iallocator"):
7933
      self.op.iallocator = None
7934
    if self.op.remote_node is not None and self.op.iallocator is not None:
7935
      raise errors.OpPrereqError("Give either the iallocator or the new"
7936
                                 " secondary, not both", errors.ECODE_INVAL)
7937

    
7938
  def ExpandNames(self):
7939
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7940
    self.needed_locks = locks = {}
7941
    if self.op.remote_node is None:
7942
      locks[locking.LEVEL_NODE] = locking.ALL_SET
7943
    else:
7944
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7945
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7946

    
7947
  def CheckPrereq(self):
7948
    pass
7949

    
7950
  def Exec(self, feedback_fn):
7951
    if self.op.remote_node is not None:
7952
      instances = []
7953
      for node in self.op.nodes:
7954
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7955
      result = []
7956
      for i in instances:
7957
        if i.primary_node == self.op.remote_node:
7958
          raise errors.OpPrereqError("Node %s is the primary node of"
7959
                                     " instance %s, cannot use it as"
7960
                                     " secondary" %
7961
                                     (self.op.remote_node, i.name),
7962
                                     errors.ECODE_INVAL)
7963
        result.append([i.name, self.op.remote_node])
7964
    else:
7965
      ial = IAllocator(self.cfg, self.rpc,
7966
                       mode=constants.IALLOCATOR_MODE_MEVAC,
7967
                       evac_nodes=self.op.nodes)
7968
      ial.Run(self.op.iallocator, validate=True)
7969
      if not ial.success:
7970
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7971
                                 errors.ECODE_NORES)
7972
      result = ial.result
7973
    return result
7974

    
7975

    
7976
class LUGrowDisk(LogicalUnit):
7977
  """Grow a disk of an instance.
7978

7979
  """
7980
  HPATH = "disk-grow"
7981
  HTYPE = constants.HTYPE_INSTANCE
7982
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7983
  REQ_BGL = False
7984

    
7985
  def ExpandNames(self):
7986
    self._ExpandAndLockInstance()
7987
    self.needed_locks[locking.LEVEL_NODE] = []
7988
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7989

    
7990
  def DeclareLocks(self, level):
7991
    if level == locking.LEVEL_NODE:
7992
      self._LockInstancesNodes()
7993

    
7994
  def BuildHooksEnv(self):
7995
    """Build hooks env.
7996

7997
    This runs on the master, the primary and all the secondaries.
7998

7999
    """
8000
    env = {
8001
      "DISK": self.op.disk,
8002
      "AMOUNT": self.op.amount,
8003
      }
8004
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8005
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8006
    return env, nl, nl
8007

    
8008
  def CheckPrereq(self):
8009
    """Check prerequisites.
8010

8011
    This checks that the instance is in the cluster.
8012

8013
    """
8014
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8015
    assert instance is not None, \
8016
      "Cannot retrieve locked instance %s" % self.op.instance_name
8017
    nodenames = list(instance.all_nodes)
8018
    for node in nodenames:
8019
      _CheckNodeOnline(self, node)
8020

    
8021

    
8022
    self.instance = instance
8023

    
8024
    if instance.disk_template not in constants.DTS_GROWABLE:
8025
      raise errors.OpPrereqError("Instance's disk layout does not support"
8026
                                 " growing.", errors.ECODE_INVAL)
8027

    
8028
    self.disk = instance.FindDisk(self.op.disk)
8029

    
8030
    if instance.disk_template != constants.DT_FILE:
8031
      # TODO: check the free disk space for file, when that feature will be
8032
      # supported
8033
      _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8034

    
8035
  def Exec(self, feedback_fn):
8036
    """Execute disk grow.
8037

8038
    """
8039
    instance = self.instance
8040
    disk = self.disk
8041

    
8042
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8043
    if not disks_ok:
8044
      raise errors.OpExecError("Cannot activate block device to grow")
8045

    
8046
    for node in instance.all_nodes:
8047
      self.cfg.SetDiskID(disk, node)
8048
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8049
      result.Raise("Grow request failed to node %s" % node)
8050

    
8051
      # TODO: Rewrite code to work properly
8052
      # DRBD goes into sync mode for a short amount of time after executing the
8053
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8054
      # calling "resize" in sync mode fails. Sleeping for a short amount of
8055
      # time is a work-around.
8056
      time.sleep(5)
8057

    
8058
    disk.RecordGrow(self.op.amount)
8059
    self.cfg.Update(instance, feedback_fn)
8060
    if self.op.wait_for_sync:
8061
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
8062
      if disk_abort:
8063
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8064
                             " status.\nPlease check the instance.")
8065
      if not instance.admin_up:
8066
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8067
    elif not instance.admin_up:
8068
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
8069
                           " not supposed to be running because no wait for"
8070
                           " sync mode was requested.")
8071

    
8072

    
8073
class LUQueryInstanceData(NoHooksLU):
8074
  """Query runtime instance data.
8075

8076
  """
8077
  _OP_REQP = ["instances", "static"]
8078
  REQ_BGL = False
8079

    
8080
  def ExpandNames(self):
8081
    self.needed_locks = {}
8082
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8083

    
8084
    if not isinstance(self.op.instances, list):
8085
      raise errors.OpPrereqError("Invalid argument type 'instances'",
8086
                                 errors.ECODE_INVAL)
8087

    
8088
    if self.op.instances:
8089
      self.wanted_names = []
8090
      for name in self.op.instances:
8091
        full_name = _ExpandInstanceName(self.cfg, name)
8092
        self.wanted_names.append(full_name)
8093
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8094
    else:
8095
      self.wanted_names = None
8096
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8097

    
8098
    self.needed_locks[locking.LEVEL_NODE] = []
8099
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8100

    
8101
  def DeclareLocks(self, level):
8102
    if level == locking.LEVEL_NODE:
8103
      self._LockInstancesNodes()
8104

    
8105
  def CheckPrereq(self):
8106
    """Check prerequisites.
8107

8108
    This only checks the optional instance list against the existing names.
8109

8110
    """
8111
    if self.wanted_names is None:
8112
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8113

    
8114
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8115
                             in self.wanted_names]
8116
    return
8117

    
8118
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
8119
    """Returns the status of a block device
8120

8121
    """
8122
    if self.op.static or not node:
8123
      return None
8124

    
8125
    self.cfg.SetDiskID(dev, node)
8126

    
8127
    result = self.rpc.call_blockdev_find(node, dev)
8128
    if result.offline:
8129
      return None
8130

    
8131
    result.Raise("Can't compute disk status for %s" % instance_name)
8132

    
8133
    status = result.payload
8134
    if status is None:
8135
      return None
8136

    
8137
    return (status.dev_path, status.major, status.minor,
8138
            status.sync_percent, status.estimated_time,
8139
            status.is_degraded, status.ldisk_status)
8140

    
8141
  def _ComputeDiskStatus(self, instance, snode, dev):
8142
    """Compute block device status.
8143

8144
    """
8145
    if dev.dev_type in constants.LDS_DRBD:
8146
      # we change the snode then (otherwise we use the one passed in)
8147
      if dev.logical_id[0] == instance.primary_node:
8148
        snode = dev.logical_id[1]
8149
      else:
8150
        snode = dev.logical_id[0]
8151

    
8152
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8153
                                              instance.name, dev)
8154
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8155

    
8156
    if dev.children:
8157
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
8158
                      for child in dev.children]
8159
    else:
8160
      dev_children = []
8161

    
8162
    data = {
8163
      "iv_name": dev.iv_name,
8164
      "dev_type": dev.dev_type,
8165
      "logical_id": dev.logical_id,
8166
      "physical_id": dev.physical_id,
8167
      "pstatus": dev_pstatus,
8168
      "sstatus": dev_sstatus,
8169
      "children": dev_children,
8170
      "mode": dev.mode,
8171
      "size": dev.size,
8172
      }
8173

    
8174
    return data
8175

    
8176
  def Exec(self, feedback_fn):
8177
    """Gather and return data"""
8178
    result = {}
8179

    
8180
    cluster = self.cfg.GetClusterInfo()
8181

    
8182
    for instance in self.wanted_instances:
8183
      if not self.op.static:
8184
        remote_info = self.rpc.call_instance_info(instance.primary_node,
8185
                                                  instance.name,
8186
                                                  instance.hypervisor)
8187
        remote_info.Raise("Error checking node %s" % instance.primary_node)
8188
        remote_info = remote_info.payload
8189
        if remote_info and "state" in remote_info:
8190
          remote_state = "up"
8191
        else:
8192
          remote_state = "down"
8193
      else:
8194
        remote_state = None
8195
      if instance.admin_up:
8196
        config_state = "up"
8197
      else:
8198
        config_state = "down"
8199

    
8200
      disks = [self._ComputeDiskStatus(instance, None, device)
8201
               for device in instance.disks]
8202

    
8203
      idict = {
8204
        "name": instance.name,
8205
        "config_state": config_state,
8206
        "run_state": remote_state,
8207
        "pnode": instance.primary_node,
8208
        "snodes": instance.secondary_nodes,
8209
        "os": instance.os,
8210
        # this happens to be the same format used for hooks
8211
        "nics": _NICListToTuple(self, instance.nics),
8212
        "disk_template": instance.disk_template,
8213
        "disks": disks,
8214
        "hypervisor": instance.hypervisor,
8215
        "network_port": instance.network_port,
8216
        "hv_instance": instance.hvparams,
8217
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
8218
        "be_instance": instance.beparams,
8219
        "be_actual": cluster.FillBE(instance),
8220
        "serial_no": instance.serial_no,
8221
        "mtime": instance.mtime,
8222
        "ctime": instance.ctime,
8223
        "uuid": instance.uuid,
8224
        }
8225

    
8226
      result[instance.name] = idict
8227

    
8228
    return result
8229

    
8230

    
8231
class LUSetInstanceParams(LogicalUnit):
8232
  """Modifies an instances's parameters.
8233

8234
  """
8235
  HPATH = "instance-modify"
8236
  HTYPE = constants.HTYPE_INSTANCE
8237
  _OP_REQP = ["instance_name"]
8238
  REQ_BGL = False
8239

    
8240
  def CheckArguments(self):
8241
    if not hasattr(self.op, 'nics'):
8242
      self.op.nics = []
8243
    if not hasattr(self.op, 'disks'):
8244
      self.op.disks = []
8245
    if not hasattr(self.op, 'beparams'):
8246
      self.op.beparams = {}
8247
    if not hasattr(self.op, 'hvparams'):
8248
      self.op.hvparams = {}
8249
    if not hasattr(self.op, "disk_template"):
8250
      self.op.disk_template = None
8251
    if not hasattr(self.op, "remote_node"):
8252
      self.op.remote_node = None
8253
    if not hasattr(self.op, "os_name"):
8254
      self.op.os_name = None
8255
    if not hasattr(self.op, "force_variant"):
8256
      self.op.force_variant = False
8257
    self.op.force = getattr(self.op, "force", False)
8258
    if not (self.op.nics or self.op.disks or self.op.disk_template or
8259
            self.op.hvparams or self.op.beparams or self.op.os_name):
8260
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8261

    
8262
    if self.op.hvparams:
8263
      _CheckGlobalHvParams(self.op.hvparams)
8264

    
8265
    # Disk validation
8266
    disk_addremove = 0
8267
    for disk_op, disk_dict in self.op.disks:
8268
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8269
      if disk_op == constants.DDM_REMOVE:
8270
        disk_addremove += 1
8271
        continue
8272
      elif disk_op == constants.DDM_ADD:
8273
        disk_addremove += 1
8274
      else:
8275
        if not isinstance(disk_op, int):
8276
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8277
        if not isinstance(disk_dict, dict):
8278
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8279
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8280

    
8281
      if disk_op == constants.DDM_ADD:
8282
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8283
        if mode not in constants.DISK_ACCESS_SET:
8284
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8285
                                     errors.ECODE_INVAL)
8286
        size = disk_dict.get('size', None)
8287
        if size is None:
8288
          raise errors.OpPrereqError("Required disk parameter size missing",
8289
                                     errors.ECODE_INVAL)
8290
        try:
8291
          size = int(size)
8292
        except (TypeError, ValueError), err:
8293
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8294
                                     str(err), errors.ECODE_INVAL)
8295
        disk_dict['size'] = size
8296
      else:
8297
        # modification of disk
8298
        if 'size' in disk_dict:
8299
          raise errors.OpPrereqError("Disk size change not possible, use"
8300
                                     " grow-disk", errors.ECODE_INVAL)
8301

    
8302
    if disk_addremove > 1:
8303
      raise errors.OpPrereqError("Only one disk add or remove operation"
8304
                                 " supported at a time", errors.ECODE_INVAL)
8305

    
8306
    if self.op.disks and self.op.disk_template is not None:
8307
      raise errors.OpPrereqError("Disk template conversion and other disk"
8308
                                 " changes not supported at the same time",
8309
                                 errors.ECODE_INVAL)
8310

    
8311
    if self.op.disk_template:
8312
      _CheckDiskTemplate(self.op.disk_template)
8313
      if (self.op.disk_template in constants.DTS_NET_MIRROR and
8314
          self.op.remote_node is None):
8315
        raise errors.OpPrereqError("Changing the disk template to a mirrored"
8316
                                   " one requires specifying a secondary node",
8317
                                   errors.ECODE_INVAL)
8318

    
8319
    # NIC validation
8320
    nic_addremove = 0
8321
    for nic_op, nic_dict in self.op.nics:
8322
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8323
      if nic_op == constants.DDM_REMOVE:
8324
        nic_addremove += 1
8325
        continue
8326
      elif nic_op == constants.DDM_ADD:
8327
        nic_addremove += 1
8328
      else:
8329
        if not isinstance(nic_op, int):
8330
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8331
        if not isinstance(nic_dict, dict):
8332
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8333
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8334

    
8335
      # nic_dict should be a dict
8336
      nic_ip = nic_dict.get('ip', None)
8337
      if nic_ip is not None:
8338
        if nic_ip.lower() == constants.VALUE_NONE:
8339
          nic_dict['ip'] = None
8340
        else:
8341
          if not utils.IsValidIP(nic_ip):
8342
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8343
                                       errors.ECODE_INVAL)
8344

    
8345
      nic_bridge = nic_dict.get('bridge', None)
8346
      nic_link = nic_dict.get('link', None)
8347
      if nic_bridge and nic_link:
8348
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8349
                                   " at the same time", errors.ECODE_INVAL)
8350
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8351
        nic_dict['bridge'] = None
8352
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8353
        nic_dict['link'] = None
8354

    
8355
      if nic_op == constants.DDM_ADD:
8356
        nic_mac = nic_dict.get('mac', None)
8357
        if nic_mac is None:
8358
          nic_dict['mac'] = constants.VALUE_AUTO
8359

    
8360
      if 'mac' in nic_dict:
8361
        nic_mac = nic_dict['mac']
8362
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8363
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8364

    
8365
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8366
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8367
                                     " modifying an existing nic",
8368
                                     errors.ECODE_INVAL)
8369

    
8370
    if nic_addremove > 1:
8371
      raise errors.OpPrereqError("Only one NIC add or remove operation"
8372
                                 " supported at a time", errors.ECODE_INVAL)
8373

    
8374
  def ExpandNames(self):
8375
    self._ExpandAndLockInstance()
8376
    self.needed_locks[locking.LEVEL_NODE] = []
8377
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8378

    
8379
  def DeclareLocks(self, level):
8380
    if level == locking.LEVEL_NODE:
8381
      self._LockInstancesNodes()
8382
      if self.op.disk_template and self.op.remote_node:
8383
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8384
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8385

    
8386
  def BuildHooksEnv(self):
8387
    """Build hooks env.
8388

8389
    This runs on the master, primary and secondaries.
8390

8391
    """
8392
    args = dict()
8393
    if constants.BE_MEMORY in self.be_new:
8394
      args['memory'] = self.be_new[constants.BE_MEMORY]
8395
    if constants.BE_VCPUS in self.be_new:
8396
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
8397
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8398
    # information at all.
8399
    if self.op.nics:
8400
      args['nics'] = []
8401
      nic_override = dict(self.op.nics)
8402
      for idx, nic in enumerate(self.instance.nics):
8403
        if idx in nic_override:
8404
          this_nic_override = nic_override[idx]
8405
        else:
8406
          this_nic_override = {}
8407
        if 'ip' in this_nic_override:
8408
          ip = this_nic_override['ip']
8409
        else:
8410
          ip = nic.ip
8411
        if 'mac' in this_nic_override:
8412
          mac = this_nic_override['mac']
8413
        else:
8414
          mac = nic.mac
8415
        if idx in self.nic_pnew:
8416
          nicparams = self.nic_pnew[idx]
8417
        else:
8418
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8419
        mode = nicparams[constants.NIC_MODE]
8420
        link = nicparams[constants.NIC_LINK]
8421
        args['nics'].append((ip, mac, mode, link))
8422
      if constants.DDM_ADD in nic_override:
8423
        ip = nic_override[constants.DDM_ADD].get('ip', None)
8424
        mac = nic_override[constants.DDM_ADD]['mac']
8425
        nicparams = self.nic_pnew[constants.DDM_ADD]
8426
        mode = nicparams[constants.NIC_MODE]
8427
        link = nicparams[constants.NIC_LINK]
8428
        args['nics'].append((ip, mac, mode, link))
8429
      elif constants.DDM_REMOVE in nic_override:
8430
        del args['nics'][-1]
8431

    
8432
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8433
    if self.op.disk_template:
8434
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8435
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8436
    return env, nl, nl
8437

    
8438
  def CheckPrereq(self):
8439
    """Check prerequisites.
8440

8441
    This only checks the instance list against the existing names.
8442

8443
    """
8444
    self.force = self.op.force
8445

    
8446
    # checking the new params on the primary/secondary nodes
8447

    
8448
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8449
    cluster = self.cluster = self.cfg.GetClusterInfo()
8450
    assert self.instance is not None, \
8451
      "Cannot retrieve locked instance %s" % self.op.instance_name
8452
    pnode = instance.primary_node
8453
    nodelist = list(instance.all_nodes)
8454

    
8455
    if self.op.disk_template:
8456
      if instance.disk_template == self.op.disk_template:
8457
        raise errors.OpPrereqError("Instance already has disk template %s" %
8458
                                   instance.disk_template, errors.ECODE_INVAL)
8459

    
8460
      if (instance.disk_template,
8461
          self.op.disk_template) not in self._DISK_CONVERSIONS:
8462
        raise errors.OpPrereqError("Unsupported disk template conversion from"
8463
                                   " %s to %s" % (instance.disk_template,
8464
                                                  self.op.disk_template),
8465
                                   errors.ECODE_INVAL)
8466
      if self.op.disk_template in constants.DTS_NET_MIRROR:
8467
        _CheckNodeOnline(self, self.op.remote_node)
8468
        _CheckNodeNotDrained(self, self.op.remote_node)
8469
        disks = [{"size": d.size} for d in instance.disks]
8470
        required = _ComputeDiskSize(self.op.disk_template, disks)
8471
        _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8472
        _CheckInstanceDown(self, instance, "cannot change disk template")
8473

    
8474
    # hvparams processing
8475
    if self.op.hvparams:
8476
      hv_type = instance.hypervisor
8477
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8478
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8479
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8480

    
8481
      # local check
8482
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8483
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8484
      self.hv_new = hv_new # the new actual values
8485
      self.hv_inst = i_hvdict # the new dict (without defaults)
8486
    else:
8487
      self.hv_new = self.hv_inst = {}
8488

    
8489
    # beparams processing
8490
    if self.op.beparams:
8491
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams)
8492
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8493
      be_new = cluster.SimpleFillBE(i_bedict)
8494
      self.be_new = be_new # the new actual values
8495
      self.be_inst = i_bedict # the new dict (without defaults)
8496
    else:
8497
      self.be_new = self.be_inst = {}
8498

    
8499
    self.warn = []
8500

    
8501
    if constants.BE_MEMORY in self.op.beparams and not self.force:
8502
      mem_check_list = [pnode]
8503
      if be_new[constants.BE_AUTO_BALANCE]:
8504
        # either we changed auto_balance to yes or it was from before
8505
        mem_check_list.extend(instance.secondary_nodes)
8506
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
8507
                                                  instance.hypervisor)
8508
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8509
                                         instance.hypervisor)
8510
      pninfo = nodeinfo[pnode]
8511
      msg = pninfo.fail_msg
8512
      if msg:
8513
        # Assume the primary node is unreachable and go ahead
8514
        self.warn.append("Can't get info from primary node %s: %s" %
8515
                         (pnode,  msg))
8516
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
8517
        self.warn.append("Node data from primary node %s doesn't contain"
8518
                         " free memory information" % pnode)
8519
      elif instance_info.fail_msg:
8520
        self.warn.append("Can't get instance runtime information: %s" %
8521
                        instance_info.fail_msg)
8522
      else:
8523
        if instance_info.payload:
8524
          current_mem = int(instance_info.payload['memory'])
8525
        else:
8526
          # Assume instance not running
8527
          # (there is a slight race condition here, but it's not very probable,
8528
          # and we have no other way to check)
8529
          current_mem = 0
8530
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8531
                    pninfo.payload['memory_free'])
8532
        if miss_mem > 0:
8533
          raise errors.OpPrereqError("This change will prevent the instance"
8534
                                     " from starting, due to %d MB of memory"
8535
                                     " missing on its primary node" % miss_mem,
8536
                                     errors.ECODE_NORES)
8537

    
8538
      if be_new[constants.BE_AUTO_BALANCE]:
8539
        for node, nres in nodeinfo.items():
8540
          if node not in instance.secondary_nodes:
8541
            continue
8542
          msg = nres.fail_msg
8543
          if msg:
8544
            self.warn.append("Can't get info from secondary node %s: %s" %
8545
                             (node, msg))
8546
          elif not isinstance(nres.payload.get('memory_free', None), int):
8547
            self.warn.append("Secondary node %s didn't return free"
8548
                             " memory information" % node)
8549
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8550
            self.warn.append("Not enough memory to failover instance to"
8551
                             " secondary node %s" % node)
8552

    
8553
    # NIC processing
8554
    self.nic_pnew = {}
8555
    self.nic_pinst = {}
8556
    for nic_op, nic_dict in self.op.nics:
8557
      if nic_op == constants.DDM_REMOVE:
8558
        if not instance.nics:
8559
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8560
                                     errors.ECODE_INVAL)
8561
        continue
8562
      if nic_op != constants.DDM_ADD:
8563
        # an existing nic
8564
        if not instance.nics:
8565
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8566
                                     " no NICs" % nic_op,
8567
                                     errors.ECODE_INVAL)
8568
        if nic_op < 0 or nic_op >= len(instance.nics):
8569
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8570
                                     " are 0 to %d" %
8571
                                     (nic_op, len(instance.nics) - 1),
8572
                                     errors.ECODE_INVAL)
8573
        old_nic_params = instance.nics[nic_op].nicparams
8574
        old_nic_ip = instance.nics[nic_op].ip
8575
      else:
8576
        old_nic_params = {}
8577
        old_nic_ip = None
8578

    
8579
      update_params_dict = dict([(key, nic_dict[key])
8580
                                 for key in constants.NICS_PARAMETERS
8581
                                 if key in nic_dict])
8582

    
8583
      if 'bridge' in nic_dict:
8584
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8585

    
8586
      new_nic_params = _GetUpdatedParams(old_nic_params,
8587
                                         update_params_dict)
8588
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8589
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8590
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8591
      self.nic_pinst[nic_op] = new_nic_params
8592
      self.nic_pnew[nic_op] = new_filled_nic_params
8593
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8594

    
8595
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
8596
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8597
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8598
        if msg:
8599
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8600
          if self.force:
8601
            self.warn.append(msg)
8602
          else:
8603
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8604
      if new_nic_mode == constants.NIC_MODE_ROUTED:
8605
        if 'ip' in nic_dict:
8606
          nic_ip = nic_dict['ip']
8607
        else:
8608
          nic_ip = old_nic_ip
8609
        if nic_ip is None:
8610
          raise errors.OpPrereqError('Cannot set the nic ip to None'
8611
                                     ' on a routed nic', errors.ECODE_INVAL)
8612
      if 'mac' in nic_dict:
8613
        nic_mac = nic_dict['mac']
8614
        if nic_mac is None:
8615
          raise errors.OpPrereqError('Cannot set the nic mac to None',
8616
                                     errors.ECODE_INVAL)
8617
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8618
          # otherwise generate the mac
8619
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8620
        else:
8621
          # or validate/reserve the current one
8622
          try:
8623
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8624
          except errors.ReservationError:
8625
            raise errors.OpPrereqError("MAC address %s already in use"
8626
                                       " in cluster" % nic_mac,
8627
                                       errors.ECODE_NOTUNIQUE)
8628

    
8629
    # DISK processing
8630
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8631
      raise errors.OpPrereqError("Disk operations not supported for"
8632
                                 " diskless instances",
8633
                                 errors.ECODE_INVAL)
8634
    for disk_op, _ in self.op.disks:
8635
      if disk_op == constants.DDM_REMOVE:
8636
        if len(instance.disks) == 1:
8637
          raise errors.OpPrereqError("Cannot remove the last disk of"
8638
                                     " an instance", errors.ECODE_INVAL)
8639
        _CheckInstanceDown(self, instance, "cannot remove disks")
8640

    
8641
      if (disk_op == constants.DDM_ADD and
8642
          len(instance.nics) >= constants.MAX_DISKS):
8643
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8644
                                   " add more" % constants.MAX_DISKS,
8645
                                   errors.ECODE_STATE)
8646
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8647
        # an existing disk
8648
        if disk_op < 0 or disk_op >= len(instance.disks):
8649
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
8650
                                     " are 0 to %d" %
8651
                                     (disk_op, len(instance.disks)),
8652
                                     errors.ECODE_INVAL)
8653

    
8654
    # OS change
8655
    if self.op.os_name and not self.op.force:
8656
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8657
                      self.op.force_variant)
8658

    
8659
    return
8660

    
8661
  def _ConvertPlainToDrbd(self, feedback_fn):
8662
    """Converts an instance from plain to drbd.
8663

8664
    """
8665
    feedback_fn("Converting template to drbd")
8666
    instance = self.instance
8667
    pnode = instance.primary_node
8668
    snode = self.op.remote_node
8669

    
8670
    # create a fake disk info for _GenerateDiskTemplate
8671
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8672
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8673
                                      instance.name, pnode, [snode],
8674
                                      disk_info, None, None, 0)
8675
    info = _GetInstanceInfoText(instance)
8676
    feedback_fn("Creating aditional volumes...")
8677
    # first, create the missing data and meta devices
8678
    for disk in new_disks:
8679
      # unfortunately this is... not too nice
8680
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8681
                            info, True)
8682
      for child in disk.children:
8683
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
8684
    # at this stage, all new LVs have been created, we can rename the
8685
    # old ones
8686
    feedback_fn("Renaming original volumes...")
8687
    rename_list = [(o, n.children[0].logical_id)
8688
                   for (o, n) in zip(instance.disks, new_disks)]
8689
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
8690
    result.Raise("Failed to rename original LVs")
8691

    
8692
    feedback_fn("Initializing DRBD devices...")
8693
    # all child devices are in place, we can now create the DRBD devices
8694
    for disk in new_disks:
8695
      for node in [pnode, snode]:
8696
        f_create = node == pnode
8697
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8698

    
8699
    # at this point, the instance has been modified
8700
    instance.disk_template = constants.DT_DRBD8
8701
    instance.disks = new_disks
8702
    self.cfg.Update(instance, feedback_fn)
8703

    
8704
    # disks are created, waiting for sync
8705
    disk_abort = not _WaitForSync(self, instance)
8706
    if disk_abort:
8707
      raise errors.OpExecError("There are some degraded disks for"
8708
                               " this instance, please cleanup manually")
8709

    
8710
  def _ConvertDrbdToPlain(self, feedback_fn):
8711
    """Converts an instance from drbd to plain.
8712

8713
    """
8714
    instance = self.instance
8715
    assert len(instance.secondary_nodes) == 1
8716
    pnode = instance.primary_node
8717
    snode = instance.secondary_nodes[0]
8718
    feedback_fn("Converting template to plain")
8719

    
8720
    old_disks = instance.disks
8721
    new_disks = [d.children[0] for d in old_disks]
8722

    
8723
    # copy over size and mode
8724
    for parent, child in zip(old_disks, new_disks):
8725
      child.size = parent.size
8726
      child.mode = parent.mode
8727

    
8728
    # update instance structure
8729
    instance.disks = new_disks
8730
    instance.disk_template = constants.DT_PLAIN
8731
    self.cfg.Update(instance, feedback_fn)
8732

    
8733
    feedback_fn("Removing volumes on the secondary node...")
8734
    for disk in old_disks:
8735
      self.cfg.SetDiskID(disk, snode)
8736
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8737
      if msg:
8738
        self.LogWarning("Could not remove block device %s on node %s,"
8739
                        " continuing anyway: %s", disk.iv_name, snode, msg)
8740

    
8741
    feedback_fn("Removing unneeded volumes on the primary node...")
8742
    for idx, disk in enumerate(old_disks):
8743
      meta = disk.children[1]
8744
      self.cfg.SetDiskID(meta, pnode)
8745
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8746
      if msg:
8747
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
8748
                        " continuing anyway: %s", idx, pnode, msg)
8749

    
8750

    
8751
  def Exec(self, feedback_fn):
8752
    """Modifies an instance.
8753

8754
    All parameters take effect only at the next restart of the instance.
8755

8756
    """
8757
    # Process here the warnings from CheckPrereq, as we don't have a
8758
    # feedback_fn there.
8759
    for warn in self.warn:
8760
      feedback_fn("WARNING: %s" % warn)
8761

    
8762
    result = []
8763
    instance = self.instance
8764
    # disk changes
8765
    for disk_op, disk_dict in self.op.disks:
8766
      if disk_op == constants.DDM_REMOVE:
8767
        # remove the last disk
8768
        device = instance.disks.pop()
8769
        device_idx = len(instance.disks)
8770
        for node, disk in device.ComputeNodeTree(instance.primary_node):
8771
          self.cfg.SetDiskID(disk, node)
8772
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8773
          if msg:
8774
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
8775
                            " continuing anyway", device_idx, node, msg)
8776
        result.append(("disk/%d" % device_idx, "remove"))
8777
      elif disk_op == constants.DDM_ADD:
8778
        # add a new disk
8779
        if instance.disk_template == constants.DT_FILE:
8780
          file_driver, file_path = instance.disks[0].logical_id
8781
          file_path = os.path.dirname(file_path)
8782
        else:
8783
          file_driver = file_path = None
8784
        disk_idx_base = len(instance.disks)
8785
        new_disk = _GenerateDiskTemplate(self,
8786
                                         instance.disk_template,
8787
                                         instance.name, instance.primary_node,
8788
                                         instance.secondary_nodes,
8789
                                         [disk_dict],
8790
                                         file_path,
8791
                                         file_driver,
8792
                                         disk_idx_base)[0]
8793
        instance.disks.append(new_disk)
8794
        info = _GetInstanceInfoText(instance)
8795

    
8796
        logging.info("Creating volume %s for instance %s",
8797
                     new_disk.iv_name, instance.name)
8798
        # Note: this needs to be kept in sync with _CreateDisks
8799
        #HARDCODE
8800
        for node in instance.all_nodes:
8801
          f_create = node == instance.primary_node
8802
          try:
8803
            _CreateBlockDev(self, node, instance, new_disk,
8804
                            f_create, info, f_create)
8805
          except errors.OpExecError, err:
8806
            self.LogWarning("Failed to create volume %s (%s) on"
8807
                            " node %s: %s",
8808
                            new_disk.iv_name, new_disk, node, err)
8809
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8810
                       (new_disk.size, new_disk.mode)))
8811
      else:
8812
        # change a given disk
8813
        instance.disks[disk_op].mode = disk_dict['mode']
8814
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8815

    
8816
    if self.op.disk_template:
8817
      r_shut = _ShutdownInstanceDisks(self, instance)
8818
      if not r_shut:
8819
        raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8820
                                 " proceed with disk template conversion")
8821
      mode = (instance.disk_template, self.op.disk_template)
8822
      try:
8823
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
8824
      except:
8825
        self.cfg.ReleaseDRBDMinors(instance.name)
8826
        raise
8827
      result.append(("disk_template", self.op.disk_template))
8828

    
8829
    # NIC changes
8830
    for nic_op, nic_dict in self.op.nics:
8831
      if nic_op == constants.DDM_REMOVE:
8832
        # remove the last nic
8833
        del instance.nics[-1]
8834
        result.append(("nic.%d" % len(instance.nics), "remove"))
8835
      elif nic_op == constants.DDM_ADD:
8836
        # mac and bridge should be set, by now
8837
        mac = nic_dict['mac']
8838
        ip = nic_dict.get('ip', None)
8839
        nicparams = self.nic_pinst[constants.DDM_ADD]
8840
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8841
        instance.nics.append(new_nic)
8842
        result.append(("nic.%d" % (len(instance.nics) - 1),
8843
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
8844
                       (new_nic.mac, new_nic.ip,
8845
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8846
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8847
                       )))
8848
      else:
8849
        for key in 'mac', 'ip':
8850
          if key in nic_dict:
8851
            setattr(instance.nics[nic_op], key, nic_dict[key])
8852
        if nic_op in self.nic_pinst:
8853
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8854
        for key, val in nic_dict.iteritems():
8855
          result.append(("nic.%s/%d" % (key, nic_op), val))
8856

    
8857
    # hvparams changes
8858
    if self.op.hvparams:
8859
      instance.hvparams = self.hv_inst
8860
      for key, val in self.op.hvparams.iteritems():
8861
        result.append(("hv/%s" % key, val))
8862

    
8863
    # beparams changes
8864
    if self.op.beparams:
8865
      instance.beparams = self.be_inst
8866
      for key, val in self.op.beparams.iteritems():
8867
        result.append(("be/%s" % key, val))
8868

    
8869
    # OS change
8870
    if self.op.os_name:
8871
      instance.os = self.op.os_name
8872

    
8873
    self.cfg.Update(instance, feedback_fn)
8874

    
8875
    return result
8876

    
8877
  _DISK_CONVERSIONS = {
8878
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8879
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8880
    }
8881

    
8882

    
8883
class LUQueryExports(NoHooksLU):
8884
  """Query the exports list
8885

8886
  """
8887
  _OP_REQP = ['nodes']
8888
  REQ_BGL = False
8889

    
8890
  def ExpandNames(self):
8891
    self.needed_locks = {}
8892
    self.share_locks[locking.LEVEL_NODE] = 1
8893
    if not self.op.nodes:
8894
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8895
    else:
8896
      self.needed_locks[locking.LEVEL_NODE] = \
8897
        _GetWantedNodes(self, self.op.nodes)
8898

    
8899
  def CheckPrereq(self):
8900
    """Check prerequisites.
8901

8902
    """
8903
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8904

    
8905
  def Exec(self, feedback_fn):
8906
    """Compute the list of all the exported system images.
8907

8908
    @rtype: dict
8909
    @return: a dictionary with the structure node->(export-list)
8910
        where export-list is a list of the instances exported on
8911
        that node.
8912

8913
    """
8914
    rpcresult = self.rpc.call_export_list(self.nodes)
8915
    result = {}
8916
    for node in rpcresult:
8917
      if rpcresult[node].fail_msg:
8918
        result[node] = False
8919
      else:
8920
        result[node] = rpcresult[node].payload
8921

    
8922
    return result
8923

    
8924

    
8925
class LUPrepareExport(NoHooksLU):
8926
  """Prepares an instance for an export and returns useful information.
8927

8928
  """
8929
  _OP_REQP = ["instance_name", "mode"]
8930
  REQ_BGL = False
8931

    
8932
  def CheckArguments(self):
8933
    """Check the arguments.
8934

8935
    """
8936
    if self.op.mode not in constants.EXPORT_MODES:
8937
      raise errors.OpPrereqError("Invalid export mode %r" % self.op.mode,
8938
                                 errors.ECODE_INVAL)
8939

    
8940
  def ExpandNames(self):
8941
    self._ExpandAndLockInstance()
8942

    
8943
  def CheckPrereq(self):
8944
    """Check prerequisites.
8945

8946
    """
8947
    instance_name = self.op.instance_name
8948

    
8949
    self.instance = self.cfg.GetInstanceInfo(instance_name)
8950
    assert self.instance is not None, \
8951
          "Cannot retrieve locked instance %s" % self.op.instance_name
8952
    _CheckNodeOnline(self, self.instance.primary_node)
8953

    
8954
    self._cds = _GetClusterDomainSecret()
8955

    
8956
  def Exec(self, feedback_fn):
8957
    """Prepares an instance for an export.
8958

8959
    """
8960
    instance = self.instance
8961

    
8962
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
8963
      salt = utils.GenerateSecret(8)
8964

    
8965
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
8966
      result = self.rpc.call_x509_cert_create(instance.primary_node,
8967
                                              constants.RIE_CERT_VALIDITY)
8968
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
8969

    
8970
      (name, cert_pem) = result.payload
8971

    
8972
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
8973
                                             cert_pem)
8974

    
8975
      return {
8976
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
8977
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
8978
                          salt),
8979
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
8980
        }
8981

    
8982
    return None
8983

    
8984

    
8985
class LUExportInstance(LogicalUnit):
8986
  """Export an instance to an image in the cluster.
8987

8988
  """
8989
  HPATH = "instance-export"
8990
  HTYPE = constants.HTYPE_INSTANCE
8991
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
8992
  REQ_BGL = False
8993

    
8994
  def CheckArguments(self):
8995
    """Check the arguments.
8996

8997
    """
8998
    _CheckBooleanOpField(self.op, "remove_instance")
8999
    _CheckBooleanOpField(self.op, "ignore_remove_failures")
9000

    
9001
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
9002
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
9003
    self.remove_instance = getattr(self.op, "remove_instance", False)
9004
    self.ignore_remove_failures = getattr(self.op, "ignore_remove_failures",
9005
                                          False)
9006
    self.export_mode = getattr(self.op, "mode", constants.EXPORT_MODE_LOCAL)
9007
    self.x509_key_name = getattr(self.op, "x509_key_name", None)
9008
    self.dest_x509_ca_pem = getattr(self.op, "destination_x509_ca", None)
9009

    
9010
    if self.remove_instance and not self.op.shutdown:
9011
      raise errors.OpPrereqError("Can not remove instance without shutting it"
9012
                                 " down before")
9013

    
9014
    if self.export_mode not in constants.EXPORT_MODES:
9015
      raise errors.OpPrereqError("Invalid export mode %r" % self.export_mode,
9016
                                 errors.ECODE_INVAL)
9017

    
9018
    if self.export_mode == constants.EXPORT_MODE_REMOTE:
9019
      if not self.x509_key_name:
9020
        raise errors.OpPrereqError("Missing X509 key name for encryption",
9021
                                   errors.ECODE_INVAL)
9022

    
9023
      if not self.dest_x509_ca_pem:
9024
        raise errors.OpPrereqError("Missing destination X509 CA",
9025
                                   errors.ECODE_INVAL)
9026

    
9027
  def ExpandNames(self):
9028
    self._ExpandAndLockInstance()
9029

    
9030
    # Lock all nodes for local exports
9031
    if self.export_mode == constants.EXPORT_MODE_LOCAL:
9032
      # FIXME: lock only instance primary and destination node
9033
      #
9034
      # Sad but true, for now we have do lock all nodes, as we don't know where
9035
      # the previous export might be, and in this LU we search for it and
9036
      # remove it from its current node. In the future we could fix this by:
9037
      #  - making a tasklet to search (share-lock all), then create the new one,
9038
      #    then one to remove, after
9039
      #  - removing the removal operation altogether
9040
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9041

    
9042
  def DeclareLocks(self, level):
9043
    """Last minute lock declaration."""
9044
    # All nodes are locked anyway, so nothing to do here.
9045

    
9046
  def BuildHooksEnv(self):
9047
    """Build hooks env.
9048

9049
    This will run on the master, primary node and target node.
9050

9051
    """
9052
    env = {
9053
      "EXPORT_MODE": self.export_mode,
9054
      "EXPORT_NODE": self.op.target_node,
9055
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9056
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
9057
      # TODO: Generic function for boolean env variables
9058
      "REMOVE_INSTANCE": str(bool(self.remove_instance)),
9059
      }
9060

    
9061
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9062

    
9063
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9064

    
9065
    if self.export_mode == constants.EXPORT_MODE_LOCAL:
9066
      nl.append(self.op.target_node)
9067

    
9068
    return env, nl, nl
9069

    
9070
  def CheckPrereq(self):
9071
    """Check prerequisites.
9072

9073
    This checks that the instance and node names are valid.
9074

9075
    """
9076
    instance_name = self.op.instance_name
9077

    
9078
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9079
    assert self.instance is not None, \
9080
          "Cannot retrieve locked instance %s" % self.op.instance_name
9081
    _CheckNodeOnline(self, self.instance.primary_node)
9082

    
9083
    if self.export_mode == constants.EXPORT_MODE_LOCAL:
9084
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9085
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9086
      assert self.dst_node is not None
9087

    
9088
      _CheckNodeOnline(self, self.dst_node.name)
9089
      _CheckNodeNotDrained(self, self.dst_node.name)
9090

    
9091
      self._cds = None
9092
      self.dest_disk_info = None
9093
      self.dest_x509_ca = None
9094

    
9095
    elif self.export_mode == constants.EXPORT_MODE_REMOTE:
9096
      self.dst_node = None
9097

    
9098
      if len(self.op.target_node) != len(self.instance.disks):
9099
        raise errors.OpPrereqError(("Received destination information for %s"
9100
                                    " disks, but instance %s has %s disks") %
9101
                                   (len(self.op.target_node), instance_name,
9102
                                    len(self.instance.disks)),
9103
                                   errors.ECODE_INVAL)
9104

    
9105
      cds = _GetClusterDomainSecret()
9106

    
9107
      # Check X509 key name
9108
      try:
9109
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9110
      except (TypeError, ValueError), err:
9111
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9112

    
9113
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9114
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9115
                                   errors.ECODE_INVAL)
9116

    
9117
      # Load and verify CA
9118
      try:
9119
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9120
      except OpenSSL.crypto.Error, err:
9121
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9122
                                   (err, ), errors.ECODE_INVAL)
9123

    
9124
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9125
      if errcode is not None:
9126
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" % (msg, ),
9127
                                   errors.ECODE_INVAL)
9128

    
9129
      self.dest_x509_ca = cert
9130

    
9131
      # Verify target information
9132
      disk_info = []
9133
      for idx, disk_data in enumerate(self.op.target_node):
9134
        try:
9135
          (host, port, magic) = \
9136
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9137
        except errors.GenericError, err:
9138
          raise errors.OpPrereqError("Target info for disk %s: %s" % (idx, err),
9139
                                     errors.ECODE_INVAL)
9140

    
9141
        disk_info.append((host, port, magic))
9142

    
9143
      assert len(disk_info) == len(self.op.target_node)
9144
      self.dest_disk_info = disk_info
9145

    
9146
    else:
9147
      raise errors.ProgrammerError("Unhandled export mode %r" %
9148
                                   self.export_mode)
9149

    
9150
    # instance disk type verification
9151
    # TODO: Implement export support for file-based disks
9152
    for disk in self.instance.disks:
9153
      if disk.dev_type == constants.LD_FILE:
9154
        raise errors.OpPrereqError("Export not supported for instances with"
9155
                                   " file-based disks", errors.ECODE_INVAL)
9156

    
9157
  def _CleanupExports(self, feedback_fn):
9158
    """Removes exports of current instance from all other nodes.
9159

9160
    If an instance in a cluster with nodes A..D was exported to node C, its
9161
    exports will be removed from the nodes A, B and D.
9162

9163
    """
9164
    assert self.export_mode != constants.EXPORT_MODE_REMOTE
9165

    
9166
    nodelist = self.cfg.GetNodeList()
9167
    nodelist.remove(self.dst_node.name)
9168

    
9169
    # on one-node clusters nodelist will be empty after the removal
9170
    # if we proceed the backup would be removed because OpQueryExports
9171
    # substitutes an empty list with the full cluster node list.
9172
    iname = self.instance.name
9173
    if nodelist:
9174
      feedback_fn("Removing old exports for instance %s" % iname)
9175
      exportlist = self.rpc.call_export_list(nodelist)
9176
      for node in exportlist:
9177
        if exportlist[node].fail_msg:
9178
          continue
9179
        if iname in exportlist[node].payload:
9180
          msg = self.rpc.call_export_remove(node, iname).fail_msg
9181
          if msg:
9182
            self.LogWarning("Could not remove older export for instance %s"
9183
                            " on node %s: %s", iname, node, msg)
9184

    
9185
  def Exec(self, feedback_fn):
9186
    """Export an instance to an image in the cluster.
9187

9188
    """
9189
    assert self.export_mode in constants.EXPORT_MODES
9190

    
9191
    instance = self.instance
9192
    src_node = instance.primary_node
9193

    
9194
    if self.op.shutdown:
9195
      # shutdown the instance, but not the disks
9196
      feedback_fn("Shutting down instance %s" % instance.name)
9197
      result = self.rpc.call_instance_shutdown(src_node, instance,
9198
                                               self.shutdown_timeout)
9199
      # TODO: Maybe ignore failures if ignore_remove_failures is set
9200
      result.Raise("Could not shutdown instance %s on"
9201
                   " node %s" % (instance.name, src_node))
9202

    
9203
    # set the disks ID correctly since call_instance_start needs the
9204
    # correct drbd minor to create the symlinks
9205
    for disk in instance.disks:
9206
      self.cfg.SetDiskID(disk, src_node)
9207

    
9208
    activate_disks = (not instance.admin_up)
9209

    
9210
    if activate_disks:
9211
      # Activate the instance disks if we'exporting a stopped instance
9212
      feedback_fn("Activating disks for %s" % instance.name)
9213
      _StartInstanceDisks(self, instance, None)
9214

    
9215
    try:
9216
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9217
                                                     instance)
9218

    
9219
      helper.CreateSnapshots()
9220
      try:
9221
        if (self.op.shutdown and instance.admin_up and
9222
            not self.remove_instance):
9223
          assert not activate_disks
9224
          feedback_fn("Starting instance %s" % instance.name)
9225
          result = self.rpc.call_instance_start(src_node, instance, None, None)
9226
          msg = result.fail_msg
9227
          if msg:
9228
            feedback_fn("Failed to start instance: %s" % msg)
9229
            _ShutdownInstanceDisks(self, instance)
9230
            raise errors.OpExecError("Could not start instance: %s" % msg)
9231

    
9232
        if self.export_mode == constants.EXPORT_MODE_LOCAL:
9233
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9234
        elif self.export_mode == constants.EXPORT_MODE_REMOTE:
9235
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
9236
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9237

    
9238
          (key_name, _, _) = self.x509_key_name
9239

    
9240
          dest_ca_pem = \
9241
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9242
                                            self.dest_x509_ca)
9243

    
9244
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9245
                                                     key_name, dest_ca_pem,
9246
                                                     timeouts)
9247
      finally:
9248
        helper.Cleanup()
9249

    
9250
      # Check for backwards compatibility
9251
      assert len(dresults) == len(instance.disks)
9252
      assert compat.all(isinstance(i, bool) for i in dresults), \
9253
             "Not all results are boolean: %r" % dresults
9254

    
9255
    finally:
9256
      if activate_disks:
9257
        feedback_fn("Deactivating disks for %s" % instance.name)
9258
        _ShutdownInstanceDisks(self, instance)
9259

    
9260
    # Remove instance if requested
9261
    if self.remove_instance:
9262
      if not (compat.all(dresults) and fin_resu):
9263
        feedback_fn("Not removing instance %s as parts of the export failed" %
9264
                    instance.name)
9265
      else:
9266
        feedback_fn("Removing instance %s" % instance.name)
9267
        _RemoveInstance(self, feedback_fn, instance,
9268
                        self.ignore_remove_failures)
9269

    
9270
    if self.export_mode == constants.EXPORT_MODE_LOCAL:
9271
      self._CleanupExports(feedback_fn)
9272

    
9273
    return fin_resu, dresults
9274

    
9275

    
9276
class LURemoveExport(NoHooksLU):
9277
  """Remove exports related to the named instance.
9278

9279
  """
9280
  _OP_REQP = ["instance_name"]
9281
  REQ_BGL = False
9282

    
9283
  def ExpandNames(self):
9284
    self.needed_locks = {}
9285
    # We need all nodes to be locked in order for RemoveExport to work, but we
9286
    # don't need to lock the instance itself, as nothing will happen to it (and
9287
    # we can remove exports also for a removed instance)
9288
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9289

    
9290
  def CheckPrereq(self):
9291
    """Check prerequisites.
9292
    """
9293
    pass
9294

    
9295
  def Exec(self, feedback_fn):
9296
    """Remove any export.
9297

9298
    """
9299
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9300
    # If the instance was not found we'll try with the name that was passed in.
9301
    # This will only work if it was an FQDN, though.
9302
    fqdn_warn = False
9303
    if not instance_name:
9304
      fqdn_warn = True
9305
      instance_name = self.op.instance_name
9306

    
9307
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9308
    exportlist = self.rpc.call_export_list(locked_nodes)
9309
    found = False
9310
    for node in exportlist:
9311
      msg = exportlist[node].fail_msg
9312
      if msg:
9313
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9314
        continue
9315
      if instance_name in exportlist[node].payload:
9316
        found = True
9317
        result = self.rpc.call_export_remove(node, instance_name)
9318
        msg = result.fail_msg
9319
        if msg:
9320
          logging.error("Could not remove export for instance %s"
9321
                        " on node %s: %s", instance_name, node, msg)
9322

    
9323
    if fqdn_warn and not found:
9324
      feedback_fn("Export not found. If trying to remove an export belonging"
9325
                  " to a deleted instance please use its Fully Qualified"
9326
                  " Domain Name.")
9327

    
9328

    
9329
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9330
  """Generic tags LU.
9331

9332
  This is an abstract class which is the parent of all the other tags LUs.
9333

9334
  """
9335

    
9336
  def ExpandNames(self):
9337
    self.needed_locks = {}
9338
    if self.op.kind == constants.TAG_NODE:
9339
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9340
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
9341
    elif self.op.kind == constants.TAG_INSTANCE:
9342
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9343
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9344

    
9345
  def CheckPrereq(self):
9346
    """Check prerequisites.
9347

9348
    """
9349
    if self.op.kind == constants.TAG_CLUSTER:
9350
      self.target = self.cfg.GetClusterInfo()
9351
    elif self.op.kind == constants.TAG_NODE:
9352
      self.target = self.cfg.GetNodeInfo(self.op.name)
9353
    elif self.op.kind == constants.TAG_INSTANCE:
9354
      self.target = self.cfg.GetInstanceInfo(self.op.name)
9355
    else:
9356
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9357
                                 str(self.op.kind), errors.ECODE_INVAL)
9358

    
9359

    
9360
class LUGetTags(TagsLU):
9361
  """Returns the tags of a given object.
9362

9363
  """
9364
  _OP_REQP = ["kind", "name"]
9365
  REQ_BGL = False
9366

    
9367
  def Exec(self, feedback_fn):
9368
    """Returns the tag list.
9369

9370
    """
9371
    return list(self.target.GetTags())
9372

    
9373

    
9374
class LUSearchTags(NoHooksLU):
9375
  """Searches the tags for a given pattern.
9376

9377
  """
9378
  _OP_REQP = ["pattern"]
9379
  REQ_BGL = False
9380

    
9381
  def ExpandNames(self):
9382
    self.needed_locks = {}
9383

    
9384
  def CheckPrereq(self):
9385
    """Check prerequisites.
9386

9387
    This checks the pattern passed for validity by compiling it.
9388

9389
    """
9390
    try:
9391
      self.re = re.compile(self.op.pattern)
9392
    except re.error, err:
9393
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9394
                                 (self.op.pattern, err), errors.ECODE_INVAL)
9395

    
9396
  def Exec(self, feedback_fn):
9397
    """Returns the tag list.
9398

9399
    """
9400
    cfg = self.cfg
9401
    tgts = [("/cluster", cfg.GetClusterInfo())]
9402
    ilist = cfg.GetAllInstancesInfo().values()
9403
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9404
    nlist = cfg.GetAllNodesInfo().values()
9405
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9406
    results = []
9407
    for path, target in tgts:
9408
      for tag in target.GetTags():
9409
        if self.re.search(tag):
9410
          results.append((path, tag))
9411
    return results
9412

    
9413

    
9414
class LUAddTags(TagsLU):
9415
  """Sets a tag on a given object.
9416

9417
  """
9418
  _OP_REQP = ["kind", "name", "tags"]
9419
  REQ_BGL = False
9420

    
9421
  def CheckPrereq(self):
9422
    """Check prerequisites.
9423

9424
    This checks the type and length of the tag name and value.
9425

9426
    """
9427
    TagsLU.CheckPrereq(self)
9428
    for tag in self.op.tags:
9429
      objects.TaggableObject.ValidateTag(tag)
9430

    
9431
  def Exec(self, feedback_fn):
9432
    """Sets the tag.
9433

9434
    """
9435
    try:
9436
      for tag in self.op.tags:
9437
        self.target.AddTag(tag)
9438
    except errors.TagError, err:
9439
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
9440
    self.cfg.Update(self.target, feedback_fn)
9441

    
9442

    
9443
class LUDelTags(TagsLU):
9444
  """Delete a list of tags from a given object.
9445

9446
  """
9447
  _OP_REQP = ["kind", "name", "tags"]
9448
  REQ_BGL = False
9449

    
9450
  def CheckPrereq(self):
9451
    """Check prerequisites.
9452

9453
    This checks that we have the given tag.
9454

9455
    """
9456
    TagsLU.CheckPrereq(self)
9457
    for tag in self.op.tags:
9458
      objects.TaggableObject.ValidateTag(tag)
9459
    del_tags = frozenset(self.op.tags)
9460
    cur_tags = self.target.GetTags()
9461
    if not del_tags <= cur_tags:
9462
      diff_tags = del_tags - cur_tags
9463
      diff_names = ["'%s'" % tag for tag in diff_tags]
9464
      diff_names.sort()
9465
      raise errors.OpPrereqError("Tag(s) %s not found" %
9466
                                 (",".join(diff_names)), errors.ECODE_NOENT)
9467

    
9468
  def Exec(self, feedback_fn):
9469
    """Remove the tag from the object.
9470

9471
    """
9472
    for tag in self.op.tags:
9473
      self.target.RemoveTag(tag)
9474
    self.cfg.Update(self.target, feedback_fn)
9475

    
9476

    
9477
class LUTestDelay(NoHooksLU):
9478
  """Sleep for a specified amount of time.
9479

9480
  This LU sleeps on the master and/or nodes for a specified amount of
9481
  time.
9482

9483
  """
9484
  _OP_REQP = ["duration", "on_master", "on_nodes"]
9485
  REQ_BGL = False
9486

    
9487
  def CheckArguments(self):
9488
    # TODO: convert to the type system
9489
    self.op.repeat = getattr(self.op, "repeat", 0)
9490
    if self.op.repeat < 0:
9491
      raise errors.OpPrereqError("Repetition count cannot be negative")
9492

    
9493
  def ExpandNames(self):
9494
    """Expand names and set required locks.
9495

9496
    This expands the node list, if any.
9497

9498
    """
9499
    self.needed_locks = {}
9500
    if self.op.on_nodes:
9501
      # _GetWantedNodes can be used here, but is not always appropriate to use
9502
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9503
      # more information.
9504
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9505
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9506

    
9507
  def CheckPrereq(self):
9508
    """Check prerequisites.
9509

9510
    """
9511

    
9512
  def _TestDelay(self):
9513
    """Do the actual sleep.
9514

9515
    """
9516
    if self.op.on_master:
9517
      if not utils.TestDelay(self.op.duration):
9518
        raise errors.OpExecError("Error during master delay test")
9519
    if self.op.on_nodes:
9520
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9521
      for node, node_result in result.items():
9522
        node_result.Raise("Failure during rpc call to node %s" % node)
9523

    
9524
  def Exec(self, feedback_fn):
9525
    """Execute the test delay opcode, with the wanted repetitions.
9526

9527
    """
9528
    if self.op.repeat == 0:
9529
      self._TestDelay()
9530
    else:
9531
      top_value = self.op.repeat - 1
9532
      for i in range(self.op.repeat):
9533
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9534
        self._TestDelay()
9535

    
9536

    
9537
class IAllocator(object):
9538
  """IAllocator framework.
9539

9540
  An IAllocator instance has three sets of attributes:
9541
    - cfg that is needed to query the cluster
9542
    - input data (all members of the _KEYS class attribute are required)
9543
    - four buffer attributes (in|out_data|text), that represent the
9544
      input (to the external script) in text and data structure format,
9545
      and the output from it, again in two formats
9546
    - the result variables from the script (success, info, nodes) for
9547
      easy usage
9548

9549
  """
9550
  # pylint: disable-msg=R0902
9551
  # lots of instance attributes
9552
  _ALLO_KEYS = [
9553
    "name", "mem_size", "disks", "disk_template",
9554
    "os", "tags", "nics", "vcpus", "hypervisor",
9555
    ]
9556
  _RELO_KEYS = [
9557
    "name", "relocate_from",
9558
    ]
9559
  _EVAC_KEYS = [
9560
    "evac_nodes",
9561
    ]
9562

    
9563
  def __init__(self, cfg, rpc, mode, **kwargs):
9564
    self.cfg = cfg
9565
    self.rpc = rpc
9566
    # init buffer variables
9567
    self.in_text = self.out_text = self.in_data = self.out_data = None
9568
    # init all input fields so that pylint is happy
9569
    self.mode = mode
9570
    self.mem_size = self.disks = self.disk_template = None
9571
    self.os = self.tags = self.nics = self.vcpus = None
9572
    self.hypervisor = None
9573
    self.relocate_from = None
9574
    self.name = None
9575
    self.evac_nodes = None
9576
    # computed fields
9577
    self.required_nodes = None
9578
    # init result fields
9579
    self.success = self.info = self.result = None
9580
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9581
      keyset = self._ALLO_KEYS
9582
      fn = self._AddNewInstance
9583
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9584
      keyset = self._RELO_KEYS
9585
      fn = self._AddRelocateInstance
9586
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9587
      keyset = self._EVAC_KEYS
9588
      fn = self._AddEvacuateNodes
9589
    else:
9590
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9591
                                   " IAllocator" % self.mode)
9592
    for key in kwargs:
9593
      if key not in keyset:
9594
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
9595
                                     " IAllocator" % key)
9596
      setattr(self, key, kwargs[key])
9597

    
9598
    for key in keyset:
9599
      if key not in kwargs:
9600
        raise errors.ProgrammerError("Missing input parameter '%s' to"
9601
                                     " IAllocator" % key)
9602
    self._BuildInputData(fn)
9603

    
9604
  def _ComputeClusterData(self):
9605
    """Compute the generic allocator input data.
9606

9607
    This is the data that is independent of the actual operation.
9608

9609
    """
9610
    cfg = self.cfg
9611
    cluster_info = cfg.GetClusterInfo()
9612
    # cluster data
9613
    data = {
9614
      "version": constants.IALLOCATOR_VERSION,
9615
      "cluster_name": cfg.GetClusterName(),
9616
      "cluster_tags": list(cluster_info.GetTags()),
9617
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9618
      # we don't have job IDs
9619
      }
9620
    iinfo = cfg.GetAllInstancesInfo().values()
9621
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9622

    
9623
    # node data
9624
    node_results = {}
9625
    node_list = cfg.GetNodeList()
9626

    
9627
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9628
      hypervisor_name = self.hypervisor
9629
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9630
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9631
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9632
      hypervisor_name = cluster_info.enabled_hypervisors[0]
9633

    
9634
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9635
                                        hypervisor_name)
9636
    node_iinfo = \
9637
      self.rpc.call_all_instances_info(node_list,
9638
                                       cluster_info.enabled_hypervisors)
9639
    for nname, nresult in node_data.items():
9640
      # first fill in static (config-based) values
9641
      ninfo = cfg.GetNodeInfo(nname)
9642
      pnr = {
9643
        "tags": list(ninfo.GetTags()),
9644
        "primary_ip": ninfo.primary_ip,
9645
        "secondary_ip": ninfo.secondary_ip,
9646
        "offline": ninfo.offline,
9647
        "drained": ninfo.drained,
9648
        "master_candidate": ninfo.master_candidate,
9649
        }
9650

    
9651
      if not (ninfo.offline or ninfo.drained):
9652
        nresult.Raise("Can't get data for node %s" % nname)
9653
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9654
                                nname)
9655
        remote_info = nresult.payload
9656

    
9657
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
9658
                     'vg_size', 'vg_free', 'cpu_total']:
9659
          if attr not in remote_info:
9660
            raise errors.OpExecError("Node '%s' didn't return attribute"
9661
                                     " '%s'" % (nname, attr))
9662
          if not isinstance(remote_info[attr], int):
9663
            raise errors.OpExecError("Node '%s' returned invalid value"
9664
                                     " for '%s': %s" %
9665
                                     (nname, attr, remote_info[attr]))
9666
        # compute memory used by primary instances
9667
        i_p_mem = i_p_up_mem = 0
9668
        for iinfo, beinfo in i_list:
9669
          if iinfo.primary_node == nname:
9670
            i_p_mem += beinfo[constants.BE_MEMORY]
9671
            if iinfo.name not in node_iinfo[nname].payload:
9672
              i_used_mem = 0
9673
            else:
9674
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9675
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9676
            remote_info['memory_free'] -= max(0, i_mem_diff)
9677

    
9678
            if iinfo.admin_up:
9679
              i_p_up_mem += beinfo[constants.BE_MEMORY]
9680

    
9681
        # compute memory used by instances
9682
        pnr_dyn = {
9683
          "total_memory": remote_info['memory_total'],
9684
          "reserved_memory": remote_info['memory_dom0'],
9685
          "free_memory": remote_info['memory_free'],
9686
          "total_disk": remote_info['vg_size'],
9687
          "free_disk": remote_info['vg_free'],
9688
          "total_cpus": remote_info['cpu_total'],
9689
          "i_pri_memory": i_p_mem,
9690
          "i_pri_up_memory": i_p_up_mem,
9691
          }
9692
        pnr.update(pnr_dyn)
9693

    
9694
      node_results[nname] = pnr
9695
    data["nodes"] = node_results
9696

    
9697
    # instance data
9698
    instance_data = {}
9699
    for iinfo, beinfo in i_list:
9700
      nic_data = []
9701
      for nic in iinfo.nics:
9702
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
9703
        nic_dict = {"mac": nic.mac,
9704
                    "ip": nic.ip,
9705
                    "mode": filled_params[constants.NIC_MODE],
9706
                    "link": filled_params[constants.NIC_LINK],
9707
                   }
9708
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9709
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9710
        nic_data.append(nic_dict)
9711
      pir = {
9712
        "tags": list(iinfo.GetTags()),
9713
        "admin_up": iinfo.admin_up,
9714
        "vcpus": beinfo[constants.BE_VCPUS],
9715
        "memory": beinfo[constants.BE_MEMORY],
9716
        "os": iinfo.os,
9717
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9718
        "nics": nic_data,
9719
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9720
        "disk_template": iinfo.disk_template,
9721
        "hypervisor": iinfo.hypervisor,
9722
        }
9723
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9724
                                                 pir["disks"])
9725
      instance_data[iinfo.name] = pir
9726

    
9727
    data["instances"] = instance_data
9728

    
9729
    self.in_data = data
9730

    
9731
  def _AddNewInstance(self):
9732
    """Add new instance data to allocator structure.
9733

9734
    This in combination with _AllocatorGetClusterData will create the
9735
    correct structure needed as input for the allocator.
9736

9737
    The checks for the completeness of the opcode must have already been
9738
    done.
9739

9740
    """
9741
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9742

    
9743
    if self.disk_template in constants.DTS_NET_MIRROR:
9744
      self.required_nodes = 2
9745
    else:
9746
      self.required_nodes = 1
9747
    request = {
9748
      "name": self.name,
9749
      "disk_template": self.disk_template,
9750
      "tags": self.tags,
9751
      "os": self.os,
9752
      "vcpus": self.vcpus,
9753
      "memory": self.mem_size,
9754
      "disks": self.disks,
9755
      "disk_space_total": disk_space,
9756
      "nics": self.nics,
9757
      "required_nodes": self.required_nodes,
9758
      }
9759
    return request
9760

    
9761
  def _AddRelocateInstance(self):
9762
    """Add relocate instance data to allocator structure.
9763

9764
    This in combination with _IAllocatorGetClusterData will create the
9765
    correct structure needed as input for the allocator.
9766

9767
    The checks for the completeness of the opcode must have already been
9768
    done.
9769

9770
    """
9771
    instance = self.cfg.GetInstanceInfo(self.name)
9772
    if instance is None:
9773
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
9774
                                   " IAllocator" % self.name)
9775

    
9776
    if instance.disk_template not in constants.DTS_NET_MIRROR:
9777
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9778
                                 errors.ECODE_INVAL)
9779

    
9780
    if len(instance.secondary_nodes) != 1:
9781
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
9782
                                 errors.ECODE_STATE)
9783

    
9784
    self.required_nodes = 1
9785
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
9786
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9787

    
9788
    request = {
9789
      "name": self.name,
9790
      "disk_space_total": disk_space,
9791
      "required_nodes": self.required_nodes,
9792
      "relocate_from": self.relocate_from,
9793
      }
9794
    return request
9795

    
9796
  def _AddEvacuateNodes(self):
9797
    """Add evacuate nodes data to allocator structure.
9798

9799
    """
9800
    request = {
9801
      "evac_nodes": self.evac_nodes
9802
      }
9803
    return request
9804

    
9805
  def _BuildInputData(self, fn):
9806
    """Build input data structures.
9807

9808
    """
9809
    self._ComputeClusterData()
9810

    
9811
    request = fn()
9812
    request["type"] = self.mode
9813
    self.in_data["request"] = request
9814

    
9815
    self.in_text = serializer.Dump(self.in_data)
9816

    
9817
  def Run(self, name, validate=True, call_fn=None):
9818
    """Run an instance allocator and return the results.
9819

9820
    """
9821
    if call_fn is None:
9822
      call_fn = self.rpc.call_iallocator_runner
9823

    
9824
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9825
    result.Raise("Failure while running the iallocator script")
9826

    
9827
    self.out_text = result.payload
9828
    if validate:
9829
      self._ValidateResult()
9830

    
9831
  def _ValidateResult(self):
9832
    """Process the allocator results.
9833

9834
    This will process and if successful save the result in
9835
    self.out_data and the other parameters.
9836

9837
    """
9838
    try:
9839
      rdict = serializer.Load(self.out_text)
9840
    except Exception, err:
9841
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9842

    
9843
    if not isinstance(rdict, dict):
9844
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
9845

    
9846
    # TODO: remove backwards compatiblity in later versions
9847
    if "nodes" in rdict and "result" not in rdict:
9848
      rdict["result"] = rdict["nodes"]
9849
      del rdict["nodes"]
9850

    
9851
    for key in "success", "info", "result":
9852
      if key not in rdict:
9853
        raise errors.OpExecError("Can't parse iallocator results:"
9854
                                 " missing key '%s'" % key)
9855
      setattr(self, key, rdict[key])
9856

    
9857
    if not isinstance(rdict["result"], list):
9858
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9859
                               " is not a list")
9860
    self.out_data = rdict
9861

    
9862

    
9863
class LUTestAllocator(NoHooksLU):
9864
  """Run allocator tests.
9865

9866
  This LU runs the allocator tests
9867

9868
  """
9869
  _OP_REQP = ["direction", "mode", "name"]
9870

    
9871
  def CheckPrereq(self):
9872
    """Check prerequisites.
9873

9874
    This checks the opcode parameters depending on the director and mode test.
9875

9876
    """
9877
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9878
      for attr in ["name", "mem_size", "disks", "disk_template",
9879
                   "os", "tags", "nics", "vcpus"]:
9880
        if not hasattr(self.op, attr):
9881
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9882
                                     attr, errors.ECODE_INVAL)
9883
      iname = self.cfg.ExpandInstanceName(self.op.name)
9884
      if iname is not None:
9885
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9886
                                   iname, errors.ECODE_EXISTS)
9887
      if not isinstance(self.op.nics, list):
9888
        raise errors.OpPrereqError("Invalid parameter 'nics'",
9889
                                   errors.ECODE_INVAL)
9890
      for row in self.op.nics:
9891
        if (not isinstance(row, dict) or
9892
            "mac" not in row or
9893
            "ip" not in row or
9894
            "bridge" not in row):
9895
          raise errors.OpPrereqError("Invalid contents of the 'nics'"
9896
                                     " parameter", errors.ECODE_INVAL)
9897
      if not isinstance(self.op.disks, list):
9898
        raise errors.OpPrereqError("Invalid parameter 'disks'",
9899
                                   errors.ECODE_INVAL)
9900
      for row in self.op.disks:
9901
        if (not isinstance(row, dict) or
9902
            "size" not in row or
9903
            not isinstance(row["size"], int) or
9904
            "mode" not in row or
9905
            row["mode"] not in ['r', 'w']):
9906
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
9907
                                     " parameter", errors.ECODE_INVAL)
9908
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9909
        self.op.hypervisor = self.cfg.GetHypervisorType()
9910
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9911
      if not hasattr(self.op, "name"):
9912
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9913
                                   errors.ECODE_INVAL)
9914
      fname = _ExpandInstanceName(self.cfg, self.op.name)
9915
      self.op.name = fname
9916
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9917
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9918
      if not hasattr(self.op, "evac_nodes"):
9919
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9920
                                   " opcode input", errors.ECODE_INVAL)
9921
    else:
9922
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9923
                                 self.op.mode, errors.ECODE_INVAL)
9924

    
9925
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9926
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
9927
        raise errors.OpPrereqError("Missing allocator name",
9928
                                   errors.ECODE_INVAL)
9929
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9930
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
9931
                                 self.op.direction, errors.ECODE_INVAL)
9932

    
9933
  def Exec(self, feedback_fn):
9934
    """Run the allocator test.
9935

9936
    """
9937
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9938
      ial = IAllocator(self.cfg, self.rpc,
9939
                       mode=self.op.mode,
9940
                       name=self.op.name,
9941
                       mem_size=self.op.mem_size,
9942
                       disks=self.op.disks,
9943
                       disk_template=self.op.disk_template,
9944
                       os=self.op.os,
9945
                       tags=self.op.tags,
9946
                       nics=self.op.nics,
9947
                       vcpus=self.op.vcpus,
9948
                       hypervisor=self.op.hypervisor,
9949
                       )
9950
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9951
      ial = IAllocator(self.cfg, self.rpc,
9952
                       mode=self.op.mode,
9953
                       name=self.op.name,
9954
                       relocate_from=list(self.relocate_from),
9955
                       )
9956
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9957
      ial = IAllocator(self.cfg, self.rpc,
9958
                       mode=self.op.mode,
9959
                       evac_nodes=self.op.evac_nodes)
9960
    else:
9961
      raise errors.ProgrammerError("Uncatched mode %s in"
9962
                                   " LUTestAllocator.Exec", self.op.mode)
9963

    
9964
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
9965
      result = ial.in_text
9966
    else:
9967
      ial.Run(self.op.allocator, validate=False)
9968
      result = ial.out_text
9969
    return result