Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ a8c931c0

History | View | Annotate | Download (356.4 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39

    
40
from ganeti import ssh
41
from ganeti import utils
42
from ganeti import errors
43
from ganeti import hypervisor
44
from ganeti import locking
45
from ganeti import constants
46
from ganeti import objects
47
from ganeti import serializer
48
from ganeti import ssconf
49
from ganeti import uidpool
50
from ganeti import compat
51
from ganeti import masterd
52

    
53
import ganeti.masterd.instance # pylint: disable-msg=W0611
54

    
55

    
56
# need to define these here before the actual LUs
57

    
58
def _EmptyList():
59
  """Returns an empty list.
60

61
  """
62
  return []
63

    
64

    
65
def _EmptyDict():
66
  """Returns an empty dict.
67

68
  """
69
  return {}
70

    
71

    
72
class LogicalUnit(object):
73
  """Logical Unit base class.
74

75
  Subclasses must follow these rules:
76
    - implement ExpandNames
77
    - implement CheckPrereq (except when tasklets are used)
78
    - implement Exec (except when tasklets are used)
79
    - implement BuildHooksEnv
80
    - redefine HPATH and HTYPE
81
    - optionally redefine their run requirements:
82
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
83

84
  Note that all commands require root permissions.
85

86
  @ivar dry_run_result: the value (if any) that will be returned to the caller
87
      in dry-run mode (signalled by opcode dry_run parameter)
88
  @cvar _OP_DEFS: a list of opcode attributes and the defaults values
89
      they should get if not already existing
90

91
  """
92
  HPATH = None
93
  HTYPE = None
94
  _OP_REQP = []
95
  _OP_DEFS = []
96
  REQ_BGL = True
97

    
98
  def __init__(self, processor, op, context, rpc):
99
    """Constructor for LogicalUnit.
100

101
    This needs to be overridden in derived classes in order to check op
102
    validity.
103

104
    """
105
    self.proc = processor
106
    self.op = op
107
    self.cfg = context.cfg
108
    self.context = context
109
    self.rpc = rpc
110
    # Dicts used to declare locking needs to mcpu
111
    self.needed_locks = None
112
    self.acquired_locks = {}
113
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
114
    self.add_locks = {}
115
    self.remove_locks = {}
116
    # Used to force good behavior when calling helper functions
117
    self.recalculate_locks = {}
118
    self.__ssh = None
119
    # logging
120
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
121
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
122
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
123
    # support for dry-run
124
    self.dry_run_result = None
125
    # support for generic debug attribute
126
    if (not hasattr(self.op, "debug_level") or
127
        not isinstance(self.op.debug_level, int)):
128
      self.op.debug_level = 0
129

    
130
    # Tasklets
131
    self.tasklets = None
132

    
133
    for aname, aval in self._OP_DEFS:
134
      if not hasattr(self.op, aname):
135
        if callable(aval):
136
          dval = aval()
137
        else:
138
          dval = aval
139
        setattr(self.op, aname, dval)
140

    
141
    for attr_name in self._OP_REQP:
142
      attr_val = getattr(op, attr_name, None)
143
      if attr_val is None:
144
        raise errors.OpPrereqError("Required parameter '%s' missing" %
145
                                   attr_name, errors.ECODE_INVAL)
146

    
147
    self.CheckArguments()
148

    
149
  def __GetSSH(self):
150
    """Returns the SshRunner object
151

152
    """
153
    if not self.__ssh:
154
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
155
    return self.__ssh
156

    
157
  ssh = property(fget=__GetSSH)
158

    
159
  def CheckArguments(self):
160
    """Check syntactic validity for the opcode arguments.
161

162
    This method is for doing a simple syntactic check and ensure
163
    validity of opcode parameters, without any cluster-related
164
    checks. While the same can be accomplished in ExpandNames and/or
165
    CheckPrereq, doing these separate is better because:
166

167
      - ExpandNames is left as as purely a lock-related function
168
      - CheckPrereq is run after we have acquired locks (and possible
169
        waited for them)
170

171
    The function is allowed to change the self.op attribute so that
172
    later methods can no longer worry about missing parameters.
173

174
    """
175
    pass
176

    
177
  def ExpandNames(self):
178
    """Expand names for this LU.
179

180
    This method is called before starting to execute the opcode, and it should
181
    update all the parameters of the opcode to their canonical form (e.g. a
182
    short node name must be fully expanded after this method has successfully
183
    completed). This way locking, hooks, logging, ecc. can work correctly.
184

185
    LUs which implement this method must also populate the self.needed_locks
186
    member, as a dict with lock levels as keys, and a list of needed lock names
187
    as values. Rules:
188

189
      - use an empty dict if you don't need any lock
190
      - if you don't need any lock at a particular level omit that level
191
      - don't put anything for the BGL level
192
      - if you want all locks at a level use locking.ALL_SET as a value
193

194
    If you need to share locks (rather than acquire them exclusively) at one
195
    level you can modify self.share_locks, setting a true value (usually 1) for
196
    that level. By default locks are not shared.
197

198
    This function can also define a list of tasklets, which then will be
199
    executed in order instead of the usual LU-level CheckPrereq and Exec
200
    functions, if those are not defined by the LU.
201

202
    Examples::
203

204
      # Acquire all nodes and one instance
205
      self.needed_locks = {
206
        locking.LEVEL_NODE: locking.ALL_SET,
207
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
208
      }
209
      # Acquire just two nodes
210
      self.needed_locks = {
211
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
212
      }
213
      # Acquire no locks
214
      self.needed_locks = {} # No, you can't leave it to the default value None
215

216
    """
217
    # The implementation of this method is mandatory only if the new LU is
218
    # concurrent, so that old LUs don't need to be changed all at the same
219
    # time.
220
    if self.REQ_BGL:
221
      self.needed_locks = {} # Exclusive LUs don't need locks.
222
    else:
223
      raise NotImplementedError
224

    
225
  def DeclareLocks(self, level):
226
    """Declare LU locking needs for a level
227

228
    While most LUs can just declare their locking needs at ExpandNames time,
229
    sometimes there's the need to calculate some locks after having acquired
230
    the ones before. This function is called just before acquiring locks at a
231
    particular level, but after acquiring the ones at lower levels, and permits
232
    such calculations. It can be used to modify self.needed_locks, and by
233
    default it does nothing.
234

235
    This function is only called if you have something already set in
236
    self.needed_locks for the level.
237

238
    @param level: Locking level which is going to be locked
239
    @type level: member of ganeti.locking.LEVELS
240

241
    """
242

    
243
  def CheckPrereq(self):
244
    """Check prerequisites for this LU.
245

246
    This method should check that the prerequisites for the execution
247
    of this LU are fulfilled. It can do internode communication, but
248
    it should be idempotent - no cluster or system changes are
249
    allowed.
250

251
    The method should raise errors.OpPrereqError in case something is
252
    not fulfilled. Its return value is ignored.
253

254
    This method should also update all the parameters of the opcode to
255
    their canonical form if it hasn't been done by ExpandNames before.
256

257
    """
258
    if self.tasklets is not None:
259
      for (idx, tl) in enumerate(self.tasklets):
260
        logging.debug("Checking prerequisites for tasklet %s/%s",
261
                      idx + 1, len(self.tasklets))
262
        tl.CheckPrereq()
263
    else:
264
      raise NotImplementedError
265

    
266
  def Exec(self, feedback_fn):
267
    """Execute the LU.
268

269
    This method should implement the actual work. It should raise
270
    errors.OpExecError for failures that are somewhat dealt with in
271
    code, or expected.
272

273
    """
274
    if self.tasklets is not None:
275
      for (idx, tl) in enumerate(self.tasklets):
276
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
277
        tl.Exec(feedback_fn)
278
    else:
279
      raise NotImplementedError
280

    
281
  def BuildHooksEnv(self):
282
    """Build hooks environment for this LU.
283

284
    This method should return a three-node tuple consisting of: a dict
285
    containing the environment that will be used for running the
286
    specific hook for this LU, a list of node names on which the hook
287
    should run before the execution, and a list of node names on which
288
    the hook should run after the execution.
289

290
    The keys of the dict must not have 'GANETI_' prefixed as this will
291
    be handled in the hooks runner. Also note additional keys will be
292
    added by the hooks runner. If the LU doesn't define any
293
    environment, an empty dict (and not None) should be returned.
294

295
    No nodes should be returned as an empty list (and not None).
296

297
    Note that if the HPATH for a LU class is None, this function will
298
    not be called.
299

300
    """
301
    raise NotImplementedError
302

    
303
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
304
    """Notify the LU about the results of its hooks.
305

306
    This method is called every time a hooks phase is executed, and notifies
307
    the Logical Unit about the hooks' result. The LU can then use it to alter
308
    its result based on the hooks.  By default the method does nothing and the
309
    previous result is passed back unchanged but any LU can define it if it
310
    wants to use the local cluster hook-scripts somehow.
311

312
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
313
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
314
    @param hook_results: the results of the multi-node hooks rpc call
315
    @param feedback_fn: function used send feedback back to the caller
316
    @param lu_result: the previous Exec result this LU had, or None
317
        in the PRE phase
318
    @return: the new Exec result, based on the previous result
319
        and hook results
320

321
    """
322
    # API must be kept, thus we ignore the unused argument and could
323
    # be a function warnings
324
    # pylint: disable-msg=W0613,R0201
325
    return lu_result
326

    
327
  def _ExpandAndLockInstance(self):
328
    """Helper function to expand and lock an instance.
329

330
    Many LUs that work on an instance take its name in self.op.instance_name
331
    and need to expand it and then declare the expanded name for locking. This
332
    function does it, and then updates self.op.instance_name to the expanded
333
    name. It also initializes needed_locks as a dict, if this hasn't been done
334
    before.
335

336
    """
337
    if self.needed_locks is None:
338
      self.needed_locks = {}
339
    else:
340
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
341
        "_ExpandAndLockInstance called with instance-level locks set"
342
    self.op.instance_name = _ExpandInstanceName(self.cfg,
343
                                                self.op.instance_name)
344
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
345

    
346
  def _LockInstancesNodes(self, primary_only=False):
347
    """Helper function to declare instances' nodes for locking.
348

349
    This function should be called after locking one or more instances to lock
350
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
351
    with all primary or secondary nodes for instances already locked and
352
    present in self.needed_locks[locking.LEVEL_INSTANCE].
353

354
    It should be called from DeclareLocks, and for safety only works if
355
    self.recalculate_locks[locking.LEVEL_NODE] is set.
356

357
    In the future it may grow parameters to just lock some instance's nodes, or
358
    to just lock primaries or secondary nodes, if needed.
359

360
    If should be called in DeclareLocks in a way similar to::
361

362
      if level == locking.LEVEL_NODE:
363
        self._LockInstancesNodes()
364

365
    @type primary_only: boolean
366
    @param primary_only: only lock primary nodes of locked instances
367

368
    """
369
    assert locking.LEVEL_NODE in self.recalculate_locks, \
370
      "_LockInstancesNodes helper function called with no nodes to recalculate"
371

    
372
    # TODO: check if we're really been called with the instance locks held
373

    
374
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
375
    # future we might want to have different behaviors depending on the value
376
    # of self.recalculate_locks[locking.LEVEL_NODE]
377
    wanted_nodes = []
378
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
379
      instance = self.context.cfg.GetInstanceInfo(instance_name)
380
      wanted_nodes.append(instance.primary_node)
381
      if not primary_only:
382
        wanted_nodes.extend(instance.secondary_nodes)
383

    
384
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
385
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
386
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
387
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
388

    
389
    del self.recalculate_locks[locking.LEVEL_NODE]
390

    
391

    
392
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
393
  """Simple LU which runs no hooks.
394

395
  This LU is intended as a parent for other LogicalUnits which will
396
  run no hooks, in order to reduce duplicate code.
397

398
  """
399
  HPATH = None
400
  HTYPE = None
401

    
402
  def BuildHooksEnv(self):
403
    """Empty BuildHooksEnv for NoHooksLu.
404

405
    This just raises an error.
406

407
    """
408
    assert False, "BuildHooksEnv called for NoHooksLUs"
409

    
410

    
411
class Tasklet:
412
  """Tasklet base class.
413

414
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
415
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
416
  tasklets know nothing about locks.
417

418
  Subclasses must follow these rules:
419
    - Implement CheckPrereq
420
    - Implement Exec
421

422
  """
423
  def __init__(self, lu):
424
    self.lu = lu
425

    
426
    # Shortcuts
427
    self.cfg = lu.cfg
428
    self.rpc = lu.rpc
429

    
430
  def CheckPrereq(self):
431
    """Check prerequisites for this tasklets.
432

433
    This method should check whether the prerequisites for the execution of
434
    this tasklet are fulfilled. It can do internode communication, but it
435
    should be idempotent - no cluster or system changes are allowed.
436

437
    The method should raise errors.OpPrereqError in case something is not
438
    fulfilled. Its return value is ignored.
439

440
    This method should also update all parameters to their canonical form if it
441
    hasn't been done before.
442

443
    """
444
    raise NotImplementedError
445

    
446
  def Exec(self, feedback_fn):
447
    """Execute the tasklet.
448

449
    This method should implement the actual work. It should raise
450
    errors.OpExecError for failures that are somewhat dealt with in code, or
451
    expected.
452

453
    """
454
    raise NotImplementedError
455

    
456

    
457
def _GetWantedNodes(lu, nodes):
458
  """Returns list of checked and expanded node names.
459

460
  @type lu: L{LogicalUnit}
461
  @param lu: the logical unit on whose behalf we execute
462
  @type nodes: list
463
  @param nodes: list of node names or None for all nodes
464
  @rtype: list
465
  @return: the list of nodes, sorted
466
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
467

468
  """
469
  if not isinstance(nodes, list):
470
    raise errors.OpPrereqError("Invalid argument type 'nodes'",
471
                               errors.ECODE_INVAL)
472

    
473
  if not nodes:
474
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
475
      " non-empty list of nodes whose name is to be expanded.")
476

    
477
  wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
478
  return utils.NiceSort(wanted)
479

    
480

    
481
def _GetWantedInstances(lu, instances):
482
  """Returns list of checked and expanded instance names.
483

484
  @type lu: L{LogicalUnit}
485
  @param lu: the logical unit on whose behalf we execute
486
  @type instances: list
487
  @param instances: list of instance names or None for all instances
488
  @rtype: list
489
  @return: the list of instances, sorted
490
  @raise errors.OpPrereqError: if the instances parameter is wrong type
491
  @raise errors.OpPrereqError: if any of the passed instances is not found
492

493
  """
494
  if not isinstance(instances, list):
495
    raise errors.OpPrereqError("Invalid argument type 'instances'",
496
                               errors.ECODE_INVAL)
497

    
498
  if instances:
499
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
500
  else:
501
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
502
  return wanted
503

    
504

    
505
def _GetUpdatedParams(old_params, update_dict,
506
                      use_default=True, use_none=False):
507
  """Return the new version of a parameter dictionary.
508

509
  @type old_params: dict
510
  @param old_params: old parameters
511
  @type update_dict: dict
512
  @param update_dict: dict containing new parameter values, or
513
      constants.VALUE_DEFAULT to reset the parameter to its default
514
      value
515
  @param use_default: boolean
516
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
517
      values as 'to be deleted' values
518
  @param use_none: boolean
519
  @type use_none: whether to recognise C{None} values as 'to be
520
      deleted' values
521
  @rtype: dict
522
  @return: the new parameter dictionary
523

524
  """
525
  params_copy = copy.deepcopy(old_params)
526
  for key, val in update_dict.iteritems():
527
    if ((use_default and val == constants.VALUE_DEFAULT) or
528
        (use_none and val is None)):
529
      try:
530
        del params_copy[key]
531
      except KeyError:
532
        pass
533
    else:
534
      params_copy[key] = val
535
  return params_copy
536

    
537

    
538
def _CheckOutputFields(static, dynamic, selected):
539
  """Checks whether all selected fields are valid.
540

541
  @type static: L{utils.FieldSet}
542
  @param static: static fields set
543
  @type dynamic: L{utils.FieldSet}
544
  @param dynamic: dynamic fields set
545

546
  """
547
  f = utils.FieldSet()
548
  f.Extend(static)
549
  f.Extend(dynamic)
550

    
551
  delta = f.NonMatching(selected)
552
  if delta:
553
    raise errors.OpPrereqError("Unknown output fields selected: %s"
554
                               % ",".join(delta), errors.ECODE_INVAL)
555

    
556

    
557
def _CheckBooleanOpField(op, name):
558
  """Validates boolean opcode parameters.
559

560
  This will ensure that an opcode parameter is either a boolean value,
561
  or None (but that it always exists).
562

563
  """
564
  val = getattr(op, name, None)
565
  if not (val is None or isinstance(val, bool)):
566
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
567
                               (name, str(val)), errors.ECODE_INVAL)
568
  setattr(op, name, val)
569

    
570

    
571
def _CheckGlobalHvParams(params):
572
  """Validates that given hypervisor params are not global ones.
573

574
  This will ensure that instances don't get customised versions of
575
  global params.
576

577
  """
578
  used_globals = constants.HVC_GLOBALS.intersection(params)
579
  if used_globals:
580
    msg = ("The following hypervisor parameters are global and cannot"
581
           " be customized at instance level, please modify them at"
582
           " cluster level: %s" % utils.CommaJoin(used_globals))
583
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
584

    
585

    
586
def _CheckNodeOnline(lu, node):
587
  """Ensure that a given node is online.
588

589
  @param lu: the LU on behalf of which we make the check
590
  @param node: the node to check
591
  @raise errors.OpPrereqError: if the node is offline
592

593
  """
594
  if lu.cfg.GetNodeInfo(node).offline:
595
    raise errors.OpPrereqError("Can't use offline node %s" % node,
596
                               errors.ECODE_INVAL)
597

    
598

    
599
def _CheckNodeNotDrained(lu, node):
600
  """Ensure that a given node is not drained.
601

602
  @param lu: the LU on behalf of which we make the check
603
  @param node: the node to check
604
  @raise errors.OpPrereqError: if the node is drained
605

606
  """
607
  if lu.cfg.GetNodeInfo(node).drained:
608
    raise errors.OpPrereqError("Can't use drained node %s" % node,
609
                               errors.ECODE_INVAL)
610

    
611

    
612
def _CheckNodeHasOS(lu, node, os_name, force_variant):
613
  """Ensure that a node supports a given OS.
614

615
  @param lu: the LU on behalf of which we make the check
616
  @param node: the node to check
617
  @param os_name: the OS to query about
618
  @param force_variant: whether to ignore variant errors
619
  @raise errors.OpPrereqError: if the node is not supporting the OS
620

621
  """
622
  result = lu.rpc.call_os_get(node, os_name)
623
  result.Raise("OS '%s' not in supported OS list for node %s" %
624
               (os_name, node),
625
               prereq=True, ecode=errors.ECODE_INVAL)
626
  if not force_variant:
627
    _CheckOSVariant(result.payload, os_name)
628

    
629

    
630
def _RequireFileStorage():
631
  """Checks that file storage is enabled.
632

633
  @raise errors.OpPrereqError: when file storage is disabled
634

635
  """
636
  if not constants.ENABLE_FILE_STORAGE:
637
    raise errors.OpPrereqError("File storage disabled at configure time",
638
                               errors.ECODE_INVAL)
639

    
640

    
641
def _CheckDiskTemplate(template):
642
  """Ensure a given disk template is valid.
643

644
  """
645
  if template not in constants.DISK_TEMPLATES:
646
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
647
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
648
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
649
  if template == constants.DT_FILE:
650
    _RequireFileStorage()
651

    
652

    
653
def _CheckStorageType(storage_type):
654
  """Ensure a given storage type is valid.
655

656
  """
657
  if storage_type not in constants.VALID_STORAGE_TYPES:
658
    raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
659
                               errors.ECODE_INVAL)
660
  if storage_type == constants.ST_FILE:
661
    _RequireFileStorage()
662

    
663

    
664
def _GetClusterDomainSecret():
665
  """Reads the cluster domain secret.
666

667
  """
668
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
669
                               strict=True)
670

    
671

    
672
def _CheckInstanceDown(lu, instance, reason):
673
  """Ensure that an instance is not running."""
674
  if instance.admin_up:
675
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
676
                               (instance.name, reason), errors.ECODE_STATE)
677

    
678
  pnode = instance.primary_node
679
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
680
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
681
              prereq=True, ecode=errors.ECODE_ENVIRON)
682

    
683
  if instance.name in ins_l.payload:
684
    raise errors.OpPrereqError("Instance %s is running, %s" %
685
                               (instance.name, reason), errors.ECODE_STATE)
686

    
687

    
688
def _ExpandItemName(fn, name, kind):
689
  """Expand an item name.
690

691
  @param fn: the function to use for expansion
692
  @param name: requested item name
693
  @param kind: text description ('Node' or 'Instance')
694
  @return: the resolved (full) name
695
  @raise errors.OpPrereqError: if the item is not found
696

697
  """
698
  full_name = fn(name)
699
  if full_name is None:
700
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
701
                               errors.ECODE_NOENT)
702
  return full_name
703

    
704

    
705
def _ExpandNodeName(cfg, name):
706
  """Wrapper over L{_ExpandItemName} for nodes."""
707
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
708

    
709

    
710
def _ExpandInstanceName(cfg, name):
711
  """Wrapper over L{_ExpandItemName} for instance."""
712
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
713

    
714

    
715
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
716
                          memory, vcpus, nics, disk_template, disks,
717
                          bep, hvp, hypervisor_name):
718
  """Builds instance related env variables for hooks
719

720
  This builds the hook environment from individual variables.
721

722
  @type name: string
723
  @param name: the name of the instance
724
  @type primary_node: string
725
  @param primary_node: the name of the instance's primary node
726
  @type secondary_nodes: list
727
  @param secondary_nodes: list of secondary nodes as strings
728
  @type os_type: string
729
  @param os_type: the name of the instance's OS
730
  @type status: boolean
731
  @param status: the should_run status of the instance
732
  @type memory: string
733
  @param memory: the memory size of the instance
734
  @type vcpus: string
735
  @param vcpus: the count of VCPUs the instance has
736
  @type nics: list
737
  @param nics: list of tuples (ip, mac, mode, link) representing
738
      the NICs the instance has
739
  @type disk_template: string
740
  @param disk_template: the disk template of the instance
741
  @type disks: list
742
  @param disks: the list of (size, mode) pairs
743
  @type bep: dict
744
  @param bep: the backend parameters for the instance
745
  @type hvp: dict
746
  @param hvp: the hypervisor parameters for the instance
747
  @type hypervisor_name: string
748
  @param hypervisor_name: the hypervisor for the instance
749
  @rtype: dict
750
  @return: the hook environment for this instance
751

752
  """
753
  if status:
754
    str_status = "up"
755
  else:
756
    str_status = "down"
757
  env = {
758
    "OP_TARGET": name,
759
    "INSTANCE_NAME": name,
760
    "INSTANCE_PRIMARY": primary_node,
761
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
762
    "INSTANCE_OS_TYPE": os_type,
763
    "INSTANCE_STATUS": str_status,
764
    "INSTANCE_MEMORY": memory,
765
    "INSTANCE_VCPUS": vcpus,
766
    "INSTANCE_DISK_TEMPLATE": disk_template,
767
    "INSTANCE_HYPERVISOR": hypervisor_name,
768
  }
769

    
770
  if nics:
771
    nic_count = len(nics)
772
    for idx, (ip, mac, mode, link) in enumerate(nics):
773
      if ip is None:
774
        ip = ""
775
      env["INSTANCE_NIC%d_IP" % idx] = ip
776
      env["INSTANCE_NIC%d_MAC" % idx] = mac
777
      env["INSTANCE_NIC%d_MODE" % idx] = mode
778
      env["INSTANCE_NIC%d_LINK" % idx] = link
779
      if mode == constants.NIC_MODE_BRIDGED:
780
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
781
  else:
782
    nic_count = 0
783

    
784
  env["INSTANCE_NIC_COUNT"] = nic_count
785

    
786
  if disks:
787
    disk_count = len(disks)
788
    for idx, (size, mode) in enumerate(disks):
789
      env["INSTANCE_DISK%d_SIZE" % idx] = size
790
      env["INSTANCE_DISK%d_MODE" % idx] = mode
791
  else:
792
    disk_count = 0
793

    
794
  env["INSTANCE_DISK_COUNT"] = disk_count
795

    
796
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
797
    for key, value in source.items():
798
      env["INSTANCE_%s_%s" % (kind, key)] = value
799

    
800
  return env
801

    
802

    
803
def _NICListToTuple(lu, nics):
804
  """Build a list of nic information tuples.
805

806
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
807
  value in LUQueryInstanceData.
808

809
  @type lu:  L{LogicalUnit}
810
  @param lu: the logical unit on whose behalf we execute
811
  @type nics: list of L{objects.NIC}
812
  @param nics: list of nics to convert to hooks tuples
813

814
  """
815
  hooks_nics = []
816
  cluster = lu.cfg.GetClusterInfo()
817
  for nic in nics:
818
    ip = nic.ip
819
    mac = nic.mac
820
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
821
    mode = filled_params[constants.NIC_MODE]
822
    link = filled_params[constants.NIC_LINK]
823
    hooks_nics.append((ip, mac, mode, link))
824
  return hooks_nics
825

    
826

    
827
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
828
  """Builds instance related env variables for hooks from an object.
829

830
  @type lu: L{LogicalUnit}
831
  @param lu: the logical unit on whose behalf we execute
832
  @type instance: L{objects.Instance}
833
  @param instance: the instance for which we should build the
834
      environment
835
  @type override: dict
836
  @param override: dictionary with key/values that will override
837
      our values
838
  @rtype: dict
839
  @return: the hook environment dictionary
840

841
  """
842
  cluster = lu.cfg.GetClusterInfo()
843
  bep = cluster.FillBE(instance)
844
  hvp = cluster.FillHV(instance)
845
  args = {
846
    'name': instance.name,
847
    'primary_node': instance.primary_node,
848
    'secondary_nodes': instance.secondary_nodes,
849
    'os_type': instance.os,
850
    'status': instance.admin_up,
851
    'memory': bep[constants.BE_MEMORY],
852
    'vcpus': bep[constants.BE_VCPUS],
853
    'nics': _NICListToTuple(lu, instance.nics),
854
    'disk_template': instance.disk_template,
855
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
856
    'bep': bep,
857
    'hvp': hvp,
858
    'hypervisor_name': instance.hypervisor,
859
  }
860
  if override:
861
    args.update(override)
862
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
863

    
864

    
865
def _AdjustCandidatePool(lu, exceptions):
866
  """Adjust the candidate pool after node operations.
867

868
  """
869
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
870
  if mod_list:
871
    lu.LogInfo("Promoted nodes to master candidate role: %s",
872
               utils.CommaJoin(node.name for node in mod_list))
873
    for name in mod_list:
874
      lu.context.ReaddNode(name)
875
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
876
  if mc_now > mc_max:
877
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
878
               (mc_now, mc_max))
879

    
880

    
881
def _DecideSelfPromotion(lu, exceptions=None):
882
  """Decide whether I should promote myself as a master candidate.
883

884
  """
885
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
886
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
887
  # the new node will increase mc_max with one, so:
888
  mc_should = min(mc_should + 1, cp_size)
889
  return mc_now < mc_should
890

    
891

    
892
def _CheckNicsBridgesExist(lu, target_nics, target_node):
893
  """Check that the brigdes needed by a list of nics exist.
894

895
  """
896
  cluster = lu.cfg.GetClusterInfo()
897
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
898
  brlist = [params[constants.NIC_LINK] for params in paramslist
899
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
900
  if brlist:
901
    result = lu.rpc.call_bridges_exist(target_node, brlist)
902
    result.Raise("Error checking bridges on destination node '%s'" %
903
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
904

    
905

    
906
def _CheckInstanceBridgesExist(lu, instance, node=None):
907
  """Check that the brigdes needed by an instance exist.
908

909
  """
910
  if node is None:
911
    node = instance.primary_node
912
  _CheckNicsBridgesExist(lu, instance.nics, node)
913

    
914

    
915
def _CheckOSVariant(os_obj, name):
916
  """Check whether an OS name conforms to the os variants specification.
917

918
  @type os_obj: L{objects.OS}
919
  @param os_obj: OS object to check
920
  @type name: string
921
  @param name: OS name passed by the user, to check for validity
922

923
  """
924
  if not os_obj.supported_variants:
925
    return
926
  try:
927
    variant = name.split("+", 1)[1]
928
  except IndexError:
929
    raise errors.OpPrereqError("OS name must include a variant",
930
                               errors.ECODE_INVAL)
931

    
932
  if variant not in os_obj.supported_variants:
933
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
934

    
935

    
936
def _GetNodeInstancesInner(cfg, fn):
937
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
938

    
939

    
940
def _GetNodeInstances(cfg, node_name):
941
  """Returns a list of all primary and secondary instances on a node.
942

943
  """
944

    
945
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
946

    
947

    
948
def _GetNodePrimaryInstances(cfg, node_name):
949
  """Returns primary instances on a node.
950

951
  """
952
  return _GetNodeInstancesInner(cfg,
953
                                lambda inst: node_name == inst.primary_node)
954

    
955

    
956
def _GetNodeSecondaryInstances(cfg, node_name):
957
  """Returns secondary instances on a node.
958

959
  """
960
  return _GetNodeInstancesInner(cfg,
961
                                lambda inst: node_name in inst.secondary_nodes)
962

    
963

    
964
def _GetStorageTypeArgs(cfg, storage_type):
965
  """Returns the arguments for a storage type.
966

967
  """
968
  # Special case for file storage
969
  if storage_type == constants.ST_FILE:
970
    # storage.FileStorage wants a list of storage directories
971
    return [[cfg.GetFileStorageDir()]]
972

    
973
  return []
974

    
975

    
976
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
977
  faulty = []
978

    
979
  for dev in instance.disks:
980
    cfg.SetDiskID(dev, node_name)
981

    
982
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
983
  result.Raise("Failed to get disk status from node %s" % node_name,
984
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
985

    
986
  for idx, bdev_status in enumerate(result.payload):
987
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
988
      faulty.append(idx)
989

    
990
  return faulty
991

    
992

    
993
class LUPostInitCluster(LogicalUnit):
994
  """Logical unit for running hooks after cluster initialization.
995

996
  """
997
  HPATH = "cluster-init"
998
  HTYPE = constants.HTYPE_CLUSTER
999
  _OP_REQP = []
1000

    
1001
  def BuildHooksEnv(self):
1002
    """Build hooks env.
1003

1004
    """
1005
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1006
    mn = self.cfg.GetMasterNode()
1007
    return env, [], [mn]
1008

    
1009
  def CheckPrereq(self):
1010
    """No prerequisites to check.
1011

1012
    """
1013
    return True
1014

    
1015
  def Exec(self, feedback_fn):
1016
    """Nothing to do.
1017

1018
    """
1019
    return True
1020

    
1021

    
1022
class LUDestroyCluster(LogicalUnit):
1023
  """Logical unit for destroying the cluster.
1024

1025
  """
1026
  HPATH = "cluster-destroy"
1027
  HTYPE = constants.HTYPE_CLUSTER
1028
  _OP_REQP = []
1029

    
1030
  def BuildHooksEnv(self):
1031
    """Build hooks env.
1032

1033
    """
1034
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1035
    return env, [], []
1036

    
1037
  def CheckPrereq(self):
1038
    """Check prerequisites.
1039

1040
    This checks whether the cluster is empty.
1041

1042
    Any errors are signaled by raising errors.OpPrereqError.
1043

1044
    """
1045
    master = self.cfg.GetMasterNode()
1046

    
1047
    nodelist = self.cfg.GetNodeList()
1048
    if len(nodelist) != 1 or nodelist[0] != master:
1049
      raise errors.OpPrereqError("There are still %d node(s) in"
1050
                                 " this cluster." % (len(nodelist) - 1),
1051
                                 errors.ECODE_INVAL)
1052
    instancelist = self.cfg.GetInstanceList()
1053
    if instancelist:
1054
      raise errors.OpPrereqError("There are still %d instance(s) in"
1055
                                 " this cluster." % len(instancelist),
1056
                                 errors.ECODE_INVAL)
1057

    
1058
  def Exec(self, feedback_fn):
1059
    """Destroys the cluster.
1060

1061
    """
1062
    master = self.cfg.GetMasterNode()
1063
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1064

    
1065
    # Run post hooks on master node before it's removed
1066
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1067
    try:
1068
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1069
    except:
1070
      # pylint: disable-msg=W0702
1071
      self.LogWarning("Errors occurred running hooks on %s" % master)
1072

    
1073
    result = self.rpc.call_node_stop_master(master, False)
1074
    result.Raise("Could not disable the master role")
1075

    
1076
    if modify_ssh_setup:
1077
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1078
      utils.CreateBackup(priv_key)
1079
      utils.CreateBackup(pub_key)
1080

    
1081
    return master
1082

    
1083

    
1084
def _VerifyCertificate(filename):
1085
  """Verifies a certificate for LUVerifyCluster.
1086

1087
  @type filename: string
1088
  @param filename: Path to PEM file
1089

1090
  """
1091
  try:
1092
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1093
                                           utils.ReadFile(filename))
1094
  except Exception, err: # pylint: disable-msg=W0703
1095
    return (LUVerifyCluster.ETYPE_ERROR,
1096
            "Failed to load X509 certificate %s: %s" % (filename, err))
1097

    
1098
  (errcode, msg) = \
1099
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1100
                                constants.SSL_CERT_EXPIRATION_ERROR)
1101

    
1102
  if msg:
1103
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1104
  else:
1105
    fnamemsg = None
1106

    
1107
  if errcode is None:
1108
    return (None, fnamemsg)
1109
  elif errcode == utils.CERT_WARNING:
1110
    return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1111
  elif errcode == utils.CERT_ERROR:
1112
    return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1113

    
1114
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1115

    
1116

    
1117
class LUVerifyCluster(LogicalUnit):
1118
  """Verifies the cluster status.
1119

1120
  """
1121
  HPATH = "cluster-verify"
1122
  HTYPE = constants.HTYPE_CLUSTER
1123
  _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1124
  REQ_BGL = False
1125

    
1126
  TCLUSTER = "cluster"
1127
  TNODE = "node"
1128
  TINSTANCE = "instance"
1129

    
1130
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1131
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1132
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1133
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1134
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1135
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1136
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1137
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1138
  ENODEDRBD = (TNODE, "ENODEDRBD")
1139
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1140
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1141
  ENODEHV = (TNODE, "ENODEHV")
1142
  ENODELVM = (TNODE, "ENODELVM")
1143
  ENODEN1 = (TNODE, "ENODEN1")
1144
  ENODENET = (TNODE, "ENODENET")
1145
  ENODEOS = (TNODE, "ENODEOS")
1146
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1147
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1148
  ENODERPC = (TNODE, "ENODERPC")
1149
  ENODESSH = (TNODE, "ENODESSH")
1150
  ENODEVERSION = (TNODE, "ENODEVERSION")
1151
  ENODESETUP = (TNODE, "ENODESETUP")
1152
  ENODETIME = (TNODE, "ENODETIME")
1153

    
1154
  ETYPE_FIELD = "code"
1155
  ETYPE_ERROR = "ERROR"
1156
  ETYPE_WARNING = "WARNING"
1157

    
1158
  class NodeImage(object):
1159
    """A class representing the logical and physical status of a node.
1160

1161
    @type name: string
1162
    @ivar name: the node name to which this object refers
1163
    @ivar volumes: a structure as returned from
1164
        L{ganeti.backend.GetVolumeList} (runtime)
1165
    @ivar instances: a list of running instances (runtime)
1166
    @ivar pinst: list of configured primary instances (config)
1167
    @ivar sinst: list of configured secondary instances (config)
1168
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1169
        of this node (config)
1170
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1171
    @ivar dfree: free disk, as reported by the node (runtime)
1172
    @ivar offline: the offline status (config)
1173
    @type rpc_fail: boolean
1174
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1175
        not whether the individual keys were correct) (runtime)
1176
    @type lvm_fail: boolean
1177
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1178
    @type hyp_fail: boolean
1179
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1180
    @type ghost: boolean
1181
    @ivar ghost: whether this is a known node or not (config)
1182
    @type os_fail: boolean
1183
    @ivar os_fail: whether the RPC call didn't return valid OS data
1184
    @type oslist: list
1185
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1186

1187
    """
1188
    def __init__(self, offline=False, name=None):
1189
      self.name = name
1190
      self.volumes = {}
1191
      self.instances = []
1192
      self.pinst = []
1193
      self.sinst = []
1194
      self.sbp = {}
1195
      self.mfree = 0
1196
      self.dfree = 0
1197
      self.offline = offline
1198
      self.rpc_fail = False
1199
      self.lvm_fail = False
1200
      self.hyp_fail = False
1201
      self.ghost = False
1202
      self.os_fail = False
1203
      self.oslist = {}
1204

    
1205
  def ExpandNames(self):
1206
    self.needed_locks = {
1207
      locking.LEVEL_NODE: locking.ALL_SET,
1208
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1209
    }
1210
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1211

    
1212
  def _Error(self, ecode, item, msg, *args, **kwargs):
1213
    """Format an error message.
1214

1215
    Based on the opcode's error_codes parameter, either format a
1216
    parseable error code, or a simpler error string.
1217

1218
    This must be called only from Exec and functions called from Exec.
1219

1220
    """
1221
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1222
    itype, etxt = ecode
1223
    # first complete the msg
1224
    if args:
1225
      msg = msg % args
1226
    # then format the whole message
1227
    if self.op.error_codes:
1228
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1229
    else:
1230
      if item:
1231
        item = " " + item
1232
      else:
1233
        item = ""
1234
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1235
    # and finally report it via the feedback_fn
1236
    self._feedback_fn("  - %s" % msg)
1237

    
1238
  def _ErrorIf(self, cond, *args, **kwargs):
1239
    """Log an error message if the passed condition is True.
1240

1241
    """
1242
    cond = bool(cond) or self.op.debug_simulate_errors
1243
    if cond:
1244
      self._Error(*args, **kwargs)
1245
    # do not mark the operation as failed for WARN cases only
1246
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1247
      self.bad = self.bad or cond
1248

    
1249
  def _VerifyNode(self, ninfo, nresult):
1250
    """Run multiple tests against a node.
1251

1252
    Test list:
1253

1254
      - compares ganeti version
1255
      - checks vg existence and size > 20G
1256
      - checks config file checksum
1257
      - checks ssh to other nodes
1258

1259
    @type ninfo: L{objects.Node}
1260
    @param ninfo: the node to check
1261
    @param nresult: the results from the node
1262
    @rtype: boolean
1263
    @return: whether overall this call was successful (and we can expect
1264
         reasonable values in the respose)
1265

1266
    """
1267
    node = ninfo.name
1268
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1269

    
1270
    # main result, nresult should be a non-empty dict
1271
    test = not nresult or not isinstance(nresult, dict)
1272
    _ErrorIf(test, self.ENODERPC, node,
1273
                  "unable to verify node: no data returned")
1274
    if test:
1275
      return False
1276

    
1277
    # compares ganeti version
1278
    local_version = constants.PROTOCOL_VERSION
1279
    remote_version = nresult.get("version", None)
1280
    test = not (remote_version and
1281
                isinstance(remote_version, (list, tuple)) and
1282
                len(remote_version) == 2)
1283
    _ErrorIf(test, self.ENODERPC, node,
1284
             "connection to node returned invalid data")
1285
    if test:
1286
      return False
1287

    
1288
    test = local_version != remote_version[0]
1289
    _ErrorIf(test, self.ENODEVERSION, node,
1290
             "incompatible protocol versions: master %s,"
1291
             " node %s", local_version, remote_version[0])
1292
    if test:
1293
      return False
1294

    
1295
    # node seems compatible, we can actually try to look into its results
1296

    
1297
    # full package version
1298
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1299
                  self.ENODEVERSION, node,
1300
                  "software version mismatch: master %s, node %s",
1301
                  constants.RELEASE_VERSION, remote_version[1],
1302
                  code=self.ETYPE_WARNING)
1303

    
1304
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1305
    if isinstance(hyp_result, dict):
1306
      for hv_name, hv_result in hyp_result.iteritems():
1307
        test = hv_result is not None
1308
        _ErrorIf(test, self.ENODEHV, node,
1309
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1310

    
1311

    
1312
    test = nresult.get(constants.NV_NODESETUP,
1313
                           ["Missing NODESETUP results"])
1314
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1315
             "; ".join(test))
1316

    
1317
    return True
1318

    
1319
  def _VerifyNodeTime(self, ninfo, nresult,
1320
                      nvinfo_starttime, nvinfo_endtime):
1321
    """Check the node time.
1322

1323
    @type ninfo: L{objects.Node}
1324
    @param ninfo: the node to check
1325
    @param nresult: the remote results for the node
1326
    @param nvinfo_starttime: the start time of the RPC call
1327
    @param nvinfo_endtime: the end time of the RPC call
1328

1329
    """
1330
    node = ninfo.name
1331
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1332

    
1333
    ntime = nresult.get(constants.NV_TIME, None)
1334
    try:
1335
      ntime_merged = utils.MergeTime(ntime)
1336
    except (ValueError, TypeError):
1337
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1338
      return
1339

    
1340
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1341
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1342
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1343
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1344
    else:
1345
      ntime_diff = None
1346

    
1347
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1348
             "Node time diverges by at least %s from master node time",
1349
             ntime_diff)
1350

    
1351
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1352
    """Check the node time.
1353

1354
    @type ninfo: L{objects.Node}
1355
    @param ninfo: the node to check
1356
    @param nresult: the remote results for the node
1357
    @param vg_name: the configured VG name
1358

1359
    """
1360
    if vg_name is None:
1361
      return
1362

    
1363
    node = ninfo.name
1364
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1365

    
1366
    # checks vg existence and size > 20G
1367
    vglist = nresult.get(constants.NV_VGLIST, None)
1368
    test = not vglist
1369
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1370
    if not test:
1371
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1372
                                            constants.MIN_VG_SIZE)
1373
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1374

    
1375
    # check pv names
1376
    pvlist = nresult.get(constants.NV_PVLIST, None)
1377
    test = pvlist is None
1378
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1379
    if not test:
1380
      # check that ':' is not present in PV names, since it's a
1381
      # special character for lvcreate (denotes the range of PEs to
1382
      # use on the PV)
1383
      for _, pvname, owner_vg in pvlist:
1384
        test = ":" in pvname
1385
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1386
                 " '%s' of VG '%s'", pvname, owner_vg)
1387

    
1388
  def _VerifyNodeNetwork(self, ninfo, nresult):
1389
    """Check the node time.
1390

1391
    @type ninfo: L{objects.Node}
1392
    @param ninfo: the node to check
1393
    @param nresult: the remote results for the node
1394

1395
    """
1396
    node = ninfo.name
1397
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1398

    
1399
    test = constants.NV_NODELIST not in nresult
1400
    _ErrorIf(test, self.ENODESSH, node,
1401
             "node hasn't returned node ssh connectivity data")
1402
    if not test:
1403
      if nresult[constants.NV_NODELIST]:
1404
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1405
          _ErrorIf(True, self.ENODESSH, node,
1406
                   "ssh communication with node '%s': %s", a_node, a_msg)
1407

    
1408
    test = constants.NV_NODENETTEST not in nresult
1409
    _ErrorIf(test, self.ENODENET, node,
1410
             "node hasn't returned node tcp connectivity data")
1411
    if not test:
1412
      if nresult[constants.NV_NODENETTEST]:
1413
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1414
        for anode in nlist:
1415
          _ErrorIf(True, self.ENODENET, node,
1416
                   "tcp communication with node '%s': %s",
1417
                   anode, nresult[constants.NV_NODENETTEST][anode])
1418

    
1419
    test = constants.NV_MASTERIP not in nresult
1420
    _ErrorIf(test, self.ENODENET, node,
1421
             "node hasn't returned node master IP reachability data")
1422
    if not test:
1423
      if not nresult[constants.NV_MASTERIP]:
1424
        if node == self.master_node:
1425
          msg = "the master node cannot reach the master IP (not configured?)"
1426
        else:
1427
          msg = "cannot reach the master IP"
1428
        _ErrorIf(True, self.ENODENET, node, msg)
1429

    
1430

    
1431
  def _VerifyInstance(self, instance, instanceconfig, node_image):
1432
    """Verify an instance.
1433

1434
    This function checks to see if the required block devices are
1435
    available on the instance's node.
1436

1437
    """
1438
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1439
    node_current = instanceconfig.primary_node
1440

    
1441
    node_vol_should = {}
1442
    instanceconfig.MapLVsByNode(node_vol_should)
1443

    
1444
    for node in node_vol_should:
1445
      n_img = node_image[node]
1446
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1447
        # ignore missing volumes on offline or broken nodes
1448
        continue
1449
      for volume in node_vol_should[node]:
1450
        test = volume not in n_img.volumes
1451
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1452
                 "volume %s missing on node %s", volume, node)
1453

    
1454
    if instanceconfig.admin_up:
1455
      pri_img = node_image[node_current]
1456
      test = instance not in pri_img.instances and not pri_img.offline
1457
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1458
               "instance not running on its primary node %s",
1459
               node_current)
1460

    
1461
    for node, n_img in node_image.items():
1462
      if (not node == node_current):
1463
        test = instance in n_img.instances
1464
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1465
                 "instance should not run on node %s", node)
1466

    
1467
  def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1468
    """Verify if there are any unknown volumes in the cluster.
1469

1470
    The .os, .swap and backup volumes are ignored. All other volumes are
1471
    reported as unknown.
1472

1473
    """
1474
    for node, n_img in node_image.items():
1475
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1476
        # skip non-healthy nodes
1477
        continue
1478
      for volume in n_img.volumes:
1479
        test = (node not in node_vol_should or
1480
                volume not in node_vol_should[node])
1481
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1482
                      "volume %s is unknown", volume)
1483

    
1484
  def _VerifyOrphanInstances(self, instancelist, node_image):
1485
    """Verify the list of running instances.
1486

1487
    This checks what instances are running but unknown to the cluster.
1488

1489
    """
1490
    for node, n_img in node_image.items():
1491
      for o_inst in n_img.instances:
1492
        test = o_inst not in instancelist
1493
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1494
                      "instance %s on node %s should not exist", o_inst, node)
1495

    
1496
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1497
    """Verify N+1 Memory Resilience.
1498

1499
    Check that if one single node dies we can still start all the
1500
    instances it was primary for.
1501

1502
    """
1503
    for node, n_img in node_image.items():
1504
      # This code checks that every node which is now listed as
1505
      # secondary has enough memory to host all instances it is
1506
      # supposed to should a single other node in the cluster fail.
1507
      # FIXME: not ready for failover to an arbitrary node
1508
      # FIXME: does not support file-backed instances
1509
      # WARNING: we currently take into account down instances as well
1510
      # as up ones, considering that even if they're down someone
1511
      # might want to start them even in the event of a node failure.
1512
      for prinode, instances in n_img.sbp.items():
1513
        needed_mem = 0
1514
        for instance in instances:
1515
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1516
          if bep[constants.BE_AUTO_BALANCE]:
1517
            needed_mem += bep[constants.BE_MEMORY]
1518
        test = n_img.mfree < needed_mem
1519
        self._ErrorIf(test, self.ENODEN1, node,
1520
                      "not enough memory on to accommodate"
1521
                      " failovers should peer node %s fail", prinode)
1522

    
1523
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1524
                       master_files):
1525
    """Verifies and computes the node required file checksums.
1526

1527
    @type ninfo: L{objects.Node}
1528
    @param ninfo: the node to check
1529
    @param nresult: the remote results for the node
1530
    @param file_list: required list of files
1531
    @param local_cksum: dictionary of local files and their checksums
1532
    @param master_files: list of files that only masters should have
1533

1534
    """
1535
    node = ninfo.name
1536
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1537

    
1538
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1539
    test = not isinstance(remote_cksum, dict)
1540
    _ErrorIf(test, self.ENODEFILECHECK, node,
1541
             "node hasn't returned file checksum data")
1542
    if test:
1543
      return
1544

    
1545
    for file_name in file_list:
1546
      node_is_mc = ninfo.master_candidate
1547
      must_have = (file_name not in master_files) or node_is_mc
1548
      # missing
1549
      test1 = file_name not in remote_cksum
1550
      # invalid checksum
1551
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1552
      # existing and good
1553
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1554
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1555
               "file '%s' missing", file_name)
1556
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1557
               "file '%s' has wrong checksum", file_name)
1558
      # not candidate and this is not a must-have file
1559
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1560
               "file '%s' should not exist on non master"
1561
               " candidates (and the file is outdated)", file_name)
1562
      # all good, except non-master/non-must have combination
1563
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1564
               "file '%s' should not exist"
1565
               " on non master candidates", file_name)
1566

    
1567
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1568
    """Verifies and the node DRBD status.
1569

1570
    @type ninfo: L{objects.Node}
1571
    @param ninfo: the node to check
1572
    @param nresult: the remote results for the node
1573
    @param instanceinfo: the dict of instances
1574
    @param drbd_map: the DRBD map as returned by
1575
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1576

1577
    """
1578
    node = ninfo.name
1579
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1580

    
1581
    # compute the DRBD minors
1582
    node_drbd = {}
1583
    for minor, instance in drbd_map[node].items():
1584
      test = instance not in instanceinfo
1585
      _ErrorIf(test, self.ECLUSTERCFG, None,
1586
               "ghost instance '%s' in temporary DRBD map", instance)
1587
        # ghost instance should not be running, but otherwise we
1588
        # don't give double warnings (both ghost instance and
1589
        # unallocated minor in use)
1590
      if test:
1591
        node_drbd[minor] = (instance, False)
1592
      else:
1593
        instance = instanceinfo[instance]
1594
        node_drbd[minor] = (instance.name, instance.admin_up)
1595

    
1596
    # and now check them
1597
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1598
    test = not isinstance(used_minors, (tuple, list))
1599
    _ErrorIf(test, self.ENODEDRBD, node,
1600
             "cannot parse drbd status file: %s", str(used_minors))
1601
    if test:
1602
      # we cannot check drbd status
1603
      return
1604

    
1605
    for minor, (iname, must_exist) in node_drbd.items():
1606
      test = minor not in used_minors and must_exist
1607
      _ErrorIf(test, self.ENODEDRBD, node,
1608
               "drbd minor %d of instance %s is not active", minor, iname)
1609
    for minor in used_minors:
1610
      test = minor not in node_drbd
1611
      _ErrorIf(test, self.ENODEDRBD, node,
1612
               "unallocated drbd minor %d is in use", minor)
1613

    
1614
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1615
    """Builds the node OS structures.
1616

1617
    @type ninfo: L{objects.Node}
1618
    @param ninfo: the node to check
1619
    @param nresult: the remote results for the node
1620
    @param nimg: the node image object
1621

1622
    """
1623
    node = ninfo.name
1624
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1625

    
1626
    remote_os = nresult.get(constants.NV_OSLIST, None)
1627
    test = (not isinstance(remote_os, list) or
1628
            not compat.all(remote_os,
1629
                           lambda v: isinstance(v, list) and len(v) == 7))
1630

    
1631
    _ErrorIf(test, self.ENODEOS, node,
1632
             "node hasn't returned valid OS data")
1633

    
1634
    nimg.os_fail = test
1635

    
1636
    if test:
1637
      return
1638

    
1639
    os_dict = {}
1640

    
1641
    for (name, os_path, status, diagnose,
1642
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1643

    
1644
      if name not in os_dict:
1645
        os_dict[name] = []
1646

    
1647
      # parameters is a list of lists instead of list of tuples due to
1648
      # JSON lacking a real tuple type, fix it:
1649
      parameters = [tuple(v) for v in parameters]
1650
      os_dict[name].append((os_path, status, diagnose,
1651
                            set(variants), set(parameters), set(api_ver)))
1652

    
1653
    nimg.oslist = os_dict
1654

    
1655
  def _VerifyNodeOS(self, ninfo, nimg, base):
1656
    """Verifies the node OS list.
1657

1658
    @type ninfo: L{objects.Node}
1659
    @param ninfo: the node to check
1660
    @param nimg: the node image object
1661
    @param base: the 'template' node we match against (e.g. from the master)
1662

1663
    """
1664
    node = ninfo.name
1665
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1666

    
1667
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1668

    
1669
    for os_name, os_data in nimg.oslist.items():
1670
      assert os_data, "Empty OS status for OS %s?!" % os_name
1671
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1672
      _ErrorIf(not f_status, self.ENODEOS, node,
1673
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1674
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1675
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1676
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1677
      # this will catched in backend too
1678
      _ErrorIf(compat.any(f_api, lambda v: v >= constants.OS_API_V15)
1679
               and not f_var, self.ENODEOS, node,
1680
               "OS %s with API at least %d does not declare any variant",
1681
               os_name, constants.OS_API_V15)
1682
      # comparisons with the 'base' image
1683
      test = os_name not in base.oslist
1684
      _ErrorIf(test, self.ENODEOS, node,
1685
               "Extra OS %s not present on reference node (%s)",
1686
               os_name, base.name)
1687
      if test:
1688
        continue
1689
      assert base.oslist[os_name], "Base node has empty OS status?"
1690
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1691
      if not b_status:
1692
        # base OS is invalid, skipping
1693
        continue
1694
      for kind, a, b in [("API version", f_api, b_api),
1695
                         ("variants list", f_var, b_var),
1696
                         ("parameters", f_param, b_param)]:
1697
        _ErrorIf(a != b, self.ENODEOS, node,
1698
                 "OS %s %s differs from reference node %s: %s vs. %s",
1699
                 kind, os_name, base.name,
1700
                 utils.CommaJoin(a), utils.CommaJoin(a))
1701

    
1702
    # check any missing OSes
1703
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1704
    _ErrorIf(missing, self.ENODEOS, node,
1705
             "OSes present on reference node %s but missing on this node: %s",
1706
             base.name, utils.CommaJoin(missing))
1707

    
1708
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1709
    """Verifies and updates the node volume data.
1710

1711
    This function will update a L{NodeImage}'s internal structures
1712
    with data from the remote call.
1713

1714
    @type ninfo: L{objects.Node}
1715
    @param ninfo: the node to check
1716
    @param nresult: the remote results for the node
1717
    @param nimg: the node image object
1718
    @param vg_name: the configured VG name
1719

1720
    """
1721
    node = ninfo.name
1722
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1723

    
1724
    nimg.lvm_fail = True
1725
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1726
    if vg_name is None:
1727
      pass
1728
    elif isinstance(lvdata, basestring):
1729
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1730
               utils.SafeEncode(lvdata))
1731
    elif not isinstance(lvdata, dict):
1732
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1733
    else:
1734
      nimg.volumes = lvdata
1735
      nimg.lvm_fail = False
1736

    
1737
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1738
    """Verifies and updates the node instance list.
1739

1740
    If the listing was successful, then updates this node's instance
1741
    list. Otherwise, it marks the RPC call as failed for the instance
1742
    list key.
1743

1744
    @type ninfo: L{objects.Node}
1745
    @param ninfo: the node to check
1746
    @param nresult: the remote results for the node
1747
    @param nimg: the node image object
1748

1749
    """
1750
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1751
    test = not isinstance(idata, list)
1752
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1753
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1754
    if test:
1755
      nimg.hyp_fail = True
1756
    else:
1757
      nimg.instances = idata
1758

    
1759
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1760
    """Verifies and computes a node information map
1761

1762
    @type ninfo: L{objects.Node}
1763
    @param ninfo: the node to check
1764
    @param nresult: the remote results for the node
1765
    @param nimg: the node image object
1766
    @param vg_name: the configured VG name
1767

1768
    """
1769
    node = ninfo.name
1770
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1771

    
1772
    # try to read free memory (from the hypervisor)
1773
    hv_info = nresult.get(constants.NV_HVINFO, None)
1774
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1775
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1776
    if not test:
1777
      try:
1778
        nimg.mfree = int(hv_info["memory_free"])
1779
      except (ValueError, TypeError):
1780
        _ErrorIf(True, self.ENODERPC, node,
1781
                 "node returned invalid nodeinfo, check hypervisor")
1782

    
1783
    # FIXME: devise a free space model for file based instances as well
1784
    if vg_name is not None:
1785
      test = (constants.NV_VGLIST not in nresult or
1786
              vg_name not in nresult[constants.NV_VGLIST])
1787
      _ErrorIf(test, self.ENODELVM, node,
1788
               "node didn't return data for the volume group '%s'"
1789
               " - it is either missing or broken", vg_name)
1790
      if not test:
1791
        try:
1792
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1793
        except (ValueError, TypeError):
1794
          _ErrorIf(True, self.ENODERPC, node,
1795
                   "node returned invalid LVM info, check LVM status")
1796

    
1797
  def CheckPrereq(self):
1798
    """Check prerequisites.
1799

1800
    Transform the list of checks we're going to skip into a set and check that
1801
    all its members are valid.
1802

1803
    """
1804
    self.skip_set = frozenset(self.op.skip_checks)
1805
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1806
      raise errors.OpPrereqError("Invalid checks to be skipped specified",
1807
                                 errors.ECODE_INVAL)
1808

    
1809
  def BuildHooksEnv(self):
1810
    """Build hooks env.
1811

1812
    Cluster-Verify hooks just ran in the post phase and their failure makes
1813
    the output be logged in the verify output and the verification to fail.
1814

1815
    """
1816
    all_nodes = self.cfg.GetNodeList()
1817
    env = {
1818
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1819
      }
1820
    for node in self.cfg.GetAllNodesInfo().values():
1821
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1822

    
1823
    return env, [], all_nodes
1824

    
1825
  def Exec(self, feedback_fn):
1826
    """Verify integrity of cluster, performing various test on nodes.
1827

1828
    """
1829
    self.bad = False
1830
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1831
    verbose = self.op.verbose
1832
    self._feedback_fn = feedback_fn
1833
    feedback_fn("* Verifying global settings")
1834
    for msg in self.cfg.VerifyConfig():
1835
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1836

    
1837
    # Check the cluster certificates
1838
    for cert_filename in constants.ALL_CERT_FILES:
1839
      (errcode, msg) = _VerifyCertificate(cert_filename)
1840
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1841

    
1842
    vg_name = self.cfg.GetVGName()
1843
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1844
    cluster = self.cfg.GetClusterInfo()
1845
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1846
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1847
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1848
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1849
                        for iname in instancelist)
1850
    i_non_redundant = [] # Non redundant instances
1851
    i_non_a_balanced = [] # Non auto-balanced instances
1852
    n_offline = 0 # Count of offline nodes
1853
    n_drained = 0 # Count of nodes being drained
1854
    node_vol_should = {}
1855

    
1856
    # FIXME: verify OS list
1857
    # do local checksums
1858
    master_files = [constants.CLUSTER_CONF_FILE]
1859
    master_node = self.master_node = self.cfg.GetMasterNode()
1860
    master_ip = self.cfg.GetMasterIP()
1861

    
1862
    file_names = ssconf.SimpleStore().GetFileList()
1863
    file_names.extend(constants.ALL_CERT_FILES)
1864
    file_names.extend(master_files)
1865
    if cluster.modify_etc_hosts:
1866
      file_names.append(constants.ETC_HOSTS)
1867

    
1868
    local_checksums = utils.FingerprintFiles(file_names)
1869

    
1870
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1871
    node_verify_param = {
1872
      constants.NV_FILELIST: file_names,
1873
      constants.NV_NODELIST: [node.name for node in nodeinfo
1874
                              if not node.offline],
1875
      constants.NV_HYPERVISOR: hypervisors,
1876
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1877
                                  node.secondary_ip) for node in nodeinfo
1878
                                 if not node.offline],
1879
      constants.NV_INSTANCELIST: hypervisors,
1880
      constants.NV_VERSION: None,
1881
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1882
      constants.NV_NODESETUP: None,
1883
      constants.NV_TIME: None,
1884
      constants.NV_MASTERIP: (master_node, master_ip),
1885
      constants.NV_OSLIST: None,
1886
      }
1887

    
1888
    if vg_name is not None:
1889
      node_verify_param[constants.NV_VGLIST] = None
1890
      node_verify_param[constants.NV_LVLIST] = vg_name
1891
      node_verify_param[constants.NV_PVLIST] = [vg_name]
1892
      node_verify_param[constants.NV_DRBDLIST] = None
1893

    
1894
    # Build our expected cluster state
1895
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
1896
                                                 name=node.name))
1897
                      for node in nodeinfo)
1898

    
1899
    for instance in instancelist:
1900
      inst_config = instanceinfo[instance]
1901

    
1902
      for nname in inst_config.all_nodes:
1903
        if nname not in node_image:
1904
          # ghost node
1905
          gnode = self.NodeImage(name=nname)
1906
          gnode.ghost = True
1907
          node_image[nname] = gnode
1908

    
1909
      inst_config.MapLVsByNode(node_vol_should)
1910

    
1911
      pnode = inst_config.primary_node
1912
      node_image[pnode].pinst.append(instance)
1913

    
1914
      for snode in inst_config.secondary_nodes:
1915
        nimg = node_image[snode]
1916
        nimg.sinst.append(instance)
1917
        if pnode not in nimg.sbp:
1918
          nimg.sbp[pnode] = []
1919
        nimg.sbp[pnode].append(instance)
1920

    
1921
    # At this point, we have the in-memory data structures complete,
1922
    # except for the runtime information, which we'll gather next
1923

    
1924
    # Due to the way our RPC system works, exact response times cannot be
1925
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1926
    # time before and after executing the request, we can at least have a time
1927
    # window.
1928
    nvinfo_starttime = time.time()
1929
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1930
                                           self.cfg.GetClusterName())
1931
    nvinfo_endtime = time.time()
1932

    
1933
    all_drbd_map = self.cfg.ComputeDRBDMap()
1934

    
1935
    feedback_fn("* Verifying node status")
1936

    
1937
    refos_img = None
1938

    
1939
    for node_i in nodeinfo:
1940
      node = node_i.name
1941
      nimg = node_image[node]
1942

    
1943
      if node_i.offline:
1944
        if verbose:
1945
          feedback_fn("* Skipping offline node %s" % (node,))
1946
        n_offline += 1
1947
        continue
1948

    
1949
      if node == master_node:
1950
        ntype = "master"
1951
      elif node_i.master_candidate:
1952
        ntype = "master candidate"
1953
      elif node_i.drained:
1954
        ntype = "drained"
1955
        n_drained += 1
1956
      else:
1957
        ntype = "regular"
1958
      if verbose:
1959
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1960

    
1961
      msg = all_nvinfo[node].fail_msg
1962
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1963
      if msg:
1964
        nimg.rpc_fail = True
1965
        continue
1966

    
1967
      nresult = all_nvinfo[node].payload
1968

    
1969
      nimg.call_ok = self._VerifyNode(node_i, nresult)
1970
      self._VerifyNodeNetwork(node_i, nresult)
1971
      self._VerifyNodeLVM(node_i, nresult, vg_name)
1972
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1973
                            master_files)
1974
      self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1975
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1976

    
1977
      self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1978
      self._UpdateNodeInstances(node_i, nresult, nimg)
1979
      self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1980
      self._UpdateNodeOS(node_i, nresult, nimg)
1981
      if not nimg.os_fail:
1982
        if refos_img is None:
1983
          refos_img = nimg
1984
        self._VerifyNodeOS(node_i, nimg, refos_img)
1985

    
1986
    feedback_fn("* Verifying instance status")
1987
    for instance in instancelist:
1988
      if verbose:
1989
        feedback_fn("* Verifying instance %s" % instance)
1990
      inst_config = instanceinfo[instance]
1991
      self._VerifyInstance(instance, inst_config, node_image)
1992
      inst_nodes_offline = []
1993

    
1994
      pnode = inst_config.primary_node
1995
      pnode_img = node_image[pnode]
1996
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
1997
               self.ENODERPC, pnode, "instance %s, connection to"
1998
               " primary node failed", instance)
1999

    
2000
      if pnode_img.offline:
2001
        inst_nodes_offline.append(pnode)
2002

    
2003
      # If the instance is non-redundant we cannot survive losing its primary
2004
      # node, so we are not N+1 compliant. On the other hand we have no disk
2005
      # templates with more than one secondary so that situation is not well
2006
      # supported either.
2007
      # FIXME: does not support file-backed instances
2008
      if not inst_config.secondary_nodes:
2009
        i_non_redundant.append(instance)
2010
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2011
               instance, "instance has multiple secondary nodes: %s",
2012
               utils.CommaJoin(inst_config.secondary_nodes),
2013
               code=self.ETYPE_WARNING)
2014

    
2015
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2016
        i_non_a_balanced.append(instance)
2017

    
2018
      for snode in inst_config.secondary_nodes:
2019
        s_img = node_image[snode]
2020
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2021
                 "instance %s, connection to secondary node failed", instance)
2022

    
2023
        if s_img.offline:
2024
          inst_nodes_offline.append(snode)
2025

    
2026
      # warn that the instance lives on offline nodes
2027
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2028
               "instance lives on offline node(s) %s",
2029
               utils.CommaJoin(inst_nodes_offline))
2030
      # ... or ghost nodes
2031
      for node in inst_config.all_nodes:
2032
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2033
                 "instance lives on ghost node %s", node)
2034

    
2035
    feedback_fn("* Verifying orphan volumes")
2036
    self._VerifyOrphanVolumes(node_vol_should, node_image)
2037

    
2038
    feedback_fn("* Verifying orphan instances")
2039
    self._VerifyOrphanInstances(instancelist, node_image)
2040

    
2041
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
2042
      feedback_fn("* Verifying N+1 Memory redundancy")
2043
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
2044

    
2045
    feedback_fn("* Other Notes")
2046
    if i_non_redundant:
2047
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2048
                  % len(i_non_redundant))
2049

    
2050
    if i_non_a_balanced:
2051
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2052
                  % len(i_non_a_balanced))
2053

    
2054
    if n_offline:
2055
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2056

    
2057
    if n_drained:
2058
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2059

    
2060
    return not self.bad
2061

    
2062
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2063
    """Analyze the post-hooks' result
2064

2065
    This method analyses the hook result, handles it, and sends some
2066
    nicely-formatted feedback back to the user.
2067

2068
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2069
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2070
    @param hooks_results: the results of the multi-node hooks rpc call
2071
    @param feedback_fn: function used send feedback back to the caller
2072
    @param lu_result: previous Exec result
2073
    @return: the new Exec result, based on the previous result
2074
        and hook results
2075

2076
    """
2077
    # We only really run POST phase hooks, and are only interested in
2078
    # their results
2079
    if phase == constants.HOOKS_PHASE_POST:
2080
      # Used to change hooks' output to proper indentation
2081
      indent_re = re.compile('^', re.M)
2082
      feedback_fn("* Hooks Results")
2083
      assert hooks_results, "invalid result from hooks"
2084

    
2085
      for node_name in hooks_results:
2086
        res = hooks_results[node_name]
2087
        msg = res.fail_msg
2088
        test = msg and not res.offline
2089
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2090
                      "Communication failure in hooks execution: %s", msg)
2091
        if res.offline or msg:
2092
          # No need to investigate payload if node is offline or gave an error.
2093
          # override manually lu_result here as _ErrorIf only
2094
          # overrides self.bad
2095
          lu_result = 1
2096
          continue
2097
        for script, hkr, output in res.payload:
2098
          test = hkr == constants.HKR_FAIL
2099
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2100
                        "Script %s failed, output:", script)
2101
          if test:
2102
            output = indent_re.sub('      ', output)
2103
            feedback_fn("%s" % output)
2104
            lu_result = 0
2105

    
2106
      return lu_result
2107

    
2108

    
2109
class LUVerifyDisks(NoHooksLU):
2110
  """Verifies the cluster disks status.
2111

2112
  """
2113
  _OP_REQP = []
2114
  REQ_BGL = False
2115

    
2116
  def ExpandNames(self):
2117
    self.needed_locks = {
2118
      locking.LEVEL_NODE: locking.ALL_SET,
2119
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2120
    }
2121
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2122

    
2123
  def CheckPrereq(self):
2124
    """Check prerequisites.
2125

2126
    This has no prerequisites.
2127

2128
    """
2129
    pass
2130

    
2131
  def Exec(self, feedback_fn):
2132
    """Verify integrity of cluster disks.
2133

2134
    @rtype: tuple of three items
2135
    @return: a tuple of (dict of node-to-node_error, list of instances
2136
        which need activate-disks, dict of instance: (node, volume) for
2137
        missing volumes
2138

2139
    """
2140
    result = res_nodes, res_instances, res_missing = {}, [], {}
2141

    
2142
    vg_name = self.cfg.GetVGName()
2143
    nodes = utils.NiceSort(self.cfg.GetNodeList())
2144
    instances = [self.cfg.GetInstanceInfo(name)
2145
                 for name in self.cfg.GetInstanceList()]
2146

    
2147
    nv_dict = {}
2148
    for inst in instances:
2149
      inst_lvs = {}
2150
      if (not inst.admin_up or
2151
          inst.disk_template not in constants.DTS_NET_MIRROR):
2152
        continue
2153
      inst.MapLVsByNode(inst_lvs)
2154
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2155
      for node, vol_list in inst_lvs.iteritems():
2156
        for vol in vol_list:
2157
          nv_dict[(node, vol)] = inst
2158

    
2159
    if not nv_dict:
2160
      return result
2161

    
2162
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2163

    
2164
    for node in nodes:
2165
      # node_volume
2166
      node_res = node_lvs[node]
2167
      if node_res.offline:
2168
        continue
2169
      msg = node_res.fail_msg
2170
      if msg:
2171
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2172
        res_nodes[node] = msg
2173
        continue
2174

    
2175
      lvs = node_res.payload
2176
      for lv_name, (_, _, lv_online) in lvs.items():
2177
        inst = nv_dict.pop((node, lv_name), None)
2178
        if (not lv_online and inst is not None
2179
            and inst.name not in res_instances):
2180
          res_instances.append(inst.name)
2181

    
2182
    # any leftover items in nv_dict are missing LVs, let's arrange the
2183
    # data better
2184
    for key, inst in nv_dict.iteritems():
2185
      if inst.name not in res_missing:
2186
        res_missing[inst.name] = []
2187
      res_missing[inst.name].append(key)
2188

    
2189
    return result
2190

    
2191

    
2192
class LURepairDiskSizes(NoHooksLU):
2193
  """Verifies the cluster disks sizes.
2194

2195
  """
2196
  _OP_REQP = ["instances"]
2197
  REQ_BGL = False
2198

    
2199
  def ExpandNames(self):
2200
    if not isinstance(self.op.instances, list):
2201
      raise errors.OpPrereqError("Invalid argument type 'instances'",
2202
                                 errors.ECODE_INVAL)
2203

    
2204
    if self.op.instances:
2205
      self.wanted_names = []
2206
      for name in self.op.instances:
2207
        full_name = _ExpandInstanceName(self.cfg, name)
2208
        self.wanted_names.append(full_name)
2209
      self.needed_locks = {
2210
        locking.LEVEL_NODE: [],
2211
        locking.LEVEL_INSTANCE: self.wanted_names,
2212
        }
2213
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2214
    else:
2215
      self.wanted_names = None
2216
      self.needed_locks = {
2217
        locking.LEVEL_NODE: locking.ALL_SET,
2218
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2219
        }
2220
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2221

    
2222
  def DeclareLocks(self, level):
2223
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2224
      self._LockInstancesNodes(primary_only=True)
2225

    
2226
  def CheckPrereq(self):
2227
    """Check prerequisites.
2228

2229
    This only checks the optional instance list against the existing names.
2230

2231
    """
2232
    if self.wanted_names is None:
2233
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2234

    
2235
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2236
                             in self.wanted_names]
2237

    
2238
  def _EnsureChildSizes(self, disk):
2239
    """Ensure children of the disk have the needed disk size.
2240

2241
    This is valid mainly for DRBD8 and fixes an issue where the
2242
    children have smaller disk size.
2243

2244
    @param disk: an L{ganeti.objects.Disk} object
2245

2246
    """
2247
    if disk.dev_type == constants.LD_DRBD8:
2248
      assert disk.children, "Empty children for DRBD8?"
2249
      fchild = disk.children[0]
2250
      mismatch = fchild.size < disk.size
2251
      if mismatch:
2252
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2253
                     fchild.size, disk.size)
2254
        fchild.size = disk.size
2255

    
2256
      # and we recurse on this child only, not on the metadev
2257
      return self._EnsureChildSizes(fchild) or mismatch
2258
    else:
2259
      return False
2260

    
2261
  def Exec(self, feedback_fn):
2262
    """Verify the size of cluster disks.
2263

2264
    """
2265
    # TODO: check child disks too
2266
    # TODO: check differences in size between primary/secondary nodes
2267
    per_node_disks = {}
2268
    for instance in self.wanted_instances:
2269
      pnode = instance.primary_node
2270
      if pnode not in per_node_disks:
2271
        per_node_disks[pnode] = []
2272
      for idx, disk in enumerate(instance.disks):
2273
        per_node_disks[pnode].append((instance, idx, disk))
2274

    
2275
    changed = []
2276
    for node, dskl in per_node_disks.items():
2277
      newl = [v[2].Copy() for v in dskl]
2278
      for dsk in newl:
2279
        self.cfg.SetDiskID(dsk, node)
2280
      result = self.rpc.call_blockdev_getsizes(node, newl)
2281
      if result.fail_msg:
2282
        self.LogWarning("Failure in blockdev_getsizes call to node"
2283
                        " %s, ignoring", node)
2284
        continue
2285
      if len(result.data) != len(dskl):
2286
        self.LogWarning("Invalid result from node %s, ignoring node results",
2287
                        node)
2288
        continue
2289
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2290
        if size is None:
2291
          self.LogWarning("Disk %d of instance %s did not return size"
2292
                          " information, ignoring", idx, instance.name)
2293
          continue
2294
        if not isinstance(size, (int, long)):
2295
          self.LogWarning("Disk %d of instance %s did not return valid"
2296
                          " size information, ignoring", idx, instance.name)
2297
          continue
2298
        size = size >> 20
2299
        if size != disk.size:
2300
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2301
                       " correcting: recorded %d, actual %d", idx,
2302
                       instance.name, disk.size, size)
2303
          disk.size = size
2304
          self.cfg.Update(instance, feedback_fn)
2305
          changed.append((instance.name, idx, size))
2306
        if self._EnsureChildSizes(disk):
2307
          self.cfg.Update(instance, feedback_fn)
2308
          changed.append((instance.name, idx, disk.size))
2309
    return changed
2310

    
2311

    
2312
class LURenameCluster(LogicalUnit):
2313
  """Rename the cluster.
2314

2315
  """
2316
  HPATH = "cluster-rename"
2317
  HTYPE = constants.HTYPE_CLUSTER
2318
  _OP_REQP = ["name"]
2319

    
2320
  def BuildHooksEnv(self):
2321
    """Build hooks env.
2322

2323
    """
2324
    env = {
2325
      "OP_TARGET": self.cfg.GetClusterName(),
2326
      "NEW_NAME": self.op.name,
2327
      }
2328
    mn = self.cfg.GetMasterNode()
2329
    all_nodes = self.cfg.GetNodeList()
2330
    return env, [mn], all_nodes
2331

    
2332
  def CheckPrereq(self):
2333
    """Verify that the passed name is a valid one.
2334

2335
    """
2336
    hostname = utils.GetHostInfo(self.op.name)
2337

    
2338
    new_name = hostname.name
2339
    self.ip = new_ip = hostname.ip
2340
    old_name = self.cfg.GetClusterName()
2341
    old_ip = self.cfg.GetMasterIP()
2342
    if new_name == old_name and new_ip == old_ip:
2343
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2344
                                 " cluster has changed",
2345
                                 errors.ECODE_INVAL)
2346
    if new_ip != old_ip:
2347
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2348
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2349
                                   " reachable on the network. Aborting." %
2350
                                   new_ip, errors.ECODE_NOTUNIQUE)
2351

    
2352
    self.op.name = new_name
2353

    
2354
  def Exec(self, feedback_fn):
2355
    """Rename the cluster.
2356

2357
    """
2358
    clustername = self.op.name
2359
    ip = self.ip
2360

    
2361
    # shutdown the master IP
2362
    master = self.cfg.GetMasterNode()
2363
    result = self.rpc.call_node_stop_master(master, False)
2364
    result.Raise("Could not disable the master role")
2365

    
2366
    try:
2367
      cluster = self.cfg.GetClusterInfo()
2368
      cluster.cluster_name = clustername
2369
      cluster.master_ip = ip
2370
      self.cfg.Update(cluster, feedback_fn)
2371

    
2372
      # update the known hosts file
2373
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2374
      node_list = self.cfg.GetNodeList()
2375
      try:
2376
        node_list.remove(master)
2377
      except ValueError:
2378
        pass
2379
      result = self.rpc.call_upload_file(node_list,
2380
                                         constants.SSH_KNOWN_HOSTS_FILE)
2381
      for to_node, to_result in result.iteritems():
2382
        msg = to_result.fail_msg
2383
        if msg:
2384
          msg = ("Copy of file %s to node %s failed: %s" %
2385
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2386
          self.proc.LogWarning(msg)
2387

    
2388
    finally:
2389
      result = self.rpc.call_node_start_master(master, False, False)
2390
      msg = result.fail_msg
2391
      if msg:
2392
        self.LogWarning("Could not re-enable the master role on"
2393
                        " the master, please restart manually: %s", msg)
2394

    
2395

    
2396
def _RecursiveCheckIfLVMBased(disk):
2397
  """Check if the given disk or its children are lvm-based.
2398

2399
  @type disk: L{objects.Disk}
2400
  @param disk: the disk to check
2401
  @rtype: boolean
2402
  @return: boolean indicating whether a LD_LV dev_type was found or not
2403

2404
  """
2405
  if disk.children:
2406
    for chdisk in disk.children:
2407
      if _RecursiveCheckIfLVMBased(chdisk):
2408
        return True
2409
  return disk.dev_type == constants.LD_LV
2410

    
2411

    
2412
class LUSetClusterParams(LogicalUnit):
2413
  """Change the parameters of the cluster.
2414

2415
  """
2416
  HPATH = "cluster-modify"
2417
  HTYPE = constants.HTYPE_CLUSTER
2418
  _OP_REQP = []
2419
  _OP_DEFS = [
2420
    ("candidate_pool_size", None),
2421
    ("uid_pool", None),
2422
    ("add_uids", None),
2423
    ("remove_uids", None),
2424
    ]
2425
  REQ_BGL = False
2426

    
2427
  def CheckArguments(self):
2428
    """Check parameters
2429

2430
    """
2431
    if self.op.candidate_pool_size is not None:
2432
      try:
2433
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2434
      except (ValueError, TypeError), err:
2435
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2436
                                   str(err), errors.ECODE_INVAL)
2437
      if self.op.candidate_pool_size < 1:
2438
        raise errors.OpPrereqError("At least one master candidate needed",
2439
                                   errors.ECODE_INVAL)
2440

    
2441
    _CheckBooleanOpField(self.op, "maintain_node_health")
2442

    
2443
    if self.op.uid_pool:
2444
      uidpool.CheckUidPool(self.op.uid_pool)
2445

    
2446
    if self.op.add_uids:
2447
      uidpool.CheckUidPool(self.op.add_uids)
2448

    
2449
    if self.op.remove_uids:
2450
      uidpool.CheckUidPool(self.op.remove_uids)
2451

    
2452
  def ExpandNames(self):
2453
    # FIXME: in the future maybe other cluster params won't require checking on
2454
    # all nodes to be modified.
2455
    self.needed_locks = {
2456
      locking.LEVEL_NODE: locking.ALL_SET,
2457
    }
2458
    self.share_locks[locking.LEVEL_NODE] = 1
2459

    
2460
  def BuildHooksEnv(self):
2461
    """Build hooks env.
2462

2463
    """
2464
    env = {
2465
      "OP_TARGET": self.cfg.GetClusterName(),
2466
      "NEW_VG_NAME": self.op.vg_name,
2467
      }
2468
    mn = self.cfg.GetMasterNode()
2469
    return env, [mn], [mn]
2470

    
2471
  def CheckPrereq(self):
2472
    """Check prerequisites.
2473

2474
    This checks whether the given params don't conflict and
2475
    if the given volume group is valid.
2476

2477
    """
2478
    if self.op.vg_name is not None and not self.op.vg_name:
2479
      instances = self.cfg.GetAllInstancesInfo().values()
2480
      for inst in instances:
2481
        for disk in inst.disks:
2482
          if _RecursiveCheckIfLVMBased(disk):
2483
            raise errors.OpPrereqError("Cannot disable lvm storage while"
2484
                                       " lvm-based instances exist",
2485
                                       errors.ECODE_INVAL)
2486

    
2487
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2488

    
2489
    # if vg_name not None, checks given volume group on all nodes
2490
    if self.op.vg_name:
2491
      vglist = self.rpc.call_vg_list(node_list)
2492
      for node in node_list:
2493
        msg = vglist[node].fail_msg
2494
        if msg:
2495
          # ignoring down node
2496
          self.LogWarning("Error while gathering data on node %s"
2497
                          " (ignoring node): %s", node, msg)
2498
          continue
2499
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2500
                                              self.op.vg_name,
2501
                                              constants.MIN_VG_SIZE)
2502
        if vgstatus:
2503
          raise errors.OpPrereqError("Error on node '%s': %s" %
2504
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2505

    
2506
    self.cluster = cluster = self.cfg.GetClusterInfo()
2507
    # validate params changes
2508
    if self.op.beparams:
2509
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2510
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2511

    
2512
    if self.op.nicparams:
2513
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2514
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2515
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2516
      nic_errors = []
2517

    
2518
      # check all instances for consistency
2519
      for instance in self.cfg.GetAllInstancesInfo().values():
2520
        for nic_idx, nic in enumerate(instance.nics):
2521
          params_copy = copy.deepcopy(nic.nicparams)
2522
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2523

    
2524
          # check parameter syntax
2525
          try:
2526
            objects.NIC.CheckParameterSyntax(params_filled)
2527
          except errors.ConfigurationError, err:
2528
            nic_errors.append("Instance %s, nic/%d: %s" %
2529
                              (instance.name, nic_idx, err))
2530

    
2531
          # if we're moving instances to routed, check that they have an ip
2532
          target_mode = params_filled[constants.NIC_MODE]
2533
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2534
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2535
                              (instance.name, nic_idx))
2536
      if nic_errors:
2537
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2538
                                   "\n".join(nic_errors))
2539

    
2540
    # hypervisor list/parameters
2541
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2542
    if self.op.hvparams:
2543
      if not isinstance(self.op.hvparams, dict):
2544
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2545
                                   errors.ECODE_INVAL)
2546
      for hv_name, hv_dict in self.op.hvparams.items():
2547
        if hv_name not in self.new_hvparams:
2548
          self.new_hvparams[hv_name] = hv_dict
2549
        else:
2550
          self.new_hvparams[hv_name].update(hv_dict)
2551

    
2552
    # os hypervisor parameters
2553
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2554
    if self.op.os_hvp:
2555
      if not isinstance(self.op.os_hvp, dict):
2556
        raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2557
                                   errors.ECODE_INVAL)
2558
      for os_name, hvs in self.op.os_hvp.items():
2559
        if not isinstance(hvs, dict):
2560
          raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2561
                                      " input"), errors.ECODE_INVAL)
2562
        if os_name not in self.new_os_hvp:
2563
          self.new_os_hvp[os_name] = hvs
2564
        else:
2565
          for hv_name, hv_dict in hvs.items():
2566
            if hv_name not in self.new_os_hvp[os_name]:
2567
              self.new_os_hvp[os_name][hv_name] = hv_dict
2568
            else:
2569
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2570

    
2571
    # os parameters
2572
    self.new_osp = objects.FillDict(cluster.osparams, {})
2573
    if self.op.osparams:
2574
      if not isinstance(self.op.osparams, dict):
2575
        raise errors.OpPrereqError("Invalid 'osparams' parameter on input",
2576
                                   errors.ECODE_INVAL)
2577
      for os_name, osp in self.op.osparams.items():
2578
        if not isinstance(osp, dict):
2579
          raise errors.OpPrereqError(("Invalid 'osparams' parameter on"
2580
                                      " input"), errors.ECODE_INVAL)
2581
        if os_name not in self.new_osp:
2582
          self.new_osp[os_name] = {}
2583

    
2584
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2585
                                                  use_none=True)
2586

    
2587
        if not self.new_osp[os_name]:
2588
          # we removed all parameters
2589
          del self.new_osp[os_name]
2590
        else:
2591
          # check the parameter validity (remote check)
2592
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2593
                         os_name, self.new_osp[os_name])
2594

    
2595
    # changes to the hypervisor list
2596
    if self.op.enabled_hypervisors is not None:
2597
      self.hv_list = self.op.enabled_hypervisors
2598
      if not self.hv_list:
2599
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2600
                                   " least one member",
2601
                                   errors.ECODE_INVAL)
2602
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2603
      if invalid_hvs:
2604
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2605
                                   " entries: %s" %
2606
                                   utils.CommaJoin(invalid_hvs),
2607
                                   errors.ECODE_INVAL)
2608
      for hv in self.hv_list:
2609
        # if the hypervisor doesn't already exist in the cluster
2610
        # hvparams, we initialize it to empty, and then (in both
2611
        # cases) we make sure to fill the defaults, as we might not
2612
        # have a complete defaults list if the hypervisor wasn't
2613
        # enabled before
2614
        if hv not in new_hvp:
2615
          new_hvp[hv] = {}
2616
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2617
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2618
    else:
2619
      self.hv_list = cluster.enabled_hypervisors
2620

    
2621
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2622
      # either the enabled list has changed, or the parameters have, validate
2623
      for hv_name, hv_params in self.new_hvparams.items():
2624
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2625
            (self.op.enabled_hypervisors and
2626
             hv_name in self.op.enabled_hypervisors)):
2627
          # either this is a new hypervisor, or its parameters have changed
2628
          hv_class = hypervisor.GetHypervisor(hv_name)
2629
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2630
          hv_class.CheckParameterSyntax(hv_params)
2631
          _CheckHVParams(self, node_list, hv_name, hv_params)
2632

    
2633
    if self.op.os_hvp:
2634
      # no need to check any newly-enabled hypervisors, since the
2635
      # defaults have already been checked in the above code-block
2636
      for os_name, os_hvp in self.new_os_hvp.items():
2637
        for hv_name, hv_params in os_hvp.items():
2638
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2639
          # we need to fill in the new os_hvp on top of the actual hv_p
2640
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2641
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2642
          hv_class = hypervisor.GetHypervisor(hv_name)
2643
          hv_class.CheckParameterSyntax(new_osp)
2644
          _CheckHVParams(self, node_list, hv_name, new_osp)
2645

    
2646

    
2647
  def Exec(self, feedback_fn):
2648
    """Change the parameters of the cluster.
2649

2650
    """
2651
    if self.op.vg_name is not None:
2652
      new_volume = self.op.vg_name
2653
      if not new_volume:
2654
        new_volume = None
2655
      if new_volume != self.cfg.GetVGName():
2656
        self.cfg.SetVGName(new_volume)
2657
      else:
2658
        feedback_fn("Cluster LVM configuration already in desired"
2659
                    " state, not changing")
2660
    if self.op.hvparams:
2661
      self.cluster.hvparams = self.new_hvparams
2662
    if self.op.os_hvp:
2663
      self.cluster.os_hvp = self.new_os_hvp
2664
    if self.op.enabled_hypervisors is not None:
2665
      self.cluster.hvparams = self.new_hvparams
2666
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2667
    if self.op.beparams:
2668
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2669
    if self.op.nicparams:
2670
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2671
    if self.op.osparams:
2672
      self.cluster.osparams = self.new_osp
2673

    
2674
    if self.op.candidate_pool_size is not None:
2675
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2676
      # we need to update the pool size here, otherwise the save will fail
2677
      _AdjustCandidatePool(self, [])
2678

    
2679
    if self.op.maintain_node_health is not None:
2680
      self.cluster.maintain_node_health = self.op.maintain_node_health
2681

    
2682
    if self.op.add_uids is not None:
2683
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2684

    
2685
    if self.op.remove_uids is not None:
2686
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2687

    
2688
    if self.op.uid_pool is not None:
2689
      self.cluster.uid_pool = self.op.uid_pool
2690

    
2691
    self.cfg.Update(self.cluster, feedback_fn)
2692

    
2693

    
2694
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2695
  """Distribute additional files which are part of the cluster configuration.
2696

2697
  ConfigWriter takes care of distributing the config and ssconf files, but
2698
  there are more files which should be distributed to all nodes. This function
2699
  makes sure those are copied.
2700

2701
  @param lu: calling logical unit
2702
  @param additional_nodes: list of nodes not in the config to distribute to
2703

2704
  """
2705
  # 1. Gather target nodes
2706
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2707
  dist_nodes = lu.cfg.GetOnlineNodeList()
2708
  if additional_nodes is not None:
2709
    dist_nodes.extend(additional_nodes)
2710
  if myself.name in dist_nodes:
2711
    dist_nodes.remove(myself.name)
2712

    
2713
  # 2. Gather files to distribute
2714
  dist_files = set([constants.ETC_HOSTS,
2715
                    constants.SSH_KNOWN_HOSTS_FILE,
2716
                    constants.RAPI_CERT_FILE,
2717
                    constants.RAPI_USERS_FILE,
2718
                    constants.CONFD_HMAC_KEY,
2719
                    constants.CLUSTER_DOMAIN_SECRET_FILE,
2720
                   ])
2721

    
2722
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2723
  for hv_name in enabled_hypervisors:
2724
    hv_class = hypervisor.GetHypervisor(hv_name)
2725
    dist_files.update(hv_class.GetAncillaryFiles())
2726

    
2727
  # 3. Perform the files upload
2728
  for fname in dist_files:
2729
    if os.path.exists(fname):
2730
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2731
      for to_node, to_result in result.items():
2732
        msg = to_result.fail_msg
2733
        if msg:
2734
          msg = ("Copy of file %s to node %s failed: %s" %
2735
                 (fname, to_node, msg))
2736
          lu.proc.LogWarning(msg)
2737

    
2738

    
2739
class LURedistributeConfig(NoHooksLU):
2740
  """Force the redistribution of cluster configuration.
2741

2742
  This is a very simple LU.
2743

2744
  """
2745
  _OP_REQP = []
2746
  REQ_BGL = False
2747

    
2748
  def ExpandNames(self):
2749
    self.needed_locks = {
2750
      locking.LEVEL_NODE: locking.ALL_SET,
2751
    }
2752
    self.share_locks[locking.LEVEL_NODE] = 1
2753

    
2754
  def CheckPrereq(self):
2755
    """Check prerequisites.
2756

2757
    """
2758

    
2759
  def Exec(self, feedback_fn):
2760
    """Redistribute the configuration.
2761

2762
    """
2763
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2764
    _RedistributeAncillaryFiles(self)
2765

    
2766

    
2767
def _WaitForSync(lu, instance, disks=None, oneshot=False):
2768
  """Sleep and poll for an instance's disk to sync.
2769

2770
  """
2771
  if not instance.disks or disks is not None and not disks:
2772
    return True
2773

    
2774
  disks = _ExpandCheckDisks(instance, disks)
2775

    
2776
  if not oneshot:
2777
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2778

    
2779
  node = instance.primary_node
2780

    
2781
  for dev in disks:
2782
    lu.cfg.SetDiskID(dev, node)
2783

    
2784
  # TODO: Convert to utils.Retry
2785

    
2786
  retries = 0
2787
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2788
  while True:
2789
    max_time = 0
2790
    done = True
2791
    cumul_degraded = False
2792
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2793
    msg = rstats.fail_msg
2794
    if msg:
2795
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2796
      retries += 1
2797
      if retries >= 10:
2798
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2799
                                 " aborting." % node)
2800
      time.sleep(6)
2801
      continue
2802
    rstats = rstats.payload
2803
    retries = 0
2804
    for i, mstat in enumerate(rstats):
2805
      if mstat is None:
2806
        lu.LogWarning("Can't compute data for node %s/%s",
2807
                           node, disks[i].iv_name)
2808
        continue
2809

    
2810
      cumul_degraded = (cumul_degraded or
2811
                        (mstat.is_degraded and mstat.sync_percent is None))
2812
      if mstat.sync_percent is not None:
2813
        done = False
2814
        if mstat.estimated_time is not None:
2815
          rem_time = ("%s remaining (estimated)" %
2816
                      utils.FormatSeconds(mstat.estimated_time))
2817
          max_time = mstat.estimated_time
2818
        else:
2819
          rem_time = "no time estimate"
2820
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2821
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
2822

    
2823
    # if we're done but degraded, let's do a few small retries, to
2824
    # make sure we see a stable and not transient situation; therefore
2825
    # we force restart of the loop
2826
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2827
      logging.info("Degraded disks found, %d retries left", degr_retries)
2828
      degr_retries -= 1
2829
      time.sleep(1)
2830
      continue
2831

    
2832
    if done or oneshot:
2833
      break
2834

    
2835
    time.sleep(min(60, max_time))
2836

    
2837
  if done:
2838
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2839
  return not cumul_degraded
2840

    
2841

    
2842
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2843
  """Check that mirrors are not degraded.
2844

2845
  The ldisk parameter, if True, will change the test from the
2846
  is_degraded attribute (which represents overall non-ok status for
2847
  the device(s)) to the ldisk (representing the local storage status).
2848

2849
  """
2850
  lu.cfg.SetDiskID(dev, node)
2851

    
2852
  result = True
2853

    
2854
  if on_primary or dev.AssembleOnSecondary():
2855
    rstats = lu.rpc.call_blockdev_find(node, dev)
2856
    msg = rstats.fail_msg
2857
    if msg:
2858
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2859
      result = False
2860
    elif not rstats.payload:
2861
      lu.LogWarning("Can't find disk on node %s", node)
2862
      result = False
2863
    else:
2864
      if ldisk:
2865
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2866
      else:
2867
        result = result and not rstats.payload.is_degraded
2868

    
2869
  if dev.children:
2870
    for child in dev.children:
2871
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2872

    
2873
  return result
2874

    
2875

    
2876
class LUDiagnoseOS(NoHooksLU):
2877
  """Logical unit for OS diagnose/query.
2878

2879
  """
2880
  _OP_REQP = ["output_fields", "names"]
2881
  REQ_BGL = False
2882
  _FIELDS_STATIC = utils.FieldSet()
2883
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants",
2884
                                   "parameters", "api_versions")
2885

    
2886
  def ExpandNames(self):
2887
    if self.op.names:
2888
      raise errors.OpPrereqError("Selective OS query not supported",
2889
                                 errors.ECODE_INVAL)
2890

    
2891
    _CheckOutputFields(static=self._FIELDS_STATIC,
2892
                       dynamic=self._FIELDS_DYNAMIC,
2893
                       selected=self.op.output_fields)
2894

    
2895
    # Lock all nodes, in shared mode
2896
    # Temporary removal of locks, should be reverted later
2897
    # TODO: reintroduce locks when they are lighter-weight
2898
    self.needed_locks = {}
2899
    #self.share_locks[locking.LEVEL_NODE] = 1
2900
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2901

    
2902
  def CheckPrereq(self):
2903
    """Check prerequisites.
2904

2905
    """
2906

    
2907
  @staticmethod
2908
  def _DiagnoseByOS(rlist):
2909
    """Remaps a per-node return list into an a per-os per-node dictionary
2910

2911
    @param rlist: a map with node names as keys and OS objects as values
2912

2913
    @rtype: dict
2914
    @return: a dictionary with osnames as keys and as value another
2915
        map, with nodes as keys and tuples of (path, status, diagnose,
2916
        variants, parameters, api_versions) as values, eg::
2917

2918
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
2919
                                     (/srv/..., False, "invalid api")],
2920
                           "node2": [(/srv/..., True, "", [], [])]}
2921
          }
2922

2923
    """
2924
    all_os = {}
2925
    # we build here the list of nodes that didn't fail the RPC (at RPC
2926
    # level), so that nodes with a non-responding node daemon don't
2927
    # make all OSes invalid
2928
    good_nodes = [node_name for node_name in rlist
2929
                  if not rlist[node_name].fail_msg]
2930
    for node_name, nr in rlist.items():
2931
      if nr.fail_msg or not nr.payload:
2932
        continue
2933
      for (name, path, status, diagnose, variants,
2934
           params, api_versions) in nr.payload:
2935
        if name not in all_os:
2936
          # build a list of nodes for this os containing empty lists
2937
          # for each node in node_list
2938
          all_os[name] = {}
2939
          for nname in good_nodes:
2940
            all_os[name][nname] = []
2941
        # convert params from [name, help] to (name, help)
2942
        params = [tuple(v) for v in params]
2943
        all_os[name][node_name].append((path, status, diagnose,
2944
                                        variants, params, api_versions))
2945
    return all_os
2946

    
2947
  def Exec(self, feedback_fn):
2948
    """Compute the list of OSes.
2949

2950
    """
2951
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2952
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2953
    pol = self._DiagnoseByOS(node_data)
2954
    output = []
2955

    
2956
    for os_name, os_data in pol.items():
2957
      row = []
2958
      valid = True
2959
      (variants, params, api_versions) = null_state = (set(), set(), set())
2960
      for idx, osl in enumerate(os_data.values()):
2961
        valid = bool(valid and osl and osl[0][1])
2962
        if not valid:
2963
          (variants, params, api_versions) = null_state
2964
          break
2965
        node_variants, node_params, node_api = osl[0][3:6]
2966
        if idx == 0: # first entry
2967
          variants = set(node_variants)
2968
          params = set(node_params)
2969
          api_versions = set(node_api)
2970
        else: # keep consistency
2971
          variants.intersection_update(node_variants)
2972
          params.intersection_update(node_params)
2973
          api_versions.intersection_update(node_api)
2974

    
2975
      for field in self.op.output_fields:
2976
        if field == "name":
2977
          val = os_name
2978
        elif field == "valid":
2979
          val = valid
2980
        elif field == "node_status":
2981
          # this is just a copy of the dict
2982
          val = {}
2983
          for node_name, nos_list in os_data.items():
2984
            val[node_name] = nos_list
2985
        elif field == "variants":
2986
          val = list(variants)
2987
        elif field == "parameters":
2988
          val = list(params)
2989
        elif field == "api_versions":
2990
          val = list(api_versions)
2991
        else:
2992
          raise errors.ParameterError(field)
2993
        row.append(val)
2994
      output.append(row)
2995

    
2996
    return output
2997

    
2998

    
2999
class LURemoveNode(LogicalUnit):
3000
  """Logical unit for removing a node.
3001

3002
  """
3003
  HPATH = "node-remove"
3004
  HTYPE = constants.HTYPE_NODE
3005
  _OP_REQP = ["node_name"]
3006

    
3007
  def BuildHooksEnv(self):
3008
    """Build hooks env.
3009

3010
    This doesn't run on the target node in the pre phase as a failed
3011
    node would then be impossible to remove.
3012

3013
    """
3014
    env = {
3015
      "OP_TARGET": self.op.node_name,
3016
      "NODE_NAME": self.op.node_name,
3017
      }
3018
    all_nodes = self.cfg.GetNodeList()
3019
    try:
3020
      all_nodes.remove(self.op.node_name)
3021
    except ValueError:
3022
      logging.warning("Node %s which is about to be removed not found"
3023
                      " in the all nodes list", self.op.node_name)
3024
    return env, all_nodes, all_nodes
3025

    
3026
  def CheckPrereq(self):
3027
    """Check prerequisites.
3028

3029
    This checks:
3030
     - the node exists in the configuration
3031
     - it does not have primary or secondary instances
3032
     - it's not the master
3033

3034
    Any errors are signaled by raising errors.OpPrereqError.
3035

3036
    """
3037
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3038
    node = self.cfg.GetNodeInfo(self.op.node_name)
3039
    assert node is not None
3040

    
3041
    instance_list = self.cfg.GetInstanceList()
3042

    
3043
    masternode = self.cfg.GetMasterNode()
3044
    if node.name == masternode:
3045
      raise errors.OpPrereqError("Node is the master node,"
3046
                                 " you need to failover first.",
3047
                                 errors.ECODE_INVAL)
3048

    
3049
    for instance_name in instance_list:
3050
      instance = self.cfg.GetInstanceInfo(instance_name)
3051
      if node.name in instance.all_nodes:
3052
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3053
                                   " please remove first." % instance_name,
3054
                                   errors.ECODE_INVAL)
3055
    self.op.node_name = node.name
3056
    self.node = node
3057

    
3058
  def Exec(self, feedback_fn):
3059
    """Removes the node from the cluster.
3060

3061
    """
3062
    node = self.node
3063
    logging.info("Stopping the node daemon and removing configs from node %s",
3064
                 node.name)
3065

    
3066
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3067

    
3068
    # Promote nodes to master candidate as needed
3069
    _AdjustCandidatePool(self, exceptions=[node.name])
3070
    self.context.RemoveNode(node.name)
3071

    
3072
    # Run post hooks on the node before it's removed
3073
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3074
    try:
3075
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3076
    except:
3077
      # pylint: disable-msg=W0702
3078
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
3079

    
3080
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3081
    msg = result.fail_msg
3082
    if msg:
3083
      self.LogWarning("Errors encountered on the remote node while leaving"
3084
                      " the cluster: %s", msg)
3085

    
3086
    # Remove node from our /etc/hosts
3087
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3088
      # FIXME: this should be done via an rpc call to node daemon
3089
      utils.RemoveHostFromEtcHosts(node.name)
3090
      _RedistributeAncillaryFiles(self)
3091

    
3092

    
3093
class LUQueryNodes(NoHooksLU):
3094
  """Logical unit for querying nodes.
3095

3096
  """
3097
  # pylint: disable-msg=W0142
3098
  _OP_REQP = ["output_fields", "names", "use_locking"]
3099
  REQ_BGL = False
3100

    
3101
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3102
                    "master_candidate", "offline", "drained"]
3103

    
3104
  _FIELDS_DYNAMIC = utils.FieldSet(
3105
    "dtotal", "dfree",
3106
    "mtotal", "mnode", "mfree",
3107
    "bootid",
3108
    "ctotal", "cnodes", "csockets",
3109
    )
3110

    
3111
  _FIELDS_STATIC = utils.FieldSet(*[
3112
    "pinst_cnt", "sinst_cnt",
3113
    "pinst_list", "sinst_list",
3114
    "pip", "sip", "tags",
3115
    "master",
3116
    "role"] + _SIMPLE_FIELDS
3117
    )
3118

    
3119
  def ExpandNames(self):
3120
    _CheckOutputFields(static=self._FIELDS_STATIC,
3121
                       dynamic=self._FIELDS_DYNAMIC,
3122
                       selected=self.op.output_fields)
3123

    
3124
    self.needed_locks = {}
3125
    self.share_locks[locking.LEVEL_NODE] = 1
3126

    
3127
    if self.op.names:
3128
      self.wanted = _GetWantedNodes(self, self.op.names)
3129
    else:
3130
      self.wanted = locking.ALL_SET
3131

    
3132
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3133
    self.do_locking = self.do_node_query and self.op.use_locking
3134
    if self.do_locking:
3135
      # if we don't request only static fields, we need to lock the nodes
3136
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
3137

    
3138
  def CheckPrereq(self):
3139
    """Check prerequisites.
3140

3141
    """
3142
    # The validation of the node list is done in the _GetWantedNodes,
3143
    # if non empty, and if empty, there's no validation to do
3144
    pass
3145

    
3146
  def Exec(self, feedback_fn):
3147
    """Computes the list of nodes and their attributes.
3148

3149
    """
3150
    all_info = self.cfg.GetAllNodesInfo()
3151
    if self.do_locking:
3152
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
3153
    elif self.wanted != locking.ALL_SET:
3154
      nodenames = self.wanted
3155
      missing = set(nodenames).difference(all_info.keys())
3156
      if missing:
3157
        raise errors.OpExecError(
3158
          "Some nodes were removed before retrieving their data: %s" % missing)
3159
    else:
3160
      nodenames = all_info.keys()
3161

    
3162
    nodenames = utils.NiceSort(nodenames)
3163
    nodelist = [all_info[name] for name in nodenames]
3164

    
3165
    # begin data gathering
3166

    
3167
    if self.do_node_query:
3168
      live_data = {}
3169
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3170
                                          self.cfg.GetHypervisorType())
3171
      for name in nodenames:
3172
        nodeinfo = node_data[name]
3173
        if not nodeinfo.fail_msg and nodeinfo.payload:
3174
          nodeinfo = nodeinfo.payload
3175
          fn = utils.TryConvert
3176
          live_data[name] = {
3177
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3178
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3179
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
3180
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3181
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
3182
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3183
            "bootid": nodeinfo.get('bootid', None),
3184
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3185
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3186
            }
3187
        else:
3188
          live_data[name] = {}
3189
    else:
3190
      live_data = dict.fromkeys(nodenames, {})
3191

    
3192
    node_to_primary = dict([(name, set()) for name in nodenames])
3193
    node_to_secondary = dict([(name, set()) for name in nodenames])
3194

    
3195
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
3196
                             "sinst_cnt", "sinst_list"))
3197
    if inst_fields & frozenset(self.op.output_fields):
3198
      inst_data = self.cfg.GetAllInstancesInfo()
3199

    
3200
      for inst in inst_data.values():
3201
        if inst.primary_node in node_to_primary:
3202
          node_to_primary[inst.primary_node].add(inst.name)
3203
        for secnode in inst.secondary_nodes:
3204
          if secnode in node_to_secondary:
3205
            node_to_secondary[secnode].add(inst.name)
3206

    
3207
    master_node = self.cfg.GetMasterNode()
3208

    
3209
    # end data gathering
3210

    
3211
    output = []
3212
    for node in nodelist:
3213
      node_output = []
3214
      for field in self.op.output_fields:
3215
        if field in self._SIMPLE_FIELDS:
3216
          val = getattr(node, field)
3217
        elif field == "pinst_list":
3218
          val = list(node_to_primary[node.name])
3219
        elif field == "sinst_list":
3220
          val = list(node_to_secondary[node.name])
3221
        elif field == "pinst_cnt":
3222
          val = len(node_to_primary[node.name])
3223
        elif field == "sinst_cnt":
3224
          val = len(node_to_secondary[node.name])
3225
        elif field == "pip":
3226
          val = node.primary_ip
3227
        elif field == "sip":
3228
          val = node.secondary_ip
3229
        elif field == "tags":
3230
          val = list(node.GetTags())
3231
        elif field == "master":
3232
          val = node.name == master_node
3233
        elif self._FIELDS_DYNAMIC.Matches(field):
3234
          val = live_data[node.name].get(field, None)
3235
        elif field == "role":
3236
          if node.name == master_node:
3237
            val = "M"
3238
          elif node.master_candidate:
3239
            val = "C"
3240
          elif node.drained:
3241
            val = "D"
3242
          elif node.offline:
3243
            val = "O"
3244
          else:
3245
            val = "R"
3246
        else:
3247
          raise errors.ParameterError(field)
3248
        node_output.append(val)
3249
      output.append(node_output)
3250

    
3251
    return output
3252

    
3253

    
3254
class LUQueryNodeVolumes(NoHooksLU):
3255
  """Logical unit for getting volumes on node(s).
3256

3257
  """
3258
  _OP_REQP = ["nodes", "output_fields"]
3259
  REQ_BGL = False
3260
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3261
  _FIELDS_STATIC = utils.FieldSet("node")
3262

    
3263
  def ExpandNames(self):
3264
    _CheckOutputFields(static=self._FIELDS_STATIC,
3265
                       dynamic=self._FIELDS_DYNAMIC,
3266
                       selected=self.op.output_fields)
3267

    
3268
    self.needed_locks = {}
3269
    self.share_locks[locking.LEVEL_NODE] = 1
3270
    if not self.op.nodes:
3271
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3272
    else:
3273
      self.needed_locks[locking.LEVEL_NODE] = \
3274
        _GetWantedNodes(self, self.op.nodes)
3275

    
3276
  def CheckPrereq(self):
3277
    """Check prerequisites.
3278

3279
    This checks that the fields required are valid output fields.
3280

3281
    """
3282
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3283

    
3284
  def Exec(self, feedback_fn):
3285
    """Computes the list of nodes and their attributes.
3286

3287
    """
3288
    nodenames = self.nodes
3289
    volumes = self.rpc.call_node_volumes(nodenames)
3290

    
3291
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3292
             in self.cfg.GetInstanceList()]
3293

    
3294
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3295

    
3296
    output = []
3297
    for node in nodenames:
3298
      nresult = volumes[node]
3299
      if nresult.offline:
3300
        continue
3301
      msg = nresult.fail_msg
3302
      if msg:
3303
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3304
        continue
3305

    
3306
      node_vols = nresult.payload[:]
3307
      node_vols.sort(key=lambda vol: vol['dev'])
3308

    
3309
      for vol in node_vols:
3310
        node_output = []
3311
        for field in self.op.output_fields:
3312
          if field == "node":
3313
            val = node
3314
          elif field == "phys":
3315
            val = vol['dev']
3316
          elif field == "vg":
3317
            val = vol['vg']
3318
          elif field == "name":
3319
            val = vol['name']
3320
          elif field == "size":
3321
            val = int(float(vol['size']))
3322
          elif field == "instance":
3323
            for inst in ilist:
3324
              if node not in lv_by_node[inst]:
3325
                continue
3326
              if vol['name'] in lv_by_node[inst][node]:
3327
                val = inst.name
3328
                break
3329
            else:
3330
              val = '-'
3331
          else:
3332
            raise errors.ParameterError(field)
3333
          node_output.append(str(val))
3334

    
3335
        output.append(node_output)
3336

    
3337
    return output
3338

    
3339

    
3340
class LUQueryNodeStorage(NoHooksLU):
3341
  """Logical unit for getting information on storage units on node(s).
3342

3343
  """
3344
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
3345
  _OP_DEFS = [("name", None)]
3346
  REQ_BGL = False
3347
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3348

    
3349
  def CheckArguments(self):
3350
    _CheckStorageType(self.op.storage_type)
3351

    
3352
    _CheckOutputFields(static=self._FIELDS_STATIC,
3353
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3354
                       selected=self.op.output_fields)
3355

    
3356
  def ExpandNames(self):
3357
    self.needed_locks = {}
3358
    self.share_locks[locking.LEVEL_NODE] = 1
3359

    
3360
    if self.op.nodes:
3361
      self.needed_locks[locking.LEVEL_NODE] = \
3362
        _GetWantedNodes(self, self.op.nodes)
3363
    else:
3364
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3365

    
3366
  def CheckPrereq(self):
3367
    """Check prerequisites.
3368

3369
    This checks that the fields required are valid output fields.
3370

3371
    """
3372
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3373

    
3374
  def Exec(self, feedback_fn):
3375
    """Computes the list of nodes and their attributes.
3376

3377
    """
3378
    # Always get name to sort by
3379
    if constants.SF_NAME in self.op.output_fields:
3380
      fields = self.op.output_fields[:]
3381
    else:
3382
      fields = [constants.SF_NAME] + self.op.output_fields
3383

    
3384
    # Never ask for node or type as it's only known to the LU
3385
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3386
      while extra in fields:
3387
        fields.remove(extra)
3388

    
3389
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3390
    name_idx = field_idx[constants.SF_NAME]
3391

    
3392
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3393
    data = self.rpc.call_storage_list(self.nodes,
3394
                                      self.op.storage_type, st_args,
3395
                                      self.op.name, fields)
3396

    
3397
    result = []
3398

    
3399
    for node in utils.NiceSort(self.nodes):
3400
      nresult = data[node]
3401
      if nresult.offline:
3402
        continue
3403

    
3404
      msg = nresult.fail_msg
3405
      if msg:
3406
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3407
        continue
3408

    
3409
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3410

    
3411
      for name in utils.NiceSort(rows.keys()):
3412
        row = rows[name]
3413

    
3414
        out = []
3415

    
3416
        for field in self.op.output_fields:
3417
          if field == constants.SF_NODE:
3418
            val = node
3419
          elif field == constants.SF_TYPE:
3420
            val = self.op.storage_type
3421
          elif field in field_idx:
3422
            val = row[field_idx[field]]
3423
          else:
3424
            raise errors.ParameterError(field)
3425

    
3426
          out.append(val)
3427

    
3428
        result.append(out)
3429

    
3430
    return result
3431

    
3432

    
3433
class LUModifyNodeStorage(NoHooksLU):
3434
  """Logical unit for modifying a storage volume on a node.
3435

3436
  """
3437
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3438
  REQ_BGL = False
3439

    
3440
  def CheckArguments(self):
3441
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3442

    
3443
    _CheckStorageType(self.op.storage_type)
3444

    
3445
  def ExpandNames(self):
3446
    self.needed_locks = {
3447
      locking.LEVEL_NODE: self.op.node_name,
3448
      }
3449

    
3450
  def CheckPrereq(self):
3451
    """Check prerequisites.
3452

3453
    """
3454
    storage_type = self.op.storage_type
3455

    
3456
    try:
3457
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3458
    except KeyError:
3459
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3460
                                 " modified" % storage_type,
3461
                                 errors.ECODE_INVAL)
3462

    
3463
    diff = set(self.op.changes.keys()) - modifiable
3464
    if diff:
3465
      raise errors.OpPrereqError("The following fields can not be modified for"
3466
                                 " storage units of type '%s': %r" %
3467
                                 (storage_type, list(diff)),
3468
                                 errors.ECODE_INVAL)
3469

    
3470
  def Exec(self, feedback_fn):
3471
    """Computes the list of nodes and their attributes.
3472

3473
    """
3474
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3475
    result = self.rpc.call_storage_modify(self.op.node_name,
3476
                                          self.op.storage_type, st_args,
3477
                                          self.op.name, self.op.changes)
3478
    result.Raise("Failed to modify storage unit '%s' on %s" %
3479
                 (self.op.name, self.op.node_name))
3480

    
3481

    
3482
class LUAddNode(LogicalUnit):
3483
  """Logical unit for adding node to the cluster.
3484

3485
  """
3486
  HPATH = "node-add"
3487
  HTYPE = constants.HTYPE_NODE
3488
  _OP_REQP = ["node_name"]
3489
  _OP_DEFS = [("secondary_ip", None)]
3490

    
3491
  def CheckArguments(self):
3492
    # validate/normalize the node name
3493
    self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3494

    
3495
  def BuildHooksEnv(self):
3496
    """Build hooks env.
3497

3498
    This will run on all nodes before, and on all nodes + the new node after.
3499

3500
    """
3501
    env = {
3502
      "OP_TARGET": self.op.node_name,
3503
      "NODE_NAME": self.op.node_name,
3504
      "NODE_PIP": self.op.primary_ip,
3505
      "NODE_SIP": self.op.secondary_ip,
3506
      }
3507
    nodes_0 = self.cfg.GetNodeList()
3508
    nodes_1 = nodes_0 + [self.op.node_name, ]
3509
    return env, nodes_0, nodes_1
3510

    
3511
  def CheckPrereq(self):
3512
    """Check prerequisites.
3513

3514
    This checks:
3515
     - the new node is not already in the config
3516
     - it is resolvable
3517
     - its parameters (single/dual homed) matches the cluster
3518

3519
    Any errors are signaled by raising errors.OpPrereqError.
3520

3521
    """
3522
    node_name = self.op.node_name
3523
    cfg = self.cfg
3524

    
3525
    dns_data = utils.GetHostInfo(node_name)
3526

    
3527
    node = dns_data.name
3528
    primary_ip = self.op.primary_ip = dns_data.ip
3529
    if self.op.secondary_ip is None:
3530
      self.op.secondary_ip = primary_ip
3531
    if not utils.IsValidIP(self.op.secondary_ip):
3532
      raise errors.OpPrereqError("Invalid secondary IP given",
3533
                                 errors.ECODE_INVAL)
3534
    secondary_ip = self.op.secondary_ip
3535

    
3536
    node_list = cfg.GetNodeList()
3537
    if not self.op.readd and node in node_list:
3538
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3539
                                 node, errors.ECODE_EXISTS)
3540
    elif self.op.readd and node not in node_list:
3541
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3542
                                 errors.ECODE_NOENT)
3543

    
3544
    self.changed_primary_ip = False
3545

    
3546
    for existing_node_name in node_list:
3547
      existing_node = cfg.GetNodeInfo(existing_node_name)
3548

    
3549
      if self.op.readd and node == existing_node_name:
3550
        if existing_node.secondary_ip != secondary_ip:
3551
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
3552
                                     " address configuration as before",
3553
                                     errors.ECODE_INVAL)
3554
        if existing_node.primary_ip != primary_ip:
3555
          self.changed_primary_ip = True
3556

    
3557
        continue
3558

    
3559
      if (existing_node.primary_ip == primary_ip or
3560
          existing_node.secondary_ip == primary_ip or
3561
          existing_node.primary_ip == secondary_ip or
3562
          existing_node.secondary_ip == secondary_ip):
3563
        raise errors.OpPrereqError("New node ip address(es) conflict with"
3564
                                   " existing node %s" % existing_node.name,
3565
                                   errors.ECODE_NOTUNIQUE)
3566

    
3567
    # check that the type of the node (single versus dual homed) is the
3568
    # same as for the master
3569
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3570
    master_singlehomed = myself.secondary_ip == myself.primary_ip
3571
    newbie_singlehomed = secondary_ip == primary_ip
3572
    if master_singlehomed != newbie_singlehomed:
3573
      if master_singlehomed:
3574
        raise errors.OpPrereqError("The master has no private ip but the"
3575
                                   " new node has one",
3576
                                   errors.ECODE_INVAL)
3577
      else:
3578
        raise errors.OpPrereqError("The master has a private ip but the"
3579
                                   " new node doesn't have one",
3580
                                   errors.ECODE_INVAL)
3581

    
3582
    # checks reachability
3583
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3584
      raise errors.OpPrereqError("Node not reachable by ping",
3585
                                 errors.ECODE_ENVIRON)
3586

    
3587
    if not newbie_singlehomed:
3588
      # check reachability from my secondary ip to newbie's secondary ip
3589
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3590
                           source=myself.secondary_ip):
3591
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3592
                                   " based ping to noded port",
3593
                                   errors.ECODE_ENVIRON)
3594

    
3595
    if self.op.readd:
3596
      exceptions = [node]
3597
    else:
3598
      exceptions = []
3599

    
3600
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3601

    
3602
    if self.op.readd:
3603
      self.new_node = self.cfg.GetNodeInfo(node)
3604
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
3605
    else:
3606
      self.new_node = objects.Node(name=node,
3607
                                   primary_ip=primary_ip,
3608
                                   secondary_ip=secondary_ip,
3609
                                   master_candidate=self.master_candidate,
3610
                                   offline=False, drained=False)
3611

    
3612
  def Exec(self, feedback_fn):
3613
    """Adds the new node to the cluster.
3614

3615
    """
3616
    new_node = self.new_node
3617
    node = new_node.name
3618

    
3619
    # for re-adds, reset the offline/drained/master-candidate flags;
3620
    # we need to reset here, otherwise offline would prevent RPC calls
3621
    # later in the procedure; this also means that if the re-add
3622
    # fails, we are left with a non-offlined, broken node
3623
    if self.op.readd:
3624
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3625
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3626
      # if we demote the node, we do cleanup later in the procedure
3627
      new_node.master_candidate = self.master_candidate
3628
      if self.changed_primary_ip:
3629
        new_node.primary_ip = self.op.primary_ip
3630

    
3631
    # notify the user about any possible mc promotion
3632
    if new_node.master_candidate:
3633
      self.LogInfo("Node will be a master candidate")
3634

    
3635
    # check connectivity
3636
    result = self.rpc.call_version([node])[node]
3637
    result.Raise("Can't get version information from node %s" % node)
3638
    if constants.PROTOCOL_VERSION == result.payload:
3639
      logging.info("Communication to node %s fine, sw version %s match",
3640
                   node, result.payload)
3641
    else:
3642
      raise errors.OpExecError("Version mismatch master version %s,"
3643
                               " node version %s" %
3644
                               (constants.PROTOCOL_VERSION, result.payload))
3645

    
3646
    # setup ssh on node
3647
    if self.cfg.GetClusterInfo().modify_ssh_setup:
3648
      logging.info("Copy ssh key to node %s", node)
3649
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3650
      keyarray = []
3651
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3652
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3653
                  priv_key, pub_key]
3654

    
3655
      for i in keyfiles:
3656
        keyarray.append(utils.ReadFile(i))
3657

    
3658
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3659
                                      keyarray[2], keyarray[3], keyarray[4],
3660
                                      keyarray[5])
3661
      result.Raise("Cannot transfer ssh keys to the new node")
3662

    
3663
    # Add node to our /etc/hosts, and add key to known_hosts
3664
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3665
      # FIXME: this should be done via an rpc call to node daemon
3666
      utils.AddHostToEtcHosts(new_node.name)
3667

    
3668
    if new_node.secondary_ip != new_node.primary_ip:
3669
      result = self.rpc.call_node_has_ip_address(new_node.name,
3670
                                                 new_node.secondary_ip)
3671
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3672
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3673
      if not result.payload:
3674
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3675
                                 " you gave (%s). Please fix and re-run this"
3676
                                 " command." % new_node.secondary_ip)
3677

    
3678
    node_verify_list = [self.cfg.GetMasterNode()]
3679
    node_verify_param = {
3680
      constants.NV_NODELIST: [node],
3681
      # TODO: do a node-net-test as well?
3682
    }
3683

    
3684
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3685
                                       self.cfg.GetClusterName())
3686
    for verifier in node_verify_list:
3687
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3688
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3689
      if nl_payload:
3690
        for failed in nl_payload:
3691
          feedback_fn("ssh/hostname verification failed"
3692
                      " (checking from %s): %s" %
3693
                      (verifier, nl_payload[failed]))
3694
        raise errors.OpExecError("ssh/hostname verification failed.")
3695

    
3696
    if self.op.readd:
3697
      _RedistributeAncillaryFiles(self)
3698
      self.context.ReaddNode(new_node)
3699
      # make sure we redistribute the config
3700
      self.cfg.Update(new_node, feedback_fn)
3701
      # and make sure the new node will not have old files around
3702
      if not new_node.master_candidate:
3703
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3704
        msg = result.fail_msg
3705
        if msg:
3706
          self.LogWarning("Node failed to demote itself from master"
3707
                          " candidate status: %s" % msg)
3708
    else:
3709
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3710
      self.context.AddNode(new_node, self.proc.GetECId())
3711

    
3712

    
3713
class LUSetNodeParams(LogicalUnit):
3714
  """Modifies the parameters of a node.
3715

3716
  """
3717
  HPATH = "node-modify"
3718
  HTYPE = constants.HTYPE_NODE
3719
  _OP_REQP = ["node_name"]
3720
  REQ_BGL = False
3721

    
3722
  def CheckArguments(self):
3723
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3724
    _CheckBooleanOpField(self.op, 'master_candidate')
3725
    _CheckBooleanOpField(self.op, 'offline')
3726
    _CheckBooleanOpField(self.op, 'drained')
3727
    _CheckBooleanOpField(self.op, 'auto_promote')
3728
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3729
    if all_mods.count(None) == 3:
3730
      raise errors.OpPrereqError("Please pass at least one modification",
3731
                                 errors.ECODE_INVAL)
3732
    if all_mods.count(True) > 1:
3733
      raise errors.OpPrereqError("Can't set the node into more than one"
3734
                                 " state at the same time",
3735
                                 errors.ECODE_INVAL)
3736

    
3737
    # Boolean value that tells us whether we're offlining or draining the node
3738
    self.offline_or_drain = (self.op.offline == True or
3739
                             self.op.drained == True)
3740
    self.deoffline_or_drain = (self.op.offline == False or
3741
                               self.op.drained == False)
3742
    self.might_demote = (self.op.master_candidate == False or
3743
                         self.offline_or_drain)
3744

    
3745
    self.lock_all = self.op.auto_promote and self.might_demote
3746

    
3747

    
3748
  def ExpandNames(self):
3749
    if self.lock_all:
3750
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3751
    else:
3752
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3753

    
3754
  def BuildHooksEnv(self):
3755
    """Build hooks env.
3756

3757
    This runs on the master node.
3758

3759
    """
3760
    env = {
3761
      "OP_TARGET": self.op.node_name,
3762
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3763
      "OFFLINE": str(self.op.offline),
3764
      "DRAINED": str(self.op.drained),
3765
      }
3766
    nl = [self.cfg.GetMasterNode(),
3767
          self.op.node_name]
3768
    return env, nl, nl
3769

    
3770
  def CheckPrereq(self):
3771
    """Check prerequisites.
3772

3773
    This only checks the instance list against the existing names.
3774

3775
    """
3776
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3777

    
3778
    if (self.op.master_candidate is not None or
3779
        self.op.drained is not None or
3780
        self.op.offline is not None):
3781
      # we can't change the master's node flags
3782
      if self.op.node_name == self.cfg.GetMasterNode():
3783
        raise errors.OpPrereqError("The master role can be changed"
3784
                                   " only via masterfailover",
3785
                                   errors.ECODE_INVAL)
3786

    
3787

    
3788
    if node.master_candidate and self.might_demote and not self.lock_all:
3789
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
3790
      # check if after removing the current node, we're missing master
3791
      # candidates
3792
      (mc_remaining, mc_should, _) = \
3793
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3794
      if mc_remaining < mc_should:
3795
        raise errors.OpPrereqError("Not enough master candidates, please"
3796
                                   " pass auto_promote to allow promotion",
3797
                                   errors.ECODE_INVAL)
3798

    
3799
    if (self.op.master_candidate == True and
3800
        ((node.offline and not self.op.offline == False) or
3801
         (node.drained and not self.op.drained == False))):
3802
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3803
                                 " to master_candidate" % node.name,
3804
                                 errors.ECODE_INVAL)
3805

    
3806
    # If we're being deofflined/drained, we'll MC ourself if needed
3807
    if (self.deoffline_or_drain and not self.offline_or_drain and not
3808
        self.op.master_candidate == True and not node.master_candidate):
3809
      self.op.master_candidate = _DecideSelfPromotion(self)
3810
      if self.op.master_candidate:
3811
        self.LogInfo("Autopromoting node to master candidate")
3812

    
3813
    return
3814

    
3815
  def Exec(self, feedback_fn):
3816
    """Modifies a node.
3817

3818
    """
3819
    node = self.node
3820

    
3821
    result = []
3822
    changed_mc = False
3823

    
3824
    if self.op.offline is not None:
3825
      node.offline = self.op.offline
3826
      result.append(("offline", str(self.op.offline)))
3827
      if self.op.offline == True:
3828
        if node.master_candidate:
3829
          node.master_candidate = False
3830
          changed_mc = True
3831
          result.append(("master_candidate", "auto-demotion due to offline"))
3832
        if node.drained:
3833
          node.drained = False
3834
          result.append(("drained", "clear drained status due to offline"))
3835

    
3836
    if self.op.master_candidate is not None:
3837
      node.master_candidate = self.op.master_candidate
3838
      changed_mc = True
3839
      result.append(("master_candidate", str(self.op.master_candidate)))
3840
      if self.op.master_candidate == False:
3841
        rrc = self.rpc.call_node_demote_from_mc(node.name)
3842
        msg = rrc.fail_msg
3843
        if msg:
3844
          self.LogWarning("Node failed to demote itself: %s" % msg)
3845

    
3846
    if self.op.drained is not None:
3847
      node.drained = self.op.drained
3848
      result.append(("drained", str(self.op.drained)))
3849
      if self.op.drained == True:
3850
        if node.master_candidate:
3851
          node.master_candidate = False
3852
          changed_mc = True
3853
          result.append(("master_candidate", "auto-demotion due to drain"))
3854
          rrc = self.rpc.call_node_demote_from_mc(node.name)
3855
          msg = rrc.fail_msg
3856
          if msg:
3857
            self.LogWarning("Node failed to demote itself: %s" % msg)
3858
        if node.offline:
3859
          node.offline = False
3860
          result.append(("offline", "clear offline status due to drain"))
3861

    
3862
    # we locked all nodes, we adjust the CP before updating this node
3863
    if self.lock_all:
3864
      _AdjustCandidatePool(self, [node.name])
3865

    
3866
    # this will trigger configuration file update, if needed
3867
    self.cfg.Update(node, feedback_fn)
3868

    
3869
    # this will trigger job queue propagation or cleanup
3870
    if changed_mc:
3871
      self.context.ReaddNode(node)
3872

    
3873
    return result
3874

    
3875

    
3876
class LUPowercycleNode(NoHooksLU):
3877
  """Powercycles a node.
3878

3879
  """
3880
  _OP_REQP = ["node_name", "force"]
3881
  REQ_BGL = False
3882

    
3883
  def CheckArguments(self):
3884
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3885
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3886
      raise errors.OpPrereqError("The node is the master and the force"
3887
                                 " parameter was not set",
3888
                                 errors.ECODE_INVAL)
3889

    
3890
  def ExpandNames(self):
3891
    """Locking for PowercycleNode.
3892

3893
    This is a last-resort option and shouldn't block on other
3894
    jobs. Therefore, we grab no locks.
3895

3896
    """
3897
    self.needed_locks = {}
3898

    
3899
  def CheckPrereq(self):
3900
    """Check prerequisites.
3901

3902
    This LU has no prereqs.
3903

3904
    """
3905
    pass
3906

    
3907
  def Exec(self, feedback_fn):
3908
    """Reboots a node.
3909

3910
    """
3911
    result = self.rpc.call_node_powercycle(self.op.node_name,
3912
                                           self.cfg.GetHypervisorType())
3913
    result.Raise("Failed to schedule the reboot")
3914
    return result.payload
3915

    
3916

    
3917
class LUQueryClusterInfo(NoHooksLU):
3918
  """Query cluster configuration.
3919

3920
  """
3921
  _OP_REQP = []
3922
  REQ_BGL = False
3923

    
3924
  def ExpandNames(self):
3925
    self.needed_locks = {}
3926

    
3927
  def CheckPrereq(self):
3928
    """No prerequsites needed for this LU.
3929

3930
    """
3931
    pass
3932

    
3933
  def Exec(self, feedback_fn):
3934
    """Return cluster config.
3935

3936
    """
3937
    cluster = self.cfg.GetClusterInfo()
3938
    os_hvp = {}
3939

    
3940
    # Filter just for enabled hypervisors
3941
    for os_name, hv_dict in cluster.os_hvp.items():
3942
      os_hvp[os_name] = {}
3943
      for hv_name, hv_params in hv_dict.items():
3944
        if hv_name in cluster.enabled_hypervisors:
3945
          os_hvp[os_name][hv_name] = hv_params
3946

    
3947
    result = {
3948
      "software_version": constants.RELEASE_VERSION,
3949
      "protocol_version": constants.PROTOCOL_VERSION,
3950
      "config_version": constants.CONFIG_VERSION,
3951
      "os_api_version": max(constants.OS_API_VERSIONS),
3952
      "export_version": constants.EXPORT_VERSION,
3953
      "architecture": (platform.architecture()[0], platform.machine()),
3954
      "name": cluster.cluster_name,
3955
      "master": cluster.master_node,
3956
      "default_hypervisor": cluster.enabled_hypervisors[0],
3957
      "enabled_hypervisors": cluster.enabled_hypervisors,
3958
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3959
                        for hypervisor_name in cluster.enabled_hypervisors]),
3960
      "os_hvp": os_hvp,
3961
      "beparams": cluster.beparams,
3962
      "osparams": cluster.osparams,
3963
      "nicparams": cluster.nicparams,
3964
      "candidate_pool_size": cluster.candidate_pool_size,
3965
      "master_netdev": cluster.master_netdev,
3966
      "volume_group_name": cluster.volume_group_name,
3967
      "file_storage_dir": cluster.file_storage_dir,
3968
      "maintain_node_health": cluster.maintain_node_health,
3969
      "ctime": cluster.ctime,
3970
      "mtime": cluster.mtime,
3971
      "uuid": cluster.uuid,
3972
      "tags": list(cluster.GetTags()),
3973
      "uid_pool": cluster.uid_pool,
3974
      }
3975

    
3976
    return result
3977

    
3978

    
3979
class LUQueryConfigValues(NoHooksLU):
3980
  """Return configuration values.
3981

3982
  """
3983
  _OP_REQP = []
3984
  REQ_BGL = False
3985
  _FIELDS_DYNAMIC = utils.FieldSet()
3986
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3987
                                  "watcher_pause")
3988

    
3989
  def ExpandNames(self):
3990
    self.needed_locks = {}
3991

    
3992
    _CheckOutputFields(static=self._FIELDS_STATIC,
3993
                       dynamic=self._FIELDS_DYNAMIC,
3994
                       selected=self.op.output_fields)
3995

    
3996
  def CheckPrereq(self):
3997
    """No prerequisites.
3998

3999
    """
4000
    pass
4001

    
4002
  def Exec(self, feedback_fn):
4003
    """Dump a representation of the cluster config to the standard output.
4004

4005
    """
4006
    values = []
4007
    for field in self.op.output_fields:
4008
      if field == "cluster_name":
4009
        entry = self.cfg.GetClusterName()
4010
      elif field == "master_node":
4011
        entry = self.cfg.GetMasterNode()
4012
      elif field == "drain_flag":
4013
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4014
      elif field == "watcher_pause":
4015
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4016
      else:
4017
        raise errors.ParameterError(field)
4018
      values.append(entry)
4019
    return values
4020

    
4021

    
4022
class LUActivateInstanceDisks(NoHooksLU):
4023
  """Bring up an instance's disks.
4024

4025
  """
4026
  _OP_REQP = ["instance_name"]
4027
  _OP_DEFS = [("ignore_size", False)]
4028
  REQ_BGL = False
4029

    
4030
  def ExpandNames(self):
4031
    self._ExpandAndLockInstance()
4032
    self.needed_locks[locking.LEVEL_NODE] = []
4033
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4034

    
4035
  def DeclareLocks(self, level):
4036
    if level == locking.LEVEL_NODE:
4037
      self._LockInstancesNodes()
4038

    
4039
  def CheckPrereq(self):
4040
    """Check prerequisites.
4041

4042
    This checks that the instance is in the cluster.
4043

4044
    """
4045
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4046
    assert self.instance is not None, \
4047
      "Cannot retrieve locked instance %s" % self.op.instance_name
4048
    _CheckNodeOnline(self, self.instance.primary_node)
4049

    
4050
  def Exec(self, feedback_fn):
4051
    """Activate the disks.
4052

4053
    """
4054
    disks_ok, disks_info = \
4055
              _AssembleInstanceDisks(self, self.instance,
4056
                                     ignore_size=self.op.ignore_size)
4057
    if not disks_ok:
4058
      raise errors.OpExecError("Cannot activate block devices")
4059

    
4060
    return disks_info
4061

    
4062

    
4063
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4064
                           ignore_size=False):
4065
  """Prepare the block devices for an instance.
4066

4067
  This sets up the block devices on all nodes.
4068

4069
  @type lu: L{LogicalUnit}
4070
  @param lu: the logical unit on whose behalf we execute
4071
  @type instance: L{objects.Instance}
4072
  @param instance: the instance for whose disks we assemble
4073
  @type disks: list of L{objects.Disk} or None
4074
  @param disks: which disks to assemble (or all, if None)
4075
  @type ignore_secondaries: boolean
4076
  @param ignore_secondaries: if true, errors on secondary nodes
4077
      won't result in an error return from the function
4078
  @type ignore_size: boolean
4079
  @param ignore_size: if true, the current known size of the disk
4080
      will not be used during the disk activation, useful for cases
4081
      when the size is wrong
4082
  @return: False if the operation failed, otherwise a list of
4083
      (host, instance_visible_name, node_visible_name)
4084
      with the mapping from node devices to instance devices
4085

4086
  """
4087
  device_info = []
4088
  disks_ok = True
4089
  iname = instance.name
4090
  disks = _ExpandCheckDisks(instance, disks)
4091

    
4092
  # With the two passes mechanism we try to reduce the window of
4093
  # opportunity for the race condition of switching DRBD to primary
4094
  # before handshaking occured, but we do not eliminate it
4095

    
4096
  # The proper fix would be to wait (with some limits) until the
4097
  # connection has been made and drbd transitions from WFConnection
4098
  # into any other network-connected state (Connected, SyncTarget,
4099
  # SyncSource, etc.)
4100

    
4101
  # 1st pass, assemble on all nodes in secondary mode
4102
  for inst_disk in disks:
4103
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4104
      if ignore_size:
4105
        node_disk = node_disk.Copy()
4106
        node_disk.UnsetSize()
4107
      lu.cfg.SetDiskID(node_disk, node)
4108
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4109
      msg = result.fail_msg
4110
      if msg:
4111
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4112
                           " (is_primary=False, pass=1): %s",
4113
                           inst_disk.iv_name, node, msg)
4114
        if not ignore_secondaries:
4115
          disks_ok = False
4116

    
4117
  # FIXME: race condition on drbd migration to primary
4118

    
4119
  # 2nd pass, do only the primary node
4120
  for inst_disk in disks:
4121
    dev_path = None
4122

    
4123
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4124
      if node != instance.primary_node:
4125
        continue
4126
      if ignore_size:
4127
        node_disk = node_disk.Copy()
4128
        node_disk.UnsetSize()
4129
      lu.cfg.SetDiskID(node_disk, node)
4130
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4131
      msg = result.fail_msg
4132
      if msg:
4133
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4134
                           " (is_primary=True, pass=2): %s",
4135
                           inst_disk.iv_name, node, msg)
4136
        disks_ok = False
4137
      else:
4138
        dev_path = result.payload
4139

    
4140
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4141

    
4142
  # leave the disks configured for the primary node
4143
  # this is a workaround that would be fixed better by
4144
  # improving the logical/physical id handling
4145
  for disk in disks:
4146
    lu.cfg.SetDiskID(disk, instance.primary_node)
4147

    
4148
  return disks_ok, device_info
4149

    
4150

    
4151
def _StartInstanceDisks(lu, instance, force):
4152
  """Start the disks of an instance.
4153

4154
  """
4155
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4156
                                           ignore_secondaries=force)
4157
  if not disks_ok:
4158
    _ShutdownInstanceDisks(lu, instance)
4159
    if force is not None and not force:
4160
      lu.proc.LogWarning("", hint="If the message above refers to a"
4161
                         " secondary node,"
4162
                         " you can retry the operation using '--force'.")
4163
    raise errors.OpExecError("Disk consistency error")
4164

    
4165

    
4166
class LUDeactivateInstanceDisks(NoHooksLU):
4167
  """Shutdown an instance's disks.
4168

4169
  """
4170
  _OP_REQP = ["instance_name"]
4171
  REQ_BGL = False
4172

    
4173
  def ExpandNames(self):
4174
    self._ExpandAndLockInstance()
4175
    self.needed_locks[locking.LEVEL_NODE] = []
4176
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4177

    
4178
  def DeclareLocks(self, level):
4179
    if level == locking.LEVEL_NODE:
4180
      self._LockInstancesNodes()
4181

    
4182
  def CheckPrereq(self):
4183
    """Check prerequisites.
4184

4185
    This checks that the instance is in the cluster.
4186

4187
    """
4188
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4189
    assert self.instance is not None, \
4190
      "Cannot retrieve locked instance %s" % self.op.instance_name
4191

    
4192
  def Exec(self, feedback_fn):
4193
    """Deactivate the disks
4194

4195
    """
4196
    instance = self.instance
4197
    _SafeShutdownInstanceDisks(self, instance)
4198

    
4199

    
4200
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4201
  """Shutdown block devices of an instance.
4202

4203
  This function checks if an instance is running, before calling
4204
  _ShutdownInstanceDisks.
4205

4206
  """
4207
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4208
  _ShutdownInstanceDisks(lu, instance, disks=disks)
4209

    
4210

    
4211
def _ExpandCheckDisks(instance, disks):
4212
  """Return the instance disks selected by the disks list
4213

4214
  @type disks: list of L{objects.Disk} or None
4215
  @param disks: selected disks
4216
  @rtype: list of L{objects.Disk}
4217
  @return: selected instance disks to act on
4218

4219
  """
4220
  if disks is None:
4221
    return instance.disks
4222
  else:
4223
    if not set(disks).issubset(instance.disks):
4224
      raise errors.ProgrammerError("Can only act on disks belonging to the"
4225
                                   " target instance")
4226
    return disks
4227

    
4228

    
4229
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4230
  """Shutdown block devices of an instance.
4231

4232
  This does the shutdown on all nodes of the instance.
4233

4234
  If the ignore_primary is false, errors on the primary node are
4235
  ignored.
4236

4237
  """
4238
  all_result = True
4239
  disks = _ExpandCheckDisks(instance, disks)
4240

    
4241
  for disk in disks:
4242
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4243
      lu.cfg.SetDiskID(top_disk, node)
4244
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4245
      msg = result.fail_msg
4246
      if msg:
4247
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4248
                      disk.iv_name, node, msg)
4249
        if not ignore_primary or node != instance.primary_node:
4250
          all_result = False
4251
  return all_result
4252

    
4253

    
4254
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4255
  """Checks if a node has enough free memory.
4256

4257
  This function check if a given node has the needed amount of free
4258
  memory. In case the node has less memory or we cannot get the
4259
  information from the node, this function raise an OpPrereqError
4260
  exception.
4261

4262
  @type lu: C{LogicalUnit}
4263
  @param lu: a logical unit from which we get configuration data
4264
  @type node: C{str}
4265
  @param node: the node to check
4266
  @type reason: C{str}
4267
  @param reason: string to use in the error message
4268
  @type requested: C{int}
4269
  @param requested: the amount of memory in MiB to check for
4270
  @type hypervisor_name: C{str}
4271
  @param hypervisor_name: the hypervisor to ask for memory stats
4272
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4273
      we cannot check the node
4274

4275
  """
4276
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4277
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4278
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4279
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4280
  if not isinstance(free_mem, int):
4281
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4282
                               " was '%s'" % (node, free_mem),
4283
                               errors.ECODE_ENVIRON)
4284
  if requested > free_mem:
4285
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4286
                               " needed %s MiB, available %s MiB" %
4287
                               (node, reason, requested, free_mem),
4288
                               errors.ECODE_NORES)
4289

    
4290

    
4291
def _CheckNodesFreeDisk(lu, nodenames, requested):
4292
  """Checks if nodes have enough free disk space in the default VG.
4293

4294
  This function check if all given nodes have the needed amount of
4295
  free disk. In case any node has less disk or we cannot get the
4296
  information from the node, this function raise an OpPrereqError
4297
  exception.
4298

4299
  @type lu: C{LogicalUnit}
4300
  @param lu: a logical unit from which we get configuration data
4301
  @type nodenames: C{list}
4302
  @param nodenames: the list of node names to check
4303
  @type requested: C{int}
4304
  @param requested: the amount of disk in MiB to check for
4305
  @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4306
      we cannot check the node
4307

4308
  """
4309
  nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4310
                                   lu.cfg.GetHypervisorType())
4311
  for node in nodenames:
4312
    info = nodeinfo[node]
4313
    info.Raise("Cannot get current information from node %s" % node,
4314
               prereq=True, ecode=errors.ECODE_ENVIRON)
4315
    vg_free = info.payload.get("vg_free", None)
4316
    if not isinstance(vg_free, int):
4317
      raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4318
                                 " result was '%s'" % (node, vg_free),
4319
                                 errors.ECODE_ENVIRON)
4320
    if requested > vg_free:
4321
      raise errors.OpPrereqError("Not enough disk space on target node %s:"
4322
                                 " required %d MiB, available %d MiB" %
4323
                                 (node, requested, vg_free),
4324
                                 errors.ECODE_NORES)
4325

    
4326

    
4327
class LUStartupInstance(LogicalUnit):
4328
  """Starts an instance.
4329

4330
  """
4331
  HPATH = "instance-start"
4332
  HTYPE = constants.HTYPE_INSTANCE
4333
  _OP_REQP = ["instance_name", "force"]
4334
  _OP_DEFS = [
4335
    ("beparams", _EmptyDict),
4336
    ("hvparams", _EmptyDict),
4337
    ]
4338
  REQ_BGL = False
4339

    
4340
  def ExpandNames(self):
4341
    self._ExpandAndLockInstance()
4342

    
4343
  def BuildHooksEnv(self):
4344
    """Build hooks env.
4345

4346
    This runs on master, primary and secondary nodes of the instance.
4347

4348
    """
4349
    env = {
4350
      "FORCE": self.op.force,
4351
      }
4352
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4353
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4354
    return env, nl, nl
4355

    
4356
  def CheckPrereq(self):
4357
    """Check prerequisites.
4358

4359
    This checks that the instance is in the cluster.
4360

4361
    """
4362
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4363
    assert self.instance is not None, \
4364
      "Cannot retrieve locked instance %s" % self.op.instance_name
4365

    
4366
    # extra beparams
4367
    if self.op.beparams:
4368
      if not isinstance(self.op.beparams, dict):
4369
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4370
                                   " dict" % (type(self.op.beparams), ),
4371
                                   errors.ECODE_INVAL)
4372
      # fill the beparams dict
4373
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4374

    
4375
    # extra hvparams
4376
    if self.op.hvparams:
4377
      if not isinstance(self.op.hvparams, dict):
4378
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4379
                                   " dict" % (type(self.op.hvparams), ),
4380
                                   errors.ECODE_INVAL)
4381

    
4382
      # check hypervisor parameter syntax (locally)
4383
      cluster = self.cfg.GetClusterInfo()
4384
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4385
      filled_hvp = cluster.FillHV(instance)
4386
      filled_hvp.update(self.op.hvparams)
4387
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4388
      hv_type.CheckParameterSyntax(filled_hvp)
4389
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4390

    
4391
    _CheckNodeOnline(self, instance.primary_node)
4392

    
4393
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4394
    # check bridges existence
4395
    _CheckInstanceBridgesExist(self, instance)
4396

    
4397
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4398
                                              instance.name,
4399
                                              instance.hypervisor)
4400
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4401
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4402
    if not remote_info.payload: # not running already
4403
      _CheckNodeFreeMemory(self, instance.primary_node,
4404
                           "starting instance %s" % instance.name,
4405
                           bep[constants.BE_MEMORY], instance.hypervisor)
4406

    
4407
  def Exec(self, feedback_fn):
4408
    """Start the instance.
4409

4410
    """
4411
    instance = self.instance
4412
    force = self.op.force
4413

    
4414
    self.cfg.MarkInstanceUp(instance.name)
4415

    
4416
    node_current = instance.primary_node
4417

    
4418
    _StartInstanceDisks(self, instance, force)
4419

    
4420
    result = self.rpc.call_instance_start(node_current, instance,
4421
                                          self.op.hvparams, self.op.beparams)
4422
    msg = result.fail_msg
4423
    if msg:
4424
      _ShutdownInstanceDisks(self, instance)
4425
      raise errors.OpExecError("Could not start instance: %s" % msg)
4426

    
4427

    
4428
class LURebootInstance(LogicalUnit):
4429
  """Reboot an instance.
4430

4431
  """
4432
  HPATH = "instance-reboot"
4433
  HTYPE = constants.HTYPE_INSTANCE
4434
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4435
  _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4436
  REQ_BGL = False
4437

    
4438
  def ExpandNames(self):
4439
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
4440
                                   constants.INSTANCE_REBOOT_HARD,
4441
                                   constants.INSTANCE_REBOOT_FULL]:
4442
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
4443
                                  (constants.INSTANCE_REBOOT_SOFT,
4444
                                   constants.INSTANCE_REBOOT_HARD,
4445
                                   constants.INSTANCE_REBOOT_FULL))
4446
    self._ExpandAndLockInstance()
4447

    
4448
  def BuildHooksEnv(self):
4449
    """Build hooks env.
4450

4451
    This runs on master, primary and secondary nodes of the instance.
4452

4453
    """
4454
    env = {
4455
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4456
      "REBOOT_TYPE": self.op.reboot_type,
4457
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4458
      }
4459
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4460
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4461
    return env, nl, nl
4462

    
4463
  def CheckPrereq(self):
4464
    """Check prerequisites.
4465

4466
    This checks that the instance is in the cluster.
4467

4468
    """
4469
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4470
    assert self.instance is not None, \
4471
      "Cannot retrieve locked instance %s" % self.op.instance_name
4472

    
4473
    _CheckNodeOnline(self, instance.primary_node)
4474

    
4475
    # check bridges existence
4476
    _CheckInstanceBridgesExist(self, instance)
4477

    
4478
  def Exec(self, feedback_fn):
4479
    """Reboot the instance.
4480

4481
    """
4482
    instance = self.instance
4483
    ignore_secondaries = self.op.ignore_secondaries
4484
    reboot_type = self.op.reboot_type
4485

    
4486
    node_current = instance.primary_node
4487

    
4488
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4489
                       constants.INSTANCE_REBOOT_HARD]:
4490
      for disk in instance.disks:
4491
        self.cfg.SetDiskID(disk, node_current)
4492
      result = self.rpc.call_instance_reboot(node_current, instance,
4493
                                             reboot_type,
4494
                                             self.op.shutdown_timeout)
4495
      result.Raise("Could not reboot instance")
4496
    else:
4497
      result = self.rpc.call_instance_shutdown(node_current, instance,
4498
                                               self.op.shutdown_timeout)
4499
      result.Raise("Could not shutdown instance for full reboot")
4500
      _ShutdownInstanceDisks(self, instance)
4501
      _StartInstanceDisks(self, instance, ignore_secondaries)
4502
      result = self.rpc.call_instance_start(node_current, instance, None, None)
4503
      msg = result.fail_msg
4504
      if msg:
4505
        _ShutdownInstanceDisks(self, instance)
4506
        raise errors.OpExecError("Could not start instance for"
4507
                                 " full reboot: %s" % msg)
4508

    
4509
    self.cfg.MarkInstanceUp(instance.name)
4510

    
4511

    
4512
class LUShutdownInstance(LogicalUnit):
4513
  """Shutdown an instance.
4514

4515
  """
4516
  HPATH = "instance-stop"
4517
  HTYPE = constants.HTYPE_INSTANCE
4518
  _OP_REQP = ["instance_name"]
4519
  _OP_DEFS = [("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4520
  REQ_BGL = False
4521

    
4522
  def ExpandNames(self):
4523
    self._ExpandAndLockInstance()
4524

    
4525
  def BuildHooksEnv(self):
4526
    """Build hooks env.
4527

4528
    This runs on master, primary and secondary nodes of the instance.
4529

4530
    """
4531
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4532
    env["TIMEOUT"] = self.op.timeout
4533
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4534
    return env, nl, nl
4535

    
4536
  def CheckPrereq(self):
4537
    """Check prerequisites.
4538

4539
    This checks that the instance is in the cluster.
4540

4541
    """
4542
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4543
    assert self.instance is not None, \
4544
      "Cannot retrieve locked instance %s" % self.op.instance_name
4545
    _CheckNodeOnline(self, self.instance.primary_node)
4546

    
4547
  def Exec(self, feedback_fn):
4548
    """Shutdown the instance.
4549

4550
    """
4551
    instance = self.instance
4552
    node_current = instance.primary_node
4553
    timeout = self.op.timeout
4554
    self.cfg.MarkInstanceDown(instance.name)
4555
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4556
    msg = result.fail_msg
4557
    if msg:
4558
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4559

    
4560
    _ShutdownInstanceDisks(self, instance)
4561

    
4562

    
4563
class LUReinstallInstance(LogicalUnit):
4564
  """Reinstall an instance.
4565

4566
  """
4567
  HPATH = "instance-reinstall"
4568
  HTYPE = constants.HTYPE_INSTANCE
4569
  _OP_REQP = ["instance_name"]
4570
  _OP_DEFS = [
4571
    ("os_type", None),
4572
    ("force_variant", False),
4573
    ]
4574
  REQ_BGL = False
4575

    
4576
  def ExpandNames(self):
4577
    self._ExpandAndLockInstance()
4578

    
4579
  def BuildHooksEnv(self):
4580
    """Build hooks env.
4581

4582
    This runs on master, primary and secondary nodes of the instance.
4583

4584
    """
4585
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4586
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4587
    return env, nl, nl
4588

    
4589
  def CheckPrereq(self):
4590
    """Check prerequisites.
4591

4592
    This checks that the instance is in the cluster and is not running.
4593

4594
    """
4595
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4596
    assert instance is not None, \
4597
      "Cannot retrieve locked instance %s" % self.op.instance_name
4598
    _CheckNodeOnline(self, instance.primary_node)
4599

    
4600
    if instance.disk_template == constants.DT_DISKLESS:
4601
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4602
                                 self.op.instance_name,
4603
                                 errors.ECODE_INVAL)
4604
    _CheckInstanceDown(self, instance, "cannot reinstall")
4605

    
4606
    if self.op.os_type is not None:
4607
      # OS verification
4608
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4609
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4610

    
4611
    self.instance = instance
4612

    
4613
  def Exec(self, feedback_fn):
4614
    """Reinstall the instance.
4615

4616
    """
4617
    inst = self.instance
4618

    
4619
    if self.op.os_type is not None:
4620
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4621
      inst.os = self.op.os_type
4622
      self.cfg.Update(inst, feedback_fn)
4623

    
4624
    _StartInstanceDisks(self, inst, None)
4625
    try:
4626
      feedback_fn("Running the instance OS create scripts...")
4627
      # FIXME: pass debug option from opcode to backend
4628
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4629
                                             self.op.debug_level)
4630
      result.Raise("Could not install OS for instance %s on node %s" %
4631
                   (inst.name, inst.primary_node))
4632
    finally:
4633
      _ShutdownInstanceDisks(self, inst)
4634

    
4635

    
4636
class LURecreateInstanceDisks(LogicalUnit):
4637
  """Recreate an instance's missing disks.
4638

4639
  """
4640
  HPATH = "instance-recreate-disks"
4641
  HTYPE = constants.HTYPE_INSTANCE
4642
  _OP_REQP = ["instance_name", "disks"]
4643
  REQ_BGL = False
4644

    
4645
  def CheckArguments(self):
4646
    """Check the arguments.
4647

4648
    """
4649
    if not isinstance(self.op.disks, list):
4650
      raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4651
    for item in self.op.disks:
4652
      if (not isinstance(item, int) or
4653
          item < 0):
4654
        raise errors.OpPrereqError("Invalid disk specification '%s'" %
4655
                                   str(item), errors.ECODE_INVAL)
4656

    
4657
  def ExpandNames(self):
4658
    self._ExpandAndLockInstance()
4659

    
4660
  def BuildHooksEnv(self):
4661
    """Build hooks env.
4662

4663
    This runs on master, primary and secondary nodes of the instance.
4664

4665
    """
4666
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4667
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4668
    return env, nl, nl
4669

    
4670
  def CheckPrereq(self):
4671
    """Check prerequisites.
4672

4673
    This checks that the instance is in the cluster and is not running.
4674

4675
    """
4676
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4677
    assert instance is not None, \
4678
      "Cannot retrieve locked instance %s" % self.op.instance_name
4679
    _CheckNodeOnline(self, instance.primary_node)
4680

    
4681
    if instance.disk_template == constants.DT_DISKLESS:
4682
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4683
                                 self.op.instance_name, errors.ECODE_INVAL)
4684
    _CheckInstanceDown(self, instance, "cannot recreate disks")
4685

    
4686
    if not self.op.disks:
4687
      self.op.disks = range(len(instance.disks))
4688
    else:
4689
      for idx in self.op.disks:
4690
        if idx >= len(instance.disks):
4691
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4692
                                     errors.ECODE_INVAL)
4693

    
4694
    self.instance = instance
4695

    
4696
  def Exec(self, feedback_fn):
4697
    """Recreate the disks.
4698

4699
    """
4700
    to_skip = []
4701
    for idx, _ in enumerate(self.instance.disks):
4702
      if idx not in self.op.disks: # disk idx has not been passed in
4703
        to_skip.append(idx)
4704
        continue
4705

    
4706
    _CreateDisks(self, self.instance, to_skip=to_skip)
4707

    
4708

    
4709
class LURenameInstance(LogicalUnit):
4710
  """Rename an instance.
4711

4712
  """
4713
  HPATH = "instance-rename"
4714
  HTYPE = constants.HTYPE_INSTANCE
4715
  _OP_REQP = ["instance_name", "new_name"]
4716
  _OP_DEFS = [("ignore_ip", False)]
4717

    
4718
  def BuildHooksEnv(self):
4719
    """Build hooks env.
4720

4721
    This runs on master, primary and secondary nodes of the instance.
4722

4723
    """
4724
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4725
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4726
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4727
    return env, nl, nl
4728

    
4729
  def CheckPrereq(self):
4730
    """Check prerequisites.
4731

4732
    This checks that the instance is in the cluster and is not running.
4733

4734
    """
4735
    self.op.instance_name = _ExpandInstanceName(self.cfg,
4736
                                                self.op.instance_name)
4737
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4738
    assert instance is not None
4739
    _CheckNodeOnline(self, instance.primary_node)
4740
    _CheckInstanceDown(self, instance, "cannot rename")
4741
    self.instance = instance
4742

    
4743
    # new name verification
4744
    name_info = utils.GetHostInfo(self.op.new_name)
4745

    
4746
    self.op.new_name = new_name = name_info.name
4747
    instance_list = self.cfg.GetInstanceList()
4748
    if new_name in instance_list:
4749
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4750
                                 new_name, errors.ECODE_EXISTS)
4751

    
4752
    if not self.op.ignore_ip:
4753
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4754
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4755
                                   (name_info.ip, new_name),
4756
                                   errors.ECODE_NOTUNIQUE)
4757

    
4758

    
4759
  def Exec(self, feedback_fn):
4760
    """Reinstall the instance.
4761

4762
    """
4763
    inst = self.instance
4764
    old_name = inst.name
4765

    
4766
    if inst.disk_template == constants.DT_FILE:
4767
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4768

    
4769
    self.cfg.RenameInstance(inst.name, self.op.new_name)
4770
    # Change the instance lock. This is definitely safe while we hold the BGL
4771
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4772
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4773

    
4774
    # re-read the instance from the configuration after rename
4775
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4776

    
4777
    if inst.disk_template == constants.DT_FILE:
4778
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4779
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4780
                                                     old_file_storage_dir,
4781
                                                     new_file_storage_dir)
4782
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4783
                   " (but the instance has been renamed in Ganeti)" %
4784
                   (inst.primary_node, old_file_storage_dir,
4785
                    new_file_storage_dir))
4786

    
4787
    _StartInstanceDisks(self, inst, None)
4788
    try:
4789
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4790
                                                 old_name, self.op.debug_level)
4791
      msg = result.fail_msg
4792
      if msg:
4793
        msg = ("Could not run OS rename script for instance %s on node %s"
4794
               " (but the instance has been renamed in Ganeti): %s" %
4795
               (inst.name, inst.primary_node, msg))
4796
        self.proc.LogWarning(msg)
4797
    finally:
4798
      _ShutdownInstanceDisks(self, inst)
4799

    
4800

    
4801
class LURemoveInstance(LogicalUnit):
4802
  """Remove an instance.
4803

4804
  """
4805
  HPATH = "instance-remove"
4806
  HTYPE = constants.HTYPE_INSTANCE
4807
  _OP_REQP = ["instance_name", "ignore_failures"]
4808
  _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4809
  REQ_BGL = False
4810

    
4811
  def ExpandNames(self):
4812
    self._ExpandAndLockInstance()
4813
    self.needed_locks[locking.LEVEL_NODE] = []
4814
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4815

    
4816
  def DeclareLocks(self, level):
4817
    if level == locking.LEVEL_NODE:
4818
      self._LockInstancesNodes()
4819

    
4820
  def BuildHooksEnv(self):
4821
    """Build hooks env.
4822

4823
    This runs on master, primary and secondary nodes of the instance.
4824

4825
    """
4826
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4827
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
4828
    nl = [self.cfg.GetMasterNode()]
4829
    nl_post = list(self.instance.all_nodes) + nl
4830
    return env, nl, nl_post
4831

    
4832
  def CheckPrereq(self):
4833
    """Check prerequisites.
4834

4835
    This checks that the instance is in the cluster.
4836

4837
    """
4838
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4839
    assert self.instance is not None, \
4840
      "Cannot retrieve locked instance %s" % self.op.instance_name
4841

    
4842
  def Exec(self, feedback_fn):
4843
    """Remove the instance.
4844

4845
    """
4846
    instance = self.instance
4847
    logging.info("Shutting down instance %s on node %s",
4848
                 instance.name, instance.primary_node)
4849

    
4850
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4851
                                             self.op.shutdown_timeout)
4852
    msg = result.fail_msg
4853
    if msg:
4854
      if self.op.ignore_failures:
4855
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
4856
      else:
4857
        raise errors.OpExecError("Could not shutdown instance %s on"
4858
                                 " node %s: %s" %
4859
                                 (instance.name, instance.primary_node, msg))
4860

    
4861
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4862

    
4863

    
4864
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4865
  """Utility function to remove an instance.
4866

4867
  """
4868
  logging.info("Removing block devices for instance %s", instance.name)
4869

    
4870
  if not _RemoveDisks(lu, instance):
4871
    if not ignore_failures:
4872
      raise errors.OpExecError("Can't remove instance's disks")
4873
    feedback_fn("Warning: can't remove instance's disks")
4874

    
4875
  logging.info("Removing instance %s out of cluster config", instance.name)
4876

    
4877
  lu.cfg.RemoveInstance(instance.name)
4878

    
4879
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4880
    "Instance lock removal conflict"
4881

    
4882
  # Remove lock for the instance
4883
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4884

    
4885

    
4886
class LUQueryInstances(NoHooksLU):
4887
  """Logical unit for querying instances.
4888

4889
  """
4890
  # pylint: disable-msg=W0142
4891
  _OP_REQP = ["output_fields", "names", "use_locking"]
4892
  REQ_BGL = False
4893
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4894
                    "serial_no", "ctime", "mtime", "uuid"]
4895
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4896
                                    "admin_state",
4897
                                    "disk_template", "ip", "mac", "bridge",
4898
                                    "nic_mode", "nic_link",
4899
                                    "sda_size", "sdb_size", "vcpus", "tags",
4900
                                    "network_port", "beparams",
4901
                                    r"(disk)\.(size)/([0-9]+)",
4902
                                    r"(disk)\.(sizes)", "disk_usage",
4903
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4904
                                    r"(nic)\.(bridge)/([0-9]+)",
4905
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
4906
                                    r"(disk|nic)\.(count)",
4907
                                    "hvparams",
4908
                                    ] + _SIMPLE_FIELDS +
4909
                                  ["hv/%s" % name
4910
                                   for name in constants.HVS_PARAMETERS
4911
                                   if name not in constants.HVC_GLOBALS] +
4912
                                  ["be/%s" % name
4913
                                   for name in constants.BES_PARAMETERS])
4914
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4915

    
4916

    
4917
  def ExpandNames(self):
4918
    _CheckOutputFields(static=self._FIELDS_STATIC,
4919
                       dynamic=self._FIELDS_DYNAMIC,
4920
                       selected=self.op.output_fields)
4921

    
4922
    self.needed_locks = {}
4923
    self.share_locks[locking.LEVEL_INSTANCE] = 1
4924
    self.share_locks[locking.LEVEL_NODE] = 1
4925

    
4926
    if self.op.names:
4927
      self.wanted = _GetWantedInstances(self, self.op.names)
4928
    else:
4929
      self.wanted = locking.ALL_SET
4930

    
4931
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4932
    self.do_locking = self.do_node_query and self.op.use_locking
4933
    if self.do_locking:
4934
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4935
      self.needed_locks[locking.LEVEL_NODE] = []
4936
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4937

    
4938
  def DeclareLocks(self, level):
4939
    if level == locking.LEVEL_NODE and self.do_locking:
4940
      self._LockInstancesNodes()
4941

    
4942
  def CheckPrereq(self):
4943
    """Check prerequisites.
4944

4945
    """
4946
    pass
4947

    
4948
  def Exec(self, feedback_fn):
4949
    """Computes the list of nodes and their attributes.
4950

4951
    """
4952
    # pylint: disable-msg=R0912
4953
    # way too many branches here
4954
    all_info = self.cfg.GetAllInstancesInfo()
4955
    if self.wanted == locking.ALL_SET:
4956
      # caller didn't specify instance names, so ordering is not important
4957
      if self.do_locking:
4958
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4959
      else:
4960
        instance_names = all_info.keys()
4961
      instance_names = utils.NiceSort(instance_names)
4962
    else:
4963
      # caller did specify names, so we must keep the ordering
4964
      if self.do_locking:
4965
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4966
      else:
4967
        tgt_set = all_info.keys()
4968
      missing = set(self.wanted).difference(tgt_set)
4969
      if missing:
4970
        raise errors.OpExecError("Some instances were removed before"
4971
                                 " retrieving their data: %s" % missing)
4972
      instance_names = self.wanted
4973

    
4974
    instance_list = [all_info[iname] for iname in instance_names]
4975

    
4976
    # begin data gathering
4977

    
4978
    nodes = frozenset([inst.primary_node for inst in instance_list])
4979
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4980

    
4981
    bad_nodes = []
4982
    off_nodes = []
4983
    if self.do_node_query:
4984
      live_data = {}
4985
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4986
      for name in nodes:
4987
        result = node_data[name]
4988
        if result.offline:
4989
          # offline nodes will be in both lists
4990
          off_nodes.append(name)
4991
        if result.fail_msg:
4992
          bad_nodes.append(name)
4993
        else:
4994
          if result.payload:
4995
            live_data.update(result.payload)
4996
          # else no instance is alive
4997
    else:
4998
      live_data = dict([(name, {}) for name in instance_names])
4999

    
5000
    # end data gathering
5001

    
5002
    HVPREFIX = "hv/"
5003
    BEPREFIX = "be/"
5004
    output = []
5005
    cluster = self.cfg.GetClusterInfo()
5006
    for instance in instance_list:
5007
      iout = []
5008
      i_hv = cluster.FillHV(instance, skip_globals=True)
5009
      i_be = cluster.FillBE(instance)
5010
      i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5011
      for field in self.op.output_fields:
5012
        st_match = self._FIELDS_STATIC.Matches(field)
5013
        if field in self._SIMPLE_FIELDS:
5014
          val = getattr(instance, field)
5015
        elif field == "pnode":
5016
          val = instance.primary_node
5017
        elif field == "snodes":
5018
          val = list(instance.secondary_nodes)
5019
        elif field == "admin_state":
5020
          val = instance.admin_up
5021
        elif field == "oper_state":
5022
          if instance.primary_node in bad_nodes:
5023
            val = None
5024
          else:
5025
            val = bool(live_data.get(instance.name))
5026
        elif field == "status":
5027
          if instance.primary_node in off_nodes:
5028
            val = "ERROR_nodeoffline"
5029
          elif instance.primary_node in bad_nodes:
5030
            val = "ERROR_nodedown"
5031
          else:
5032
            running = bool(live_data.get(instance.name))
5033
            if running:
5034
              if instance.admin_up:
5035
                val = "running"
5036
              else:
5037
                val = "ERROR_up"
5038
            else:
5039
              if instance.admin_up:
5040
                val = "ERROR_down"
5041
              else:
5042
                val = "ADMIN_down"
5043
        elif field == "oper_ram":
5044
          if instance.primary_node in bad_nodes:
5045
            val = None
5046
          elif instance.name in live_data:
5047
            val = live_data[instance.name].get("memory", "?")
5048
          else:
5049
            val = "-"
5050
        elif field == "vcpus":
5051
          val = i_be[constants.BE_VCPUS]
5052
        elif field == "disk_template":
5053
          val = instance.disk_template
5054
        elif field == "ip":
5055
          if instance.nics:
5056
            val = instance.nics[0].ip
5057
          else:
5058
            val = None
5059
        elif field == "nic_mode":
5060
          if instance.nics:
5061
            val = i_nicp[0][constants.NIC_MODE]
5062
          else:
5063
            val = None
5064
        elif field == "nic_link":
5065
          if instance.nics:
5066
            val = i_nicp[0][constants.NIC_LINK]
5067
          else:
5068
            val = None
5069
        elif field == "bridge":
5070
          if (instance.nics and
5071
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5072
            val = i_nicp[0][constants.NIC_LINK]
5073
          else:
5074
            val = None
5075
        elif field == "mac":
5076
          if instance.nics:
5077
            val = instance.nics[0].mac
5078
          else:
5079
            val = None
5080
        elif field == "sda_size" or field == "sdb_size":
5081
          idx = ord(field[2]) - ord('a')
5082
          try:
5083
            val = instance.FindDisk(idx).size
5084
          except errors.OpPrereqError:
5085
            val = None
5086
        elif field == "disk_usage": # total disk usage per node
5087
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
5088
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5089
        elif field == "tags":
5090
          val = list(instance.GetTags())
5091
        elif field == "hvparams":
5092
          val = i_hv
5093
        elif (field.startswith(HVPREFIX) and
5094
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5095
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5096
          val = i_hv.get(field[len(HVPREFIX):], None)
5097
        elif field == "beparams":
5098
          val = i_be
5099
        elif (field.startswith(BEPREFIX) and
5100
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5101
          val = i_be.get(field[len(BEPREFIX):], None)
5102
        elif st_match and st_match.groups():
5103
          # matches a variable list
5104
          st_groups = st_match.groups()
5105
          if st_groups and st_groups[0] == "disk":
5106
            if st_groups[1] == "count":
5107
              val = len(instance.disks)
5108
            elif st_groups[1] == "sizes":
5109
              val = [disk.size for disk in instance.disks]
5110
            elif st_groups[1] == "size":
5111
              try:
5112
                val = instance.FindDisk(st_groups[2]).size
5113
              except errors.OpPrereqError:
5114
                val = None
5115
            else:
5116
              assert False, "Unhandled disk parameter"
5117
          elif st_groups[0] == "nic":
5118
            if st_groups[1] == "count":
5119
              val = len(instance.nics)
5120
            elif st_groups[1] == "macs":
5121
              val = [nic.mac for nic in instance.nics]
5122
            elif st_groups[1] == "ips":
5123
              val = [nic.ip for nic in instance.nics]
5124
            elif st_groups[1] == "modes":
5125
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5126
            elif st_groups[1] == "links":
5127
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5128
            elif st_groups[1] == "bridges":
5129
              val = []
5130
              for nicp in i_nicp:
5131
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5132
                  val.append(nicp[constants.NIC_LINK])
5133
                else:
5134
                  val.append(None)
5135
            else:
5136
              # index-based item
5137
              nic_idx = int(st_groups[2])
5138
              if nic_idx >= len(instance.nics):
5139
                val = None
5140
              else:
5141
                if st_groups[1] == "mac":
5142
                  val = instance.nics[nic_idx].mac
5143
                elif st_groups[1] == "ip":
5144
                  val = instance.nics[nic_idx].ip
5145
                elif st_groups[1] == "mode":
5146
                  val = i_nicp[nic_idx][constants.NIC_MODE]
5147
                elif st_groups[1] == "link":
5148
                  val = i_nicp[nic_idx][constants.NIC_LINK]
5149
                elif st_groups[1] == "bridge":
5150
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5151
                  if nic_mode == constants.NIC_MODE_BRIDGED:
5152
                    val = i_nicp[nic_idx][constants.NIC_LINK]
5153
                  else:
5154
                    val = None
5155
                else:
5156
                  assert False, "Unhandled NIC parameter"
5157
          else:
5158
            assert False, ("Declared but unhandled variable parameter '%s'" %
5159
                           field)
5160
        else:
5161
          assert False, "Declared but unhandled parameter '%s'" % field
5162
        iout.append(val)
5163
      output.append(iout)
5164

    
5165
    return output
5166

    
5167

    
5168
class LUFailoverInstance(LogicalUnit):
5169
  """Failover an instance.
5170

5171
  """
5172
  HPATH = "instance-failover"
5173
  HTYPE = constants.HTYPE_INSTANCE
5174
  _OP_REQP = ["instance_name", "ignore_consistency"]
5175
  _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
5176
  REQ_BGL = False
5177

    
5178
  def ExpandNames(self):
5179
    self._ExpandAndLockInstance()
5180
    self.needed_locks[locking.LEVEL_NODE] = []
5181
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5182

    
5183
  def DeclareLocks(self, level):
5184
    if level == locking.LEVEL_NODE:
5185
      self._LockInstancesNodes()
5186

    
5187
  def BuildHooksEnv(self):
5188
    """Build hooks env.
5189

5190
    This runs on master, primary and secondary nodes of the instance.
5191

5192
    """
5193
    instance = self.instance
5194
    source_node = instance.primary_node
5195
    target_node = instance.secondary_nodes[0]
5196
    env = {
5197
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5198
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5199
      "OLD_PRIMARY": source_node,
5200
      "OLD_SECONDARY": target_node,
5201
      "NEW_PRIMARY": target_node,
5202
      "NEW_SECONDARY": source_node,
5203
      }
5204
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5205
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5206
    nl_post = list(nl)
5207
    nl_post.append(source_node)
5208
    return env, nl, nl_post
5209

    
5210
  def CheckPrereq(self):
5211
    """Check prerequisites.
5212

5213
    This checks that the instance is in the cluster.
5214

5215
    """
5216
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5217
    assert self.instance is not None, \
5218
      "Cannot retrieve locked instance %s" % self.op.instance_name
5219

    
5220
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5221
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5222
      raise errors.OpPrereqError("Instance's disk layout is not"
5223
                                 " network mirrored, cannot failover.",
5224
                                 errors.ECODE_STATE)
5225

    
5226
    secondary_nodes = instance.secondary_nodes
5227
    if not secondary_nodes:
5228
      raise errors.ProgrammerError("no secondary node but using "
5229
                                   "a mirrored disk template")
5230

    
5231
    target_node = secondary_nodes[0]
5232
    _CheckNodeOnline(self, target_node)
5233
    _CheckNodeNotDrained(self, target_node)
5234
    if instance.admin_up:
5235
      # check memory requirements on the secondary node
5236
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5237
                           instance.name, bep[constants.BE_MEMORY],
5238
                           instance.hypervisor)
5239
    else:
5240
      self.LogInfo("Not checking memory on the secondary node as"
5241
                   " instance will not be started")
5242

    
5243
    # check bridge existance
5244
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5245

    
5246
  def Exec(self, feedback_fn):
5247
    """Failover an instance.
5248

5249
    The failover is done by shutting it down on its present node and
5250
    starting it on the secondary.
5251

5252
    """
5253
    instance = self.instance
5254

    
5255
    source_node = instance.primary_node
5256
    target_node = instance.secondary_nodes[0]
5257

    
5258
    if instance.admin_up:
5259
      feedback_fn("* checking disk consistency between source and target")
5260
      for dev in instance.disks:
5261
        # for drbd, these are drbd over lvm
5262
        if not _CheckDiskConsistency(self, dev, target_node, False):
5263
          if not self.op.ignore_consistency:
5264
            raise errors.OpExecError("Disk %s is degraded on target node,"
5265
                                     " aborting failover." % dev.iv_name)
5266
    else:
5267
      feedback_fn("* not checking disk consistency as instance is not running")
5268

    
5269
    feedback_fn("* shutting down instance on source node")
5270
    logging.info("Shutting down instance %s on node %s",
5271
                 instance.name, source_node)
5272

    
5273
    result = self.rpc.call_instance_shutdown(source_node, instance,
5274
                                             self.op.shutdown_timeout)
5275
    msg = result.fail_msg
5276
    if msg:
5277
      if self.op.ignore_consistency:
5278
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5279
                             " Proceeding anyway. Please make sure node"
5280
                             " %s is down. Error details: %s",
5281
                             instance.name, source_node, source_node, msg)
5282
      else:
5283
        raise errors.OpExecError("Could not shutdown instance %s on"
5284
                                 " node %s: %s" %
5285
                                 (instance.name, source_node, msg))
5286

    
5287
    feedback_fn("* deactivating the instance's disks on source node")
5288
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5289
      raise errors.OpExecError("Can't shut down the instance's disks.")
5290

    
5291
    instance.primary_node = target_node
5292
    # distribute new instance config to the other nodes
5293
    self.cfg.Update(instance, feedback_fn)
5294

    
5295
    # Only start the instance if it's marked as up
5296
    if instance.admin_up:
5297
      feedback_fn("* activating the instance's disks on target node")
5298
      logging.info("Starting instance %s on node %s",
5299
                   instance.name, target_node)
5300

    
5301
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5302
                                           ignore_secondaries=True)
5303
      if not disks_ok:
5304
        _ShutdownInstanceDisks(self, instance)
5305
        raise errors.OpExecError("Can't activate the instance's disks")
5306

    
5307
      feedback_fn("* starting the instance on the target node")
5308
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5309
      msg = result.fail_msg
5310
      if msg:
5311
        _ShutdownInstanceDisks(self, instance)
5312
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5313
                                 (instance.name, target_node, msg))
5314

    
5315

    
5316
class LUMigrateInstance(LogicalUnit):
5317
  """Migrate an instance.
5318

5319
  This is migration without shutting down, compared to the failover,
5320
  which is done with shutdown.
5321

5322
  """
5323
  HPATH = "instance-migrate"
5324
  HTYPE = constants.HTYPE_INSTANCE
5325
  _OP_REQP = ["instance_name", "live", "cleanup"]
5326

    
5327
  REQ_BGL = False
5328

    
5329
  def ExpandNames(self):
5330
    self._ExpandAndLockInstance()
5331

    
5332
    self.needed_locks[locking.LEVEL_NODE] = []
5333
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5334

    
5335
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5336
                                       self.op.live, self.op.cleanup)
5337
    self.tasklets = [self._migrater]
5338

    
5339
  def DeclareLocks(self, level):
5340
    if level == locking.LEVEL_NODE:
5341
      self._LockInstancesNodes()
5342

    
5343
  def BuildHooksEnv(self):
5344
    """Build hooks env.
5345

5346
    This runs on master, primary and secondary nodes of the instance.
5347

5348
    """
5349
    instance = self._migrater.instance
5350
    source_node = instance.primary_node
5351
    target_node = instance.secondary_nodes[0]
5352
    env = _BuildInstanceHookEnvByObject(self, instance)
5353
    env["MIGRATE_LIVE"] = self.op.live
5354
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5355
    env.update({
5356
        "OLD_PRIMARY": source_node,
5357
        "OLD_SECONDARY": target_node,
5358
        "NEW_PRIMARY": target_node,
5359
        "NEW_SECONDARY": source_node,
5360
        })
5361
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5362
    nl_post = list(nl)
5363
    nl_post.append(source_node)
5364
    return env, nl, nl_post
5365

    
5366

    
5367
class LUMoveInstance(LogicalUnit):
5368
  """Move an instance by data-copying.
5369

5370
  """
5371
  HPATH = "instance-move"
5372
  HTYPE = constants.HTYPE_INSTANCE
5373
  _OP_REQP = ["instance_name", "target_node"]
5374
  _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
5375
  REQ_BGL = False
5376

    
5377
  def ExpandNames(self):
5378
    self._ExpandAndLockInstance()
5379
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5380
    self.op.target_node = target_node
5381
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5382
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5383

    
5384
  def DeclareLocks(self, level):
5385
    if level == locking.LEVEL_NODE:
5386
      self._LockInstancesNodes(primary_only=True)
5387

    
5388
  def BuildHooksEnv(self):
5389
    """Build hooks env.
5390

5391
    This runs on master, primary and secondary nodes of the instance.
5392

5393
    """
5394
    env = {
5395
      "TARGET_NODE": self.op.target_node,
5396
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5397
      }
5398
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5399
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5400
                                       self.op.target_node]
5401
    return env, nl, nl
5402

    
5403
  def CheckPrereq(self):
5404
    """Check prerequisites.
5405

5406
    This checks that the instance is in the cluster.
5407

5408
    """
5409
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5410
    assert self.instance is not None, \
5411
      "Cannot retrieve locked instance %s" % self.op.instance_name
5412

    
5413
    node = self.cfg.GetNodeInfo(self.op.target_node)
5414
    assert node is not None, \
5415
      "Cannot retrieve locked node %s" % self.op.target_node
5416

    
5417
    self.target_node = target_node = node.name
5418

    
5419
    if target_node == instance.primary_node:
5420
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
5421
                                 (instance.name, target_node),
5422
                                 errors.ECODE_STATE)
5423

    
5424
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5425

    
5426
    for idx, dsk in enumerate(instance.disks):
5427
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5428
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5429
                                   " cannot copy" % idx, errors.ECODE_STATE)
5430

    
5431
    _CheckNodeOnline(self, target_node)
5432
    _CheckNodeNotDrained(self, target_node)
5433

    
5434
    if instance.admin_up:
5435
      # check memory requirements on the secondary node
5436
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5437
                           instance.name, bep[constants.BE_MEMORY],
5438
                           instance.hypervisor)
5439
    else:
5440
      self.LogInfo("Not checking memory on the secondary node as"
5441
                   " instance will not be started")
5442

    
5443
    # check bridge existance
5444
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5445

    
5446
  def Exec(self, feedback_fn):
5447
    """Move an instance.
5448

5449
    The move is done by shutting it down on its present node, copying
5450
    the data over (slow) and starting it on the new node.
5451

5452
    """
5453
    instance = self.instance
5454

    
5455
    source_node = instance.primary_node
5456
    target_node = self.target_node
5457

    
5458
    self.LogInfo("Shutting down instance %s on source node %s",
5459
                 instance.name, source_node)
5460

    
5461
    result = self.rpc.call_instance_shutdown(source_node, instance,
5462
                                             self.op.shutdown_timeout)
5463
    msg = result.fail_msg
5464
    if msg:
5465
      if self.op.ignore_consistency:
5466
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5467
                             " Proceeding anyway. Please make sure node"
5468
                             " %s is down. Error details: %s",
5469
                             instance.name, source_node, source_node, msg)
5470
      else:
5471
        raise errors.OpExecError("Could not shutdown instance %s on"
5472
                                 " node %s: %s" %
5473
                                 (instance.name, source_node, msg))
5474

    
5475
    # create the target disks
5476
    try:
5477
      _CreateDisks(self, instance, target_node=target_node)
5478
    except errors.OpExecError:
5479
      self.LogWarning("Device creation failed, reverting...")
5480
      try:
5481
        _RemoveDisks(self, instance, target_node=target_node)
5482
      finally:
5483
        self.cfg.ReleaseDRBDMinors(instance.name)
5484
        raise
5485

    
5486
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5487

    
5488
    errs = []
5489
    # activate, get path, copy the data over
5490
    for idx, disk in enumerate(instance.disks):
5491
      self.LogInfo("Copying data for disk %d", idx)
5492
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5493
                                               instance.name, True)
5494
      if result.fail_msg:
5495
        self.LogWarning("Can't assemble newly created disk %d: %s",
5496
                        idx, result.fail_msg)
5497
        errs.append(result.fail_msg)
5498
        break
5499
      dev_path = result.payload
5500
      result = self.rpc.call_blockdev_export(source_node, disk,
5501
                                             target_node, dev_path,
5502
                                             cluster_name)
5503
      if result.fail_msg:
5504
        self.LogWarning("Can't copy data over for disk %d: %s",
5505
                        idx, result.fail_msg)
5506
        errs.append(result.fail_msg)
5507
        break
5508

    
5509
    if errs:
5510
      self.LogWarning("Some disks failed to copy, aborting")
5511
      try:
5512
        _RemoveDisks(self, instance, target_node=target_node)
5513
      finally:
5514
        self.cfg.ReleaseDRBDMinors(instance.name)
5515
        raise errors.OpExecError("Errors during disk copy: %s" %
5516
                                 (",".join(errs),))
5517

    
5518
    instance.primary_node = target_node
5519
    self.cfg.Update(instance, feedback_fn)
5520

    
5521
    self.LogInfo("Removing the disks on the original node")
5522
    _RemoveDisks(self, instance, target_node=source_node)
5523

    
5524
    # Only start the instance if it's marked as up
5525
    if instance.admin_up:
5526
      self.LogInfo("Starting instance %s on node %s",
5527
                   instance.name, target_node)
5528

    
5529
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5530
                                           ignore_secondaries=True)
5531
      if not disks_ok:
5532
        _ShutdownInstanceDisks(self, instance)
5533
        raise errors.OpExecError("Can't activate the instance's disks")
5534

    
5535
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5536
      msg = result.fail_msg
5537
      if msg:
5538
        _ShutdownInstanceDisks(self, instance)
5539
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5540
                                 (instance.name, target_node, msg))
5541

    
5542

    
5543
class LUMigrateNode(LogicalUnit):
5544
  """Migrate all instances from a node.
5545

5546
  """
5547
  HPATH = "node-migrate"
5548
  HTYPE = constants.HTYPE_NODE
5549
  _OP_REQP = ["node_name", "live"]
5550
  REQ_BGL = False
5551

    
5552
  def ExpandNames(self):
5553
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5554

    
5555
    self.needed_locks = {
5556
      locking.LEVEL_NODE: [self.op.node_name],
5557
      }
5558

    
5559
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5560

    
5561
    # Create tasklets for migrating instances for all instances on this node
5562
    names = []
5563
    tasklets = []
5564

    
5565
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5566
      logging.debug("Migrating instance %s", inst.name)
5567
      names.append(inst.name)
5568

    
5569
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5570

    
5571
    self.tasklets = tasklets
5572

    
5573
    # Declare instance locks
5574
    self.needed_locks[locking.LEVEL_INSTANCE] = names
5575

    
5576
  def DeclareLocks(self, level):
5577
    if level == locking.LEVEL_NODE:
5578
      self._LockInstancesNodes()
5579

    
5580
  def BuildHooksEnv(self):
5581
    """Build hooks env.
5582

5583
    This runs on the master, the primary and all the secondaries.
5584

5585
    """
5586
    env = {
5587
      "NODE_NAME": self.op.node_name,
5588
      }
5589

    
5590
    nl = [self.cfg.GetMasterNode()]
5591

    
5592
    return (env, nl, nl)
5593

    
5594

    
5595
class TLMigrateInstance(Tasklet):
5596
  def __init__(self, lu, instance_name, live, cleanup):
5597
    """Initializes this class.
5598

5599
    """
5600
    Tasklet.__init__(self, lu)
5601

    
5602
    # Parameters
5603
    self.instance_name = instance_name
5604
    self.live = live
5605
    self.cleanup = cleanup
5606

    
5607
  def CheckPrereq(self):
5608
    """Check prerequisites.
5609

5610
    This checks that the instance is in the cluster.
5611

5612
    """
5613
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5614
    instance = self.cfg.GetInstanceInfo(instance_name)
5615
    assert instance is not None
5616

    
5617
    if instance.disk_template != constants.DT_DRBD8:
5618
      raise errors.OpPrereqError("Instance's disk layout is not"
5619
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5620

    
5621
    secondary_nodes = instance.secondary_nodes
5622
    if not secondary_nodes:
5623
      raise errors.ConfigurationError("No secondary node but using"
5624
                                      " drbd8 disk template")
5625

    
5626
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
5627

    
5628
    target_node = secondary_nodes[0]
5629
    # check memory requirements on the secondary node
5630
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5631
                         instance.name, i_be[constants.BE_MEMORY],
5632
                         instance.hypervisor)
5633

    
5634
    # check bridge existance
5635
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5636

    
5637
    if not self.cleanup:
5638
      _CheckNodeNotDrained(self.lu, target_node)
5639
      result = self.rpc.call_instance_migratable(instance.primary_node,
5640
                                                 instance)
5641
      result.Raise("Can't migrate, please use failover",
5642
                   prereq=True, ecode=errors.ECODE_STATE)
5643

    
5644
    self.instance = instance
5645

    
5646
  def _WaitUntilSync(self):
5647
    """Poll with custom rpc for disk sync.
5648

5649
    This uses our own step-based rpc call.
5650

5651
    """
5652
    self.feedback_fn("* wait until resync is done")
5653
    all_done = False
5654
    while not all_done:
5655
      all_done = True
5656
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5657
                                            self.nodes_ip,
5658
                                            self.instance.disks)
5659
      min_percent = 100
5660
      for node, nres in result.items():
5661
        nres.Raise("Cannot resync disks on node %s" % node)
5662
        node_done, node_percent = nres.payload
5663
        all_done = all_done and node_done
5664
        if node_percent is not None:
5665
          min_percent = min(min_percent, node_percent)
5666
      if not all_done:
5667
        if min_percent < 100:
5668
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
5669
        time.sleep(2)
5670

    
5671
  def _EnsureSecondary(self, node):
5672
    """Demote a node to secondary.
5673

5674
    """
5675
    self.feedback_fn("* switching node %s to secondary mode" % node)
5676

    
5677
    for dev in self.instance.disks:
5678
      self.cfg.SetDiskID(dev, node)
5679

    
5680
    result = self.rpc.call_blockdev_close(node, self.instance.name,
5681
                                          self.instance.disks)
5682
    result.Raise("Cannot change disk to secondary on node %s" % node)
5683

    
5684
  def _GoStandalone(self):
5685
    """Disconnect from the network.
5686

5687
    """
5688
    self.feedback_fn("* changing into standalone mode")
5689
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5690
                                               self.instance.disks)
5691
    for node, nres in result.items():
5692
      nres.Raise("Cannot disconnect disks node %s" % node)
5693

    
5694
  def _GoReconnect(self, multimaster):
5695
    """Reconnect to the network.
5696

5697
    """
5698
    if multimaster:
5699
      msg = "dual-master"
5700
    else:
5701
      msg = "single-master"
5702
    self.feedback_fn("* changing disks into %s mode" % msg)
5703
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5704
                                           self.instance.disks,
5705
                                           self.instance.name, multimaster)
5706
    for node, nres in result.items():
5707
      nres.Raise("Cannot change disks config on node %s" % node)
5708

    
5709
  def _ExecCleanup(self):
5710
    """Try to cleanup after a failed migration.
5711

5712
    The cleanup is done by:
5713
      - check that the instance is running only on one node
5714
        (and update the config if needed)
5715
      - change disks on its secondary node to secondary
5716
      - wait until disks are fully synchronized
5717
      - disconnect from the network
5718
      - change disks into single-master mode
5719
      - wait again until disks are fully synchronized
5720

5721
    """
5722
    instance = self.instance
5723
    target_node = self.target_node
5724
    source_node = self.source_node
5725

    
5726
    # check running on only one node
5727
    self.feedback_fn("* checking where the instance actually runs"
5728
                     " (if this hangs, the hypervisor might be in"
5729
                     " a bad state)")
5730
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5731
    for node, result in ins_l.items():
5732
      result.Raise("Can't contact node %s" % node)
5733

    
5734
    runningon_source = instance.name in ins_l[source_node].payload
5735
    runningon_target = instance.name in ins_l[target_node].payload
5736

    
5737
    if runningon_source and runningon_target:
5738
      raise errors.OpExecError("Instance seems to be running on two nodes,"
5739
                               " or the hypervisor is confused. You will have"
5740
                               " to ensure manually that it runs only on one"
5741
                               " and restart this operation.")
5742

    
5743
    if not (runningon_source or runningon_target):
5744
      raise errors.OpExecError("Instance does not seem to be running at all."
5745
                               " In this case, it's safer to repair by"
5746
                               " running 'gnt-instance stop' to ensure disk"
5747
                               " shutdown, and then restarting it.")
5748

    
5749
    if runningon_target:
5750
      # the migration has actually succeeded, we need to update the config
5751
      self.feedback_fn("* instance running on secondary node (%s),"
5752
                       " updating config" % target_node)
5753
      instance.primary_node = target_node
5754
      self.cfg.Update(instance, self.feedback_fn)
5755
      demoted_node = source_node
5756
    else:
5757
      self.feedback_fn("* instance confirmed to be running on its"
5758
                       " primary node (%s)" % source_node)
5759
      demoted_node = target_node
5760

    
5761
    self._EnsureSecondary(demoted_node)
5762
    try:
5763
      self._WaitUntilSync()
5764
    except errors.OpExecError:
5765
      # we ignore here errors, since if the device is standalone, it
5766
      # won't be able to sync
5767
      pass
5768
    self._GoStandalone()
5769
    self._GoReconnect(False)
5770
    self._WaitUntilSync()
5771

    
5772
    self.feedback_fn("* done")
5773

    
5774
  def _RevertDiskStatus(self):
5775
    """Try to revert the disk status after a failed migration.
5776

5777
    """
5778
    target_node = self.target_node
5779
    try:
5780
      self._EnsureSecondary(target_node)
5781
      self._GoStandalone()
5782
      self._GoReconnect(False)
5783
      self._WaitUntilSync()
5784
    except errors.OpExecError, err:
5785
      self.lu.LogWarning("Migration failed and I can't reconnect the"
5786
                         " drives: error '%s'\n"
5787
                         "Please look and recover the instance status" %
5788
                         str(err))
5789

    
5790
  def _AbortMigration(self):
5791
    """Call the hypervisor code to abort a started migration.
5792

5793
    """
5794
    instance = self.instance
5795
    target_node = self.target_node
5796
    migration_info = self.migration_info
5797

    
5798
    abort_result = self.rpc.call_finalize_migration(target_node,
5799
                                                    instance,
5800
                                                    migration_info,
5801
                                                    False)
5802
    abort_msg = abort_result.fail_msg
5803
    if abort_msg:
5804
      logging.error("Aborting migration failed on target node %s: %s",
5805
                    target_node, abort_msg)
5806
      # Don't raise an exception here, as we stil have to try to revert the
5807
      # disk status, even if this step failed.
5808

    
5809
  def _ExecMigration(self):
5810
    """Migrate an instance.
5811

5812
    The migrate is done by:
5813
      - change the disks into dual-master mode
5814
      - wait until disks are fully synchronized again
5815
      - migrate the instance
5816
      - change disks on the new secondary node (the old primary) to secondary
5817
      - wait until disks are fully synchronized
5818
      - change disks into single-master mode
5819

5820
    """
5821
    instance = self.instance
5822
    target_node = self.target_node
5823
    source_node = self.source_node
5824

    
5825
    self.feedback_fn("* checking disk consistency between source and target")
5826
    for dev in instance.disks:
5827
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
5828
        raise errors.OpExecError("Disk %s is degraded or not fully"
5829
                                 " synchronized on target node,"
5830
                                 " aborting migrate." % dev.iv_name)
5831

    
5832
    # First get the migration information from the remote node
5833
    result = self.rpc.call_migration_info(source_node, instance)
5834
    msg = result.fail_msg
5835
    if msg:
5836
      log_err = ("Failed fetching source migration information from %s: %s" %
5837
                 (source_node, msg))
5838
      logging.error(log_err)
5839
      raise errors.OpExecError(log_err)
5840

    
5841
    self.migration_info = migration_info = result.payload
5842

    
5843
    # Then switch the disks to master/master mode
5844
    self._EnsureSecondary(target_node)
5845
    self._GoStandalone()
5846
    self._GoReconnect(True)
5847
    self._WaitUntilSync()
5848

    
5849
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
5850
    result = self.rpc.call_accept_instance(target_node,
5851
                                           instance,
5852
                                           migration_info,
5853
                                           self.nodes_ip[target_node])
5854

    
5855
    msg = result.fail_msg
5856
    if msg:
5857
      logging.error("Instance pre-migration failed, trying to revert"
5858
                    " disk status: %s", msg)
5859
      self.feedback_fn("Pre-migration failed, aborting")
5860
      self._AbortMigration()
5861
      self._RevertDiskStatus()
5862
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5863
                               (instance.name, msg))
5864

    
5865
    self.feedback_fn("* migrating instance to %s" % target_node)
5866
    time.sleep(10)
5867
    result = self.rpc.call_instance_migrate(source_node, instance,
5868
                                            self.nodes_ip[target_node],
5869
                                            self.live)
5870
    msg = result.fail_msg
5871
    if msg:
5872
      logging.error("Instance migration failed, trying to revert"
5873
                    " disk status: %s", msg)
5874
      self.feedback_fn("Migration failed, aborting")
5875
      self._AbortMigration()
5876
      self._RevertDiskStatus()
5877
      raise errors.OpExecError("Could not migrate instance %s: %s" %
5878
                               (instance.name, msg))
5879
    time.sleep(10)
5880

    
5881
    instance.primary_node = target_node
5882
    # distribute new instance config to the other nodes
5883
    self.cfg.Update(instance, self.feedback_fn)
5884

    
5885
    result = self.rpc.call_finalize_migration(target_node,
5886
                                              instance,
5887
                                              migration_info,
5888
                                              True)
5889
    msg = result.fail_msg
5890
    if msg:
5891
      logging.error("Instance migration succeeded, but finalization failed:"
5892
                    " %s", msg)
5893
      raise errors.OpExecError("Could not finalize instance migration: %s" %
5894
                               msg)
5895

    
5896
    self._EnsureSecondary(source_node)
5897
    self._WaitUntilSync()
5898
    self._GoStandalone()
5899
    self._GoReconnect(False)
5900
    self._WaitUntilSync()
5901

    
5902
    self.feedback_fn("* done")
5903

    
5904
  def Exec(self, feedback_fn):
5905
    """Perform the migration.
5906

5907
    """
5908
    feedback_fn("Migrating instance %s" % self.instance.name)
5909

    
5910
    self.feedback_fn = feedback_fn
5911

    
5912
    self.source_node = self.instance.primary_node
5913
    self.target_node = self.instance.secondary_nodes[0]
5914
    self.all_nodes = [self.source_node, self.target_node]
5915
    self.nodes_ip = {
5916
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5917
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5918
      }
5919

    
5920
    if self.cleanup:
5921
      return self._ExecCleanup()
5922
    else:
5923
      return self._ExecMigration()
5924

    
5925

    
5926
def _CreateBlockDev(lu, node, instance, device, force_create,
5927
                    info, force_open):
5928
  """Create a tree of block devices on a given node.
5929

5930
  If this device type has to be created on secondaries, create it and
5931
  all its children.
5932

5933
  If not, just recurse to children keeping the same 'force' value.
5934

5935
  @param lu: the lu on whose behalf we execute
5936
  @param node: the node on which to create the device
5937
  @type instance: L{objects.Instance}
5938
  @param instance: the instance which owns the device
5939
  @type device: L{objects.Disk}
5940
  @param device: the device to create
5941
  @type force_create: boolean
5942
  @param force_create: whether to force creation of this device; this
5943
      will be change to True whenever we find a device which has
5944
      CreateOnSecondary() attribute
5945
  @param info: the extra 'metadata' we should attach to the device
5946
      (this will be represented as a LVM tag)
5947
  @type force_open: boolean
5948
  @param force_open: this parameter will be passes to the
5949
      L{backend.BlockdevCreate} function where it specifies
5950
      whether we run on primary or not, and it affects both
5951
      the child assembly and the device own Open() execution
5952

5953
  """
5954
  if device.CreateOnSecondary():
5955
    force_create = True
5956

    
5957
  if device.children:
5958
    for child in device.children:
5959
      _CreateBlockDev(lu, node, instance, child, force_create,
5960
                      info, force_open)
5961

    
5962
  if not force_create:
5963
    return
5964

    
5965
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5966

    
5967

    
5968
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5969
  """Create a single block device on a given node.
5970

5971
  This will not recurse over children of the device, so they must be
5972
  created in advance.
5973

5974
  @param lu: the lu on whose behalf we execute
5975
  @param node: the node on which to create the device
5976
  @type instance: L{objects.Instance}
5977
  @param instance: the instance which owns the device
5978
  @type device: L{objects.Disk}
5979
  @param device: the device to create
5980
  @param info: the extra 'metadata' we should attach to the device
5981
      (this will be represented as a LVM tag)
5982
  @type force_open: boolean
5983
  @param force_open: this parameter will be passes to the
5984
      L{backend.BlockdevCreate} function where it specifies
5985
      whether we run on primary or not, and it affects both
5986
      the child assembly and the device own Open() execution
5987

5988
  """
5989
  lu.cfg.SetDiskID(device, node)
5990
  result = lu.rpc.call_blockdev_create(node, device, device.size,
5991
                                       instance.name, force_open, info)
5992
  result.Raise("Can't create block device %s on"
5993
               " node %s for instance %s" % (device, node, instance.name))
5994
  if device.physical_id is None:
5995
    device.physical_id = result.payload
5996

    
5997

    
5998
def _GenerateUniqueNames(lu, exts):
5999
  """Generate a suitable LV name.
6000

6001
  This will generate a logical volume name for the given instance.
6002

6003
  """
6004
  results = []
6005
  for val in exts:
6006
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6007
    results.append("%s%s" % (new_id, val))
6008
  return results
6009

    
6010

    
6011
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6012
                         p_minor, s_minor):
6013
  """Generate a drbd8 device complete with its children.
6014

6015
  """
6016
  port = lu.cfg.AllocatePort()
6017
  vgname = lu.cfg.GetVGName()
6018
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6019
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6020
                          logical_id=(vgname, names[0]))
6021
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6022
                          logical_id=(vgname, names[1]))
6023
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6024
                          logical_id=(primary, secondary, port,
6025
                                      p_minor, s_minor,
6026
                                      shared_secret),
6027
                          children=[dev_data, dev_meta],
6028
                          iv_name=iv_name)
6029
  return drbd_dev
6030

    
6031

    
6032
def _GenerateDiskTemplate(lu, template_name,
6033
                          instance_name, primary_node,
6034
                          secondary_nodes, disk_info,
6035
                          file_storage_dir, file_driver,
6036
                          base_index):
6037
  """Generate the entire disk layout for a given template type.
6038

6039
  """
6040
  #TODO: compute space requirements
6041

    
6042
  vgname = lu.cfg.GetVGName()
6043
  disk_count = len(disk_info)
6044
  disks = []
6045
  if template_name == constants.DT_DISKLESS:
6046
    pass
6047
  elif template_name == constants.DT_PLAIN:
6048
    if len(secondary_nodes) != 0:
6049
      raise errors.ProgrammerError("Wrong template configuration")
6050

    
6051
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6052
                                      for i in range(disk_count)])
6053
    for idx, disk in enumerate(disk_info):
6054
      disk_index = idx + base_index
6055
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6056
                              logical_id=(vgname, names[idx]),
6057
                              iv_name="disk/%d" % disk_index,
6058
                              mode=disk["mode"])
6059
      disks.append(disk_dev)
6060
  elif template_name == constants.DT_DRBD8:
6061
    if len(secondary_nodes) != 1:
6062
      raise errors.ProgrammerError("Wrong template configuration")
6063
    remote_node = secondary_nodes[0]
6064
    minors = lu.cfg.AllocateDRBDMinor(
6065
      [primary_node, remote_node] * len(disk_info), instance_name)
6066

    
6067
    names = []
6068
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6069
                                               for i in range(disk_count)]):
6070
      names.append(lv_prefix + "_data")
6071
      names.append(lv_prefix + "_meta")
6072
    for idx, disk in enumerate(disk_info):
6073
      disk_index = idx + base_index
6074
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6075
                                      disk["size"], names[idx*2:idx*2+2],
6076
                                      "disk/%d" % disk_index,
6077
                                      minors[idx*2], minors[idx*2+1])
6078
      disk_dev.mode = disk["mode"]
6079
      disks.append(disk_dev)
6080
  elif template_name == constants.DT_FILE:
6081
    if len(secondary_nodes) != 0:
6082
      raise errors.ProgrammerError("Wrong template configuration")
6083

    
6084
    _RequireFileStorage()
6085

    
6086
    for idx, disk in enumerate(disk_info):
6087
      disk_index = idx + base_index
6088
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6089
                              iv_name="disk/%d" % disk_index,
6090
                              logical_id=(file_driver,
6091
                                          "%s/disk%d" % (file_storage_dir,
6092
                                                         disk_index)),
6093
                              mode=disk["mode"])
6094
      disks.append(disk_dev)
6095
  else:
6096
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6097
  return disks
6098

    
6099

    
6100
def _GetInstanceInfoText(instance):
6101
  """Compute that text that should be added to the disk's metadata.
6102

6103
  """
6104
  return "originstname+%s" % instance.name
6105

    
6106

    
6107
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6108
  """Create all disks for an instance.
6109

6110
  This abstracts away some work from AddInstance.
6111

6112
  @type lu: L{LogicalUnit}
6113
  @param lu: the logical unit on whose behalf we execute
6114
  @type instance: L{objects.Instance}
6115
  @param instance: the instance whose disks we should create
6116
  @type to_skip: list
6117
  @param to_skip: list of indices to skip
6118
  @type target_node: string
6119
  @param target_node: if passed, overrides the target node for creation
6120
  @rtype: boolean
6121
  @return: the success of the creation
6122

6123
  """
6124
  info = _GetInstanceInfoText(instance)
6125
  if target_node is None:
6126
    pnode = instance.primary_node
6127
    all_nodes = instance.all_nodes
6128
  else:
6129
    pnode = target_node
6130
    all_nodes = [pnode]
6131

    
6132
  if instance.disk_template == constants.DT_FILE:
6133
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6134
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6135

    
6136
    result.Raise("Failed to create directory '%s' on"
6137
                 " node %s" % (file_storage_dir, pnode))
6138

    
6139
  # Note: this needs to be kept in sync with adding of disks in
6140
  # LUSetInstanceParams
6141
  for idx, device in enumerate(instance.disks):
6142
    if to_skip and idx in to_skip:
6143
      continue
6144
    logging.info("Creating volume %s for instance %s",
6145
                 device.iv_name, instance.name)
6146
    #HARDCODE
6147
    for node in all_nodes:
6148
      f_create = node == pnode
6149
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6150

    
6151

    
6152
def _RemoveDisks(lu, instance, target_node=None):
6153
  """Remove all disks for an instance.
6154

6155
  This abstracts away some work from `AddInstance()` and
6156
  `RemoveInstance()`. Note that in case some of the devices couldn't
6157
  be removed, the removal will continue with the other ones (compare
6158
  with `_CreateDisks()`).
6159

6160
  @type lu: L{LogicalUnit}
6161
  @param lu: the logical unit on whose behalf we execute
6162
  @type instance: L{objects.Instance}
6163
  @param instance: the instance whose disks we should remove
6164
  @type target_node: string
6165
  @param target_node: used to override the node on which to remove the disks
6166
  @rtype: boolean
6167
  @return: the success of the removal
6168

6169
  """
6170
  logging.info("Removing block devices for instance %s", instance.name)
6171

    
6172
  all_result = True
6173
  for device in instance.disks:
6174
    if target_node:
6175
      edata = [(target_node, device)]
6176
    else:
6177
      edata = device.ComputeNodeTree(instance.primary_node)
6178
    for node, disk in edata:
6179
      lu.cfg.SetDiskID(disk, node)
6180
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6181
      if msg:
6182
        lu.LogWarning("Could not remove block device %s on node %s,"
6183
                      " continuing anyway: %s", device.iv_name, node, msg)
6184
        all_result = False
6185

    
6186
  if instance.disk_template == constants.DT_FILE:
6187
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6188
    if target_node:
6189
      tgt = target_node
6190
    else:
6191
      tgt = instance.primary_node
6192
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6193
    if result.fail_msg:
6194
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6195
                    file_storage_dir, instance.primary_node, result.fail_msg)
6196
      all_result = False
6197

    
6198
  return all_result
6199

    
6200

    
6201
def _ComputeDiskSize(disk_template, disks):
6202
  """Compute disk size requirements in the volume group
6203

6204
  """
6205
  # Required free disk space as a function of disk and swap space
6206
  req_size_dict = {
6207
    constants.DT_DISKLESS: None,
6208
    constants.DT_PLAIN: sum(d["size"] for d in disks),
6209
    # 128 MB are added for drbd metadata for each disk
6210
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6211
    constants.DT_FILE: None,
6212
  }
6213

    
6214
  if disk_template not in req_size_dict:
6215
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6216
                                 " is unknown" %  disk_template)
6217

    
6218
  return req_size_dict[disk_template]
6219

    
6220

    
6221
def _CheckHVParams(lu, nodenames, hvname, hvparams):
6222
  """Hypervisor parameter validation.
6223

6224
  This function abstract the hypervisor parameter validation to be
6225
  used in both instance create and instance modify.
6226

6227
  @type lu: L{LogicalUnit}
6228
  @param lu: the logical unit for which we check
6229
  @type nodenames: list
6230
  @param nodenames: the list of nodes on which we should check
6231
  @type hvname: string
6232
  @param hvname: the name of the hypervisor we should use
6233
  @type hvparams: dict
6234
  @param hvparams: the parameters which we need to check
6235
  @raise errors.OpPrereqError: if the parameters are not valid
6236

6237
  """
6238
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6239
                                                  hvname,
6240
                                                  hvparams)
6241
  for node in nodenames:
6242
    info = hvinfo[node]
6243
    if info.offline:
6244
      continue
6245
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6246

    
6247

    
6248
def _CheckOSParams(lu, required, nodenames, osname, osparams):
6249
  """OS parameters validation.
6250

6251
  @type lu: L{LogicalUnit}
6252
  @param lu: the logical unit for which we check
6253
  @type required: boolean
6254
  @param required: whether the validation should fail if the OS is not
6255
      found
6256
  @type nodenames: list
6257
  @param nodenames: the list of nodes on which we should check
6258
  @type osname: string
6259
  @param osname: the name of the hypervisor we should use
6260
  @type osparams: dict
6261
  @param osparams: the parameters which we need to check
6262
  @raise errors.OpPrereqError: if the parameters are not valid
6263

6264
  """
6265
  result = lu.rpc.call_os_validate(required, nodenames, osname,
6266
                                   [constants.OS_VALIDATE_PARAMETERS],
6267
                                   osparams)
6268
  for node, nres in result.items():
6269
    # we don't check for offline cases since this should be run only
6270
    # against the master node and/or an instance's nodes
6271
    nres.Raise("OS Parameters validation failed on node %s" % node)
6272
    if not nres.payload:
6273
      lu.LogInfo("OS %s not found on node %s, validation skipped",
6274
                 osname, node)
6275

    
6276

    
6277
class LUCreateInstance(LogicalUnit):
6278
  """Create an instance.
6279

6280
  """
6281
  HPATH = "instance-add"
6282
  HTYPE = constants.HTYPE_INSTANCE
6283
  _OP_REQP = ["instance_name", "disks",
6284
              "mode", "start",
6285
              "wait_for_sync", "ip_check", "nics",
6286
              "hvparams", "beparams", "osparams"]
6287
  _OP_DEFS = [
6288
    ("name_check", True),
6289
    ("no_install", False),
6290
    ("os_type", None),
6291
    ("force_variant", False),
6292
    ("source_handshake", None),
6293
    ("source_x509_ca", None),
6294
    ("source_instance_name", None),
6295
    ("src_node", None),
6296
    ("src_path", None),
6297
    ("pnode", None),
6298
    ("snode", None),
6299
    ("iallocator", None),
6300
    ("hypervisor", None),
6301
    ("disk_template", None),
6302
    ("identify_defaults", None),
6303
    ]
6304
  REQ_BGL = False
6305

    
6306
  def CheckArguments(self):
6307
    """Check arguments.
6308

6309
    """
6310
    # do not require name_check to ease forward/backward compatibility
6311
    # for tools
6312
    if self.op.no_install and self.op.start:
6313
      self.LogInfo("No-installation mode selected, disabling startup")
6314
      self.op.start = False
6315
    # validate/normalize the instance name
6316
    self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6317
    if self.op.ip_check and not self.op.name_check:
6318
      # TODO: make the ip check more flexible and not depend on the name check
6319
      raise errors.OpPrereqError("Cannot do ip checks without a name check",
6320
                                 errors.ECODE_INVAL)
6321

    
6322
    # check nics' parameter names
6323
    for nic in self.op.nics:
6324
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6325

    
6326
    # check disks. parameter names and consistent adopt/no-adopt strategy
6327
    has_adopt = has_no_adopt = False
6328
    for disk in self.op.disks:
6329
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6330
      if "adopt" in disk:
6331
        has_adopt = True
6332
      else:
6333
        has_no_adopt = True
6334
    if has_adopt and has_no_adopt:
6335
      raise errors.OpPrereqError("Either all disks are adopted or none is",
6336
                                 errors.ECODE_INVAL)
6337
    if has_adopt:
6338
      if self.op.disk_template != constants.DT_PLAIN:
6339
        raise errors.OpPrereqError("Disk adoption is only supported for the"
6340
                                   " 'plain' disk template",
6341
                                   errors.ECODE_INVAL)
6342
      if self.op.iallocator is not None:
6343
        raise errors.OpPrereqError("Disk adoption not allowed with an"
6344
                                   " iallocator script", errors.ECODE_INVAL)
6345
      if self.op.mode == constants.INSTANCE_IMPORT:
6346
        raise errors.OpPrereqError("Disk adoption not allowed for"
6347
                                   " instance import", errors.ECODE_INVAL)
6348

    
6349
    self.adopt_disks = has_adopt
6350

    
6351
    # verify creation mode
6352
    if self.op.mode not in constants.INSTANCE_CREATE_MODES:
6353
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6354
                                 self.op.mode, errors.ECODE_INVAL)
6355

    
6356
    # instance name verification
6357
    if self.op.name_check:
6358
      self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6359
      self.op.instance_name = self.hostname1.name
6360
      # used in CheckPrereq for ip ping check
6361
      self.check_ip = self.hostname1.ip
6362
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6363
      raise errors.OpPrereqError("Remote imports require names to be checked" %
6364
                                 errors.ECODE_INVAL)
6365
    else:
6366
      self.check_ip = None
6367

    
6368
    # file storage checks
6369
    if (self.op.file_driver and
6370
        not self.op.file_driver in constants.FILE_DRIVER):
6371
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6372
                                 self.op.file_driver, errors.ECODE_INVAL)
6373

    
6374
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6375
      raise errors.OpPrereqError("File storage directory path not absolute",
6376
                                 errors.ECODE_INVAL)
6377

    
6378
    ### Node/iallocator related checks
6379
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
6380
      raise errors.OpPrereqError("One and only one of iallocator and primary"
6381
                                 " node must be given",
6382
                                 errors.ECODE_INVAL)
6383

    
6384
    self._cds = _GetClusterDomainSecret()
6385

    
6386
    if self.op.mode == constants.INSTANCE_IMPORT:
6387
      # On import force_variant must be True, because if we forced it at
6388
      # initial install, our only chance when importing it back is that it
6389
      # works again!
6390
      self.op.force_variant = True
6391

    
6392
      if self.op.no_install:
6393
        self.LogInfo("No-installation mode has no effect during import")
6394

    
6395
    elif self.op.mode == constants.INSTANCE_CREATE:
6396
      if self.op.os_type is None:
6397
        raise errors.OpPrereqError("No guest OS specified",
6398
                                   errors.ECODE_INVAL)
6399
      if self.op.disk_template is None:
6400
        raise errors.OpPrereqError("No disk template specified",
6401
                                   errors.ECODE_INVAL)
6402

    
6403
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6404
      # Check handshake to ensure both clusters have the same domain secret
6405
      src_handshake = self.op.source_handshake
6406
      if not src_handshake:
6407
        raise errors.OpPrereqError("Missing source handshake",
6408
                                   errors.ECODE_INVAL)
6409

    
6410
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6411
                                                           src_handshake)
6412
      if errmsg:
6413
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6414
                                   errors.ECODE_INVAL)
6415

    
6416
      # Load and check source CA
6417
      self.source_x509_ca_pem = self.op.source_x509_ca
6418
      if not self.source_x509_ca_pem:
6419
        raise errors.OpPrereqError("Missing source X509 CA",
6420
                                   errors.ECODE_INVAL)
6421

    
6422
      try:
6423
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6424
                                                    self._cds)
6425
      except OpenSSL.crypto.Error, err:
6426
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6427
                                   (err, ), errors.ECODE_INVAL)
6428

    
6429
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6430
      if errcode is not None:
6431
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6432
                                   errors.ECODE_INVAL)
6433

    
6434
      self.source_x509_ca = cert
6435

    
6436
      src_instance_name = self.op.source_instance_name
6437
      if not src_instance_name:
6438
        raise errors.OpPrereqError("Missing source instance name",
6439
                                   errors.ECODE_INVAL)
6440

    
6441
      self.source_instance_name = \
6442
        utils.GetHostInfo(utils.HostInfo.NormalizeName(src_instance_name)).name
6443

    
6444
    else:
6445
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
6446
                                 self.op.mode, errors.ECODE_INVAL)
6447

    
6448
  def ExpandNames(self):
6449
    """ExpandNames for CreateInstance.
6450

6451
    Figure out the right locks for instance creation.
6452

6453
    """
6454
    self.needed_locks = {}
6455

    
6456
    instance_name = self.op.instance_name
6457
    # this is just a preventive check, but someone might still add this
6458
    # instance in the meantime, and creation will fail at lock-add time
6459
    if instance_name in self.cfg.GetInstanceList():
6460
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6461
                                 instance_name, errors.ECODE_EXISTS)
6462

    
6463
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6464

    
6465
    if self.op.iallocator:
6466
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6467
    else:
6468
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6469
      nodelist = [self.op.pnode]
6470
      if self.op.snode is not None:
6471
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6472
        nodelist.append(self.op.snode)
6473
      self.needed_locks[locking.LEVEL_NODE] = nodelist
6474

    
6475
    # in case of import lock the source node too
6476
    if self.op.mode == constants.INSTANCE_IMPORT:
6477
      src_node = self.op.src_node
6478
      src_path = self.op.src_path
6479

    
6480
      if src_path is None:
6481
        self.op.src_path = src_path = self.op.instance_name
6482

    
6483
      if src_node is None:
6484
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6485
        self.op.src_node = None
6486
        if os.path.isabs(src_path):
6487
          raise errors.OpPrereqError("Importing an instance from an absolute"
6488
                                     " path requires a source node option.",
6489
                                     errors.ECODE_INVAL)
6490
      else:
6491
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6492
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6493
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
6494
        if not os.path.isabs(src_path):
6495
          self.op.src_path = src_path = \
6496
            utils.PathJoin(constants.EXPORT_DIR, src_path)
6497

    
6498
  def _RunAllocator(self):
6499
    """Run the allocator based on input opcode.
6500

6501
    """
6502
    nics = [n.ToDict() for n in self.nics]
6503
    ial = IAllocator(self.cfg, self.rpc,
6504
                     mode=constants.IALLOCATOR_MODE_ALLOC,
6505
                     name=self.op.instance_name,
6506
                     disk_template=self.op.disk_template,
6507
                     tags=[],
6508
                     os=self.op.os_type,
6509
                     vcpus=self.be_full[constants.BE_VCPUS],
6510
                     mem_size=self.be_full[constants.BE_MEMORY],
6511
                     disks=self.disks,
6512
                     nics=nics,
6513
                     hypervisor=self.op.hypervisor,
6514
                     )
6515

    
6516
    ial.Run(self.op.iallocator)
6517

    
6518
    if not ial.success:
6519
      raise errors.OpPrereqError("Can't compute nodes using"
6520
                                 " iallocator '%s': %s" %
6521
                                 (self.op.iallocator, ial.info),
6522
                                 errors.ECODE_NORES)
6523
    if len(ial.result) != ial.required_nodes:
6524
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6525
                                 " of nodes (%s), required %s" %
6526
                                 (self.op.iallocator, len(ial.result),
6527
                                  ial.required_nodes), errors.ECODE_FAULT)
6528
    self.op.pnode = ial.result[0]
6529
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6530
                 self.op.instance_name, self.op.iallocator,
6531
                 utils.CommaJoin(ial.result))
6532
    if ial.required_nodes == 2:
6533
      self.op.snode = ial.result[1]
6534

    
6535
  def BuildHooksEnv(self):
6536
    """Build hooks env.
6537

6538
    This runs on master, primary and secondary nodes of the instance.
6539

6540
    """
6541
    env = {
6542
      "ADD_MODE": self.op.mode,
6543
      }
6544
    if self.op.mode == constants.INSTANCE_IMPORT:
6545
      env["SRC_NODE"] = self.op.src_node
6546
      env["SRC_PATH"] = self.op.src_path
6547
      env["SRC_IMAGES"] = self.src_images
6548

    
6549
    env.update(_BuildInstanceHookEnv(
6550
      name=self.op.instance_name,
6551
      primary_node=self.op.pnode,
6552
      secondary_nodes=self.secondaries,
6553
      status=self.op.start,
6554
      os_type=self.op.os_type,
6555
      memory=self.be_full[constants.BE_MEMORY],
6556
      vcpus=self.be_full[constants.BE_VCPUS],
6557
      nics=_NICListToTuple(self, self.nics),
6558
      disk_template=self.op.disk_template,
6559
      disks=[(d["size"], d["mode"]) for d in self.disks],
6560
      bep=self.be_full,
6561
      hvp=self.hv_full,
6562
      hypervisor_name=self.op.hypervisor,
6563
    ))
6564

    
6565
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6566
          self.secondaries)
6567
    return env, nl, nl
6568

    
6569
  def _ReadExportInfo(self):
6570
    """Reads the export information from disk.
6571

6572
    It will override the opcode source node and path with the actual
6573
    information, if these two were not specified before.
6574

6575
    @return: the export information
6576

6577
    """
6578
    assert self.op.mode == constants.INSTANCE_IMPORT
6579

    
6580
    src_node = self.op.src_node
6581
    src_path = self.op.src_path
6582

    
6583
    if src_node is None:
6584
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6585
      exp_list = self.rpc.call_export_list(locked_nodes)
6586
      found = False
6587
      for node in exp_list:
6588
        if exp_list[node].fail_msg:
6589
          continue
6590
        if src_path in exp_list[node].payload:
6591
          found = True
6592
          self.op.src_node = src_node = node
6593
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6594
                                                       src_path)
6595
          break
6596
      if not found:
6597
        raise errors.OpPrereqError("No export found for relative path %s" %
6598
                                    src_path, errors.ECODE_INVAL)
6599

    
6600
    _CheckNodeOnline(self, src_node)
6601
    result = self.rpc.call_export_info(src_node, src_path)
6602
    result.Raise("No export or invalid export found in dir %s" % src_path)
6603

    
6604
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6605
    if not export_info.has_section(constants.INISECT_EXP):
6606
      raise errors.ProgrammerError("Corrupted export config",
6607
                                   errors.ECODE_ENVIRON)
6608

    
6609
    ei_version = export_info.get(constants.INISECT_EXP, "version")
6610
    if (int(ei_version) != constants.EXPORT_VERSION):
6611
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6612
                                 (ei_version, constants.EXPORT_VERSION),
6613
                                 errors.ECODE_ENVIRON)
6614
    return export_info
6615

    
6616
  def _ReadExportParams(self, einfo):
6617
    """Use export parameters as defaults.
6618

6619
    In case the opcode doesn't specify (as in override) some instance
6620
    parameters, then try to use them from the export information, if
6621
    that declares them.
6622

6623
    """
6624
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6625

    
6626
    if self.op.disk_template is None:
6627
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
6628
        self.op.disk_template = einfo.get(constants.INISECT_INS,
6629
                                          "disk_template")
6630
      else:
6631
        raise errors.OpPrereqError("No disk template specified and the export"
6632
                                   " is missing the disk_template information",
6633
                                   errors.ECODE_INVAL)
6634

    
6635
    if not self.op.disks:
6636
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
6637
        disks = []
6638
        # TODO: import the disk iv_name too
6639
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6640
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6641
          disks.append({"size": disk_sz})
6642
        self.op.disks = disks
6643
      else:
6644
        raise errors.OpPrereqError("No disk info specified and the export"
6645
                                   " is missing the disk information",
6646
                                   errors.ECODE_INVAL)
6647

    
6648
    if (not self.op.nics and
6649
        einfo.has_option(constants.INISECT_INS, "nic_count")):
6650
      nics = []
6651
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6652
        ndict = {}
6653
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6654
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6655
          ndict[name] = v
6656
        nics.append(ndict)
6657
      self.op.nics = nics
6658

    
6659
    if (self.op.hypervisor is None and
6660
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
6661
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6662
    if einfo.has_section(constants.INISECT_HYP):
6663
      # use the export parameters but do not override the ones
6664
      # specified by the user
6665
      for name, value in einfo.items(constants.INISECT_HYP):
6666
        if name not in self.op.hvparams:
6667
          self.op.hvparams[name] = value
6668

    
6669
    if einfo.has_section(constants.INISECT_BEP):
6670
      # use the parameters, without overriding
6671
      for name, value in einfo.items(constants.INISECT_BEP):
6672
        if name not in self.op.beparams:
6673
          self.op.beparams[name] = value
6674
    else:
6675
      # try to read the parameters old style, from the main section
6676
      for name in constants.BES_PARAMETERS:
6677
        if (name not in self.op.beparams and
6678
            einfo.has_option(constants.INISECT_INS, name)):
6679
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6680

    
6681
    if einfo.has_section(constants.INISECT_OSP):
6682
      # use the parameters, without overriding
6683
      for name, value in einfo.items(constants.INISECT_OSP):
6684
        if name not in self.op.osparams:
6685
          self.op.osparams[name] = value
6686

    
6687
  def _RevertToDefaults(self, cluster):
6688
    """Revert the instance parameters to the default values.
6689

6690
    """
6691
    # hvparams
6692
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6693
    for name in self.op.hvparams.keys():
6694
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6695
        del self.op.hvparams[name]
6696
    # beparams
6697
    be_defs = cluster.SimpleFillBE({})
6698
    for name in self.op.beparams.keys():
6699
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
6700
        del self.op.beparams[name]
6701
    # nic params
6702
    nic_defs = cluster.SimpleFillNIC({})
6703
    for nic in self.op.nics:
6704
      for name in constants.NICS_PARAMETERS:
6705
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6706
          del nic[name]
6707
    # osparams
6708
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6709
    for name in self.op.osparams.keys():
6710
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
6711
        del self.op.osparams[name]
6712

    
6713
  def CheckPrereq(self):
6714
    """Check prerequisites.
6715

6716
    """
6717
    if self.op.mode == constants.INSTANCE_IMPORT:
6718
      export_info = self._ReadExportInfo()
6719
      self._ReadExportParams(export_info)
6720

    
6721
    _CheckDiskTemplate(self.op.disk_template)
6722

    
6723
    if (not self.cfg.GetVGName() and
6724
        self.op.disk_template not in constants.DTS_NOT_LVM):
6725
      raise errors.OpPrereqError("Cluster does not support lvm-based"
6726
                                 " instances", errors.ECODE_STATE)
6727

    
6728
    if self.op.hypervisor is None:
6729
      self.op.hypervisor = self.cfg.GetHypervisorType()
6730

    
6731
    cluster = self.cfg.GetClusterInfo()
6732
    enabled_hvs = cluster.enabled_hypervisors
6733
    if self.op.hypervisor not in enabled_hvs:
6734
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6735
                                 " cluster (%s)" % (self.op.hypervisor,
6736
                                  ",".join(enabled_hvs)),
6737
                                 errors.ECODE_STATE)
6738

    
6739
    # check hypervisor parameter syntax (locally)
6740
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6741
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6742
                                      self.op.hvparams)
6743
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6744
    hv_type.CheckParameterSyntax(filled_hvp)
6745
    self.hv_full = filled_hvp
6746
    # check that we don't specify global parameters on an instance
6747
    _CheckGlobalHvParams(self.op.hvparams)
6748

    
6749
    # fill and remember the beparams dict
6750
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6751
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
6752

    
6753
    # build os parameters
6754
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6755

    
6756
    # now that hvp/bep are in final format, let's reset to defaults,
6757
    # if told to do so
6758
    if self.op.identify_defaults:
6759
      self._RevertToDefaults(cluster)
6760

    
6761
    # NIC buildup
6762
    self.nics = []
6763
    for idx, nic in enumerate(self.op.nics):
6764
      nic_mode_req = nic.get("mode", None)
6765
      nic_mode = nic_mode_req
6766
      if nic_mode is None:
6767
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6768

    
6769
      # in routed mode, for the first nic, the default ip is 'auto'
6770
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6771
        default_ip_mode = constants.VALUE_AUTO
6772
      else:
6773
        default_ip_mode = constants.VALUE_NONE
6774

    
6775
      # ip validity checks
6776
      ip = nic.get("ip", default_ip_mode)
6777
      if ip is None or ip.lower() == constants.VALUE_NONE:
6778
        nic_ip = None
6779
      elif ip.lower() == constants.VALUE_AUTO:
6780
        if not self.op.name_check:
6781
          raise errors.OpPrereqError("IP address set to auto but name checks"
6782
                                     " have been skipped. Aborting.",
6783
                                     errors.ECODE_INVAL)
6784
        nic_ip = self.hostname1.ip
6785
      else:
6786
        if not utils.IsValidIP(ip):
6787
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6788
                                     " like a valid IP" % ip,
6789
                                     errors.ECODE_INVAL)
6790
        nic_ip = ip
6791

    
6792
      # TODO: check the ip address for uniqueness
6793
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6794
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
6795
                                   errors.ECODE_INVAL)
6796

    
6797
      # MAC address verification
6798
      mac = nic.get("mac", constants.VALUE_AUTO)
6799
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6800
        mac = utils.NormalizeAndValidateMac(mac)
6801

    
6802
        try:
6803
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
6804
        except errors.ReservationError:
6805
          raise errors.OpPrereqError("MAC address %s already in use"
6806
                                     " in cluster" % mac,
6807
                                     errors.ECODE_NOTUNIQUE)
6808

    
6809
      # bridge verification
6810
      bridge = nic.get("bridge", None)
6811
      link = nic.get("link", None)
6812
      if bridge and link:
6813
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6814
                                   " at the same time", errors.ECODE_INVAL)
6815
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6816
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6817
                                   errors.ECODE_INVAL)
6818
      elif bridge:
6819
        link = bridge
6820

    
6821
      nicparams = {}
6822
      if nic_mode_req:
6823
        nicparams[constants.NIC_MODE] = nic_mode_req
6824
      if link:
6825
        nicparams[constants.NIC_LINK] = link
6826

    
6827
      check_params = cluster.SimpleFillNIC(nicparams)
6828
      objects.NIC.CheckParameterSyntax(check_params)
6829
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6830

    
6831
    # disk checks/pre-build
6832
    self.disks = []
6833
    for disk in self.op.disks:
6834
      mode = disk.get("mode", constants.DISK_RDWR)
6835
      if mode not in constants.DISK_ACCESS_SET:
6836
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6837
                                   mode, errors.ECODE_INVAL)
6838
      size = disk.get("size", None)
6839
      if size is None:
6840
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6841
      try:
6842
        size = int(size)
6843
      except (TypeError, ValueError):
6844
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6845
                                   errors.ECODE_INVAL)
6846
      new_disk = {"size": size, "mode": mode}
6847
      if "adopt" in disk:
6848
        new_disk["adopt"] = disk["adopt"]
6849
      self.disks.append(new_disk)
6850

    
6851
    if self.op.mode == constants.INSTANCE_IMPORT:
6852

    
6853
      # Check that the new instance doesn't have less disks than the export
6854
      instance_disks = len(self.disks)
6855
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6856
      if instance_disks < export_disks:
6857
        raise errors.OpPrereqError("Not enough disks to import."
6858
                                   " (instance: %d, export: %d)" %
6859
                                   (instance_disks, export_disks),
6860
                                   errors.ECODE_INVAL)
6861

    
6862
      disk_images = []
6863
      for idx in range(export_disks):
6864
        option = 'disk%d_dump' % idx
6865
        if export_info.has_option(constants.INISECT_INS, option):
6866
          # FIXME: are the old os-es, disk sizes, etc. useful?
6867
          export_name = export_info.get(constants.INISECT_INS, option)
6868
          image = utils.PathJoin(self.op.src_path, export_name)
6869
          disk_images.append(image)
6870
        else:
6871
          disk_images.append(False)
6872

    
6873
      self.src_images = disk_images
6874

    
6875
      old_name = export_info.get(constants.INISECT_INS, 'name')
6876
      try:
6877
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6878
      except (TypeError, ValueError), err:
6879
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
6880
                                   " an integer: %s" % str(err),
6881
                                   errors.ECODE_STATE)
6882
      if self.op.instance_name == old_name:
6883
        for idx, nic in enumerate(self.nics):
6884
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6885
            nic_mac_ini = 'nic%d_mac' % idx
6886
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6887

    
6888
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6889

    
6890
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
6891
    if self.op.ip_check:
6892
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6893
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6894
                                   (self.check_ip, self.op.instance_name),
6895
                                   errors.ECODE_NOTUNIQUE)
6896

    
6897
    #### mac address generation
6898
    # By generating here the mac address both the allocator and the hooks get
6899
    # the real final mac address rather than the 'auto' or 'generate' value.
6900
    # There is a race condition between the generation and the instance object
6901
    # creation, which means that we know the mac is valid now, but we're not
6902
    # sure it will be when we actually add the instance. If things go bad
6903
    # adding the instance will abort because of a duplicate mac, and the
6904
    # creation job will fail.
6905
    for nic in self.nics:
6906
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6907
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6908

    
6909
    #### allocator run
6910

    
6911
    if self.op.iallocator is not None:
6912
      self._RunAllocator()
6913

    
6914
    #### node related checks
6915

    
6916
    # check primary node
6917
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6918
    assert self.pnode is not None, \
6919
      "Cannot retrieve locked node %s" % self.op.pnode
6920
    if pnode.offline:
6921
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6922
                                 pnode.name, errors.ECODE_STATE)
6923
    if pnode.drained:
6924
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6925
                                 pnode.name, errors.ECODE_STATE)
6926

    
6927
    self.secondaries = []
6928

    
6929
    # mirror node verification
6930
    if self.op.disk_template in constants.DTS_NET_MIRROR:
6931
      if self.op.snode is None:
6932
        raise errors.OpPrereqError("The networked disk templates need"
6933
                                   " a mirror node", errors.ECODE_INVAL)
6934
      if self.op.snode == pnode.name:
6935
        raise errors.OpPrereqError("The secondary node cannot be the"
6936
                                   " primary node.", errors.ECODE_INVAL)
6937
      _CheckNodeOnline(self, self.op.snode)
6938
      _CheckNodeNotDrained(self, self.op.snode)
6939
      self.secondaries.append(self.op.snode)
6940

    
6941
    nodenames = [pnode.name] + self.secondaries
6942

    
6943
    req_size = _ComputeDiskSize(self.op.disk_template,
6944
                                self.disks)
6945

    
6946
    # Check lv size requirements, if not adopting
6947
    if req_size is not None and not self.adopt_disks:
6948
      _CheckNodesFreeDisk(self, nodenames, req_size)
6949

    
6950
    if self.adopt_disks: # instead, we must check the adoption data
6951
      all_lvs = set([i["adopt"] for i in self.disks])
6952
      if len(all_lvs) != len(self.disks):
6953
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
6954
                                   errors.ECODE_INVAL)
6955
      for lv_name in all_lvs:
6956
        try:
6957
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6958
        except errors.ReservationError:
6959
          raise errors.OpPrereqError("LV named %s used by another instance" %
6960
                                     lv_name, errors.ECODE_NOTUNIQUE)
6961

    
6962
      node_lvs = self.rpc.call_lv_list([pnode.name],
6963
                                       self.cfg.GetVGName())[pnode.name]
6964
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6965
      node_lvs = node_lvs.payload
6966
      delta = all_lvs.difference(node_lvs.keys())
6967
      if delta:
6968
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
6969
                                   utils.CommaJoin(delta),
6970
                                   errors.ECODE_INVAL)
6971
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6972
      if online_lvs:
6973
        raise errors.OpPrereqError("Online logical volumes found, cannot"
6974
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
6975
                                   errors.ECODE_STATE)
6976
      # update the size of disk based on what is found
6977
      for dsk in self.disks:
6978
        dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6979

    
6980
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6981

    
6982
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6983
    # check OS parameters (remotely)
6984
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
6985

    
6986
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6987

    
6988
    # memory check on primary node
6989
    if self.op.start:
6990
      _CheckNodeFreeMemory(self, self.pnode.name,
6991
                           "creating instance %s" % self.op.instance_name,
6992
                           self.be_full[constants.BE_MEMORY],
6993
                           self.op.hypervisor)
6994

    
6995
    self.dry_run_result = list(nodenames)
6996

    
6997
  def Exec(self, feedback_fn):
6998
    """Create and add the instance to the cluster.
6999

7000
    """
7001
    instance = self.op.instance_name
7002
    pnode_name = self.pnode.name
7003

    
7004
    ht_kind = self.op.hypervisor
7005
    if ht_kind in constants.HTS_REQ_PORT:
7006
      network_port = self.cfg.AllocatePort()
7007
    else:
7008
      network_port = None
7009

    
7010
    if constants.ENABLE_FILE_STORAGE:
7011
      # this is needed because os.path.join does not accept None arguments
7012
      if self.op.file_storage_dir is None:
7013
        string_file_storage_dir = ""
7014
      else:
7015
        string_file_storage_dir = self.op.file_storage_dir
7016

    
7017
      # build the full file storage dir path
7018
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7019
                                        string_file_storage_dir, instance)
7020
    else:
7021
      file_storage_dir = ""
7022

    
7023
    disks = _GenerateDiskTemplate(self,
7024
                                  self.op.disk_template,
7025
                                  instance, pnode_name,
7026
                                  self.secondaries,
7027
                                  self.disks,
7028
                                  file_storage_dir,
7029
                                  self.op.file_driver,
7030
                                  0)
7031

    
7032
    iobj = objects.Instance(name=instance, os=self.op.os_type,
7033
                            primary_node=pnode_name,
7034
                            nics=self.nics, disks=disks,
7035
                            disk_template=self.op.disk_template,
7036
                            admin_up=False,
7037
                            network_port=network_port,
7038
                            beparams=self.op.beparams,
7039
                            hvparams=self.op.hvparams,
7040
                            hypervisor=self.op.hypervisor,
7041
                            osparams=self.op.osparams,
7042
                            )
7043

    
7044
    if self.adopt_disks:
7045
      # rename LVs to the newly-generated names; we need to construct
7046
      # 'fake' LV disks with the old data, plus the new unique_id
7047
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7048
      rename_to = []
7049
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7050
        rename_to.append(t_dsk.logical_id)
7051
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7052
        self.cfg.SetDiskID(t_dsk, pnode_name)
7053
      result = self.rpc.call_blockdev_rename(pnode_name,
7054
                                             zip(tmp_disks, rename_to))
7055
      result.Raise("Failed to rename adoped LVs")
7056
    else:
7057
      feedback_fn("* creating instance disks...")
7058
      try:
7059
        _CreateDisks(self, iobj)
7060
      except errors.OpExecError:
7061
        self.LogWarning("Device creation failed, reverting...")
7062
        try:
7063
          _RemoveDisks(self, iobj)
7064
        finally:
7065
          self.cfg.ReleaseDRBDMinors(instance)
7066
          raise
7067

    
7068
    feedback_fn("adding instance %s to cluster config" % instance)
7069

    
7070
    self.cfg.AddInstance(iobj, self.proc.GetECId())
7071

    
7072
    # Declare that we don't want to remove the instance lock anymore, as we've
7073
    # added the instance to the config
7074
    del self.remove_locks[locking.LEVEL_INSTANCE]
7075
    # Unlock all the nodes
7076
    if self.op.mode == constants.INSTANCE_IMPORT:
7077
      nodes_keep = [self.op.src_node]
7078
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7079
                       if node != self.op.src_node]
7080
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7081
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7082
    else:
7083
      self.context.glm.release(locking.LEVEL_NODE)
7084
      del self.acquired_locks[locking.LEVEL_NODE]
7085

    
7086
    if self.op.wait_for_sync:
7087
      disk_abort = not _WaitForSync(self, iobj)
7088
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
7089
      # make sure the disks are not degraded (still sync-ing is ok)
7090
      time.sleep(15)
7091
      feedback_fn("* checking mirrors status")
7092
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7093
    else:
7094
      disk_abort = False
7095

    
7096
    if disk_abort:
7097
      _RemoveDisks(self, iobj)
7098
      self.cfg.RemoveInstance(iobj.name)
7099
      # Make sure the instance lock gets removed
7100
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7101
      raise errors.OpExecError("There are some degraded disks for"
7102
                               " this instance")
7103

    
7104
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7105
      if self.op.mode == constants.INSTANCE_CREATE:
7106
        if not self.op.no_install:
7107
          feedback_fn("* running the instance OS create scripts...")
7108
          # FIXME: pass debug option from opcode to backend
7109
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7110
                                                 self.op.debug_level)
7111
          result.Raise("Could not add os for instance %s"
7112
                       " on node %s" % (instance, pnode_name))
7113

    
7114
      elif self.op.mode == constants.INSTANCE_IMPORT:
7115
        feedback_fn("* running the instance OS import scripts...")
7116

    
7117
        transfers = []
7118

    
7119
        for idx, image in enumerate(self.src_images):
7120
          if not image:
7121
            continue
7122

    
7123
          # FIXME: pass debug option from opcode to backend
7124
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7125
                                             constants.IEIO_FILE, (image, ),
7126
                                             constants.IEIO_SCRIPT,
7127
                                             (iobj.disks[idx], idx),
7128
                                             None)
7129
          transfers.append(dt)
7130

    
7131
        import_result = \
7132
          masterd.instance.TransferInstanceData(self, feedback_fn,
7133
                                                self.op.src_node, pnode_name,
7134
                                                self.pnode.secondary_ip,
7135
                                                iobj, transfers)
7136
        if not compat.all(import_result):
7137
          self.LogWarning("Some disks for instance %s on node %s were not"
7138
                          " imported successfully" % (instance, pnode_name))
7139

    
7140
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7141
        feedback_fn("* preparing remote import...")
7142
        connect_timeout = constants.RIE_CONNECT_TIMEOUT
7143
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7144

    
7145
        disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7146
                                                     self.source_x509_ca,
7147
                                                     self._cds, timeouts)
7148
        if not compat.all(disk_results):
7149
          # TODO: Should the instance still be started, even if some disks
7150
          # failed to import (valid for local imports, too)?
7151
          self.LogWarning("Some disks for instance %s on node %s were not"
7152
                          " imported successfully" % (instance, pnode_name))
7153

    
7154
        # Run rename script on newly imported instance
7155
        assert iobj.name == instance
7156
        feedback_fn("Running rename script for %s" % instance)
7157
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7158
                                                   self.source_instance_name,
7159
                                                   self.op.debug_level)
7160
        if result.fail_msg:
7161
          self.LogWarning("Failed to run rename script for %s on node"
7162
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
7163

    
7164
      else:
7165
        # also checked in the prereq part
7166
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7167
                                     % self.op.mode)
7168

    
7169
    if self.op.start:
7170
      iobj.admin_up = True
7171
      self.cfg.Update(iobj, feedback_fn)
7172
      logging.info("Starting instance %s on node %s", instance, pnode_name)
7173
      feedback_fn("* starting instance...")
7174
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7175
      result.Raise("Could not start instance")
7176

    
7177
    return list(iobj.all_nodes)
7178

    
7179

    
7180
class LUConnectConsole(NoHooksLU):
7181
  """Connect to an instance's console.
7182

7183
  This is somewhat special in that it returns the command line that
7184
  you need to run on the master node in order to connect to the
7185
  console.
7186

7187
  """
7188
  _OP_REQP = ["instance_name"]
7189
  REQ_BGL = False
7190

    
7191
  def ExpandNames(self):
7192
    self._ExpandAndLockInstance()
7193

    
7194
  def CheckPrereq(self):
7195
    """Check prerequisites.
7196

7197
    This checks that the instance is in the cluster.
7198

7199
    """
7200
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7201
    assert self.instance is not None, \
7202
      "Cannot retrieve locked instance %s" % self.op.instance_name
7203
    _CheckNodeOnline(self, self.instance.primary_node)
7204

    
7205
  def Exec(self, feedback_fn):
7206
    """Connect to the console of an instance
7207

7208
    """
7209
    instance = self.instance
7210
    node = instance.primary_node
7211

    
7212
    node_insts = self.rpc.call_instance_list([node],
7213
                                             [instance.hypervisor])[node]
7214
    node_insts.Raise("Can't get node information from %s" % node)
7215

    
7216
    if instance.name not in node_insts.payload:
7217
      raise errors.OpExecError("Instance %s is not running." % instance.name)
7218

    
7219
    logging.debug("Connecting to console of %s on %s", instance.name, node)
7220

    
7221
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
7222
    cluster = self.cfg.GetClusterInfo()
7223
    # beparams and hvparams are passed separately, to avoid editing the
7224
    # instance and then saving the defaults in the instance itself.
7225
    hvparams = cluster.FillHV(instance)
7226
    beparams = cluster.FillBE(instance)
7227
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7228

    
7229
    # build ssh cmdline
7230
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7231

    
7232

    
7233
class LUReplaceDisks(LogicalUnit):
7234
  """Replace the disks of an instance.
7235

7236
  """
7237
  HPATH = "mirrors-replace"
7238
  HTYPE = constants.HTYPE_INSTANCE
7239
  _OP_REQP = ["instance_name", "mode", "disks"]
7240
  _OP_DEFS = [
7241
    ("remote_node", None),
7242
    ("iallocator", None),
7243
    ("early_release", None),
7244
    ]
7245
  REQ_BGL = False
7246

    
7247
  def CheckArguments(self):
7248
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7249
                                  self.op.iallocator)
7250

    
7251
  def ExpandNames(self):
7252
    self._ExpandAndLockInstance()
7253

    
7254
    if self.op.iallocator is not None:
7255
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7256

    
7257
    elif self.op.remote_node is not None:
7258
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7259
      self.op.remote_node = remote_node
7260

    
7261
      # Warning: do not remove the locking of the new secondary here
7262
      # unless DRBD8.AddChildren is changed to work in parallel;
7263
      # currently it doesn't since parallel invocations of
7264
      # FindUnusedMinor will conflict
7265
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7266
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7267

    
7268
    else:
7269
      self.needed_locks[locking.LEVEL_NODE] = []
7270
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7271

    
7272
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7273
                                   self.op.iallocator, self.op.remote_node,
7274
                                   self.op.disks, False, self.op.early_release)
7275

    
7276
    self.tasklets = [self.replacer]
7277

    
7278
  def DeclareLocks(self, level):
7279
    # If we're not already locking all nodes in the set we have to declare the
7280
    # instance's primary/secondary nodes.
7281
    if (level == locking.LEVEL_NODE and
7282
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7283
      self._LockInstancesNodes()
7284

    
7285
  def BuildHooksEnv(self):
7286
    """Build hooks env.
7287

7288
    This runs on the master, the primary and all the secondaries.
7289

7290
    """
7291
    instance = self.replacer.instance
7292
    env = {
7293
      "MODE": self.op.mode,
7294
      "NEW_SECONDARY": self.op.remote_node,
7295
      "OLD_SECONDARY": instance.secondary_nodes[0],
7296
      }
7297
    env.update(_BuildInstanceHookEnvByObject(self, instance))
7298
    nl = [
7299
      self.cfg.GetMasterNode(),
7300
      instance.primary_node,
7301
      ]
7302
    if self.op.remote_node is not None:
7303
      nl.append(self.op.remote_node)
7304
    return env, nl, nl
7305

    
7306

    
7307
class LUEvacuateNode(LogicalUnit):
7308
  """Relocate the secondary instances from a node.
7309

7310
  """
7311
  HPATH = "node-evacuate"
7312
  HTYPE = constants.HTYPE_NODE
7313
  _OP_REQP = ["node_name"]
7314
  _OP_DEFS = [
7315
    ("remote_node", None),
7316
    ("iallocator", None),
7317
    ("early_release", False),
7318
    ]
7319
  REQ_BGL = False
7320

    
7321
  def CheckArguments(self):
7322
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
7323
                                  self.op.remote_node,
7324
                                  self.op.iallocator)
7325

    
7326
  def ExpandNames(self):
7327
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7328

    
7329
    self.needed_locks = {}
7330

    
7331
    # Declare node locks
7332
    if self.op.iallocator is not None:
7333
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7334

    
7335
    elif self.op.remote_node is not None:
7336
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7337

    
7338
      # Warning: do not remove the locking of the new secondary here
7339
      # unless DRBD8.AddChildren is changed to work in parallel;
7340
      # currently it doesn't since parallel invocations of
7341
      # FindUnusedMinor will conflict
7342
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
7343
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7344

    
7345
    else:
7346
      raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
7347

    
7348
    # Create tasklets for replacing disks for all secondary instances on this
7349
    # node
7350
    names = []
7351
    tasklets = []
7352

    
7353
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
7354
      logging.debug("Replacing disks for instance %s", inst.name)
7355
      names.append(inst.name)
7356

    
7357
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
7358
                                self.op.iallocator, self.op.remote_node, [],
7359
                                True, self.op.early_release)
7360
      tasklets.append(replacer)
7361

    
7362
    self.tasklets = tasklets
7363
    self.instance_names = names
7364

    
7365
    # Declare instance locks
7366
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
7367

    
7368
  def DeclareLocks(self, level):
7369
    # If we're not already locking all nodes in the set we have to declare the
7370
    # instance's primary/secondary nodes.
7371
    if (level == locking.LEVEL_NODE and
7372
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7373
      self._LockInstancesNodes()
7374

    
7375
  def BuildHooksEnv(self):
7376
    """Build hooks env.
7377

7378
    This runs on the master, the primary and all the secondaries.
7379

7380
    """
7381
    env = {
7382
      "NODE_NAME": self.op.node_name,
7383
      }
7384

    
7385
    nl = [self.cfg.GetMasterNode()]
7386

    
7387
    if self.op.remote_node is not None:
7388
      env["NEW_SECONDARY"] = self.op.remote_node
7389
      nl.append(self.op.remote_node)
7390

    
7391
    return (env, nl, nl)
7392

    
7393

    
7394
class TLReplaceDisks(Tasklet):
7395
  """Replaces disks for an instance.
7396

7397
  Note: Locking is not within the scope of this class.
7398

7399
  """
7400
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7401
               disks, delay_iallocator, early_release):
7402
    """Initializes this class.
7403

7404
    """
7405
    Tasklet.__init__(self, lu)
7406

    
7407
    # Parameters
7408
    self.instance_name = instance_name
7409
    self.mode = mode
7410
    self.iallocator_name = iallocator_name
7411
    self.remote_node = remote_node
7412
    self.disks = disks
7413
    self.delay_iallocator = delay_iallocator
7414
    self.early_release = early_release
7415

    
7416
    # Runtime data
7417
    self.instance = None
7418
    self.new_node = None
7419
    self.target_node = None
7420
    self.other_node = None
7421
    self.remote_node_info = None
7422
    self.node_secondary_ip = None
7423

    
7424
  @staticmethod
7425
  def CheckArguments(mode, remote_node, iallocator):
7426
    """Helper function for users of this class.
7427

7428
    """
7429
    # check for valid parameter combination
7430
    if mode == constants.REPLACE_DISK_CHG:
7431
      if remote_node is None and iallocator is None:
7432
        raise errors.OpPrereqError("When changing the secondary either an"
7433
                                   " iallocator script must be used or the"
7434
                                   " new node given", errors.ECODE_INVAL)
7435

    
7436
      if remote_node is not None and iallocator is not None:
7437
        raise errors.OpPrereqError("Give either the iallocator or the new"
7438
                                   " secondary, not both", errors.ECODE_INVAL)
7439

    
7440
    elif remote_node is not None or iallocator is not None:
7441
      # Not replacing the secondary
7442
      raise errors.OpPrereqError("The iallocator and new node options can"
7443
                                 " only be used when changing the"
7444
                                 " secondary node", errors.ECODE_INVAL)
7445

    
7446
  @staticmethod
7447
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7448
    """Compute a new secondary node using an IAllocator.
7449

7450
    """
7451
    ial = IAllocator(lu.cfg, lu.rpc,
7452
                     mode=constants.IALLOCATOR_MODE_RELOC,
7453
                     name=instance_name,
7454
                     relocate_from=relocate_from)
7455

    
7456
    ial.Run(iallocator_name)
7457

    
7458
    if not ial.success:
7459
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7460
                                 " %s" % (iallocator_name, ial.info),
7461
                                 errors.ECODE_NORES)
7462

    
7463
    if len(ial.result) != ial.required_nodes:
7464
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7465
                                 " of nodes (%s), required %s" %
7466
                                 (iallocator_name,
7467
                                  len(ial.result), ial.required_nodes),
7468
                                 errors.ECODE_FAULT)
7469

    
7470
    remote_node_name = ial.result[0]
7471

    
7472
    lu.LogInfo("Selected new secondary for instance '%s': %s",
7473
               instance_name, remote_node_name)
7474

    
7475
    return remote_node_name
7476

    
7477
  def _FindFaultyDisks(self, node_name):
7478
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7479
                                    node_name, True)
7480

    
7481
  def CheckPrereq(self):
7482
    """Check prerequisites.
7483

7484
    This checks that the instance is in the cluster.
7485

7486
    """
7487
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7488
    assert instance is not None, \
7489
      "Cannot retrieve locked instance %s" % self.instance_name
7490

    
7491
    if instance.disk_template != constants.DT_DRBD8:
7492
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7493
                                 " instances", errors.ECODE_INVAL)
7494

    
7495
    if len(instance.secondary_nodes) != 1:
7496
      raise errors.OpPrereqError("The instance has a strange layout,"
7497
                                 " expected one secondary but found %d" %
7498
                                 len(instance.secondary_nodes),
7499
                                 errors.ECODE_FAULT)
7500

    
7501
    if not self.delay_iallocator:
7502
      self._CheckPrereq2()
7503

    
7504
  def _CheckPrereq2(self):
7505
    """Check prerequisites, second part.
7506

7507
    This function should always be part of CheckPrereq. It was separated and is
7508
    now called from Exec because during node evacuation iallocator was only
7509
    called with an unmodified cluster model, not taking planned changes into
7510
    account.
7511

7512
    """
7513
    instance = self.instance
7514
    secondary_node = instance.secondary_nodes[0]
7515

    
7516
    if self.iallocator_name is None:
7517
      remote_node = self.remote_node
7518
    else:
7519
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7520
                                       instance.name, instance.secondary_nodes)
7521

    
7522
    if remote_node is not None:
7523
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7524
      assert self.remote_node_info is not None, \
7525
        "Cannot retrieve locked node %s" % remote_node
7526
    else:
7527
      self.remote_node_info = None
7528

    
7529
    if remote_node == self.instance.primary_node:
7530
      raise errors.OpPrereqError("The specified node is the primary node of"
7531
                                 " the instance.", errors.ECODE_INVAL)
7532

    
7533
    if remote_node == secondary_node:
7534
      raise errors.OpPrereqError("The specified node is already the"
7535
                                 " secondary node of the instance.",
7536
                                 errors.ECODE_INVAL)
7537

    
7538
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7539
                                    constants.REPLACE_DISK_CHG):
7540
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
7541
                                 errors.ECODE_INVAL)
7542

    
7543
    if self.mode == constants.REPLACE_DISK_AUTO:
7544
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
7545
      faulty_secondary = self._FindFaultyDisks(secondary_node)
7546

    
7547
      if faulty_primary and faulty_secondary:
7548
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7549
                                   " one node and can not be repaired"
7550
                                   " automatically" % self.instance_name,
7551
                                   errors.ECODE_STATE)
7552

    
7553
      if faulty_primary:
7554
        self.disks = faulty_primary
7555
        self.target_node = instance.primary_node
7556
        self.other_node = secondary_node
7557
        check_nodes = [self.target_node, self.other_node]
7558
      elif faulty_secondary:
7559
        self.disks = faulty_secondary
7560
        self.target_node = secondary_node
7561
        self.other_node = instance.primary_node
7562
        check_nodes = [self.target_node, self.other_node]
7563
      else:
7564
        self.disks = []
7565
        check_nodes = []
7566

    
7567
    else:
7568
      # Non-automatic modes
7569
      if self.mode == constants.REPLACE_DISK_PRI:
7570
        self.target_node = instance.primary_node
7571
        self.other_node = secondary_node
7572
        check_nodes = [self.target_node, self.other_node]
7573

    
7574
      elif self.mode == constants.REPLACE_DISK_SEC:
7575
        self.target_node = secondary_node
7576
        self.other_node = instance.primary_node
7577
        check_nodes = [self.target_node, self.other_node]
7578

    
7579
      elif self.mode == constants.REPLACE_DISK_CHG:
7580
        self.new_node = remote_node
7581
        self.other_node = instance.primary_node
7582
        self.target_node = secondary_node
7583
        check_nodes = [self.new_node, self.other_node]
7584

    
7585
        _CheckNodeNotDrained(self.lu, remote_node)
7586

    
7587
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
7588
        assert old_node_info is not None
7589
        if old_node_info.offline and not self.early_release:
7590
          # doesn't make sense to delay the release
7591
          self.early_release = True
7592
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7593
                          " early-release mode", secondary_node)
7594

    
7595
      else:
7596
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7597
                                     self.mode)
7598

    
7599
      # If not specified all disks should be replaced
7600
      if not self.disks:
7601
        self.disks = range(len(self.instance.disks))
7602

    
7603
    for node in check_nodes:
7604
      _CheckNodeOnline(self.lu, node)
7605

    
7606
    # Check whether disks are valid
7607
    for disk_idx in self.disks:
7608
      instance.FindDisk(disk_idx)
7609

    
7610
    # Get secondary node IP addresses
7611
    node_2nd_ip = {}
7612

    
7613
    for node_name in [self.target_node, self.other_node, self.new_node]:
7614
      if node_name is not None:
7615
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7616

    
7617
    self.node_secondary_ip = node_2nd_ip
7618

    
7619
  def Exec(self, feedback_fn):
7620
    """Execute disk replacement.
7621

7622
    This dispatches the disk replacement to the appropriate handler.
7623

7624
    """
7625
    if self.delay_iallocator:
7626
      self._CheckPrereq2()
7627

    
7628
    if not self.disks:
7629
      feedback_fn("No disks need replacement")
7630
      return
7631

    
7632
    feedback_fn("Replacing disk(s) %s for %s" %
7633
                (utils.CommaJoin(self.disks), self.instance.name))
7634

    
7635
    activate_disks = (not self.instance.admin_up)
7636

    
7637
    # Activate the instance disks if we're replacing them on a down instance
7638
    if activate_disks:
7639
      _StartInstanceDisks(self.lu, self.instance, True)
7640

    
7641
    try:
7642
      # Should we replace the secondary node?
7643
      if self.new_node is not None:
7644
        fn = self._ExecDrbd8Secondary
7645
      else:
7646
        fn = self._ExecDrbd8DiskOnly
7647

    
7648
      return fn(feedback_fn)
7649

    
7650
    finally:
7651
      # Deactivate the instance disks if we're replacing them on a
7652
      # down instance
7653
      if activate_disks:
7654
        _SafeShutdownInstanceDisks(self.lu, self.instance)
7655

    
7656
  def _CheckVolumeGroup(self, nodes):
7657
    self.lu.LogInfo("Checking volume groups")
7658

    
7659
    vgname = self.cfg.GetVGName()
7660

    
7661
    # Make sure volume group exists on all involved nodes
7662
    results = self.rpc.call_vg_list(nodes)
7663
    if not results:
7664
      raise errors.OpExecError("Can't list volume groups on the nodes")
7665

    
7666
    for node in nodes:
7667
      res = results[node]
7668
      res.Raise("Error checking node %s" % node)
7669
      if vgname not in res.payload:
7670
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
7671
                                 (vgname, node))
7672

    
7673
  def _CheckDisksExistence(self, nodes):
7674
    # Check disk existence
7675
    for idx, dev in enumerate(self.instance.disks):
7676
      if idx not in self.disks:
7677
        continue
7678

    
7679
      for node in nodes:
7680
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7681
        self.cfg.SetDiskID(dev, node)
7682

    
7683
        result = self.rpc.call_blockdev_find(node, dev)
7684

    
7685
        msg = result.fail_msg
7686
        if msg or not result.payload:
7687
          if not msg:
7688
            msg = "disk not found"
7689
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7690
                                   (idx, node, msg))
7691

    
7692
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7693
    for idx, dev in enumerate(self.instance.disks):
7694
      if idx not in self.disks:
7695
        continue
7696

    
7697
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7698
                      (idx, node_name))
7699

    
7700
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7701
                                   ldisk=ldisk):
7702
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7703
                                 " replace disks for instance %s" %
7704
                                 (node_name, self.instance.name))
7705

    
7706
  def _CreateNewStorage(self, node_name):
7707
    vgname = self.cfg.GetVGName()
7708
    iv_names = {}
7709

    
7710
    for idx, dev in enumerate(self.instance.disks):
7711
      if idx not in self.disks:
7712
        continue
7713

    
7714
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7715

    
7716
      self.cfg.SetDiskID(dev, node_name)
7717

    
7718
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7719
      names = _GenerateUniqueNames(self.lu, lv_names)
7720

    
7721
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7722
                             logical_id=(vgname, names[0]))
7723
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7724
                             logical_id=(vgname, names[1]))
7725

    
7726
      new_lvs = [lv_data, lv_meta]
7727
      old_lvs = dev.children
7728
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7729

    
7730
      # we pass force_create=True to force the LVM creation
7731
      for new_lv in new_lvs:
7732
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7733
                        _GetInstanceInfoText(self.instance), False)
7734

    
7735
    return iv_names
7736

    
7737
  def _CheckDevices(self, node_name, iv_names):
7738
    for name, (dev, _, _) in iv_names.iteritems():
7739
      self.cfg.SetDiskID(dev, node_name)
7740

    
7741
      result = self.rpc.call_blockdev_find(node_name, dev)
7742

    
7743
      msg = result.fail_msg
7744
      if msg or not result.payload:
7745
        if not msg:
7746
          msg = "disk not found"
7747
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
7748
                                 (name, msg))
7749

    
7750
      if result.payload.is_degraded:
7751
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
7752

    
7753
  def _RemoveOldStorage(self, node_name, iv_names):
7754
    for name, (_, old_lvs, _) in iv_names.iteritems():
7755
      self.lu.LogInfo("Remove logical volumes for %s" % name)
7756

    
7757
      for lv in old_lvs:
7758
        self.cfg.SetDiskID(lv, node_name)
7759

    
7760
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7761
        if msg:
7762
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
7763
                             hint="remove unused LVs manually")
7764

    
7765
  def _ReleaseNodeLock(self, node_name):
7766
    """Releases the lock for a given node."""
7767
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7768

    
7769
  def _ExecDrbd8DiskOnly(self, feedback_fn):
7770
    """Replace a disk on the primary or secondary for DRBD 8.
7771

7772
    The algorithm for replace is quite complicated:
7773

7774
      1. for each disk to be replaced:
7775

7776
        1. create new LVs on the target node with unique names
7777
        1. detach old LVs from the drbd device
7778
        1. rename old LVs to name_replaced.<time_t>
7779
        1. rename new LVs to old LVs
7780
        1. attach the new LVs (with the old names now) to the drbd device
7781

7782
      1. wait for sync across all devices
7783

7784
      1. for each modified disk:
7785

7786
        1. remove old LVs (which have the name name_replaces.<time_t>)
7787

7788
    Failures are not very well handled.
7789

7790
    """
7791
    steps_total = 6
7792

    
7793
    # Step: check device activation
7794
    self.lu.LogStep(1, steps_total, "Check device existence")
7795
    self._CheckDisksExistence([self.other_node, self.target_node])
7796
    self._CheckVolumeGroup([self.target_node, self.other_node])
7797

    
7798
    # Step: check other node consistency
7799
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7800
    self._CheckDisksConsistency(self.other_node,
7801
                                self.other_node == self.instance.primary_node,
7802
                                False)
7803

    
7804
    # Step: create new storage
7805
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7806
    iv_names = self._CreateNewStorage(self.target_node)
7807

    
7808
    # Step: for each lv, detach+rename*2+attach
7809
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7810
    for dev, old_lvs, new_lvs in iv_names.itervalues():
7811
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7812

    
7813
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7814
                                                     old_lvs)
7815
      result.Raise("Can't detach drbd from local storage on node"
7816
                   " %s for device %s" % (self.target_node, dev.iv_name))
7817
      #dev.children = []
7818
      #cfg.Update(instance)
7819

    
7820
      # ok, we created the new LVs, so now we know we have the needed
7821
      # storage; as such, we proceed on the target node to rename
7822
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7823
      # using the assumption that logical_id == physical_id (which in
7824
      # turn is the unique_id on that node)
7825

    
7826
      # FIXME(iustin): use a better name for the replaced LVs
7827
      temp_suffix = int(time.time())
7828
      ren_fn = lambda d, suff: (d.physical_id[0],
7829
                                d.physical_id[1] + "_replaced-%s" % suff)
7830

    
7831
      # Build the rename list based on what LVs exist on the node
7832
      rename_old_to_new = []
7833
      for to_ren in old_lvs:
7834
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7835
        if not result.fail_msg and result.payload:
7836
          # device exists
7837
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7838

    
7839
      self.lu.LogInfo("Renaming the old LVs on the target node")
7840
      result = self.rpc.call_blockdev_rename(self.target_node,
7841
                                             rename_old_to_new)
7842
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
7843

    
7844
      # Now we rename the new LVs to the old LVs
7845
      self.lu.LogInfo("Renaming the new LVs on the target node")
7846
      rename_new_to_old = [(new, old.physical_id)
7847
                           for old, new in zip(old_lvs, new_lvs)]
7848
      result = self.rpc.call_blockdev_rename(self.target_node,
7849
                                             rename_new_to_old)
7850
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
7851

    
7852
      for old, new in zip(old_lvs, new_lvs):
7853
        new.logical_id = old.logical_id
7854
        self.cfg.SetDiskID(new, self.target_node)
7855

    
7856
      for disk in old_lvs:
7857
        disk.logical_id = ren_fn(disk, temp_suffix)
7858
        self.cfg.SetDiskID(disk, self.target_node)
7859

    
7860
      # Now that the new lvs have the old name, we can add them to the device
7861
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7862
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7863
                                                  new_lvs)
7864
      msg = result.fail_msg
7865
      if msg:
7866
        for new_lv in new_lvs:
7867
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
7868
                                               new_lv).fail_msg
7869
          if msg2:
7870
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7871
                               hint=("cleanup manually the unused logical"
7872
                                     "volumes"))
7873
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7874

    
7875
      dev.children = new_lvs
7876

    
7877
      self.cfg.Update(self.instance, feedback_fn)
7878

    
7879
    cstep = 5
7880
    if self.early_release:
7881
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7882
      cstep += 1
7883
      self._RemoveOldStorage(self.target_node, iv_names)
7884
      # WARNING: we release both node locks here, do not do other RPCs
7885
      # than WaitForSync to the primary node
7886
      self._ReleaseNodeLock([self.target_node, self.other_node])
7887

    
7888
    # Wait for sync
7889
    # This can fail as the old devices are degraded and _WaitForSync
7890
    # does a combined result over all disks, so we don't check its return value
7891
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7892
    cstep += 1
7893
    _WaitForSync(self.lu, self.instance)
7894

    
7895
    # Check all devices manually
7896
    self._CheckDevices(self.instance.primary_node, iv_names)
7897

    
7898
    # Step: remove old storage
7899
    if not self.early_release:
7900
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7901
      cstep += 1
7902
      self._RemoveOldStorage(self.target_node, iv_names)
7903

    
7904
  def _ExecDrbd8Secondary(self, feedback_fn):
7905
    """Replace the secondary node for DRBD 8.
7906

7907
    The algorithm for replace is quite complicated:
7908
      - for all disks of the instance:
7909
        - create new LVs on the new node with same names
7910
        - shutdown the drbd device on the old secondary
7911
        - disconnect the drbd network on the primary
7912
        - create the drbd device on the new secondary
7913
        - network attach the drbd on the primary, using an artifice:
7914
          the drbd code for Attach() will connect to the network if it
7915
          finds a device which is connected to the good local disks but
7916
          not network enabled
7917
      - wait for sync across all devices
7918
      - remove all disks from the old secondary
7919

7920
    Failures are not very well handled.
7921

7922
    """
7923
    steps_total = 6
7924

    
7925
    # Step: check device activation
7926
    self.lu.LogStep(1, steps_total, "Check device existence")
7927
    self._CheckDisksExistence([self.instance.primary_node])
7928
    self._CheckVolumeGroup([self.instance.primary_node])
7929

    
7930
    # Step: check other node consistency
7931
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7932
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
7933

    
7934
    # Step: create new storage
7935
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7936
    for idx, dev in enumerate(self.instance.disks):
7937
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7938
                      (self.new_node, idx))
7939
      # we pass force_create=True to force LVM creation
7940
      for new_lv in dev.children:
7941
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7942
                        _GetInstanceInfoText(self.instance), False)
7943

    
7944
    # Step 4: dbrd minors and drbd setups changes
7945
    # after this, we must manually remove the drbd minors on both the
7946
    # error and the success paths
7947
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7948
    minors = self.cfg.AllocateDRBDMinor([self.new_node
7949
                                         for dev in self.instance.disks],
7950
                                        self.instance.name)
7951
    logging.debug("Allocated minors %r", minors)
7952

    
7953
    iv_names = {}
7954
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7955
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7956
                      (self.new_node, idx))
7957
      # create new devices on new_node; note that we create two IDs:
7958
      # one without port, so the drbd will be activated without
7959
      # networking information on the new node at this stage, and one
7960
      # with network, for the latter activation in step 4
7961
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7962
      if self.instance.primary_node == o_node1:
7963
        p_minor = o_minor1
7964
      else:
7965
        assert self.instance.primary_node == o_node2, "Three-node instance?"
7966
        p_minor = o_minor2
7967

    
7968
      new_alone_id = (self.instance.primary_node, self.new_node, None,
7969
                      p_minor, new_minor, o_secret)
7970
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
7971
                    p_minor, new_minor, o_secret)
7972

    
7973
      iv_names[idx] = (dev, dev.children, new_net_id)
7974
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7975
                    new_net_id)
7976
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7977
                              logical_id=new_alone_id,
7978
                              children=dev.children,
7979
                              size=dev.size)
7980
      try:
7981
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7982
                              _GetInstanceInfoText(self.instance), False)
7983
      except errors.GenericError:
7984
        self.cfg.ReleaseDRBDMinors(self.instance.name)
7985
        raise
7986

    
7987
    # We have new devices, shutdown the drbd on the old secondary
7988
    for idx, dev in enumerate(self.instance.disks):
7989
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7990
      self.cfg.SetDiskID(dev, self.target_node)
7991
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7992
      if msg:
7993
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7994
                           "node: %s" % (idx, msg),
7995
                           hint=("Please cleanup this device manually as"
7996
                                 " soon as possible"))
7997

    
7998
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7999
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8000
                                               self.node_secondary_ip,
8001
                                               self.instance.disks)\
8002
                                              [self.instance.primary_node]
8003

    
8004
    msg = result.fail_msg
8005
    if msg:
8006
      # detaches didn't succeed (unlikely)
8007
      self.cfg.ReleaseDRBDMinors(self.instance.name)
8008
      raise errors.OpExecError("Can't detach the disks from the network on"
8009
                               " old node: %s" % (msg,))
8010

    
8011
    # if we managed to detach at least one, we update all the disks of
8012
    # the instance to point to the new secondary
8013
    self.lu.LogInfo("Updating instance configuration")
8014
    for dev, _, new_logical_id in iv_names.itervalues():
8015
      dev.logical_id = new_logical_id
8016
      self.cfg.SetDiskID(dev, self.instance.primary_node)
8017

    
8018
    self.cfg.Update(self.instance, feedback_fn)
8019

    
8020
    # and now perform the drbd attach
8021
    self.lu.LogInfo("Attaching primary drbds to new secondary"
8022
                    " (standalone => connected)")
8023
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8024
                                            self.new_node],
8025
                                           self.node_secondary_ip,
8026
                                           self.instance.disks,
8027
                                           self.instance.name,
8028
                                           False)
8029
    for to_node, to_result in result.items():
8030
      msg = to_result.fail_msg
8031
      if msg:
8032
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8033
                           to_node, msg,
8034
                           hint=("please do a gnt-instance info to see the"
8035
                                 " status of disks"))
8036
    cstep = 5
8037
    if self.early_release:
8038
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8039
      cstep += 1
8040
      self._RemoveOldStorage(self.target_node, iv_names)
8041
      # WARNING: we release all node locks here, do not do other RPCs
8042
      # than WaitForSync to the primary node
8043
      self._ReleaseNodeLock([self.instance.primary_node,
8044
                             self.target_node,
8045
                             self.new_node])
8046

    
8047
    # Wait for sync
8048
    # This can fail as the old devices are degraded and _WaitForSync
8049
    # does a combined result over all disks, so we don't check its return value
8050
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8051
    cstep += 1
8052
    _WaitForSync(self.lu, self.instance)
8053

    
8054
    # Check all devices manually
8055
    self._CheckDevices(self.instance.primary_node, iv_names)
8056

    
8057
    # Step: remove old storage
8058
    if not self.early_release:
8059
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8060
      self._RemoveOldStorage(self.target_node, iv_names)
8061

    
8062

    
8063
class LURepairNodeStorage(NoHooksLU):
8064
  """Repairs the volume group on a node.
8065

8066
  """
8067
  _OP_REQP = ["node_name"]
8068
  REQ_BGL = False
8069

    
8070
  def CheckArguments(self):
8071
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8072

    
8073
    _CheckStorageType(self.op.storage_type)
8074

    
8075
  def ExpandNames(self):
8076
    self.needed_locks = {
8077
      locking.LEVEL_NODE: [self.op.node_name],
8078
      }
8079

    
8080
  def _CheckFaultyDisks(self, instance, node_name):
8081
    """Ensure faulty disks abort the opcode or at least warn."""
8082
    try:
8083
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8084
                                  node_name, True):
8085
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8086
                                   " node '%s'" % (instance.name, node_name),
8087
                                   errors.ECODE_STATE)
8088
    except errors.OpPrereqError, err:
8089
      if self.op.ignore_consistency:
8090
        self.proc.LogWarning(str(err.args[0]))
8091
      else:
8092
        raise
8093

    
8094
  def CheckPrereq(self):
8095
    """Check prerequisites.
8096

8097
    """
8098
    storage_type = self.op.storage_type
8099

    
8100
    if (constants.SO_FIX_CONSISTENCY not in
8101
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8102
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
8103
                                 " repaired" % storage_type,
8104
                                 errors.ECODE_INVAL)
8105

    
8106
    # Check whether any instance on this node has faulty disks
8107
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8108
      if not inst.admin_up:
8109
        continue
8110
      check_nodes = set(inst.all_nodes)
8111
      check_nodes.discard(self.op.node_name)
8112
      for inst_node_name in check_nodes:
8113
        self._CheckFaultyDisks(inst, inst_node_name)
8114

    
8115
  def Exec(self, feedback_fn):
8116
    feedback_fn("Repairing storage unit '%s' on %s ..." %
8117
                (self.op.name, self.op.node_name))
8118

    
8119
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8120
    result = self.rpc.call_storage_execute(self.op.node_name,
8121
                                           self.op.storage_type, st_args,
8122
                                           self.op.name,
8123
                                           constants.SO_FIX_CONSISTENCY)
8124
    result.Raise("Failed to repair storage unit '%s' on %s" %
8125
                 (self.op.name, self.op.node_name))
8126

    
8127

    
8128
class LUNodeEvacuationStrategy(NoHooksLU):
8129
  """Computes the node evacuation strategy.
8130

8131
  """
8132
  _OP_REQP = ["nodes"]
8133
  _OP_DEFS = [
8134
    ("remote_node", None),
8135
    ("iallocator", None),
8136
    ]
8137
  REQ_BGL = False
8138

    
8139
  def CheckArguments(self):
8140
    if self.op.remote_node is not None and self.op.iallocator is not None:
8141
      raise errors.OpPrereqError("Give either the iallocator or the new"
8142
                                 " secondary, not both", errors.ECODE_INVAL)
8143

    
8144
  def ExpandNames(self):
8145
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8146
    self.needed_locks = locks = {}
8147
    if self.op.remote_node is None:
8148
      locks[locking.LEVEL_NODE] = locking.ALL_SET
8149
    else:
8150
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8151
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8152

    
8153
  def CheckPrereq(self):
8154
    pass
8155

    
8156
  def Exec(self, feedback_fn):
8157
    if self.op.remote_node is not None:
8158
      instances = []
8159
      for node in self.op.nodes:
8160
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8161
      result = []
8162
      for i in instances:
8163
        if i.primary_node == self.op.remote_node:
8164
          raise errors.OpPrereqError("Node %s is the primary node of"
8165
                                     " instance %s, cannot use it as"
8166
                                     " secondary" %
8167
                                     (self.op.remote_node, i.name),
8168
                                     errors.ECODE_INVAL)
8169
        result.append([i.name, self.op.remote_node])
8170
    else:
8171
      ial = IAllocator(self.cfg, self.rpc,
8172
                       mode=constants.IALLOCATOR_MODE_MEVAC,
8173
                       evac_nodes=self.op.nodes)
8174
      ial.Run(self.op.iallocator, validate=True)
8175
      if not ial.success:
8176
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8177
                                 errors.ECODE_NORES)
8178
      result = ial.result
8179
    return result
8180

    
8181

    
8182
class LUGrowDisk(LogicalUnit):
8183
  """Grow a disk of an instance.
8184

8185
  """
8186
  HPATH = "disk-grow"
8187
  HTYPE = constants.HTYPE_INSTANCE
8188
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
8189
  REQ_BGL = False
8190

    
8191
  def ExpandNames(self):
8192
    self._ExpandAndLockInstance()
8193
    self.needed_locks[locking.LEVEL_NODE] = []
8194
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8195

    
8196
  def DeclareLocks(self, level):
8197
    if level == locking.LEVEL_NODE:
8198
      self._LockInstancesNodes()
8199

    
8200
  def BuildHooksEnv(self):
8201
    """Build hooks env.
8202

8203
    This runs on the master, the primary and all the secondaries.
8204

8205
    """
8206
    env = {
8207
      "DISK": self.op.disk,
8208
      "AMOUNT": self.op.amount,
8209
      }
8210
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8211
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8212
    return env, nl, nl
8213

    
8214
  def CheckPrereq(self):
8215
    """Check prerequisites.
8216

8217
    This checks that the instance is in the cluster.
8218

8219
    """
8220
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8221
    assert instance is not None, \
8222
      "Cannot retrieve locked instance %s" % self.op.instance_name
8223
    nodenames = list(instance.all_nodes)
8224
    for node in nodenames:
8225
      _CheckNodeOnline(self, node)
8226

    
8227

    
8228
    self.instance = instance
8229

    
8230
    if instance.disk_template not in constants.DTS_GROWABLE:
8231
      raise errors.OpPrereqError("Instance's disk layout does not support"
8232
                                 " growing.", errors.ECODE_INVAL)
8233

    
8234
    self.disk = instance.FindDisk(self.op.disk)
8235

    
8236
    if instance.disk_template != constants.DT_FILE:
8237
      # TODO: check the free disk space for file, when that feature will be
8238
      # supported
8239
      _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8240

    
8241
  def Exec(self, feedback_fn):
8242
    """Execute disk grow.
8243

8244
    """
8245
    instance = self.instance
8246
    disk = self.disk
8247

    
8248
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8249
    if not disks_ok:
8250
      raise errors.OpExecError("Cannot activate block device to grow")
8251

    
8252
    for node in instance.all_nodes:
8253
      self.cfg.SetDiskID(disk, node)
8254
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8255
      result.Raise("Grow request failed to node %s" % node)
8256

    
8257
      # TODO: Rewrite code to work properly
8258
      # DRBD goes into sync mode for a short amount of time after executing the
8259
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8260
      # calling "resize" in sync mode fails. Sleeping for a short amount of
8261
      # time is a work-around.
8262
      time.sleep(5)
8263

    
8264
    disk.RecordGrow(self.op.amount)
8265
    self.cfg.Update(instance, feedback_fn)
8266
    if self.op.wait_for_sync:
8267
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
8268
      if disk_abort:
8269
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8270
                             " status.\nPlease check the instance.")
8271
      if not instance.admin_up:
8272
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8273
    elif not instance.admin_up:
8274
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
8275
                           " not supposed to be running because no wait for"
8276
                           " sync mode was requested.")
8277

    
8278

    
8279
class LUQueryInstanceData(NoHooksLU):
8280
  """Query runtime instance data.
8281

8282
  """
8283
  _OP_REQP = ["instances", "static"]
8284
  REQ_BGL = False
8285

    
8286
  def ExpandNames(self):
8287
    self.needed_locks = {}
8288
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8289

    
8290
    if not isinstance(self.op.instances, list):
8291
      raise errors.OpPrereqError("Invalid argument type 'instances'",
8292
                                 errors.ECODE_INVAL)
8293

    
8294
    if self.op.instances:
8295
      self.wanted_names = []
8296
      for name in self.op.instances:
8297
        full_name = _ExpandInstanceName(self.cfg, name)
8298
        self.wanted_names.append(full_name)
8299
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8300
    else:
8301
      self.wanted_names = None
8302
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8303

    
8304
    self.needed_locks[locking.LEVEL_NODE] = []
8305
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8306

    
8307
  def DeclareLocks(self, level):
8308
    if level == locking.LEVEL_NODE:
8309
      self._LockInstancesNodes()
8310

    
8311
  def CheckPrereq(self):
8312
    """Check prerequisites.
8313

8314
    This only checks the optional instance list against the existing names.
8315

8316
    """
8317
    if self.wanted_names is None:
8318
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8319

    
8320
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8321
                             in self.wanted_names]
8322
    return
8323

    
8324
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
8325
    """Returns the status of a block device
8326

8327
    """
8328
    if self.op.static or not node:
8329
      return None
8330

    
8331
    self.cfg.SetDiskID(dev, node)
8332

    
8333
    result = self.rpc.call_blockdev_find(node, dev)
8334
    if result.offline:
8335
      return None
8336

    
8337
    result.Raise("Can't compute disk status for %s" % instance_name)
8338

    
8339
    status = result.payload
8340
    if status is None:
8341
      return None
8342

    
8343
    return (status.dev_path, status.major, status.minor,
8344
            status.sync_percent, status.estimated_time,
8345
            status.is_degraded, status.ldisk_status)
8346

    
8347
  def _ComputeDiskStatus(self, instance, snode, dev):
8348
    """Compute block device status.
8349

8350
    """
8351
    if dev.dev_type in constants.LDS_DRBD:
8352
      # we change the snode then (otherwise we use the one passed in)
8353
      if dev.logical_id[0] == instance.primary_node:
8354
        snode = dev.logical_id[1]
8355
      else:
8356
        snode = dev.logical_id[0]
8357

    
8358
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8359
                                              instance.name, dev)
8360
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8361

    
8362
    if dev.children:
8363
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
8364
                      for child in dev.children]
8365
    else:
8366
      dev_children = []
8367

    
8368
    data = {
8369
      "iv_name": dev.iv_name,
8370
      "dev_type": dev.dev_type,
8371
      "logical_id": dev.logical_id,
8372
      "physical_id": dev.physical_id,
8373
      "pstatus": dev_pstatus,
8374
      "sstatus": dev_sstatus,
8375
      "children": dev_children,
8376
      "mode": dev.mode,
8377
      "size": dev.size,
8378
      }
8379

    
8380
    return data
8381

    
8382
  def Exec(self, feedback_fn):
8383
    """Gather and return data"""
8384
    result = {}
8385

    
8386
    cluster = self.cfg.GetClusterInfo()
8387

    
8388
    for instance in self.wanted_instances:
8389
      if not self.op.static:
8390
        remote_info = self.rpc.call_instance_info(instance.primary_node,
8391
                                                  instance.name,
8392
                                                  instance.hypervisor)
8393
        remote_info.Raise("Error checking node %s" % instance.primary_node)
8394
        remote_info = remote_info.payload
8395
        if remote_info and "state" in remote_info:
8396
          remote_state = "up"
8397
        else:
8398
          remote_state = "down"
8399
      else:
8400
        remote_state = None
8401
      if instance.admin_up:
8402
        config_state = "up"
8403
      else:
8404
        config_state = "down"
8405

    
8406
      disks = [self._ComputeDiskStatus(instance, None, device)
8407
               for device in instance.disks]
8408

    
8409
      idict = {
8410
        "name": instance.name,
8411
        "config_state": config_state,
8412
        "run_state": remote_state,
8413
        "pnode": instance.primary_node,
8414
        "snodes": instance.secondary_nodes,
8415
        "os": instance.os,
8416
        # this happens to be the same format used for hooks
8417
        "nics": _NICListToTuple(self, instance.nics),
8418
        "disk_template": instance.disk_template,
8419
        "disks": disks,
8420
        "hypervisor": instance.hypervisor,
8421
        "network_port": instance.network_port,
8422
        "hv_instance": instance.hvparams,
8423
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
8424
        "be_instance": instance.beparams,
8425
        "be_actual": cluster.FillBE(instance),
8426
        "os_instance": instance.osparams,
8427
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8428
        "serial_no": instance.serial_no,
8429
        "mtime": instance.mtime,
8430
        "ctime": instance.ctime,
8431
        "uuid": instance.uuid,
8432
        }
8433

    
8434
      result[instance.name] = idict
8435

    
8436
    return result
8437

    
8438

    
8439
class LUSetInstanceParams(LogicalUnit):
8440
  """Modifies an instances's parameters.
8441

8442
  """
8443
  HPATH = "instance-modify"
8444
  HTYPE = constants.HTYPE_INSTANCE
8445
  _OP_REQP = ["instance_name"]
8446
  _OP_DEFS = [
8447
    ("nics", _EmptyList),
8448
    ("disks", _EmptyList),
8449
    ("beparams", _EmptyDict),
8450
    ("hvparams", _EmptyDict),
8451
    ("disk_template", None),
8452
    ("remote_node", None),
8453
    ("os_name", None),
8454
    ("force_variant", False),
8455
    ("osparams", None),
8456
    ("force", False),
8457
    ]
8458
  REQ_BGL = False
8459

    
8460
  def CheckArguments(self):
8461
    if not (self.op.nics or self.op.disks or self.op.disk_template or
8462
            self.op.hvparams or self.op.beparams or self.op.os_name):
8463
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8464

    
8465
    if self.op.hvparams:
8466
      _CheckGlobalHvParams(self.op.hvparams)
8467

    
8468
    # Disk validation
8469
    disk_addremove = 0
8470
    for disk_op, disk_dict in self.op.disks:
8471
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8472
      if disk_op == constants.DDM_REMOVE:
8473
        disk_addremove += 1
8474
        continue
8475
      elif disk_op == constants.DDM_ADD:
8476
        disk_addremove += 1
8477
      else:
8478
        if not isinstance(disk_op, int):
8479
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8480
        if not isinstance(disk_dict, dict):
8481
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8482
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8483

    
8484
      if disk_op == constants.DDM_ADD:
8485
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8486
        if mode not in constants.DISK_ACCESS_SET:
8487
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8488
                                     errors.ECODE_INVAL)
8489
        size = disk_dict.get('size', None)
8490
        if size is None:
8491
          raise errors.OpPrereqError("Required disk parameter size missing",
8492
                                     errors.ECODE_INVAL)
8493
        try:
8494
          size = int(size)
8495
        except (TypeError, ValueError), err:
8496
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8497
                                     str(err), errors.ECODE_INVAL)
8498
        disk_dict['size'] = size
8499
      else:
8500
        # modification of disk
8501
        if 'size' in disk_dict:
8502
          raise errors.OpPrereqError("Disk size change not possible, use"
8503
                                     " grow-disk", errors.ECODE_INVAL)
8504

    
8505
    if disk_addremove > 1:
8506
      raise errors.OpPrereqError("Only one disk add or remove operation"
8507
                                 " supported at a time", errors.ECODE_INVAL)
8508

    
8509
    if self.op.disks and self.op.disk_template is not None:
8510
      raise errors.OpPrereqError("Disk template conversion and other disk"
8511
                                 " changes not supported at the same time",
8512
                                 errors.ECODE_INVAL)
8513

    
8514
    if self.op.disk_template:
8515
      _CheckDiskTemplate(self.op.disk_template)
8516
      if (self.op.disk_template in constants.DTS_NET_MIRROR and
8517
          self.op.remote_node is None):
8518
        raise errors.OpPrereqError("Changing the disk template to a mirrored"
8519
                                   " one requires specifying a secondary node",
8520
                                   errors.ECODE_INVAL)
8521

    
8522
    # NIC validation
8523
    nic_addremove = 0
8524
    for nic_op, nic_dict in self.op.nics:
8525
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8526
      if nic_op == constants.DDM_REMOVE:
8527
        nic_addremove += 1
8528
        continue
8529
      elif nic_op == constants.DDM_ADD:
8530
        nic_addremove += 1
8531
      else:
8532
        if not isinstance(nic_op, int):
8533
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8534
        if not isinstance(nic_dict, dict):
8535
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8536
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8537

    
8538
      # nic_dict should be a dict
8539
      nic_ip = nic_dict.get('ip', None)
8540
      if nic_ip is not None:
8541
        if nic_ip.lower() == constants.VALUE_NONE:
8542
          nic_dict['ip'] = None
8543
        else:
8544
          if not utils.IsValidIP(nic_ip):
8545
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8546
                                       errors.ECODE_INVAL)
8547

    
8548
      nic_bridge = nic_dict.get('bridge', None)
8549
      nic_link = nic_dict.get('link', None)
8550
      if nic_bridge and nic_link:
8551
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8552
                                   " at the same time", errors.ECODE_INVAL)
8553
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8554
        nic_dict['bridge'] = None
8555
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8556
        nic_dict['link'] = None
8557

    
8558
      if nic_op == constants.DDM_ADD:
8559
        nic_mac = nic_dict.get('mac', None)
8560
        if nic_mac is None:
8561
          nic_dict['mac'] = constants.VALUE_AUTO
8562

    
8563
      if 'mac' in nic_dict:
8564
        nic_mac = nic_dict['mac']
8565
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8566
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8567

    
8568
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8569
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8570
                                     " modifying an existing nic",
8571
                                     errors.ECODE_INVAL)
8572

    
8573
    if nic_addremove > 1:
8574
      raise errors.OpPrereqError("Only one NIC add or remove operation"
8575
                                 " supported at a time", errors.ECODE_INVAL)
8576

    
8577
  def ExpandNames(self):
8578
    self._ExpandAndLockInstance()
8579
    self.needed_locks[locking.LEVEL_NODE] = []
8580
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8581

    
8582
  def DeclareLocks(self, level):
8583
    if level == locking.LEVEL_NODE:
8584
      self._LockInstancesNodes()
8585
      if self.op.disk_template and self.op.remote_node:
8586
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8587
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8588

    
8589
  def BuildHooksEnv(self):
8590
    """Build hooks env.
8591

8592
    This runs on the master, primary and secondaries.
8593

8594
    """
8595
    args = dict()
8596
    if constants.BE_MEMORY in self.be_new:
8597
      args['memory'] = self.be_new[constants.BE_MEMORY]
8598
    if constants.BE_VCPUS in self.be_new:
8599
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
8600
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8601
    # information at all.
8602
    if self.op.nics:
8603
      args['nics'] = []
8604
      nic_override = dict(self.op.nics)
8605
      for idx, nic in enumerate(self.instance.nics):
8606
        if idx in nic_override:
8607
          this_nic_override = nic_override[idx]
8608
        else:
8609
          this_nic_override = {}
8610
        if 'ip' in this_nic_override:
8611
          ip = this_nic_override['ip']
8612
        else:
8613
          ip = nic.ip
8614
        if 'mac' in this_nic_override:
8615
          mac = this_nic_override['mac']
8616
        else:
8617
          mac = nic.mac
8618
        if idx in self.nic_pnew:
8619
          nicparams = self.nic_pnew[idx]
8620
        else:
8621
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8622
        mode = nicparams[constants.NIC_MODE]
8623
        link = nicparams[constants.NIC_LINK]
8624
        args['nics'].append((ip, mac, mode, link))
8625
      if constants.DDM_ADD in nic_override:
8626
        ip = nic_override[constants.DDM_ADD].get('ip', None)
8627
        mac = nic_override[constants.DDM_ADD]['mac']
8628
        nicparams = self.nic_pnew[constants.DDM_ADD]
8629
        mode = nicparams[constants.NIC_MODE]
8630
        link = nicparams[constants.NIC_LINK]
8631
        args['nics'].append((ip, mac, mode, link))
8632
      elif constants.DDM_REMOVE in nic_override:
8633
        del args['nics'][-1]
8634

    
8635
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8636
    if self.op.disk_template:
8637
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8638
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8639
    return env, nl, nl
8640

    
8641
  def CheckPrereq(self):
8642
    """Check prerequisites.
8643

8644
    This only checks the instance list against the existing names.
8645

8646
    """
8647
    # checking the new params on the primary/secondary nodes
8648

    
8649
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8650
    cluster = self.cluster = self.cfg.GetClusterInfo()
8651
    assert self.instance is not None, \
8652
      "Cannot retrieve locked instance %s" % self.op.instance_name
8653
    pnode = instance.primary_node
8654
    nodelist = list(instance.all_nodes)
8655

    
8656
    # OS change
8657
    if self.op.os_name and not self.op.force:
8658
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8659
                      self.op.force_variant)
8660
      instance_os = self.op.os_name
8661
    else:
8662
      instance_os = instance.os
8663

    
8664
    if self.op.disk_template:
8665
      if instance.disk_template == self.op.disk_template:
8666
        raise errors.OpPrereqError("Instance already has disk template %s" %
8667
                                   instance.disk_template, errors.ECODE_INVAL)
8668

    
8669
      if (instance.disk_template,
8670
          self.op.disk_template) not in self._DISK_CONVERSIONS:
8671
        raise errors.OpPrereqError("Unsupported disk template conversion from"
8672
                                   " %s to %s" % (instance.disk_template,
8673
                                                  self.op.disk_template),
8674
                                   errors.ECODE_INVAL)
8675
      if self.op.disk_template in constants.DTS_NET_MIRROR:
8676
        _CheckNodeOnline(self, self.op.remote_node)
8677
        _CheckNodeNotDrained(self, self.op.remote_node)
8678
        disks = [{"size": d.size} for d in instance.disks]
8679
        required = _ComputeDiskSize(self.op.disk_template, disks)
8680
        _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8681
        _CheckInstanceDown(self, instance, "cannot change disk template")
8682

    
8683
    # hvparams processing
8684
    if self.op.hvparams:
8685
      hv_type = instance.hypervisor
8686
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8687
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8688
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8689

    
8690
      # local check
8691
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8692
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8693
      self.hv_new = hv_new # the new actual values
8694
      self.hv_inst = i_hvdict # the new dict (without defaults)
8695
    else:
8696
      self.hv_new = self.hv_inst = {}
8697

    
8698
    # beparams processing
8699
    if self.op.beparams:
8700
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8701
                                   use_none=True)
8702
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8703
      be_new = cluster.SimpleFillBE(i_bedict)
8704
      self.be_new = be_new # the new actual values
8705
      self.be_inst = i_bedict # the new dict (without defaults)
8706
    else:
8707
      self.be_new = self.be_inst = {}
8708

    
8709
    # osparams processing
8710
    if self.op.osparams:
8711
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8712
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8713
      self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8714
      self.os_inst = i_osdict # the new dict (without defaults)
8715
    else:
8716
      self.os_new = self.os_inst = {}
8717

    
8718
    self.warn = []
8719

    
8720
    if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8721
      mem_check_list = [pnode]
8722
      if be_new[constants.BE_AUTO_BALANCE]:
8723
        # either we changed auto_balance to yes or it was from before
8724
        mem_check_list.extend(instance.secondary_nodes)
8725
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
8726
                                                  instance.hypervisor)
8727
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8728
                                         instance.hypervisor)
8729
      pninfo = nodeinfo[pnode]
8730
      msg = pninfo.fail_msg
8731
      if msg:
8732
        # Assume the primary node is unreachable and go ahead
8733
        self.warn.append("Can't get info from primary node %s: %s" %
8734
                         (pnode,  msg))
8735
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
8736
        self.warn.append("Node data from primary node %s doesn't contain"
8737
                         " free memory information" % pnode)
8738
      elif instance_info.fail_msg:
8739
        self.warn.append("Can't get instance runtime information: %s" %
8740
                        instance_info.fail_msg)
8741
      else:
8742
        if instance_info.payload:
8743
          current_mem = int(instance_info.payload['memory'])
8744
        else:
8745
          # Assume instance not running
8746
          # (there is a slight race condition here, but it's not very probable,
8747
          # and we have no other way to check)
8748
          current_mem = 0
8749
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8750
                    pninfo.payload['memory_free'])
8751
        if miss_mem > 0:
8752
          raise errors.OpPrereqError("This change will prevent the instance"
8753
                                     " from starting, due to %d MB of memory"
8754
                                     " missing on its primary node" % miss_mem,
8755
                                     errors.ECODE_NORES)
8756

    
8757
      if be_new[constants.BE_AUTO_BALANCE]:
8758
        for node, nres in nodeinfo.items():
8759
          if node not in instance.secondary_nodes:
8760
            continue
8761
          msg = nres.fail_msg
8762
          if msg:
8763
            self.warn.append("Can't get info from secondary node %s: %s" %
8764
                             (node, msg))
8765
          elif not isinstance(nres.payload.get('memory_free', None), int):
8766
            self.warn.append("Secondary node %s didn't return free"
8767
                             " memory information" % node)
8768
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8769
            self.warn.append("Not enough memory to failover instance to"
8770
                             " secondary node %s" % node)
8771

    
8772
    # NIC processing
8773
    self.nic_pnew = {}
8774
    self.nic_pinst = {}
8775
    for nic_op, nic_dict in self.op.nics:
8776
      if nic_op == constants.DDM_REMOVE:
8777
        if not instance.nics:
8778
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8779
                                     errors.ECODE_INVAL)
8780
        continue
8781
      if nic_op != constants.DDM_ADD:
8782
        # an existing nic
8783
        if not instance.nics:
8784
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8785
                                     " no NICs" % nic_op,
8786
                                     errors.ECODE_INVAL)
8787
        if nic_op < 0 or nic_op >= len(instance.nics):
8788
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8789
                                     " are 0 to %d" %
8790
                                     (nic_op, len(instance.nics) - 1),
8791
                                     errors.ECODE_INVAL)
8792
        old_nic_params = instance.nics[nic_op].nicparams
8793
        old_nic_ip = instance.nics[nic_op].ip
8794
      else:
8795
        old_nic_params = {}
8796
        old_nic_ip = None
8797

    
8798
      update_params_dict = dict([(key, nic_dict[key])
8799
                                 for key in constants.NICS_PARAMETERS
8800
                                 if key in nic_dict])
8801

    
8802
      if 'bridge' in nic_dict:
8803
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8804

    
8805
      new_nic_params = _GetUpdatedParams(old_nic_params,
8806
                                         update_params_dict)
8807
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8808
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8809
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8810
      self.nic_pinst[nic_op] = new_nic_params
8811
      self.nic_pnew[nic_op] = new_filled_nic_params
8812
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8813

    
8814
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
8815
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8816
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8817
        if msg:
8818
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8819
          if self.op.force:
8820
            self.warn.append(msg)
8821
          else:
8822
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8823
      if new_nic_mode == constants.NIC_MODE_ROUTED:
8824
        if 'ip' in nic_dict:
8825
          nic_ip = nic_dict['ip']
8826
        else:
8827
          nic_ip = old_nic_ip
8828
        if nic_ip is None:
8829
          raise errors.OpPrereqError('Cannot set the nic ip to None'
8830
                                     ' on a routed nic', errors.ECODE_INVAL)
8831
      if 'mac' in nic_dict:
8832
        nic_mac = nic_dict['mac']
8833
        if nic_mac is None:
8834
          raise errors.OpPrereqError('Cannot set the nic mac to None',
8835
                                     errors.ECODE_INVAL)
8836
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8837
          # otherwise generate the mac
8838
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8839
        else:
8840
          # or validate/reserve the current one
8841
          try:
8842
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8843
          except errors.ReservationError:
8844
            raise errors.OpPrereqError("MAC address %s already in use"
8845
                                       " in cluster" % nic_mac,
8846
                                       errors.ECODE_NOTUNIQUE)
8847

    
8848
    # DISK processing
8849
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8850
      raise errors.OpPrereqError("Disk operations not supported for"
8851
                                 " diskless instances",
8852
                                 errors.ECODE_INVAL)
8853
    for disk_op, _ in self.op.disks:
8854
      if disk_op == constants.DDM_REMOVE:
8855
        if len(instance.disks) == 1:
8856
          raise errors.OpPrereqError("Cannot remove the last disk of"
8857
                                     " an instance", errors.ECODE_INVAL)
8858
        _CheckInstanceDown(self, instance, "cannot remove disks")
8859

    
8860
      if (disk_op == constants.DDM_ADD and
8861
          len(instance.nics) >= constants.MAX_DISKS):
8862
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8863
                                   " add more" % constants.MAX_DISKS,
8864
                                   errors.ECODE_STATE)
8865
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8866
        # an existing disk
8867
        if disk_op < 0 or disk_op >= len(instance.disks):
8868
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
8869
                                     " are 0 to %d" %
8870
                                     (disk_op, len(instance.disks)),
8871
                                     errors.ECODE_INVAL)
8872

    
8873
    return
8874

    
8875
  def _ConvertPlainToDrbd(self, feedback_fn):
8876
    """Converts an instance from plain to drbd.
8877

8878
    """
8879
    feedback_fn("Converting template to drbd")
8880
    instance = self.instance
8881
    pnode = instance.primary_node
8882
    snode = self.op.remote_node
8883

    
8884
    # create a fake disk info for _GenerateDiskTemplate
8885
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8886
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8887
                                      instance.name, pnode, [snode],
8888
                                      disk_info, None, None, 0)
8889
    info = _GetInstanceInfoText(instance)
8890
    feedback_fn("Creating aditional volumes...")
8891
    # first, create the missing data and meta devices
8892
    for disk in new_disks:
8893
      # unfortunately this is... not too nice
8894
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8895
                            info, True)
8896
      for child in disk.children:
8897
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
8898
    # at this stage, all new LVs have been created, we can rename the
8899
    # old ones
8900
    feedback_fn("Renaming original volumes...")
8901
    rename_list = [(o, n.children[0].logical_id)
8902
                   for (o, n) in zip(instance.disks, new_disks)]
8903
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
8904
    result.Raise("Failed to rename original LVs")
8905

    
8906
    feedback_fn("Initializing DRBD devices...")
8907
    # all child devices are in place, we can now create the DRBD devices
8908
    for disk in new_disks:
8909
      for node in [pnode, snode]:
8910
        f_create = node == pnode
8911
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8912

    
8913
    # at this point, the instance has been modified
8914
    instance.disk_template = constants.DT_DRBD8
8915
    instance.disks = new_disks
8916
    self.cfg.Update(instance, feedback_fn)
8917

    
8918
    # disks are created, waiting for sync
8919
    disk_abort = not _WaitForSync(self, instance)
8920
    if disk_abort:
8921
      raise errors.OpExecError("There are some degraded disks for"
8922
                               " this instance, please cleanup manually")
8923

    
8924
  def _ConvertDrbdToPlain(self, feedback_fn):
8925
    """Converts an instance from drbd to plain.
8926

8927
    """
8928
    instance = self.instance
8929
    assert len(instance.secondary_nodes) == 1
8930
    pnode = instance.primary_node
8931
    snode = instance.secondary_nodes[0]
8932
    feedback_fn("Converting template to plain")
8933

    
8934
    old_disks = instance.disks
8935
    new_disks = [d.children[0] for d in old_disks]
8936

    
8937
    # copy over size and mode
8938
    for parent, child in zip(old_disks, new_disks):
8939
      child.size = parent.size
8940
      child.mode = parent.mode
8941

    
8942
    # update instance structure
8943
    instance.disks = new_disks
8944
    instance.disk_template = constants.DT_PLAIN
8945
    self.cfg.Update(instance, feedback_fn)
8946

    
8947
    feedback_fn("Removing volumes on the secondary node...")
8948
    for disk in old_disks:
8949
      self.cfg.SetDiskID(disk, snode)
8950
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8951
      if msg:
8952
        self.LogWarning("Could not remove block device %s on node %s,"
8953
                        " continuing anyway: %s", disk.iv_name, snode, msg)
8954

    
8955
    feedback_fn("Removing unneeded volumes on the primary node...")
8956
    for idx, disk in enumerate(old_disks):
8957
      meta = disk.children[1]
8958
      self.cfg.SetDiskID(meta, pnode)
8959
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8960
      if msg:
8961
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
8962
                        " continuing anyway: %s", idx, pnode, msg)
8963

    
8964

    
8965
  def Exec(self, feedback_fn):
8966
    """Modifies an instance.
8967

8968
    All parameters take effect only at the next restart of the instance.
8969

8970
    """
8971
    # Process here the warnings from CheckPrereq, as we don't have a
8972
    # feedback_fn there.
8973
    for warn in self.warn:
8974
      feedback_fn("WARNING: %s" % warn)
8975

    
8976
    result = []
8977
    instance = self.instance
8978
    # disk changes
8979
    for disk_op, disk_dict in self.op.disks:
8980
      if disk_op == constants.DDM_REMOVE:
8981
        # remove the last disk
8982
        device = instance.disks.pop()
8983
        device_idx = len(instance.disks)
8984
        for node, disk in device.ComputeNodeTree(instance.primary_node):
8985
          self.cfg.SetDiskID(disk, node)
8986
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8987
          if msg:
8988
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
8989
                            " continuing anyway", device_idx, node, msg)
8990
        result.append(("disk/%d" % device_idx, "remove"))
8991
      elif disk_op == constants.DDM_ADD:
8992
        # add a new disk
8993
        if instance.disk_template == constants.DT_FILE:
8994
          file_driver, file_path = instance.disks[0].logical_id
8995
          file_path = os.path.dirname(file_path)
8996
        else:
8997
          file_driver = file_path = None
8998
        disk_idx_base = len(instance.disks)
8999
        new_disk = _GenerateDiskTemplate(self,
9000
                                         instance.disk_template,
9001
                                         instance.name, instance.primary_node,
9002
                                         instance.secondary_nodes,
9003
                                         [disk_dict],
9004
                                         file_path,
9005
                                         file_driver,
9006
                                         disk_idx_base)[0]
9007
        instance.disks.append(new_disk)
9008
        info = _GetInstanceInfoText(instance)
9009

    
9010
        logging.info("Creating volume %s for instance %s",
9011
                     new_disk.iv_name, instance.name)
9012
        # Note: this needs to be kept in sync with _CreateDisks
9013
        #HARDCODE
9014
        for node in instance.all_nodes:
9015
          f_create = node == instance.primary_node
9016
          try:
9017
            _CreateBlockDev(self, node, instance, new_disk,
9018
                            f_create, info, f_create)
9019
          except errors.OpExecError, err:
9020
            self.LogWarning("Failed to create volume %s (%s) on"
9021
                            " node %s: %s",
9022
                            new_disk.iv_name, new_disk, node, err)
9023
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9024
                       (new_disk.size, new_disk.mode)))
9025
      else:
9026
        # change a given disk
9027
        instance.disks[disk_op].mode = disk_dict['mode']
9028
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9029

    
9030
    if self.op.disk_template:
9031
      r_shut = _ShutdownInstanceDisks(self, instance)
9032
      if not r_shut:
9033
        raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9034
                                 " proceed with disk template conversion")
9035
      mode = (instance.disk_template, self.op.disk_template)
9036
      try:
9037
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
9038
      except:
9039
        self.cfg.ReleaseDRBDMinors(instance.name)
9040
        raise
9041
      result.append(("disk_template", self.op.disk_template))
9042

    
9043
    # NIC changes
9044
    for nic_op, nic_dict in self.op.nics:
9045
      if nic_op == constants.DDM_REMOVE:
9046
        # remove the last nic
9047
        del instance.nics[-1]
9048
        result.append(("nic.%d" % len(instance.nics), "remove"))
9049
      elif nic_op == constants.DDM_ADD:
9050
        # mac and bridge should be set, by now
9051
        mac = nic_dict['mac']
9052
        ip = nic_dict.get('ip', None)
9053
        nicparams = self.nic_pinst[constants.DDM_ADD]
9054
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9055
        instance.nics.append(new_nic)
9056
        result.append(("nic.%d" % (len(instance.nics) - 1),
9057
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
9058
                       (new_nic.mac, new_nic.ip,
9059
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9060
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9061
                       )))
9062
      else:
9063
        for key in 'mac', 'ip':
9064
          if key in nic_dict:
9065
            setattr(instance.nics[nic_op], key, nic_dict[key])
9066
        if nic_op in self.nic_pinst:
9067
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9068
        for key, val in nic_dict.iteritems():
9069
          result.append(("nic.%s/%d" % (key, nic_op), val))
9070

    
9071
    # hvparams changes
9072
    if self.op.hvparams:
9073
      instance.hvparams = self.hv_inst
9074
      for key, val in self.op.hvparams.iteritems():
9075
        result.append(("hv/%s" % key, val))
9076

    
9077
    # beparams changes
9078
    if self.op.beparams:
9079
      instance.beparams = self.be_inst
9080
      for key, val in self.op.beparams.iteritems():
9081
        result.append(("be/%s" % key, val))
9082

    
9083
    # OS change
9084
    if self.op.os_name:
9085
      instance.os = self.op.os_name
9086

    
9087
    # osparams changes
9088
    if self.op.osparams:
9089
      instance.osparams = self.os_inst
9090
      for key, val in self.op.osparams.iteritems():
9091
        result.append(("os/%s" % key, val))
9092

    
9093
    self.cfg.Update(instance, feedback_fn)
9094

    
9095
    return result
9096

    
9097
  _DISK_CONVERSIONS = {
9098
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9099
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9100
    }
9101

    
9102

    
9103
class LUQueryExports(NoHooksLU):
9104
  """Query the exports list
9105

9106
  """
9107
  _OP_REQP = ['nodes']
9108
  REQ_BGL = False
9109

    
9110
  def ExpandNames(self):
9111
    self.needed_locks = {}
9112
    self.share_locks[locking.LEVEL_NODE] = 1
9113
    if not self.op.nodes:
9114
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9115
    else:
9116
      self.needed_locks[locking.LEVEL_NODE] = \
9117
        _GetWantedNodes(self, self.op.nodes)
9118

    
9119
  def CheckPrereq(self):
9120
    """Check prerequisites.
9121

9122
    """
9123
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9124

    
9125
  def Exec(self, feedback_fn):
9126
    """Compute the list of all the exported system images.
9127

9128
    @rtype: dict
9129
    @return: a dictionary with the structure node->(export-list)
9130
        where export-list is a list of the instances exported on
9131
        that node.
9132

9133
    """
9134
    rpcresult = self.rpc.call_export_list(self.nodes)
9135
    result = {}
9136
    for node in rpcresult:
9137
      if rpcresult[node].fail_msg:
9138
        result[node] = False
9139
      else:
9140
        result[node] = rpcresult[node].payload
9141

    
9142
    return result
9143

    
9144

    
9145
class LUPrepareExport(NoHooksLU):
9146
  """Prepares an instance for an export and returns useful information.
9147

9148
  """
9149
  _OP_REQP = ["instance_name", "mode"]
9150
  REQ_BGL = False
9151

    
9152
  def CheckArguments(self):
9153
    """Check the arguments.
9154

9155
    """
9156
    if self.op.mode not in constants.EXPORT_MODES:
9157
      raise errors.OpPrereqError("Invalid export mode %r" % self.op.mode,
9158
                                 errors.ECODE_INVAL)
9159

    
9160
  def ExpandNames(self):
9161
    self._ExpandAndLockInstance()
9162

    
9163
  def CheckPrereq(self):
9164
    """Check prerequisites.
9165

9166
    """
9167
    instance_name = self.op.instance_name
9168

    
9169
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9170
    assert self.instance is not None, \
9171
          "Cannot retrieve locked instance %s" % self.op.instance_name
9172
    _CheckNodeOnline(self, self.instance.primary_node)
9173

    
9174
    self._cds = _GetClusterDomainSecret()
9175

    
9176
  def Exec(self, feedback_fn):
9177
    """Prepares an instance for an export.
9178

9179
    """
9180
    instance = self.instance
9181

    
9182
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9183
      salt = utils.GenerateSecret(8)
9184

    
9185
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9186
      result = self.rpc.call_x509_cert_create(instance.primary_node,
9187
                                              constants.RIE_CERT_VALIDITY)
9188
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
9189

    
9190
      (name, cert_pem) = result.payload
9191

    
9192
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9193
                                             cert_pem)
9194

    
9195
      return {
9196
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9197
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9198
                          salt),
9199
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9200
        }
9201

    
9202
    return None
9203

    
9204

    
9205
class LUExportInstance(LogicalUnit):
9206
  """Export an instance to an image in the cluster.
9207

9208
  """
9209
  HPATH = "instance-export"
9210
  HTYPE = constants.HTYPE_INSTANCE
9211
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
9212
  _OP_DEFS = [
9213
    ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT),
9214
    ("remove_instance", False),
9215
    ("ignore_remove_failures", False),
9216
    ("mode", constants.EXPORT_MODE_LOCAL),
9217
    ("x509_key_name", None),
9218
    ("destination_x509_ca", None),
9219
    ]
9220
  REQ_BGL = False
9221

    
9222
  def CheckArguments(self):
9223
    """Check the arguments.
9224

9225
    """
9226
    self.x509_key_name = self.op.x509_key_name
9227
    self.dest_x509_ca_pem = self.op.destination_x509_ca
9228

    
9229
    if self.op.remove_instance and not self.op.shutdown:
9230
      raise errors.OpPrereqError("Can not remove instance without shutting it"
9231
                                 " down before")
9232

    
9233
    if self.op.mode not in constants.EXPORT_MODES:
9234
      raise errors.OpPrereqError("Invalid export mode %r" % self.op.mode,
9235
                                 errors.ECODE_INVAL)
9236

    
9237
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9238
      if not self.x509_key_name:
9239
        raise errors.OpPrereqError("Missing X509 key name for encryption",
9240
                                   errors.ECODE_INVAL)
9241

    
9242
      if not self.dest_x509_ca_pem:
9243
        raise errors.OpPrereqError("Missing destination X509 CA",
9244
                                   errors.ECODE_INVAL)
9245

    
9246
  def ExpandNames(self):
9247
    self._ExpandAndLockInstance()
9248

    
9249
    # Lock all nodes for local exports
9250
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9251
      # FIXME: lock only instance primary and destination node
9252
      #
9253
      # Sad but true, for now we have do lock all nodes, as we don't know where
9254
      # the previous export might be, and in this LU we search for it and
9255
      # remove it from its current node. In the future we could fix this by:
9256
      #  - making a tasklet to search (share-lock all), then create the
9257
      #    new one, then one to remove, after
9258
      #  - removing the removal operation altogether
9259
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9260

    
9261
  def DeclareLocks(self, level):
9262
    """Last minute lock declaration."""
9263
    # All nodes are locked anyway, so nothing to do here.
9264

    
9265
  def BuildHooksEnv(self):
9266
    """Build hooks env.
9267

9268
    This will run on the master, primary node and target node.
9269

9270
    """
9271
    env = {
9272
      "EXPORT_MODE": self.op.mode,
9273
      "EXPORT_NODE": self.op.target_node,
9274
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9275
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9276
      # TODO: Generic function for boolean env variables
9277
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9278
      }
9279

    
9280
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9281

    
9282
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9283

    
9284
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9285
      nl.append(self.op.target_node)
9286

    
9287
    return env, nl, nl
9288

    
9289
  def CheckPrereq(self):
9290
    """Check prerequisites.
9291

9292
    This checks that the instance and node names are valid.
9293

9294
    """
9295
    instance_name = self.op.instance_name
9296

    
9297
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9298
    assert self.instance is not None, \
9299
          "Cannot retrieve locked instance %s" % self.op.instance_name
9300
    _CheckNodeOnline(self, self.instance.primary_node)
9301

    
9302
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9303
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9304
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9305
      assert self.dst_node is not None
9306

    
9307
      _CheckNodeOnline(self, self.dst_node.name)
9308
      _CheckNodeNotDrained(self, self.dst_node.name)
9309

    
9310
      self._cds = None
9311
      self.dest_disk_info = None
9312
      self.dest_x509_ca = None
9313

    
9314
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9315
      self.dst_node = None
9316

    
9317
      if len(self.op.target_node) != len(self.instance.disks):
9318
        raise errors.OpPrereqError(("Received destination information for %s"
9319
                                    " disks, but instance %s has %s disks") %
9320
                                   (len(self.op.target_node), instance_name,
9321
                                    len(self.instance.disks)),
9322
                                   errors.ECODE_INVAL)
9323

    
9324
      cds = _GetClusterDomainSecret()
9325

    
9326
      # Check X509 key name
9327
      try:
9328
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9329
      except (TypeError, ValueError), err:
9330
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9331

    
9332
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9333
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9334
                                   errors.ECODE_INVAL)
9335

    
9336
      # Load and verify CA
9337
      try:
9338
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9339
      except OpenSSL.crypto.Error, err:
9340
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9341
                                   (err, ), errors.ECODE_INVAL)
9342

    
9343
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9344
      if errcode is not None:
9345
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9346
                                   (msg, ), errors.ECODE_INVAL)
9347

    
9348
      self.dest_x509_ca = cert
9349

    
9350
      # Verify target information
9351
      disk_info = []
9352
      for idx, disk_data in enumerate(self.op.target_node):
9353
        try:
9354
          (host, port, magic) = \
9355
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9356
        except errors.GenericError, err:
9357
          raise errors.OpPrereqError("Target info for disk %s: %s" %
9358
                                     (idx, err), errors.ECODE_INVAL)
9359

    
9360
        disk_info.append((host, port, magic))
9361

    
9362
      assert len(disk_info) == len(self.op.target_node)
9363
      self.dest_disk_info = disk_info
9364

    
9365
    else:
9366
      raise errors.ProgrammerError("Unhandled export mode %r" %
9367
                                   self.op.mode)
9368

    
9369
    # instance disk type verification
9370
    # TODO: Implement export support for file-based disks
9371
    for disk in self.instance.disks:
9372
      if disk.dev_type == constants.LD_FILE:
9373
        raise errors.OpPrereqError("Export not supported for instances with"
9374
                                   " file-based disks", errors.ECODE_INVAL)
9375

    
9376
  def _CleanupExports(self, feedback_fn):
9377
    """Removes exports of current instance from all other nodes.
9378

9379
    If an instance in a cluster with nodes A..D was exported to node C, its
9380
    exports will be removed from the nodes A, B and D.
9381

9382
    """
9383
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
9384

    
9385
    nodelist = self.cfg.GetNodeList()
9386
    nodelist.remove(self.dst_node.name)
9387

    
9388
    # on one-node clusters nodelist will be empty after the removal
9389
    # if we proceed the backup would be removed because OpQueryExports
9390
    # substitutes an empty list with the full cluster node list.
9391
    iname = self.instance.name
9392
    if nodelist:
9393
      feedback_fn("Removing old exports for instance %s" % iname)
9394
      exportlist = self.rpc.call_export_list(nodelist)
9395
      for node in exportlist:
9396
        if exportlist[node].fail_msg:
9397
          continue
9398
        if iname in exportlist[node].payload:
9399
          msg = self.rpc.call_export_remove(node, iname).fail_msg
9400
          if msg:
9401
            self.LogWarning("Could not remove older export for instance %s"
9402
                            " on node %s: %s", iname, node, msg)
9403

    
9404
  def Exec(self, feedback_fn):
9405
    """Export an instance to an image in the cluster.
9406

9407
    """
9408
    assert self.op.mode in constants.EXPORT_MODES
9409

    
9410
    instance = self.instance
9411
    src_node = instance.primary_node
9412

    
9413
    if self.op.shutdown:
9414
      # shutdown the instance, but not the disks
9415
      feedback_fn("Shutting down instance %s" % instance.name)
9416
      result = self.rpc.call_instance_shutdown(src_node, instance,
9417
                                               self.op.shutdown_timeout)
9418
      # TODO: Maybe ignore failures if ignore_remove_failures is set
9419
      result.Raise("Could not shutdown instance %s on"
9420
                   " node %s" % (instance.name, src_node))
9421

    
9422
    # set the disks ID correctly since call_instance_start needs the
9423
    # correct drbd minor to create the symlinks
9424
    for disk in instance.disks:
9425
      self.cfg.SetDiskID(disk, src_node)
9426

    
9427
    activate_disks = (not instance.admin_up)
9428

    
9429
    if activate_disks:
9430
      # Activate the instance disks if we'exporting a stopped instance
9431
      feedback_fn("Activating disks for %s" % instance.name)
9432
      _StartInstanceDisks(self, instance, None)
9433

    
9434
    try:
9435
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9436
                                                     instance)
9437

    
9438
      helper.CreateSnapshots()
9439
      try:
9440
        if (self.op.shutdown and instance.admin_up and
9441
            not self.op.remove_instance):
9442
          assert not activate_disks
9443
          feedback_fn("Starting instance %s" % instance.name)
9444
          result = self.rpc.call_instance_start(src_node, instance, None, None)
9445
          msg = result.fail_msg
9446
          if msg:
9447
            feedback_fn("Failed to start instance: %s" % msg)
9448
            _ShutdownInstanceDisks(self, instance)
9449
            raise errors.OpExecError("Could not start instance: %s" % msg)
9450

    
9451
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
9452
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9453
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9454
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
9455
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9456

    
9457
          (key_name, _, _) = self.x509_key_name
9458

    
9459
          dest_ca_pem = \
9460
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9461
                                            self.dest_x509_ca)
9462

    
9463
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9464
                                                     key_name, dest_ca_pem,
9465
                                                     timeouts)
9466
      finally:
9467
        helper.Cleanup()
9468

    
9469
      # Check for backwards compatibility
9470
      assert len(dresults) == len(instance.disks)
9471
      assert compat.all(isinstance(i, bool) for i in dresults), \
9472
             "Not all results are boolean: %r" % dresults
9473

    
9474
    finally:
9475
      if activate_disks:
9476
        feedback_fn("Deactivating disks for %s" % instance.name)
9477
        _ShutdownInstanceDisks(self, instance)
9478

    
9479
    # Remove instance if requested
9480
    if self.op.remove_instance:
9481
      if not (compat.all(dresults) and fin_resu):
9482
        feedback_fn("Not removing instance %s as parts of the export failed" %
9483
                    instance.name)
9484
      else:
9485
        feedback_fn("Removing instance %s" % instance.name)
9486
        _RemoveInstance(self, feedback_fn, instance,
9487
                        self.op.ignore_remove_failures)
9488

    
9489
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9490
      self._CleanupExports(feedback_fn)
9491

    
9492
    return fin_resu, dresults
9493

    
9494

    
9495
class LURemoveExport(NoHooksLU):
9496
  """Remove exports related to the named instance.
9497

9498
  """
9499
  _OP_REQP = ["instance_name"]
9500
  REQ_BGL = False
9501

    
9502
  def ExpandNames(self):
9503
    self.needed_locks = {}
9504
    # We need all nodes to be locked in order for RemoveExport to work, but we
9505
    # don't need to lock the instance itself, as nothing will happen to it (and
9506
    # we can remove exports also for a removed instance)
9507
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9508

    
9509
  def CheckPrereq(self):
9510
    """Check prerequisites.
9511
    """
9512
    pass
9513

    
9514
  def Exec(self, feedback_fn):
9515
    """Remove any export.
9516

9517
    """
9518
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9519
    # If the instance was not found we'll try with the name that was passed in.
9520
    # This will only work if it was an FQDN, though.
9521
    fqdn_warn = False
9522
    if not instance_name:
9523
      fqdn_warn = True
9524
      instance_name = self.op.instance_name
9525

    
9526
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9527
    exportlist = self.rpc.call_export_list(locked_nodes)
9528
    found = False
9529
    for node in exportlist:
9530
      msg = exportlist[node].fail_msg
9531
      if msg:
9532
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9533
        continue
9534
      if instance_name in exportlist[node].payload:
9535
        found = True
9536
        result = self.rpc.call_export_remove(node, instance_name)
9537
        msg = result.fail_msg
9538
        if msg:
9539
          logging.error("Could not remove export for instance %s"
9540
                        " on node %s: %s", instance_name, node, msg)
9541

    
9542
    if fqdn_warn and not found:
9543
      feedback_fn("Export not found. If trying to remove an export belonging"
9544
                  " to a deleted instance please use its Fully Qualified"
9545
                  " Domain Name.")
9546

    
9547

    
9548
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9549
  """Generic tags LU.
9550

9551
  This is an abstract class which is the parent of all the other tags LUs.
9552

9553
  """
9554

    
9555
  def ExpandNames(self):
9556
    self.needed_locks = {}
9557
    if self.op.kind == constants.TAG_NODE:
9558
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9559
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
9560
    elif self.op.kind == constants.TAG_INSTANCE:
9561
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9562
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9563

    
9564
  def CheckPrereq(self):
9565
    """Check prerequisites.
9566

9567
    """
9568
    if self.op.kind == constants.TAG_CLUSTER:
9569
      self.target = self.cfg.GetClusterInfo()
9570
    elif self.op.kind == constants.TAG_NODE:
9571
      self.target = self.cfg.GetNodeInfo(self.op.name)
9572
    elif self.op.kind == constants.TAG_INSTANCE:
9573
      self.target = self.cfg.GetInstanceInfo(self.op.name)
9574
    else:
9575
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9576
                                 str(self.op.kind), errors.ECODE_INVAL)
9577

    
9578

    
9579
class LUGetTags(TagsLU):
9580
  """Returns the tags of a given object.
9581

9582
  """
9583
  _OP_REQP = ["kind", "name"]
9584
  REQ_BGL = False
9585

    
9586
  def Exec(self, feedback_fn):
9587
    """Returns the tag list.
9588

9589
    """
9590
    return list(self.target.GetTags())
9591

    
9592

    
9593
class LUSearchTags(NoHooksLU):
9594
  """Searches the tags for a given pattern.
9595

9596
  """
9597
  _OP_REQP = ["pattern"]
9598
  REQ_BGL = False
9599

    
9600
  def ExpandNames(self):
9601
    self.needed_locks = {}
9602

    
9603
  def CheckPrereq(self):
9604
    """Check prerequisites.
9605

9606
    This checks the pattern passed for validity by compiling it.
9607

9608
    """
9609
    try:
9610
      self.re = re.compile(self.op.pattern)
9611
    except re.error, err:
9612
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9613
                                 (self.op.pattern, err), errors.ECODE_INVAL)
9614

    
9615
  def Exec(self, feedback_fn):
9616
    """Returns the tag list.
9617

9618
    """
9619
    cfg = self.cfg
9620
    tgts = [("/cluster", cfg.GetClusterInfo())]
9621
    ilist = cfg.GetAllInstancesInfo().values()
9622
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9623
    nlist = cfg.GetAllNodesInfo().values()
9624
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9625
    results = []
9626
    for path, target in tgts:
9627
      for tag in target.GetTags():
9628
        if self.re.search(tag):
9629
          results.append((path, tag))
9630
    return results
9631

    
9632

    
9633
class LUAddTags(TagsLU):
9634
  """Sets a tag on a given object.
9635

9636
  """
9637
  _OP_REQP = ["kind", "name", "tags"]
9638
  REQ_BGL = False
9639

    
9640
  def CheckPrereq(self):
9641
    """Check prerequisites.
9642

9643
    This checks the type and length of the tag name and value.
9644

9645
    """
9646
    TagsLU.CheckPrereq(self)
9647
    for tag in self.op.tags:
9648
      objects.TaggableObject.ValidateTag(tag)
9649

    
9650
  def Exec(self, feedback_fn):
9651
    """Sets the tag.
9652

9653
    """
9654
    try:
9655
      for tag in self.op.tags:
9656
        self.target.AddTag(tag)
9657
    except errors.TagError, err:
9658
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
9659
    self.cfg.Update(self.target, feedback_fn)
9660

    
9661

    
9662
class LUDelTags(TagsLU):
9663
  """Delete a list of tags from a given object.
9664

9665
  """
9666
  _OP_REQP = ["kind", "name", "tags"]
9667
  REQ_BGL = False
9668

    
9669
  def CheckPrereq(self):
9670
    """Check prerequisites.
9671

9672
    This checks that we have the given tag.
9673

9674
    """
9675
    TagsLU.CheckPrereq(self)
9676
    for tag in self.op.tags:
9677
      objects.TaggableObject.ValidateTag(tag)
9678
    del_tags = frozenset(self.op.tags)
9679
    cur_tags = self.target.GetTags()
9680
    if not del_tags <= cur_tags:
9681
      diff_tags = del_tags - cur_tags
9682
      diff_names = ["'%s'" % tag for tag in diff_tags]
9683
      diff_names.sort()
9684
      raise errors.OpPrereqError("Tag(s) %s not found" %
9685
                                 (",".join(diff_names)), errors.ECODE_NOENT)
9686

    
9687
  def Exec(self, feedback_fn):
9688
    """Remove the tag from the object.
9689

9690
    """
9691
    for tag in self.op.tags:
9692
      self.target.RemoveTag(tag)
9693
    self.cfg.Update(self.target, feedback_fn)
9694

    
9695

    
9696
class LUTestDelay(NoHooksLU):
9697
  """Sleep for a specified amount of time.
9698

9699
  This LU sleeps on the master and/or nodes for a specified amount of
9700
  time.
9701

9702
  """
9703
  _OP_REQP = ["duration", "on_master", "on_nodes"]
9704
  REQ_BGL = False
9705

    
9706
  def CheckArguments(self):
9707
    # TODO: convert to the type system
9708
    self.op.repeat = getattr(self.op, "repeat", 0)
9709
    if self.op.repeat < 0:
9710
      raise errors.OpPrereqError("Repetition count cannot be negative")
9711

    
9712
  def ExpandNames(self):
9713
    """Expand names and set required locks.
9714

9715
    This expands the node list, if any.
9716

9717
    """
9718
    self.needed_locks = {}
9719
    if self.op.on_nodes:
9720
      # _GetWantedNodes can be used here, but is not always appropriate to use
9721
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9722
      # more information.
9723
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9724
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9725

    
9726
  def CheckPrereq(self):
9727
    """Check prerequisites.
9728

9729
    """
9730

    
9731
  def _TestDelay(self):
9732
    """Do the actual sleep.
9733

9734
    """
9735
    if self.op.on_master:
9736
      if not utils.TestDelay(self.op.duration):
9737
        raise errors.OpExecError("Error during master delay test")
9738
    if self.op.on_nodes:
9739
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9740
      for node, node_result in result.items():
9741
        node_result.Raise("Failure during rpc call to node %s" % node)
9742

    
9743
  def Exec(self, feedback_fn):
9744
    """Execute the test delay opcode, with the wanted repetitions.
9745

9746
    """
9747
    if self.op.repeat == 0:
9748
      self._TestDelay()
9749
    else:
9750
      top_value = self.op.repeat - 1
9751
      for i in range(self.op.repeat):
9752
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9753
        self._TestDelay()
9754

    
9755

    
9756
class IAllocator(object):
9757
  """IAllocator framework.
9758

9759
  An IAllocator instance has three sets of attributes:
9760
    - cfg that is needed to query the cluster
9761
    - input data (all members of the _KEYS class attribute are required)
9762
    - four buffer attributes (in|out_data|text), that represent the
9763
      input (to the external script) in text and data structure format,
9764
      and the output from it, again in two formats
9765
    - the result variables from the script (success, info, nodes) for
9766
      easy usage
9767

9768
  """
9769
  # pylint: disable-msg=R0902
9770
  # lots of instance attributes
9771
  _ALLO_KEYS = [
9772
    "name", "mem_size", "disks", "disk_template",
9773
    "os", "tags", "nics", "vcpus", "hypervisor",
9774
    ]
9775
  _RELO_KEYS = [
9776
    "name", "relocate_from",
9777
    ]
9778
  _EVAC_KEYS = [
9779
    "evac_nodes",
9780
    ]
9781

    
9782
  def __init__(self, cfg, rpc, mode, **kwargs):
9783
    self.cfg = cfg
9784
    self.rpc = rpc
9785
    # init buffer variables
9786
    self.in_text = self.out_text = self.in_data = self.out_data = None
9787
    # init all input fields so that pylint is happy
9788
    self.mode = mode
9789
    self.mem_size = self.disks = self.disk_template = None
9790
    self.os = self.tags = self.nics = self.vcpus = None
9791
    self.hypervisor = None
9792
    self.relocate_from = None
9793
    self.name = None
9794
    self.evac_nodes = None
9795
    # computed fields
9796
    self.required_nodes = None
9797
    # init result fields
9798
    self.success = self.info = self.result = None
9799
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9800
      keyset = self._ALLO_KEYS
9801
      fn = self._AddNewInstance
9802
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9803
      keyset = self._RELO_KEYS
9804
      fn = self._AddRelocateInstance
9805
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9806
      keyset = self._EVAC_KEYS
9807
      fn = self._AddEvacuateNodes
9808
    else:
9809
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9810
                                   " IAllocator" % self.mode)
9811
    for key in kwargs:
9812
      if key not in keyset:
9813
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
9814
                                     " IAllocator" % key)
9815
      setattr(self, key, kwargs[key])
9816

    
9817
    for key in keyset:
9818
      if key not in kwargs:
9819
        raise errors.ProgrammerError("Missing input parameter '%s' to"
9820
                                     " IAllocator" % key)
9821
    self._BuildInputData(fn)
9822

    
9823
  def _ComputeClusterData(self):
9824
    """Compute the generic allocator input data.
9825

9826
    This is the data that is independent of the actual operation.
9827

9828
    """
9829
    cfg = self.cfg
9830
    cluster_info = cfg.GetClusterInfo()
9831
    # cluster data
9832
    data = {
9833
      "version": constants.IALLOCATOR_VERSION,
9834
      "cluster_name": cfg.GetClusterName(),
9835
      "cluster_tags": list(cluster_info.GetTags()),
9836
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9837
      # we don't have job IDs
9838
      }
9839
    iinfo = cfg.GetAllInstancesInfo().values()
9840
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9841

    
9842
    # node data
9843
    node_results = {}
9844
    node_list = cfg.GetNodeList()
9845

    
9846
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9847
      hypervisor_name = self.hypervisor
9848
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9849
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9850
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9851
      hypervisor_name = cluster_info.enabled_hypervisors[0]
9852

    
9853
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9854
                                        hypervisor_name)
9855
    node_iinfo = \
9856
      self.rpc.call_all_instances_info(node_list,
9857
                                       cluster_info.enabled_hypervisors)
9858
    for nname, nresult in node_data.items():
9859
      # first fill in static (config-based) values
9860
      ninfo = cfg.GetNodeInfo(nname)
9861
      pnr = {
9862
        "tags": list(ninfo.GetTags()),
9863
        "primary_ip": ninfo.primary_ip,
9864
        "secondary_ip": ninfo.secondary_ip,
9865
        "offline": ninfo.offline,
9866
        "drained": ninfo.drained,
9867
        "master_candidate": ninfo.master_candidate,
9868
        }
9869

    
9870
      if not (ninfo.offline or ninfo.drained):
9871
        nresult.Raise("Can't get data for node %s" % nname)
9872
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9873
                                nname)
9874
        remote_info = nresult.payload
9875

    
9876
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
9877
                     'vg_size', 'vg_free', 'cpu_total']:
9878
          if attr not in remote_info:
9879
            raise errors.OpExecError("Node '%s' didn't return attribute"
9880
                                     " '%s'" % (nname, attr))
9881
          if not isinstance(remote_info[attr], int):
9882
            raise errors.OpExecError("Node '%s' returned invalid value"
9883
                                     " for '%s': %s" %
9884
                                     (nname, attr, remote_info[attr]))
9885
        # compute memory used by primary instances
9886
        i_p_mem = i_p_up_mem = 0
9887
        for iinfo, beinfo in i_list:
9888
          if iinfo.primary_node == nname:
9889
            i_p_mem += beinfo[constants.BE_MEMORY]
9890
            if iinfo.name not in node_iinfo[nname].payload:
9891
              i_used_mem = 0
9892
            else:
9893
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9894
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9895
            remote_info['memory_free'] -= max(0, i_mem_diff)
9896

    
9897
            if iinfo.admin_up:
9898
              i_p_up_mem += beinfo[constants.BE_MEMORY]
9899

    
9900
        # compute memory used by instances
9901
        pnr_dyn = {
9902
          "total_memory": remote_info['memory_total'],
9903
          "reserved_memory": remote_info['memory_dom0'],
9904
          "free_memory": remote_info['memory_free'],
9905
          "total_disk": remote_info['vg_size'],
9906
          "free_disk": remote_info['vg_free'],
9907
          "total_cpus": remote_info['cpu_total'],
9908
          "i_pri_memory": i_p_mem,
9909
          "i_pri_up_memory": i_p_up_mem,
9910
          }
9911
        pnr.update(pnr_dyn)
9912

    
9913
      node_results[nname] = pnr
9914
    data["nodes"] = node_results
9915

    
9916
    # instance data
9917
    instance_data = {}
9918
    for iinfo, beinfo in i_list:
9919
      nic_data = []
9920
      for nic in iinfo.nics:
9921
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
9922
        nic_dict = {"mac": nic.mac,
9923
                    "ip": nic.ip,
9924
                    "mode": filled_params[constants.NIC_MODE],
9925
                    "link": filled_params[constants.NIC_LINK],
9926
                   }
9927
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9928
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9929
        nic_data.append(nic_dict)
9930
      pir = {
9931
        "tags": list(iinfo.GetTags()),
9932
        "admin_up": iinfo.admin_up,
9933
        "vcpus": beinfo[constants.BE_VCPUS],
9934
        "memory": beinfo[constants.BE_MEMORY],
9935
        "os": iinfo.os,
9936
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9937
        "nics": nic_data,
9938
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9939
        "disk_template": iinfo.disk_template,
9940
        "hypervisor": iinfo.hypervisor,
9941
        }
9942
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9943
                                                 pir["disks"])
9944
      instance_data[iinfo.name] = pir
9945

    
9946
    data["instances"] = instance_data
9947

    
9948
    self.in_data = data
9949

    
9950
  def _AddNewInstance(self):
9951
    """Add new instance data to allocator structure.
9952

9953
    This in combination with _AllocatorGetClusterData will create the
9954
    correct structure needed as input for the allocator.
9955

9956
    The checks for the completeness of the opcode must have already been
9957
    done.
9958

9959
    """
9960
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9961

    
9962
    if self.disk_template in constants.DTS_NET_MIRROR:
9963
      self.required_nodes = 2
9964
    else:
9965
      self.required_nodes = 1
9966
    request = {
9967
      "name": self.name,
9968
      "disk_template": self.disk_template,
9969
      "tags": self.tags,
9970
      "os": self.os,
9971
      "vcpus": self.vcpus,
9972
      "memory": self.mem_size,
9973
      "disks": self.disks,
9974
      "disk_space_total": disk_space,
9975
      "nics": self.nics,
9976
      "required_nodes": self.required_nodes,
9977
      }
9978
    return request
9979

    
9980
  def _AddRelocateInstance(self):
9981
    """Add relocate instance data to allocator structure.
9982

9983
    This in combination with _IAllocatorGetClusterData will create the
9984
    correct structure needed as input for the allocator.
9985

9986
    The checks for the completeness of the opcode must have already been
9987
    done.
9988

9989
    """
9990
    instance = self.cfg.GetInstanceInfo(self.name)
9991
    if instance is None:
9992
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
9993
                                   " IAllocator" % self.name)
9994

    
9995
    if instance.disk_template not in constants.DTS_NET_MIRROR:
9996
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9997
                                 errors.ECODE_INVAL)
9998

    
9999
    if len(instance.secondary_nodes) != 1:
10000
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
10001
                                 errors.ECODE_STATE)
10002

    
10003
    self.required_nodes = 1
10004
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
10005
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10006

    
10007
    request = {
10008
      "name": self.name,
10009
      "disk_space_total": disk_space,
10010
      "required_nodes": self.required_nodes,
10011
      "relocate_from": self.relocate_from,
10012
      }
10013
    return request
10014

    
10015
  def _AddEvacuateNodes(self):
10016
    """Add evacuate nodes data to allocator structure.
10017

10018
    """
10019
    request = {
10020
      "evac_nodes": self.evac_nodes
10021
      }
10022
    return request
10023

    
10024
  def _BuildInputData(self, fn):
10025
    """Build input data structures.
10026

10027
    """
10028
    self._ComputeClusterData()
10029

    
10030
    request = fn()
10031
    request["type"] = self.mode
10032
    self.in_data["request"] = request
10033

    
10034
    self.in_text = serializer.Dump(self.in_data)
10035

    
10036
  def Run(self, name, validate=True, call_fn=None):
10037
    """Run an instance allocator and return the results.
10038

10039
    """
10040
    if call_fn is None:
10041
      call_fn = self.rpc.call_iallocator_runner
10042

    
10043
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10044
    result.Raise("Failure while running the iallocator script")
10045

    
10046
    self.out_text = result.payload
10047
    if validate:
10048
      self._ValidateResult()
10049

    
10050
  def _ValidateResult(self):
10051
    """Process the allocator results.
10052

10053
    This will process and if successful save the result in
10054
    self.out_data and the other parameters.
10055

10056
    """
10057
    try:
10058
      rdict = serializer.Load(self.out_text)
10059
    except Exception, err:
10060
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10061

    
10062
    if not isinstance(rdict, dict):
10063
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
10064

    
10065
    # TODO: remove backwards compatiblity in later versions
10066
    if "nodes" in rdict and "result" not in rdict:
10067
      rdict["result"] = rdict["nodes"]
10068
      del rdict["nodes"]
10069

    
10070
    for key in "success", "info", "result":
10071
      if key not in rdict:
10072
        raise errors.OpExecError("Can't parse iallocator results:"
10073
                                 " missing key '%s'" % key)
10074
      setattr(self, key, rdict[key])
10075

    
10076
    if not isinstance(rdict["result"], list):
10077
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10078
                               " is not a list")
10079
    self.out_data = rdict
10080

    
10081

    
10082
class LUTestAllocator(NoHooksLU):
10083
  """Run allocator tests.
10084

10085
  This LU runs the allocator tests
10086

10087
  """
10088
  _OP_REQP = ["direction", "mode", "name"]
10089
  _OP_DEFS = [
10090
    ("hypervisor", None),
10091
    ("allocator", None),
10092
    ]
10093

    
10094
  def CheckPrereq(self):
10095
    """Check prerequisites.
10096

10097
    This checks the opcode parameters depending on the director and mode test.
10098

10099
    """
10100
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10101
      for attr in ["name", "mem_size", "disks", "disk_template",
10102
                   "os", "tags", "nics", "vcpus"]:
10103
        if not hasattr(self.op, attr):
10104
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10105
                                     attr, errors.ECODE_INVAL)
10106
      iname = self.cfg.ExpandInstanceName(self.op.name)
10107
      if iname is not None:
10108
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10109
                                   iname, errors.ECODE_EXISTS)
10110
      if not isinstance(self.op.nics, list):
10111
        raise errors.OpPrereqError("Invalid parameter 'nics'",
10112
                                   errors.ECODE_INVAL)
10113
      for row in self.op.nics:
10114
        if (not isinstance(row, dict) or
10115
            "mac" not in row or
10116
            "ip" not in row or
10117
            "bridge" not in row):
10118
          raise errors.OpPrereqError("Invalid contents of the 'nics'"
10119
                                     " parameter", errors.ECODE_INVAL)
10120
      if not isinstance(self.op.disks, list):
10121
        raise errors.OpPrereqError("Invalid parameter 'disks'",
10122
                                   errors.ECODE_INVAL)
10123
      for row in self.op.disks:
10124
        if (not isinstance(row, dict) or
10125
            "size" not in row or
10126
            not isinstance(row["size"], int) or
10127
            "mode" not in row or
10128
            row["mode"] not in ['r', 'w']):
10129
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
10130
                                     " parameter", errors.ECODE_INVAL)
10131
      if self.op.hypervisor is None:
10132
        self.op.hypervisor = self.cfg.GetHypervisorType()
10133
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10134
      if not hasattr(self.op, "name"):
10135
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
10136
                                   errors.ECODE_INVAL)
10137
      fname = _ExpandInstanceName(self.cfg, self.op.name)
10138
      self.op.name = fname
10139
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10140
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10141
      if not hasattr(self.op, "evac_nodes"):
10142
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10143
                                   " opcode input", errors.ECODE_INVAL)
10144
    else:
10145
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10146
                                 self.op.mode, errors.ECODE_INVAL)
10147

    
10148
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10149
      if self.op.allocator is None:
10150
        raise errors.OpPrereqError("Missing allocator name",
10151
                                   errors.ECODE_INVAL)
10152
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10153
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
10154
                                 self.op.direction, errors.ECODE_INVAL)
10155

    
10156
  def Exec(self, feedback_fn):
10157
    """Run the allocator test.
10158

10159
    """
10160
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10161
      ial = IAllocator(self.cfg, self.rpc,
10162
                       mode=self.op.mode,
10163
                       name=self.op.name,
10164
                       mem_size=self.op.mem_size,
10165
                       disks=self.op.disks,
10166
                       disk_template=self.op.disk_template,
10167
                       os=self.op.os,
10168
                       tags=self.op.tags,
10169
                       nics=self.op.nics,
10170
                       vcpus=self.op.vcpus,
10171
                       hypervisor=self.op.hypervisor,
10172
                       )
10173
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10174
      ial = IAllocator(self.cfg, self.rpc,
10175
                       mode=self.op.mode,
10176
                       name=self.op.name,
10177
                       relocate_from=list(self.relocate_from),
10178
                       )
10179
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10180
      ial = IAllocator(self.cfg, self.rpc,
10181
                       mode=self.op.mode,
10182
                       evac_nodes=self.op.evac_nodes)
10183
    else:
10184
      raise errors.ProgrammerError("Uncatched mode %s in"
10185
                                   " LUTestAllocator.Exec", self.op.mode)
10186

    
10187
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
10188
      result = ial.in_text
10189
    else:
10190
      ial.Run(self.op.allocator, validate=False)
10191
      result = ial.out_text
10192
    return result