Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 9ade2eda

History | View | Annotate | Download (356.5 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39

    
40
from ganeti import ssh
41
from ganeti import utils
42
from ganeti import errors
43
from ganeti import hypervisor
44
from ganeti import locking
45
from ganeti import constants
46
from ganeti import objects
47
from ganeti import serializer
48
from ganeti import ssconf
49
from ganeti import uidpool
50
from ganeti import compat
51
from ganeti import masterd
52

    
53
import ganeti.masterd.instance # pylint: disable-msg=W0611
54

    
55

    
56
# need to define these here before the actual LUs
57

    
58
def _EmptyList():
59
  """Returns an empty list.
60

61
  """
62
  return []
63

    
64

    
65
def _EmptyDict():
66
  """Returns an empty dict.
67

68
  """
69
  return {}
70

    
71

    
72
class LogicalUnit(object):
73
  """Logical Unit base class.
74

75
  Subclasses must follow these rules:
76
    - implement ExpandNames
77
    - implement CheckPrereq (except when tasklets are used)
78
    - implement Exec (except when tasklets are used)
79
    - implement BuildHooksEnv
80
    - redefine HPATH and HTYPE
81
    - optionally redefine their run requirements:
82
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
83

84
  Note that all commands require root permissions.
85

86
  @ivar dry_run_result: the value (if any) that will be returned to the caller
87
      in dry-run mode (signalled by opcode dry_run parameter)
88
  @cvar _OP_DEFS: a list of opcode attributes and the defaults values
89
      they should get if not already existing
90

91
  """
92
  HPATH = None
93
  HTYPE = None
94
  _OP_REQP = []
95
  _OP_DEFS = []
96
  REQ_BGL = True
97

    
98
  def __init__(self, processor, op, context, rpc):
99
    """Constructor for LogicalUnit.
100

101
    This needs to be overridden in derived classes in order to check op
102
    validity.
103

104
    """
105
    self.proc = processor
106
    self.op = op
107
    self.cfg = context.cfg
108
    self.context = context
109
    self.rpc = rpc
110
    # Dicts used to declare locking needs to mcpu
111
    self.needed_locks = None
112
    self.acquired_locks = {}
113
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
114
    self.add_locks = {}
115
    self.remove_locks = {}
116
    # Used to force good behavior when calling helper functions
117
    self.recalculate_locks = {}
118
    self.__ssh = None
119
    # logging
120
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
121
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
122
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
123
    # support for dry-run
124
    self.dry_run_result = None
125
    # support for generic debug attribute
126
    if (not hasattr(self.op, "debug_level") or
127
        not isinstance(self.op.debug_level, int)):
128
      self.op.debug_level = 0
129

    
130
    # Tasklets
131
    self.tasklets = None
132

    
133
    for aname, aval in self._OP_DEFS:
134
      if not hasattr(self.op, aname):
135
        if callable(aval):
136
          dval = aval()
137
        else:
138
          dval = aval
139
        setattr(self.op, aname, dval)
140

    
141
    for attr_name in self._OP_REQP:
142
      attr_val = getattr(op, attr_name, None)
143
      if attr_val is None:
144
        raise errors.OpPrereqError("Required parameter '%s' missing" %
145
                                   attr_name, errors.ECODE_INVAL)
146

    
147
    self.CheckArguments()
148

    
149
  def __GetSSH(self):
150
    """Returns the SshRunner object
151

152
    """
153
    if not self.__ssh:
154
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
155
    return self.__ssh
156

    
157
  ssh = property(fget=__GetSSH)
158

    
159
  def CheckArguments(self):
160
    """Check syntactic validity for the opcode arguments.
161

162
    This method is for doing a simple syntactic check and ensure
163
    validity of opcode parameters, without any cluster-related
164
    checks. While the same can be accomplished in ExpandNames and/or
165
    CheckPrereq, doing these separate is better because:
166

167
      - ExpandNames is left as as purely a lock-related function
168
      - CheckPrereq is run after we have acquired locks (and possible
169
        waited for them)
170

171
    The function is allowed to change the self.op attribute so that
172
    later methods can no longer worry about missing parameters.
173

174
    """
175
    pass
176

    
177
  def ExpandNames(self):
178
    """Expand names for this LU.
179

180
    This method is called before starting to execute the opcode, and it should
181
    update all the parameters of the opcode to their canonical form (e.g. a
182
    short node name must be fully expanded after this method has successfully
183
    completed). This way locking, hooks, logging, ecc. can work correctly.
184

185
    LUs which implement this method must also populate the self.needed_locks
186
    member, as a dict with lock levels as keys, and a list of needed lock names
187
    as values. Rules:
188

189
      - use an empty dict if you don't need any lock
190
      - if you don't need any lock at a particular level omit that level
191
      - don't put anything for the BGL level
192
      - if you want all locks at a level use locking.ALL_SET as a value
193

194
    If you need to share locks (rather than acquire them exclusively) at one
195
    level you can modify self.share_locks, setting a true value (usually 1) for
196
    that level. By default locks are not shared.
197

198
    This function can also define a list of tasklets, which then will be
199
    executed in order instead of the usual LU-level CheckPrereq and Exec
200
    functions, if those are not defined by the LU.
201

202
    Examples::
203

204
      # Acquire all nodes and one instance
205
      self.needed_locks = {
206
        locking.LEVEL_NODE: locking.ALL_SET,
207
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
208
      }
209
      # Acquire just two nodes
210
      self.needed_locks = {
211
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
212
      }
213
      # Acquire no locks
214
      self.needed_locks = {} # No, you can't leave it to the default value None
215

216
    """
217
    # The implementation of this method is mandatory only if the new LU is
218
    # concurrent, so that old LUs don't need to be changed all at the same
219
    # time.
220
    if self.REQ_BGL:
221
      self.needed_locks = {} # Exclusive LUs don't need locks.
222
    else:
223
      raise NotImplementedError
224

    
225
  def DeclareLocks(self, level):
226
    """Declare LU locking needs for a level
227

228
    While most LUs can just declare their locking needs at ExpandNames time,
229
    sometimes there's the need to calculate some locks after having acquired
230
    the ones before. This function is called just before acquiring locks at a
231
    particular level, but after acquiring the ones at lower levels, and permits
232
    such calculations. It can be used to modify self.needed_locks, and by
233
    default it does nothing.
234

235
    This function is only called if you have something already set in
236
    self.needed_locks for the level.
237

238
    @param level: Locking level which is going to be locked
239
    @type level: member of ganeti.locking.LEVELS
240

241
    """
242

    
243
  def CheckPrereq(self):
244
    """Check prerequisites for this LU.
245

246
    This method should check that the prerequisites for the execution
247
    of this LU are fulfilled. It can do internode communication, but
248
    it should be idempotent - no cluster or system changes are
249
    allowed.
250

251
    The method should raise errors.OpPrereqError in case something is
252
    not fulfilled. Its return value is ignored.
253

254
    This method should also update all the parameters of the opcode to
255
    their canonical form if it hasn't been done by ExpandNames before.
256

257
    """
258
    if self.tasklets is not None:
259
      for (idx, tl) in enumerate(self.tasklets):
260
        logging.debug("Checking prerequisites for tasklet %s/%s",
261
                      idx + 1, len(self.tasklets))
262
        tl.CheckPrereq()
263
    else:
264
      raise NotImplementedError
265

    
266
  def Exec(self, feedback_fn):
267
    """Execute the LU.
268

269
    This method should implement the actual work. It should raise
270
    errors.OpExecError for failures that are somewhat dealt with in
271
    code, or expected.
272

273
    """
274
    if self.tasklets is not None:
275
      for (idx, tl) in enumerate(self.tasklets):
276
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
277
        tl.Exec(feedback_fn)
278
    else:
279
      raise NotImplementedError
280

    
281
  def BuildHooksEnv(self):
282
    """Build hooks environment for this LU.
283

284
    This method should return a three-node tuple consisting of: a dict
285
    containing the environment that will be used for running the
286
    specific hook for this LU, a list of node names on which the hook
287
    should run before the execution, and a list of node names on which
288
    the hook should run after the execution.
289

290
    The keys of the dict must not have 'GANETI_' prefixed as this will
291
    be handled in the hooks runner. Also note additional keys will be
292
    added by the hooks runner. If the LU doesn't define any
293
    environment, an empty dict (and not None) should be returned.
294

295
    No nodes should be returned as an empty list (and not None).
296

297
    Note that if the HPATH for a LU class is None, this function will
298
    not be called.
299

300
    """
301
    raise NotImplementedError
302

    
303
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
304
    """Notify the LU about the results of its hooks.
305

306
    This method is called every time a hooks phase is executed, and notifies
307
    the Logical Unit about the hooks' result. The LU can then use it to alter
308
    its result based on the hooks.  By default the method does nothing and the
309
    previous result is passed back unchanged but any LU can define it if it
310
    wants to use the local cluster hook-scripts somehow.
311

312
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
313
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
314
    @param hook_results: the results of the multi-node hooks rpc call
315
    @param feedback_fn: function used send feedback back to the caller
316
    @param lu_result: the previous Exec result this LU had, or None
317
        in the PRE phase
318
    @return: the new Exec result, based on the previous result
319
        and hook results
320

321
    """
322
    # API must be kept, thus we ignore the unused argument and could
323
    # be a function warnings
324
    # pylint: disable-msg=W0613,R0201
325
    return lu_result
326

    
327
  def _ExpandAndLockInstance(self):
328
    """Helper function to expand and lock an instance.
329

330
    Many LUs that work on an instance take its name in self.op.instance_name
331
    and need to expand it and then declare the expanded name for locking. This
332
    function does it, and then updates self.op.instance_name to the expanded
333
    name. It also initializes needed_locks as a dict, if this hasn't been done
334
    before.
335

336
    """
337
    if self.needed_locks is None:
338
      self.needed_locks = {}
339
    else:
340
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
341
        "_ExpandAndLockInstance called with instance-level locks set"
342
    self.op.instance_name = _ExpandInstanceName(self.cfg,
343
                                                self.op.instance_name)
344
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
345

    
346
  def _LockInstancesNodes(self, primary_only=False):
347
    """Helper function to declare instances' nodes for locking.
348

349
    This function should be called after locking one or more instances to lock
350
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
351
    with all primary or secondary nodes for instances already locked and
352
    present in self.needed_locks[locking.LEVEL_INSTANCE].
353

354
    It should be called from DeclareLocks, and for safety only works if
355
    self.recalculate_locks[locking.LEVEL_NODE] is set.
356

357
    In the future it may grow parameters to just lock some instance's nodes, or
358
    to just lock primaries or secondary nodes, if needed.
359

360
    If should be called in DeclareLocks in a way similar to::
361

362
      if level == locking.LEVEL_NODE:
363
        self._LockInstancesNodes()
364

365
    @type primary_only: boolean
366
    @param primary_only: only lock primary nodes of locked instances
367

368
    """
369
    assert locking.LEVEL_NODE in self.recalculate_locks, \
370
      "_LockInstancesNodes helper function called with no nodes to recalculate"
371

    
372
    # TODO: check if we're really been called with the instance locks held
373

    
374
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
375
    # future we might want to have different behaviors depending on the value
376
    # of self.recalculate_locks[locking.LEVEL_NODE]
377
    wanted_nodes = []
378
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
379
      instance = self.context.cfg.GetInstanceInfo(instance_name)
380
      wanted_nodes.append(instance.primary_node)
381
      if not primary_only:
382
        wanted_nodes.extend(instance.secondary_nodes)
383

    
384
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
385
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
386
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
387
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
388

    
389
    del self.recalculate_locks[locking.LEVEL_NODE]
390

    
391

    
392
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
393
  """Simple LU which runs no hooks.
394

395
  This LU is intended as a parent for other LogicalUnits which will
396
  run no hooks, in order to reduce duplicate code.
397

398
  """
399
  HPATH = None
400
  HTYPE = None
401

    
402
  def BuildHooksEnv(self):
403
    """Empty BuildHooksEnv for NoHooksLu.
404

405
    This just raises an error.
406

407
    """
408
    assert False, "BuildHooksEnv called for NoHooksLUs"
409

    
410

    
411
class Tasklet:
412
  """Tasklet base class.
413

414
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
415
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
416
  tasklets know nothing about locks.
417

418
  Subclasses must follow these rules:
419
    - Implement CheckPrereq
420
    - Implement Exec
421

422
  """
423
  def __init__(self, lu):
424
    self.lu = lu
425

    
426
    # Shortcuts
427
    self.cfg = lu.cfg
428
    self.rpc = lu.rpc
429

    
430
  def CheckPrereq(self):
431
    """Check prerequisites for this tasklets.
432

433
    This method should check whether the prerequisites for the execution of
434
    this tasklet are fulfilled. It can do internode communication, but it
435
    should be idempotent - no cluster or system changes are allowed.
436

437
    The method should raise errors.OpPrereqError in case something is not
438
    fulfilled. Its return value is ignored.
439

440
    This method should also update all parameters to their canonical form if it
441
    hasn't been done before.
442

443
    """
444
    raise NotImplementedError
445

    
446
  def Exec(self, feedback_fn):
447
    """Execute the tasklet.
448

449
    This method should implement the actual work. It should raise
450
    errors.OpExecError for failures that are somewhat dealt with in code, or
451
    expected.
452

453
    """
454
    raise NotImplementedError
455

    
456

    
457
def _GetWantedNodes(lu, nodes):
458
  """Returns list of checked and expanded node names.
459

460
  @type lu: L{LogicalUnit}
461
  @param lu: the logical unit on whose behalf we execute
462
  @type nodes: list
463
  @param nodes: list of node names or None for all nodes
464
  @rtype: list
465
  @return: the list of nodes, sorted
466
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
467

468
  """
469
  if not isinstance(nodes, list):
470
    raise errors.OpPrereqError("Invalid argument type 'nodes'",
471
                               errors.ECODE_INVAL)
472

    
473
  if not nodes:
474
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
475
      " non-empty list of nodes whose name is to be expanded.")
476

    
477
  wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
478
  return utils.NiceSort(wanted)
479

    
480

    
481
def _GetWantedInstances(lu, instances):
482
  """Returns list of checked and expanded instance names.
483

484
  @type lu: L{LogicalUnit}
485
  @param lu: the logical unit on whose behalf we execute
486
  @type instances: list
487
  @param instances: list of instance names or None for all instances
488
  @rtype: list
489
  @return: the list of instances, sorted
490
  @raise errors.OpPrereqError: if the instances parameter is wrong type
491
  @raise errors.OpPrereqError: if any of the passed instances is not found
492

493
  """
494
  if not isinstance(instances, list):
495
    raise errors.OpPrereqError("Invalid argument type 'instances'",
496
                               errors.ECODE_INVAL)
497

    
498
  if instances:
499
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
500
  else:
501
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
502
  return wanted
503

    
504

    
505
def _GetUpdatedParams(old_params, update_dict,
506
                      use_default=True, use_none=False):
507
  """Return the new version of a parameter dictionary.
508

509
  @type old_params: dict
510
  @param old_params: old parameters
511
  @type update_dict: dict
512
  @param update_dict: dict containing new parameter values, or
513
      constants.VALUE_DEFAULT to reset the parameter to its default
514
      value
515
  @param use_default: boolean
516
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
517
      values as 'to be deleted' values
518
  @param use_none: boolean
519
  @type use_none: whether to recognise C{None} values as 'to be
520
      deleted' values
521
  @rtype: dict
522
  @return: the new parameter dictionary
523

524
  """
525
  params_copy = copy.deepcopy(old_params)
526
  for key, val in update_dict.iteritems():
527
    if ((use_default and val == constants.VALUE_DEFAULT) or
528
        (use_none and val is None)):
529
      try:
530
        del params_copy[key]
531
      except KeyError:
532
        pass
533
    else:
534
      params_copy[key] = val
535
  return params_copy
536

    
537

    
538
def _CheckOutputFields(static, dynamic, selected):
539
  """Checks whether all selected fields are valid.
540

541
  @type static: L{utils.FieldSet}
542
  @param static: static fields set
543
  @type dynamic: L{utils.FieldSet}
544
  @param dynamic: dynamic fields set
545

546
  """
547
  f = utils.FieldSet()
548
  f.Extend(static)
549
  f.Extend(dynamic)
550

    
551
  delta = f.NonMatching(selected)
552
  if delta:
553
    raise errors.OpPrereqError("Unknown output fields selected: %s"
554
                               % ",".join(delta), errors.ECODE_INVAL)
555

    
556

    
557
def _CheckBooleanOpField(op, name):
558
  """Validates boolean opcode parameters.
559

560
  This will ensure that an opcode parameter is either a boolean value,
561
  or None (but that it always exists).
562

563
  """
564
  val = getattr(op, name, None)
565
  if not (val is None or isinstance(val, bool)):
566
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
567
                               (name, str(val)), errors.ECODE_INVAL)
568
  setattr(op, name, val)
569

    
570

    
571
def _CheckGlobalHvParams(params):
572
  """Validates that given hypervisor params are not global ones.
573

574
  This will ensure that instances don't get customised versions of
575
  global params.
576

577
  """
578
  used_globals = constants.HVC_GLOBALS.intersection(params)
579
  if used_globals:
580
    msg = ("The following hypervisor parameters are global and cannot"
581
           " be customized at instance level, please modify them at"
582
           " cluster level: %s" % utils.CommaJoin(used_globals))
583
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
584

    
585

    
586
def _CheckNodeOnline(lu, node):
587
  """Ensure that a given node is online.
588

589
  @param lu: the LU on behalf of which we make the check
590
  @param node: the node to check
591
  @raise errors.OpPrereqError: if the node is offline
592

593
  """
594
  if lu.cfg.GetNodeInfo(node).offline:
595
    raise errors.OpPrereqError("Can't use offline node %s" % node,
596
                               errors.ECODE_INVAL)
597

    
598

    
599
def _CheckNodeNotDrained(lu, node):
600
  """Ensure that a given node is not drained.
601

602
  @param lu: the LU on behalf of which we make the check
603
  @param node: the node to check
604
  @raise errors.OpPrereqError: if the node is drained
605

606
  """
607
  if lu.cfg.GetNodeInfo(node).drained:
608
    raise errors.OpPrereqError("Can't use drained node %s" % node,
609
                               errors.ECODE_INVAL)
610

    
611

    
612
def _CheckNodeHasOS(lu, node, os_name, force_variant):
613
  """Ensure that a node supports a given OS.
614

615
  @param lu: the LU on behalf of which we make the check
616
  @param node: the node to check
617
  @param os_name: the OS to query about
618
  @param force_variant: whether to ignore variant errors
619
  @raise errors.OpPrereqError: if the node is not supporting the OS
620

621
  """
622
  result = lu.rpc.call_os_get(node, os_name)
623
  result.Raise("OS '%s' not in supported OS list for node %s" %
624
               (os_name, node),
625
               prereq=True, ecode=errors.ECODE_INVAL)
626
  if not force_variant:
627
    _CheckOSVariant(result.payload, os_name)
628

    
629

    
630
def _RequireFileStorage():
631
  """Checks that file storage is enabled.
632

633
  @raise errors.OpPrereqError: when file storage is disabled
634

635
  """
636
  if not constants.ENABLE_FILE_STORAGE:
637
    raise errors.OpPrereqError("File storage disabled at configure time",
638
                               errors.ECODE_INVAL)
639

    
640

    
641
def _CheckDiskTemplate(template):
642
  """Ensure a given disk template is valid.
643

644
  """
645
  if template not in constants.DISK_TEMPLATES:
646
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
647
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
648
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
649
  if template == constants.DT_FILE:
650
    _RequireFileStorage()
651

    
652

    
653
def _CheckStorageType(storage_type):
654
  """Ensure a given storage type is valid.
655

656
  """
657
  if storage_type not in constants.VALID_STORAGE_TYPES:
658
    raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
659
                               errors.ECODE_INVAL)
660
  if storage_type == constants.ST_FILE:
661
    _RequireFileStorage()
662

    
663

    
664
def _GetClusterDomainSecret():
665
  """Reads the cluster domain secret.
666

667
  """
668
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
669
                               strict=True)
670

    
671

    
672
def _CheckInstanceDown(lu, instance, reason):
673
  """Ensure that an instance is not running."""
674
  if instance.admin_up:
675
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
676
                               (instance.name, reason), errors.ECODE_STATE)
677

    
678
  pnode = instance.primary_node
679
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
680
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
681
              prereq=True, ecode=errors.ECODE_ENVIRON)
682

    
683
  if instance.name in ins_l.payload:
684
    raise errors.OpPrereqError("Instance %s is running, %s" %
685
                               (instance.name, reason), errors.ECODE_STATE)
686

    
687

    
688
def _ExpandItemName(fn, name, kind):
689
  """Expand an item name.
690

691
  @param fn: the function to use for expansion
692
  @param name: requested item name
693
  @param kind: text description ('Node' or 'Instance')
694
  @return: the resolved (full) name
695
  @raise errors.OpPrereqError: if the item is not found
696

697
  """
698
  full_name = fn(name)
699
  if full_name is None:
700
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
701
                               errors.ECODE_NOENT)
702
  return full_name
703

    
704

    
705
def _ExpandNodeName(cfg, name):
706
  """Wrapper over L{_ExpandItemName} for nodes."""
707
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
708

    
709

    
710
def _ExpandInstanceName(cfg, name):
711
  """Wrapper over L{_ExpandItemName} for instance."""
712
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
713

    
714

    
715
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
716
                          memory, vcpus, nics, disk_template, disks,
717
                          bep, hvp, hypervisor_name):
718
  """Builds instance related env variables for hooks
719

720
  This builds the hook environment from individual variables.
721

722
  @type name: string
723
  @param name: the name of the instance
724
  @type primary_node: string
725
  @param primary_node: the name of the instance's primary node
726
  @type secondary_nodes: list
727
  @param secondary_nodes: list of secondary nodes as strings
728
  @type os_type: string
729
  @param os_type: the name of the instance's OS
730
  @type status: boolean
731
  @param status: the should_run status of the instance
732
  @type memory: string
733
  @param memory: the memory size of the instance
734
  @type vcpus: string
735
  @param vcpus: the count of VCPUs the instance has
736
  @type nics: list
737
  @param nics: list of tuples (ip, mac, mode, link) representing
738
      the NICs the instance has
739
  @type disk_template: string
740
  @param disk_template: the disk template of the instance
741
  @type disks: list
742
  @param disks: the list of (size, mode) pairs
743
  @type bep: dict
744
  @param bep: the backend parameters for the instance
745
  @type hvp: dict
746
  @param hvp: the hypervisor parameters for the instance
747
  @type hypervisor_name: string
748
  @param hypervisor_name: the hypervisor for the instance
749
  @rtype: dict
750
  @return: the hook environment for this instance
751

752
  """
753
  if status:
754
    str_status = "up"
755
  else:
756
    str_status = "down"
757
  env = {
758
    "OP_TARGET": name,
759
    "INSTANCE_NAME": name,
760
    "INSTANCE_PRIMARY": primary_node,
761
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
762
    "INSTANCE_OS_TYPE": os_type,
763
    "INSTANCE_STATUS": str_status,
764
    "INSTANCE_MEMORY": memory,
765
    "INSTANCE_VCPUS": vcpus,
766
    "INSTANCE_DISK_TEMPLATE": disk_template,
767
    "INSTANCE_HYPERVISOR": hypervisor_name,
768
  }
769

    
770
  if nics:
771
    nic_count = len(nics)
772
    for idx, (ip, mac, mode, link) in enumerate(nics):
773
      if ip is None:
774
        ip = ""
775
      env["INSTANCE_NIC%d_IP" % idx] = ip
776
      env["INSTANCE_NIC%d_MAC" % idx] = mac
777
      env["INSTANCE_NIC%d_MODE" % idx] = mode
778
      env["INSTANCE_NIC%d_LINK" % idx] = link
779
      if mode == constants.NIC_MODE_BRIDGED:
780
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
781
  else:
782
    nic_count = 0
783

    
784
  env["INSTANCE_NIC_COUNT"] = nic_count
785

    
786
  if disks:
787
    disk_count = len(disks)
788
    for idx, (size, mode) in enumerate(disks):
789
      env["INSTANCE_DISK%d_SIZE" % idx] = size
790
      env["INSTANCE_DISK%d_MODE" % idx] = mode
791
  else:
792
    disk_count = 0
793

    
794
  env["INSTANCE_DISK_COUNT"] = disk_count
795

    
796
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
797
    for key, value in source.items():
798
      env["INSTANCE_%s_%s" % (kind, key)] = value
799

    
800
  return env
801

    
802

    
803
def _NICListToTuple(lu, nics):
804
  """Build a list of nic information tuples.
805

806
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
807
  value in LUQueryInstanceData.
808

809
  @type lu:  L{LogicalUnit}
810
  @param lu: the logical unit on whose behalf we execute
811
  @type nics: list of L{objects.NIC}
812
  @param nics: list of nics to convert to hooks tuples
813

814
  """
815
  hooks_nics = []
816
  cluster = lu.cfg.GetClusterInfo()
817
  for nic in nics:
818
    ip = nic.ip
819
    mac = nic.mac
820
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
821
    mode = filled_params[constants.NIC_MODE]
822
    link = filled_params[constants.NIC_LINK]
823
    hooks_nics.append((ip, mac, mode, link))
824
  return hooks_nics
825

    
826

    
827
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
828
  """Builds instance related env variables for hooks from an object.
829

830
  @type lu: L{LogicalUnit}
831
  @param lu: the logical unit on whose behalf we execute
832
  @type instance: L{objects.Instance}
833
  @param instance: the instance for which we should build the
834
      environment
835
  @type override: dict
836
  @param override: dictionary with key/values that will override
837
      our values
838
  @rtype: dict
839
  @return: the hook environment dictionary
840

841
  """
842
  cluster = lu.cfg.GetClusterInfo()
843
  bep = cluster.FillBE(instance)
844
  hvp = cluster.FillHV(instance)
845
  args = {
846
    'name': instance.name,
847
    'primary_node': instance.primary_node,
848
    'secondary_nodes': instance.secondary_nodes,
849
    'os_type': instance.os,
850
    'status': instance.admin_up,
851
    'memory': bep[constants.BE_MEMORY],
852
    'vcpus': bep[constants.BE_VCPUS],
853
    'nics': _NICListToTuple(lu, instance.nics),
854
    'disk_template': instance.disk_template,
855
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
856
    'bep': bep,
857
    'hvp': hvp,
858
    'hypervisor_name': instance.hypervisor,
859
  }
860
  if override:
861
    args.update(override)
862
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
863

    
864

    
865
def _AdjustCandidatePool(lu, exceptions):
866
  """Adjust the candidate pool after node operations.
867

868
  """
869
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
870
  if mod_list:
871
    lu.LogInfo("Promoted nodes to master candidate role: %s",
872
               utils.CommaJoin(node.name for node in mod_list))
873
    for name in mod_list:
874
      lu.context.ReaddNode(name)
875
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
876
  if mc_now > mc_max:
877
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
878
               (mc_now, mc_max))
879

    
880

    
881
def _DecideSelfPromotion(lu, exceptions=None):
882
  """Decide whether I should promote myself as a master candidate.
883

884
  """
885
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
886
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
887
  # the new node will increase mc_max with one, so:
888
  mc_should = min(mc_should + 1, cp_size)
889
  return mc_now < mc_should
890

    
891

    
892
def _CheckNicsBridgesExist(lu, target_nics, target_node):
893
  """Check that the brigdes needed by a list of nics exist.
894

895
  """
896
  cluster = lu.cfg.GetClusterInfo()
897
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
898
  brlist = [params[constants.NIC_LINK] for params in paramslist
899
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
900
  if brlist:
901
    result = lu.rpc.call_bridges_exist(target_node, brlist)
902
    result.Raise("Error checking bridges on destination node '%s'" %
903
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
904

    
905

    
906
def _CheckInstanceBridgesExist(lu, instance, node=None):
907
  """Check that the brigdes needed by an instance exist.
908

909
  """
910
  if node is None:
911
    node = instance.primary_node
912
  _CheckNicsBridgesExist(lu, instance.nics, node)
913

    
914

    
915
def _CheckOSVariant(os_obj, name):
916
  """Check whether an OS name conforms to the os variants specification.
917

918
  @type os_obj: L{objects.OS}
919
  @param os_obj: OS object to check
920
  @type name: string
921
  @param name: OS name passed by the user, to check for validity
922

923
  """
924
  if not os_obj.supported_variants:
925
    return
926
  try:
927
    variant = name.split("+", 1)[1]
928
  except IndexError:
929
    raise errors.OpPrereqError("OS name must include a variant",
930
                               errors.ECODE_INVAL)
931

    
932
  if variant not in os_obj.supported_variants:
933
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
934

    
935

    
936
def _GetNodeInstancesInner(cfg, fn):
937
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
938

    
939

    
940
def _GetNodeInstances(cfg, node_name):
941
  """Returns a list of all primary and secondary instances on a node.
942

943
  """
944

    
945
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
946

    
947

    
948
def _GetNodePrimaryInstances(cfg, node_name):
949
  """Returns primary instances on a node.
950

951
  """
952
  return _GetNodeInstancesInner(cfg,
953
                                lambda inst: node_name == inst.primary_node)
954

    
955

    
956
def _GetNodeSecondaryInstances(cfg, node_name):
957
  """Returns secondary instances on a node.
958

959
  """
960
  return _GetNodeInstancesInner(cfg,
961
                                lambda inst: node_name in inst.secondary_nodes)
962

    
963

    
964
def _GetStorageTypeArgs(cfg, storage_type):
965
  """Returns the arguments for a storage type.
966

967
  """
968
  # Special case for file storage
969
  if storage_type == constants.ST_FILE:
970
    # storage.FileStorage wants a list of storage directories
971
    return [[cfg.GetFileStorageDir()]]
972

    
973
  return []
974

    
975

    
976
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
977
  faulty = []
978

    
979
  for dev in instance.disks:
980
    cfg.SetDiskID(dev, node_name)
981

    
982
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
983
  result.Raise("Failed to get disk status from node %s" % node_name,
984
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
985

    
986
  for idx, bdev_status in enumerate(result.payload):
987
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
988
      faulty.append(idx)
989

    
990
  return faulty
991

    
992

    
993
class LUPostInitCluster(LogicalUnit):
994
  """Logical unit for running hooks after cluster initialization.
995

996
  """
997
  HPATH = "cluster-init"
998
  HTYPE = constants.HTYPE_CLUSTER
999
  _OP_REQP = []
1000

    
1001
  def BuildHooksEnv(self):
1002
    """Build hooks env.
1003

1004
    """
1005
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1006
    mn = self.cfg.GetMasterNode()
1007
    return env, [], [mn]
1008

    
1009
  def CheckPrereq(self):
1010
    """No prerequisites to check.
1011

1012
    """
1013
    return True
1014

    
1015
  def Exec(self, feedback_fn):
1016
    """Nothing to do.
1017

1018
    """
1019
    return True
1020

    
1021

    
1022
class LUDestroyCluster(LogicalUnit):
1023
  """Logical unit for destroying the cluster.
1024

1025
  """
1026
  HPATH = "cluster-destroy"
1027
  HTYPE = constants.HTYPE_CLUSTER
1028
  _OP_REQP = []
1029

    
1030
  def BuildHooksEnv(self):
1031
    """Build hooks env.
1032

1033
    """
1034
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1035
    return env, [], []
1036

    
1037
  def CheckPrereq(self):
1038
    """Check prerequisites.
1039

1040
    This checks whether the cluster is empty.
1041

1042
    Any errors are signaled by raising errors.OpPrereqError.
1043

1044
    """
1045
    master = self.cfg.GetMasterNode()
1046

    
1047
    nodelist = self.cfg.GetNodeList()
1048
    if len(nodelist) != 1 or nodelist[0] != master:
1049
      raise errors.OpPrereqError("There are still %d node(s) in"
1050
                                 " this cluster." % (len(nodelist) - 1),
1051
                                 errors.ECODE_INVAL)
1052
    instancelist = self.cfg.GetInstanceList()
1053
    if instancelist:
1054
      raise errors.OpPrereqError("There are still %d instance(s) in"
1055
                                 " this cluster." % len(instancelist),
1056
                                 errors.ECODE_INVAL)
1057

    
1058
  def Exec(self, feedback_fn):
1059
    """Destroys the cluster.
1060

1061
    """
1062
    master = self.cfg.GetMasterNode()
1063
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1064

    
1065
    # Run post hooks on master node before it's removed
1066
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1067
    try:
1068
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1069
    except:
1070
      # pylint: disable-msg=W0702
1071
      self.LogWarning("Errors occurred running hooks on %s" % master)
1072

    
1073
    result = self.rpc.call_node_stop_master(master, False)
1074
    result.Raise("Could not disable the master role")
1075

    
1076
    if modify_ssh_setup:
1077
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1078
      utils.CreateBackup(priv_key)
1079
      utils.CreateBackup(pub_key)
1080

    
1081
    return master
1082

    
1083

    
1084
def _VerifyCertificate(filename):
1085
  """Verifies a certificate for LUVerifyCluster.
1086

1087
  @type filename: string
1088
  @param filename: Path to PEM file
1089

1090
  """
1091
  try:
1092
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1093
                                           utils.ReadFile(filename))
1094
  except Exception, err: # pylint: disable-msg=W0703
1095
    return (LUVerifyCluster.ETYPE_ERROR,
1096
            "Failed to load X509 certificate %s: %s" % (filename, err))
1097

    
1098
  (errcode, msg) = \
1099
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1100
                                constants.SSL_CERT_EXPIRATION_ERROR)
1101

    
1102
  if msg:
1103
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1104
  else:
1105
    fnamemsg = None
1106

    
1107
  if errcode is None:
1108
    return (None, fnamemsg)
1109
  elif errcode == utils.CERT_WARNING:
1110
    return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1111
  elif errcode == utils.CERT_ERROR:
1112
    return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1113

    
1114
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1115

    
1116

    
1117
class LUVerifyCluster(LogicalUnit):
1118
  """Verifies the cluster status.
1119

1120
  """
1121
  HPATH = "cluster-verify"
1122
  HTYPE = constants.HTYPE_CLUSTER
1123
  _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1124
  REQ_BGL = False
1125

    
1126
  TCLUSTER = "cluster"
1127
  TNODE = "node"
1128
  TINSTANCE = "instance"
1129

    
1130
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1131
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1132
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1133
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1134
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1135
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1136
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1137
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1138
  ENODEDRBD = (TNODE, "ENODEDRBD")
1139
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1140
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1141
  ENODEHV = (TNODE, "ENODEHV")
1142
  ENODELVM = (TNODE, "ENODELVM")
1143
  ENODEN1 = (TNODE, "ENODEN1")
1144
  ENODENET = (TNODE, "ENODENET")
1145
  ENODEOS = (TNODE, "ENODEOS")
1146
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1147
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1148
  ENODERPC = (TNODE, "ENODERPC")
1149
  ENODESSH = (TNODE, "ENODESSH")
1150
  ENODEVERSION = (TNODE, "ENODEVERSION")
1151
  ENODESETUP = (TNODE, "ENODESETUP")
1152
  ENODETIME = (TNODE, "ENODETIME")
1153

    
1154
  ETYPE_FIELD = "code"
1155
  ETYPE_ERROR = "ERROR"
1156
  ETYPE_WARNING = "WARNING"
1157

    
1158
  class NodeImage(object):
1159
    """A class representing the logical and physical status of a node.
1160

1161
    @type name: string
1162
    @ivar name: the node name to which this object refers
1163
    @ivar volumes: a structure as returned from
1164
        L{ganeti.backend.GetVolumeList} (runtime)
1165
    @ivar instances: a list of running instances (runtime)
1166
    @ivar pinst: list of configured primary instances (config)
1167
    @ivar sinst: list of configured secondary instances (config)
1168
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1169
        of this node (config)
1170
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1171
    @ivar dfree: free disk, as reported by the node (runtime)
1172
    @ivar offline: the offline status (config)
1173
    @type rpc_fail: boolean
1174
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1175
        not whether the individual keys were correct) (runtime)
1176
    @type lvm_fail: boolean
1177
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1178
    @type hyp_fail: boolean
1179
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1180
    @type ghost: boolean
1181
    @ivar ghost: whether this is a known node or not (config)
1182
    @type os_fail: boolean
1183
    @ivar os_fail: whether the RPC call didn't return valid OS data
1184
    @type oslist: list
1185
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1186

1187
    """
1188
    def __init__(self, offline=False, name=None):
1189
      self.name = name
1190
      self.volumes = {}
1191
      self.instances = []
1192
      self.pinst = []
1193
      self.sinst = []
1194
      self.sbp = {}
1195
      self.mfree = 0
1196
      self.dfree = 0
1197
      self.offline = offline
1198
      self.rpc_fail = False
1199
      self.lvm_fail = False
1200
      self.hyp_fail = False
1201
      self.ghost = False
1202
      self.os_fail = False
1203
      self.oslist = {}
1204

    
1205
  def ExpandNames(self):
1206
    self.needed_locks = {
1207
      locking.LEVEL_NODE: locking.ALL_SET,
1208
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1209
    }
1210
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1211

    
1212
  def _Error(self, ecode, item, msg, *args, **kwargs):
1213
    """Format an error message.
1214

1215
    Based on the opcode's error_codes parameter, either format a
1216
    parseable error code, or a simpler error string.
1217

1218
    This must be called only from Exec and functions called from Exec.
1219

1220
    """
1221
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1222
    itype, etxt = ecode
1223
    # first complete the msg
1224
    if args:
1225
      msg = msg % args
1226
    # then format the whole message
1227
    if self.op.error_codes:
1228
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1229
    else:
1230
      if item:
1231
        item = " " + item
1232
      else:
1233
        item = ""
1234
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1235
    # and finally report it via the feedback_fn
1236
    self._feedback_fn("  - %s" % msg)
1237

    
1238
  def _ErrorIf(self, cond, *args, **kwargs):
1239
    """Log an error message if the passed condition is True.
1240

1241
    """
1242
    cond = bool(cond) or self.op.debug_simulate_errors
1243
    if cond:
1244
      self._Error(*args, **kwargs)
1245
    # do not mark the operation as failed for WARN cases only
1246
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1247
      self.bad = self.bad or cond
1248

    
1249
  def _VerifyNode(self, ninfo, nresult):
1250
    """Run multiple tests against a node.
1251

1252
    Test list:
1253

1254
      - compares ganeti version
1255
      - checks vg existence and size > 20G
1256
      - checks config file checksum
1257
      - checks ssh to other nodes
1258

1259
    @type ninfo: L{objects.Node}
1260
    @param ninfo: the node to check
1261
    @param nresult: the results from the node
1262
    @rtype: boolean
1263
    @return: whether overall this call was successful (and we can expect
1264
         reasonable values in the respose)
1265

1266
    """
1267
    node = ninfo.name
1268
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1269

    
1270
    # main result, nresult should be a non-empty dict
1271
    test = not nresult or not isinstance(nresult, dict)
1272
    _ErrorIf(test, self.ENODERPC, node,
1273
                  "unable to verify node: no data returned")
1274
    if test:
1275
      return False
1276

    
1277
    # compares ganeti version
1278
    local_version = constants.PROTOCOL_VERSION
1279
    remote_version = nresult.get("version", None)
1280
    test = not (remote_version and
1281
                isinstance(remote_version, (list, tuple)) and
1282
                len(remote_version) == 2)
1283
    _ErrorIf(test, self.ENODERPC, node,
1284
             "connection to node returned invalid data")
1285
    if test:
1286
      return False
1287

    
1288
    test = local_version != remote_version[0]
1289
    _ErrorIf(test, self.ENODEVERSION, node,
1290
             "incompatible protocol versions: master %s,"
1291
             " node %s", local_version, remote_version[0])
1292
    if test:
1293
      return False
1294

    
1295
    # node seems compatible, we can actually try to look into its results
1296

    
1297
    # full package version
1298
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1299
                  self.ENODEVERSION, node,
1300
                  "software version mismatch: master %s, node %s",
1301
                  constants.RELEASE_VERSION, remote_version[1],
1302
                  code=self.ETYPE_WARNING)
1303

    
1304
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1305
    if isinstance(hyp_result, dict):
1306
      for hv_name, hv_result in hyp_result.iteritems():
1307
        test = hv_result is not None
1308
        _ErrorIf(test, self.ENODEHV, node,
1309
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1310

    
1311

    
1312
    test = nresult.get(constants.NV_NODESETUP,
1313
                           ["Missing NODESETUP results"])
1314
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1315
             "; ".join(test))
1316

    
1317
    return True
1318

    
1319
  def _VerifyNodeTime(self, ninfo, nresult,
1320
                      nvinfo_starttime, nvinfo_endtime):
1321
    """Check the node time.
1322

1323
    @type ninfo: L{objects.Node}
1324
    @param ninfo: the node to check
1325
    @param nresult: the remote results for the node
1326
    @param nvinfo_starttime: the start time of the RPC call
1327
    @param nvinfo_endtime: the end time of the RPC call
1328

1329
    """
1330
    node = ninfo.name
1331
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1332

    
1333
    ntime = nresult.get(constants.NV_TIME, None)
1334
    try:
1335
      ntime_merged = utils.MergeTime(ntime)
1336
    except (ValueError, TypeError):
1337
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1338
      return
1339

    
1340
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1341
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1342
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1343
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1344
    else:
1345
      ntime_diff = None
1346

    
1347
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1348
             "Node time diverges by at least %s from master node time",
1349
             ntime_diff)
1350

    
1351
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1352
    """Check the node time.
1353

1354
    @type ninfo: L{objects.Node}
1355
    @param ninfo: the node to check
1356
    @param nresult: the remote results for the node
1357
    @param vg_name: the configured VG name
1358

1359
    """
1360
    if vg_name is None:
1361
      return
1362

    
1363
    node = ninfo.name
1364
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1365

    
1366
    # checks vg existence and size > 20G
1367
    vglist = nresult.get(constants.NV_VGLIST, None)
1368
    test = not vglist
1369
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1370
    if not test:
1371
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1372
                                            constants.MIN_VG_SIZE)
1373
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1374

    
1375
    # check pv names
1376
    pvlist = nresult.get(constants.NV_PVLIST, None)
1377
    test = pvlist is None
1378
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1379
    if not test:
1380
      # check that ':' is not present in PV names, since it's a
1381
      # special character for lvcreate (denotes the range of PEs to
1382
      # use on the PV)
1383
      for _, pvname, owner_vg in pvlist:
1384
        test = ":" in pvname
1385
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1386
                 " '%s' of VG '%s'", pvname, owner_vg)
1387

    
1388
  def _VerifyNodeNetwork(self, ninfo, nresult):
1389
    """Check the node time.
1390

1391
    @type ninfo: L{objects.Node}
1392
    @param ninfo: the node to check
1393
    @param nresult: the remote results for the node
1394

1395
    """
1396
    node = ninfo.name
1397
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1398

    
1399
    test = constants.NV_NODELIST not in nresult
1400
    _ErrorIf(test, self.ENODESSH, node,
1401
             "node hasn't returned node ssh connectivity data")
1402
    if not test:
1403
      if nresult[constants.NV_NODELIST]:
1404
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1405
          _ErrorIf(True, self.ENODESSH, node,
1406
                   "ssh communication with node '%s': %s", a_node, a_msg)
1407

    
1408
    test = constants.NV_NODENETTEST not in nresult
1409
    _ErrorIf(test, self.ENODENET, node,
1410
             "node hasn't returned node tcp connectivity data")
1411
    if not test:
1412
      if nresult[constants.NV_NODENETTEST]:
1413
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1414
        for anode in nlist:
1415
          _ErrorIf(True, self.ENODENET, node,
1416
                   "tcp communication with node '%s': %s",
1417
                   anode, nresult[constants.NV_NODENETTEST][anode])
1418

    
1419
    test = constants.NV_MASTERIP not in nresult
1420
    _ErrorIf(test, self.ENODENET, node,
1421
             "node hasn't returned node master IP reachability data")
1422
    if not test:
1423
      if not nresult[constants.NV_MASTERIP]:
1424
        if node == self.master_node:
1425
          msg = "the master node cannot reach the master IP (not configured?)"
1426
        else:
1427
          msg = "cannot reach the master IP"
1428
        _ErrorIf(True, self.ENODENET, node, msg)
1429

    
1430

    
1431
  def _VerifyInstance(self, instance, instanceconfig, node_image):
1432
    """Verify an instance.
1433

1434
    This function checks to see if the required block devices are
1435
    available on the instance's node.
1436

1437
    """
1438
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1439
    node_current = instanceconfig.primary_node
1440

    
1441
    node_vol_should = {}
1442
    instanceconfig.MapLVsByNode(node_vol_should)
1443

    
1444
    for node in node_vol_should:
1445
      n_img = node_image[node]
1446
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1447
        # ignore missing volumes on offline or broken nodes
1448
        continue
1449
      for volume in node_vol_should[node]:
1450
        test = volume not in n_img.volumes
1451
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1452
                 "volume %s missing on node %s", volume, node)
1453

    
1454
    if instanceconfig.admin_up:
1455
      pri_img = node_image[node_current]
1456
      test = instance not in pri_img.instances and not pri_img.offline
1457
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1458
               "instance not running on its primary node %s",
1459
               node_current)
1460

    
1461
    for node, n_img in node_image.items():
1462
      if (not node == node_current):
1463
        test = instance in n_img.instances
1464
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1465
                 "instance should not run on node %s", node)
1466

    
1467
  def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1468
    """Verify if there are any unknown volumes in the cluster.
1469

1470
    The .os, .swap and backup volumes are ignored. All other volumes are
1471
    reported as unknown.
1472

1473
    """
1474
    for node, n_img in node_image.items():
1475
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1476
        # skip non-healthy nodes
1477
        continue
1478
      for volume in n_img.volumes:
1479
        test = (node not in node_vol_should or
1480
                volume not in node_vol_should[node])
1481
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1482
                      "volume %s is unknown", volume)
1483

    
1484
  def _VerifyOrphanInstances(self, instancelist, node_image):
1485
    """Verify the list of running instances.
1486

1487
    This checks what instances are running but unknown to the cluster.
1488

1489
    """
1490
    for node, n_img in node_image.items():
1491
      for o_inst in n_img.instances:
1492
        test = o_inst not in instancelist
1493
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1494
                      "instance %s on node %s should not exist", o_inst, node)
1495

    
1496
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1497
    """Verify N+1 Memory Resilience.
1498

1499
    Check that if one single node dies we can still start all the
1500
    instances it was primary for.
1501

1502
    """
1503
    for node, n_img in node_image.items():
1504
      # This code checks that every node which is now listed as
1505
      # secondary has enough memory to host all instances it is
1506
      # supposed to should a single other node in the cluster fail.
1507
      # FIXME: not ready for failover to an arbitrary node
1508
      # FIXME: does not support file-backed instances
1509
      # WARNING: we currently take into account down instances as well
1510
      # as up ones, considering that even if they're down someone
1511
      # might want to start them even in the event of a node failure.
1512
      for prinode, instances in n_img.sbp.items():
1513
        needed_mem = 0
1514
        for instance in instances:
1515
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1516
          if bep[constants.BE_AUTO_BALANCE]:
1517
            needed_mem += bep[constants.BE_MEMORY]
1518
        test = n_img.mfree < needed_mem
1519
        self._ErrorIf(test, self.ENODEN1, node,
1520
                      "not enough memory on to accommodate"
1521
                      " failovers should peer node %s fail", prinode)
1522

    
1523
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1524
                       master_files):
1525
    """Verifies and computes the node required file checksums.
1526

1527
    @type ninfo: L{objects.Node}
1528
    @param ninfo: the node to check
1529
    @param nresult: the remote results for the node
1530
    @param file_list: required list of files
1531
    @param local_cksum: dictionary of local files and their checksums
1532
    @param master_files: list of files that only masters should have
1533

1534
    """
1535
    node = ninfo.name
1536
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1537

    
1538
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1539
    test = not isinstance(remote_cksum, dict)
1540
    _ErrorIf(test, self.ENODEFILECHECK, node,
1541
             "node hasn't returned file checksum data")
1542
    if test:
1543
      return
1544

    
1545
    for file_name in file_list:
1546
      node_is_mc = ninfo.master_candidate
1547
      must_have = (file_name not in master_files) or node_is_mc
1548
      # missing
1549
      test1 = file_name not in remote_cksum
1550
      # invalid checksum
1551
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1552
      # existing and good
1553
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1554
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1555
               "file '%s' missing", file_name)
1556
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1557
               "file '%s' has wrong checksum", file_name)
1558
      # not candidate and this is not a must-have file
1559
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1560
               "file '%s' should not exist on non master"
1561
               " candidates (and the file is outdated)", file_name)
1562
      # all good, except non-master/non-must have combination
1563
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1564
               "file '%s' should not exist"
1565
               " on non master candidates", file_name)
1566

    
1567
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1568
    """Verifies and the node DRBD status.
1569

1570
    @type ninfo: L{objects.Node}
1571
    @param ninfo: the node to check
1572
    @param nresult: the remote results for the node
1573
    @param instanceinfo: the dict of instances
1574
    @param drbd_map: the DRBD map as returned by
1575
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1576

1577
    """
1578
    node = ninfo.name
1579
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1580

    
1581
    # compute the DRBD minors
1582
    node_drbd = {}
1583
    for minor, instance in drbd_map[node].items():
1584
      test = instance not in instanceinfo
1585
      _ErrorIf(test, self.ECLUSTERCFG, None,
1586
               "ghost instance '%s' in temporary DRBD map", instance)
1587
        # ghost instance should not be running, but otherwise we
1588
        # don't give double warnings (both ghost instance and
1589
        # unallocated minor in use)
1590
      if test:
1591
        node_drbd[minor] = (instance, False)
1592
      else:
1593
        instance = instanceinfo[instance]
1594
        node_drbd[minor] = (instance.name, instance.admin_up)
1595

    
1596
    # and now check them
1597
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1598
    test = not isinstance(used_minors, (tuple, list))
1599
    _ErrorIf(test, self.ENODEDRBD, node,
1600
             "cannot parse drbd status file: %s", str(used_minors))
1601
    if test:
1602
      # we cannot check drbd status
1603
      return
1604

    
1605
    for minor, (iname, must_exist) in node_drbd.items():
1606
      test = minor not in used_minors and must_exist
1607
      _ErrorIf(test, self.ENODEDRBD, node,
1608
               "drbd minor %d of instance %s is not active", minor, iname)
1609
    for minor in used_minors:
1610
      test = minor not in node_drbd
1611
      _ErrorIf(test, self.ENODEDRBD, node,
1612
               "unallocated drbd minor %d is in use", minor)
1613

    
1614
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1615
    """Builds the node OS structures.
1616

1617
    @type ninfo: L{objects.Node}
1618
    @param ninfo: the node to check
1619
    @param nresult: the remote results for the node
1620
    @param nimg: the node image object
1621

1622
    """
1623
    node = ninfo.name
1624
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1625

    
1626
    remote_os = nresult.get(constants.NV_OSLIST, None)
1627
    test = (not isinstance(remote_os, list) or
1628
            not compat.all(remote_os,
1629
                           lambda v: isinstance(v, list) and len(v) == 7))
1630

    
1631
    _ErrorIf(test, self.ENODEOS, node,
1632
             "node hasn't returned valid OS data")
1633

    
1634
    nimg.os_fail = test
1635

    
1636
    if test:
1637
      return
1638

    
1639
    os_dict = {}
1640

    
1641
    for (name, os_path, status, diagnose,
1642
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1643

    
1644
      if name not in os_dict:
1645
        os_dict[name] = []
1646

    
1647
      # parameters is a list of lists instead of list of tuples due to
1648
      # JSON lacking a real tuple type, fix it:
1649
      parameters = [tuple(v) for v in parameters]
1650
      os_dict[name].append((os_path, status, diagnose,
1651
                            set(variants), set(parameters), set(api_ver)))
1652

    
1653
    nimg.oslist = os_dict
1654

    
1655
  def _VerifyNodeOS(self, ninfo, nimg, base):
1656
    """Verifies the node OS list.
1657

1658
    @type ninfo: L{objects.Node}
1659
    @param ninfo: the node to check
1660
    @param nimg: the node image object
1661
    @param base: the 'template' node we match against (e.g. from the master)
1662

1663
    """
1664
    node = ninfo.name
1665
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1666

    
1667
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1668

    
1669
    for os_name, os_data in nimg.oslist.items():
1670
      assert os_data, "Empty OS status for OS %s?!" % os_name
1671
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1672
      _ErrorIf(not f_status, self.ENODEOS, node,
1673
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1674
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1675
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1676
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1677
      # this will catched in backend too
1678
      _ErrorIf(compat.any(f_api, lambda v: v >= constants.OS_API_V15)
1679
               and not f_var, self.ENODEOS, node,
1680
               "OS %s with API at least %d does not declare any variant",
1681
               os_name, constants.OS_API_V15)
1682
      # comparisons with the 'base' image
1683
      test = os_name not in base.oslist
1684
      _ErrorIf(test, self.ENODEOS, node,
1685
               "Extra OS %s not present on reference node (%s)",
1686
               os_name, base.name)
1687
      if test:
1688
        continue
1689
      assert base.oslist[os_name], "Base node has empty OS status?"
1690
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1691
      if not b_status:
1692
        # base OS is invalid, skipping
1693
        continue
1694
      for kind, a, b in [("API version", f_api, b_api),
1695
                         ("variants list", f_var, b_var),
1696
                         ("parameters", f_param, b_param)]:
1697
        _ErrorIf(a != b, self.ENODEOS, node,
1698
                 "OS %s %s differs from reference node %s: %s vs. %s",
1699
                 kind, os_name, base.name,
1700
                 utils.CommaJoin(a), utils.CommaJoin(a))
1701

    
1702
    # check any missing OSes
1703
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1704
    _ErrorIf(missing, self.ENODEOS, node,
1705
             "OSes present on reference node %s but missing on this node: %s",
1706
             base.name, utils.CommaJoin(missing))
1707

    
1708
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1709
    """Verifies and updates the node volume data.
1710

1711
    This function will update a L{NodeImage}'s internal structures
1712
    with data from the remote call.
1713

1714
    @type ninfo: L{objects.Node}
1715
    @param ninfo: the node to check
1716
    @param nresult: the remote results for the node
1717
    @param nimg: the node image object
1718
    @param vg_name: the configured VG name
1719

1720
    """
1721
    node = ninfo.name
1722
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1723

    
1724
    nimg.lvm_fail = True
1725
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1726
    if vg_name is None:
1727
      pass
1728
    elif isinstance(lvdata, basestring):
1729
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1730
               utils.SafeEncode(lvdata))
1731
    elif not isinstance(lvdata, dict):
1732
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1733
    else:
1734
      nimg.volumes = lvdata
1735
      nimg.lvm_fail = False
1736

    
1737
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1738
    """Verifies and updates the node instance list.
1739

1740
    If the listing was successful, then updates this node's instance
1741
    list. Otherwise, it marks the RPC call as failed for the instance
1742
    list key.
1743

1744
    @type ninfo: L{objects.Node}
1745
    @param ninfo: the node to check
1746
    @param nresult: the remote results for the node
1747
    @param nimg: the node image object
1748

1749
    """
1750
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1751
    test = not isinstance(idata, list)
1752
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1753
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1754
    if test:
1755
      nimg.hyp_fail = True
1756
    else:
1757
      nimg.instances = idata
1758

    
1759
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1760
    """Verifies and computes a node information map
1761

1762
    @type ninfo: L{objects.Node}
1763
    @param ninfo: the node to check
1764
    @param nresult: the remote results for the node
1765
    @param nimg: the node image object
1766
    @param vg_name: the configured VG name
1767

1768
    """
1769
    node = ninfo.name
1770
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1771

    
1772
    # try to read free memory (from the hypervisor)
1773
    hv_info = nresult.get(constants.NV_HVINFO, None)
1774
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1775
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1776
    if not test:
1777
      try:
1778
        nimg.mfree = int(hv_info["memory_free"])
1779
      except (ValueError, TypeError):
1780
        _ErrorIf(True, self.ENODERPC, node,
1781
                 "node returned invalid nodeinfo, check hypervisor")
1782

    
1783
    # FIXME: devise a free space model for file based instances as well
1784
    if vg_name is not None:
1785
      test = (constants.NV_VGLIST not in nresult or
1786
              vg_name not in nresult[constants.NV_VGLIST])
1787
      _ErrorIf(test, self.ENODELVM, node,
1788
               "node didn't return data for the volume group '%s'"
1789
               " - it is either missing or broken", vg_name)
1790
      if not test:
1791
        try:
1792
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1793
        except (ValueError, TypeError):
1794
          _ErrorIf(True, self.ENODERPC, node,
1795
                   "node returned invalid LVM info, check LVM status")
1796

    
1797
  def CheckPrereq(self):
1798
    """Check prerequisites.
1799

1800
    Transform the list of checks we're going to skip into a set and check that
1801
    all its members are valid.
1802

1803
    """
1804
    self.skip_set = frozenset(self.op.skip_checks)
1805
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1806
      raise errors.OpPrereqError("Invalid checks to be skipped specified",
1807
                                 errors.ECODE_INVAL)
1808

    
1809
  def BuildHooksEnv(self):
1810
    """Build hooks env.
1811

1812
    Cluster-Verify hooks just ran in the post phase and their failure makes
1813
    the output be logged in the verify output and the verification to fail.
1814

1815
    """
1816
    all_nodes = self.cfg.GetNodeList()
1817
    env = {
1818
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1819
      }
1820
    for node in self.cfg.GetAllNodesInfo().values():
1821
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1822

    
1823
    return env, [], all_nodes
1824

    
1825
  def Exec(self, feedback_fn):
1826
    """Verify integrity of cluster, performing various test on nodes.
1827

1828
    """
1829
    self.bad = False
1830
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1831
    verbose = self.op.verbose
1832
    self._feedback_fn = feedback_fn
1833
    feedback_fn("* Verifying global settings")
1834
    for msg in self.cfg.VerifyConfig():
1835
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1836

    
1837
    # Check the cluster certificates
1838
    for cert_filename in constants.ALL_CERT_FILES:
1839
      (errcode, msg) = _VerifyCertificate(cert_filename)
1840
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1841

    
1842
    vg_name = self.cfg.GetVGName()
1843
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1844
    cluster = self.cfg.GetClusterInfo()
1845
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1846
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1847
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1848
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1849
                        for iname in instancelist)
1850
    i_non_redundant = [] # Non redundant instances
1851
    i_non_a_balanced = [] # Non auto-balanced instances
1852
    n_offline = 0 # Count of offline nodes
1853
    n_drained = 0 # Count of nodes being drained
1854
    node_vol_should = {}
1855

    
1856
    # FIXME: verify OS list
1857
    # do local checksums
1858
    master_files = [constants.CLUSTER_CONF_FILE]
1859
    master_node = self.master_node = self.cfg.GetMasterNode()
1860
    master_ip = self.cfg.GetMasterIP()
1861

    
1862
    file_names = ssconf.SimpleStore().GetFileList()
1863
    file_names.extend(constants.ALL_CERT_FILES)
1864
    file_names.extend(master_files)
1865
    if cluster.modify_etc_hosts:
1866
      file_names.append(constants.ETC_HOSTS)
1867

    
1868
    local_checksums = utils.FingerprintFiles(file_names)
1869

    
1870
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1871
    node_verify_param = {
1872
      constants.NV_FILELIST: file_names,
1873
      constants.NV_NODELIST: [node.name for node in nodeinfo
1874
                              if not node.offline],
1875
      constants.NV_HYPERVISOR: hypervisors,
1876
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1877
                                  node.secondary_ip) for node in nodeinfo
1878
                                 if not node.offline],
1879
      constants.NV_INSTANCELIST: hypervisors,
1880
      constants.NV_VERSION: None,
1881
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1882
      constants.NV_NODESETUP: None,
1883
      constants.NV_TIME: None,
1884
      constants.NV_MASTERIP: (master_node, master_ip),
1885
      constants.NV_OSLIST: None,
1886
      }
1887

    
1888
    if vg_name is not None:
1889
      node_verify_param[constants.NV_VGLIST] = None
1890
      node_verify_param[constants.NV_LVLIST] = vg_name
1891
      node_verify_param[constants.NV_PVLIST] = [vg_name]
1892
      node_verify_param[constants.NV_DRBDLIST] = None
1893

    
1894
    # Build our expected cluster state
1895
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
1896
                                                 name=node.name))
1897
                      for node in nodeinfo)
1898

    
1899
    for instance in instancelist:
1900
      inst_config = instanceinfo[instance]
1901

    
1902
      for nname in inst_config.all_nodes:
1903
        if nname not in node_image:
1904
          # ghost node
1905
          gnode = self.NodeImage(name=nname)
1906
          gnode.ghost = True
1907
          node_image[nname] = gnode
1908

    
1909
      inst_config.MapLVsByNode(node_vol_should)
1910

    
1911
      pnode = inst_config.primary_node
1912
      node_image[pnode].pinst.append(instance)
1913

    
1914
      for snode in inst_config.secondary_nodes:
1915
        nimg = node_image[snode]
1916
        nimg.sinst.append(instance)
1917
        if pnode not in nimg.sbp:
1918
          nimg.sbp[pnode] = []
1919
        nimg.sbp[pnode].append(instance)
1920

    
1921
    # At this point, we have the in-memory data structures complete,
1922
    # except for the runtime information, which we'll gather next
1923

    
1924
    # Due to the way our RPC system works, exact response times cannot be
1925
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1926
    # time before and after executing the request, we can at least have a time
1927
    # window.
1928
    nvinfo_starttime = time.time()
1929
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1930
                                           self.cfg.GetClusterName())
1931
    nvinfo_endtime = time.time()
1932

    
1933
    all_drbd_map = self.cfg.ComputeDRBDMap()
1934

    
1935
    feedback_fn("* Verifying node status")
1936

    
1937
    refos_img = None
1938

    
1939
    for node_i in nodeinfo:
1940
      node = node_i.name
1941
      nimg = node_image[node]
1942

    
1943
      if node_i.offline:
1944
        if verbose:
1945
          feedback_fn("* Skipping offline node %s" % (node,))
1946
        n_offline += 1
1947
        continue
1948

    
1949
      if node == master_node:
1950
        ntype = "master"
1951
      elif node_i.master_candidate:
1952
        ntype = "master candidate"
1953
      elif node_i.drained:
1954
        ntype = "drained"
1955
        n_drained += 1
1956
      else:
1957
        ntype = "regular"
1958
      if verbose:
1959
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1960

    
1961
      msg = all_nvinfo[node].fail_msg
1962
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1963
      if msg:
1964
        nimg.rpc_fail = True
1965
        continue
1966

    
1967
      nresult = all_nvinfo[node].payload
1968

    
1969
      nimg.call_ok = self._VerifyNode(node_i, nresult)
1970
      self._VerifyNodeNetwork(node_i, nresult)
1971
      self._VerifyNodeLVM(node_i, nresult, vg_name)
1972
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1973
                            master_files)
1974
      self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1975
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1976

    
1977
      self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1978
      self._UpdateNodeInstances(node_i, nresult, nimg)
1979
      self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1980
      self._UpdateNodeOS(node_i, nresult, nimg)
1981
      if not nimg.os_fail:
1982
        if refos_img is None:
1983
          refos_img = nimg
1984
        self._VerifyNodeOS(node_i, nimg, refos_img)
1985

    
1986
    feedback_fn("* Verifying instance status")
1987
    for instance in instancelist:
1988
      if verbose:
1989
        feedback_fn("* Verifying instance %s" % instance)
1990
      inst_config = instanceinfo[instance]
1991
      self._VerifyInstance(instance, inst_config, node_image)
1992
      inst_nodes_offline = []
1993

    
1994
      pnode = inst_config.primary_node
1995
      pnode_img = node_image[pnode]
1996
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
1997
               self.ENODERPC, pnode, "instance %s, connection to"
1998
               " primary node failed", instance)
1999

    
2000
      if pnode_img.offline:
2001
        inst_nodes_offline.append(pnode)
2002

    
2003
      # If the instance is non-redundant we cannot survive losing its primary
2004
      # node, so we are not N+1 compliant. On the other hand we have no disk
2005
      # templates with more than one secondary so that situation is not well
2006
      # supported either.
2007
      # FIXME: does not support file-backed instances
2008
      if not inst_config.secondary_nodes:
2009
        i_non_redundant.append(instance)
2010
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2011
               instance, "instance has multiple secondary nodes: %s",
2012
               utils.CommaJoin(inst_config.secondary_nodes),
2013
               code=self.ETYPE_WARNING)
2014

    
2015
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2016
        i_non_a_balanced.append(instance)
2017

    
2018
      for snode in inst_config.secondary_nodes:
2019
        s_img = node_image[snode]
2020
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2021
                 "instance %s, connection to secondary node failed", instance)
2022

    
2023
        if s_img.offline:
2024
          inst_nodes_offline.append(snode)
2025

    
2026
      # warn that the instance lives on offline nodes
2027
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2028
               "instance lives on offline node(s) %s",
2029
               utils.CommaJoin(inst_nodes_offline))
2030
      # ... or ghost nodes
2031
      for node in inst_config.all_nodes:
2032
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2033
                 "instance lives on ghost node %s", node)
2034

    
2035
    feedback_fn("* Verifying orphan volumes")
2036
    self._VerifyOrphanVolumes(node_vol_should, node_image)
2037

    
2038
    feedback_fn("* Verifying orphan instances")
2039
    self._VerifyOrphanInstances(instancelist, node_image)
2040

    
2041
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
2042
      feedback_fn("* Verifying N+1 Memory redundancy")
2043
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
2044

    
2045
    feedback_fn("* Other Notes")
2046
    if i_non_redundant:
2047
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2048
                  % len(i_non_redundant))
2049

    
2050
    if i_non_a_balanced:
2051
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2052
                  % len(i_non_a_balanced))
2053

    
2054
    if n_offline:
2055
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2056

    
2057
    if n_drained:
2058
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2059

    
2060
    return not self.bad
2061

    
2062
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2063
    """Analyze the post-hooks' result
2064

2065
    This method analyses the hook result, handles it, and sends some
2066
    nicely-formatted feedback back to the user.
2067

2068
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2069
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2070
    @param hooks_results: the results of the multi-node hooks rpc call
2071
    @param feedback_fn: function used send feedback back to the caller
2072
    @param lu_result: previous Exec result
2073
    @return: the new Exec result, based on the previous result
2074
        and hook results
2075

2076
    """
2077
    # We only really run POST phase hooks, and are only interested in
2078
    # their results
2079
    if phase == constants.HOOKS_PHASE_POST:
2080
      # Used to change hooks' output to proper indentation
2081
      indent_re = re.compile('^', re.M)
2082
      feedback_fn("* Hooks Results")
2083
      assert hooks_results, "invalid result from hooks"
2084

    
2085
      for node_name in hooks_results:
2086
        res = hooks_results[node_name]
2087
        msg = res.fail_msg
2088
        test = msg and not res.offline
2089
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2090
                      "Communication failure in hooks execution: %s", msg)
2091
        if res.offline or msg:
2092
          # No need to investigate payload if node is offline or gave an error.
2093
          # override manually lu_result here as _ErrorIf only
2094
          # overrides self.bad
2095
          lu_result = 1
2096
          continue
2097
        for script, hkr, output in res.payload:
2098
          test = hkr == constants.HKR_FAIL
2099
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2100
                        "Script %s failed, output:", script)
2101
          if test:
2102
            output = indent_re.sub('      ', output)
2103
            feedback_fn("%s" % output)
2104
            lu_result = 0
2105

    
2106
      return lu_result
2107

    
2108

    
2109
class LUVerifyDisks(NoHooksLU):
2110
  """Verifies the cluster disks status.
2111

2112
  """
2113
  _OP_REQP = []
2114
  REQ_BGL = False
2115

    
2116
  def ExpandNames(self):
2117
    self.needed_locks = {
2118
      locking.LEVEL_NODE: locking.ALL_SET,
2119
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2120
    }
2121
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2122

    
2123
  def CheckPrereq(self):
2124
    """Check prerequisites.
2125

2126
    This has no prerequisites.
2127

2128
    """
2129
    pass
2130

    
2131
  def Exec(self, feedback_fn):
2132
    """Verify integrity of cluster disks.
2133

2134
    @rtype: tuple of three items
2135
    @return: a tuple of (dict of node-to-node_error, list of instances
2136
        which need activate-disks, dict of instance: (node, volume) for
2137
        missing volumes
2138

2139
    """
2140
    result = res_nodes, res_instances, res_missing = {}, [], {}
2141

    
2142
    vg_name = self.cfg.GetVGName()
2143
    nodes = utils.NiceSort(self.cfg.GetNodeList())
2144
    instances = [self.cfg.GetInstanceInfo(name)
2145
                 for name in self.cfg.GetInstanceList()]
2146

    
2147
    nv_dict = {}
2148
    for inst in instances:
2149
      inst_lvs = {}
2150
      if (not inst.admin_up or
2151
          inst.disk_template not in constants.DTS_NET_MIRROR):
2152
        continue
2153
      inst.MapLVsByNode(inst_lvs)
2154
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2155
      for node, vol_list in inst_lvs.iteritems():
2156
        for vol in vol_list:
2157
          nv_dict[(node, vol)] = inst
2158

    
2159
    if not nv_dict:
2160
      return result
2161

    
2162
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2163

    
2164
    for node in nodes:
2165
      # node_volume
2166
      node_res = node_lvs[node]
2167
      if node_res.offline:
2168
        continue
2169
      msg = node_res.fail_msg
2170
      if msg:
2171
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2172
        res_nodes[node] = msg
2173
        continue
2174

    
2175
      lvs = node_res.payload
2176
      for lv_name, (_, _, lv_online) in lvs.items():
2177
        inst = nv_dict.pop((node, lv_name), None)
2178
        if (not lv_online and inst is not None
2179
            and inst.name not in res_instances):
2180
          res_instances.append(inst.name)
2181

    
2182
    # any leftover items in nv_dict are missing LVs, let's arrange the
2183
    # data better
2184
    for key, inst in nv_dict.iteritems():
2185
      if inst.name not in res_missing:
2186
        res_missing[inst.name] = []
2187
      res_missing[inst.name].append(key)
2188

    
2189
    return result
2190

    
2191

    
2192
class LURepairDiskSizes(NoHooksLU):
2193
  """Verifies the cluster disks sizes.
2194

2195
  """
2196
  _OP_REQP = ["instances"]
2197
  REQ_BGL = False
2198

    
2199
  def CheckArguments(self):
2200
    if not isinstance(self.op.instances, list):
2201
      raise errors.OpPrereqError("Invalid argument type 'instances'",
2202
                                 errors.ECODE_INVAL)
2203

    
2204
  def ExpandNames(self):
2205
    if self.op.instances:
2206
      self.wanted_names = []
2207
      for name in self.op.instances:
2208
        full_name = _ExpandInstanceName(self.cfg, name)
2209
        self.wanted_names.append(full_name)
2210
      self.needed_locks = {
2211
        locking.LEVEL_NODE: [],
2212
        locking.LEVEL_INSTANCE: self.wanted_names,
2213
        }
2214
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2215
    else:
2216
      self.wanted_names = None
2217
      self.needed_locks = {
2218
        locking.LEVEL_NODE: locking.ALL_SET,
2219
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2220
        }
2221
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2222

    
2223
  def DeclareLocks(self, level):
2224
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2225
      self._LockInstancesNodes(primary_only=True)
2226

    
2227
  def CheckPrereq(self):
2228
    """Check prerequisites.
2229

2230
    This only checks the optional instance list against the existing names.
2231

2232
    """
2233
    if self.wanted_names is None:
2234
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2235

    
2236
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2237
                             in self.wanted_names]
2238

    
2239
  def _EnsureChildSizes(self, disk):
2240
    """Ensure children of the disk have the needed disk size.
2241

2242
    This is valid mainly for DRBD8 and fixes an issue where the
2243
    children have smaller disk size.
2244

2245
    @param disk: an L{ganeti.objects.Disk} object
2246

2247
    """
2248
    if disk.dev_type == constants.LD_DRBD8:
2249
      assert disk.children, "Empty children for DRBD8?"
2250
      fchild = disk.children[0]
2251
      mismatch = fchild.size < disk.size
2252
      if mismatch:
2253
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2254
                     fchild.size, disk.size)
2255
        fchild.size = disk.size
2256

    
2257
      # and we recurse on this child only, not on the metadev
2258
      return self._EnsureChildSizes(fchild) or mismatch
2259
    else:
2260
      return False
2261

    
2262
  def Exec(self, feedback_fn):
2263
    """Verify the size of cluster disks.
2264

2265
    """
2266
    # TODO: check child disks too
2267
    # TODO: check differences in size between primary/secondary nodes
2268
    per_node_disks = {}
2269
    for instance in self.wanted_instances:
2270
      pnode = instance.primary_node
2271
      if pnode not in per_node_disks:
2272
        per_node_disks[pnode] = []
2273
      for idx, disk in enumerate(instance.disks):
2274
        per_node_disks[pnode].append((instance, idx, disk))
2275

    
2276
    changed = []
2277
    for node, dskl in per_node_disks.items():
2278
      newl = [v[2].Copy() for v in dskl]
2279
      for dsk in newl:
2280
        self.cfg.SetDiskID(dsk, node)
2281
      result = self.rpc.call_blockdev_getsizes(node, newl)
2282
      if result.fail_msg:
2283
        self.LogWarning("Failure in blockdev_getsizes call to node"
2284
                        " %s, ignoring", node)
2285
        continue
2286
      if len(result.data) != len(dskl):
2287
        self.LogWarning("Invalid result from node %s, ignoring node results",
2288
                        node)
2289
        continue
2290
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2291
        if size is None:
2292
          self.LogWarning("Disk %d of instance %s did not return size"
2293
                          " information, ignoring", idx, instance.name)
2294
          continue
2295
        if not isinstance(size, (int, long)):
2296
          self.LogWarning("Disk %d of instance %s did not return valid"
2297
                          " size information, ignoring", idx, instance.name)
2298
          continue
2299
        size = size >> 20
2300
        if size != disk.size:
2301
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2302
                       " correcting: recorded %d, actual %d", idx,
2303
                       instance.name, disk.size, size)
2304
          disk.size = size
2305
          self.cfg.Update(instance, feedback_fn)
2306
          changed.append((instance.name, idx, size))
2307
        if self._EnsureChildSizes(disk):
2308
          self.cfg.Update(instance, feedback_fn)
2309
          changed.append((instance.name, idx, disk.size))
2310
    return changed
2311

    
2312

    
2313
class LURenameCluster(LogicalUnit):
2314
  """Rename the cluster.
2315

2316
  """
2317
  HPATH = "cluster-rename"
2318
  HTYPE = constants.HTYPE_CLUSTER
2319
  _OP_REQP = ["name"]
2320

    
2321
  def BuildHooksEnv(self):
2322
    """Build hooks env.
2323

2324
    """
2325
    env = {
2326
      "OP_TARGET": self.cfg.GetClusterName(),
2327
      "NEW_NAME": self.op.name,
2328
      }
2329
    mn = self.cfg.GetMasterNode()
2330
    all_nodes = self.cfg.GetNodeList()
2331
    return env, [mn], all_nodes
2332

    
2333
  def CheckPrereq(self):
2334
    """Verify that the passed name is a valid one.
2335

2336
    """
2337
    hostname = utils.GetHostInfo(self.op.name)
2338

    
2339
    new_name = hostname.name
2340
    self.ip = new_ip = hostname.ip
2341
    old_name = self.cfg.GetClusterName()
2342
    old_ip = self.cfg.GetMasterIP()
2343
    if new_name == old_name and new_ip == old_ip:
2344
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2345
                                 " cluster has changed",
2346
                                 errors.ECODE_INVAL)
2347
    if new_ip != old_ip:
2348
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2349
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2350
                                   " reachable on the network. Aborting." %
2351
                                   new_ip, errors.ECODE_NOTUNIQUE)
2352

    
2353
    self.op.name = new_name
2354

    
2355
  def Exec(self, feedback_fn):
2356
    """Rename the cluster.
2357

2358
    """
2359
    clustername = self.op.name
2360
    ip = self.ip
2361

    
2362
    # shutdown the master IP
2363
    master = self.cfg.GetMasterNode()
2364
    result = self.rpc.call_node_stop_master(master, False)
2365
    result.Raise("Could not disable the master role")
2366

    
2367
    try:
2368
      cluster = self.cfg.GetClusterInfo()
2369
      cluster.cluster_name = clustername
2370
      cluster.master_ip = ip
2371
      self.cfg.Update(cluster, feedback_fn)
2372

    
2373
      # update the known hosts file
2374
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2375
      node_list = self.cfg.GetNodeList()
2376
      try:
2377
        node_list.remove(master)
2378
      except ValueError:
2379
        pass
2380
      result = self.rpc.call_upload_file(node_list,
2381
                                         constants.SSH_KNOWN_HOSTS_FILE)
2382
      for to_node, to_result in result.iteritems():
2383
        msg = to_result.fail_msg
2384
        if msg:
2385
          msg = ("Copy of file %s to node %s failed: %s" %
2386
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2387
          self.proc.LogWarning(msg)
2388

    
2389
    finally:
2390
      result = self.rpc.call_node_start_master(master, False, False)
2391
      msg = result.fail_msg
2392
      if msg:
2393
        self.LogWarning("Could not re-enable the master role on"
2394
                        " the master, please restart manually: %s", msg)
2395

    
2396

    
2397
def _RecursiveCheckIfLVMBased(disk):
2398
  """Check if the given disk or its children are lvm-based.
2399

2400
  @type disk: L{objects.Disk}
2401
  @param disk: the disk to check
2402
  @rtype: boolean
2403
  @return: boolean indicating whether a LD_LV dev_type was found or not
2404

2405
  """
2406
  if disk.children:
2407
    for chdisk in disk.children:
2408
      if _RecursiveCheckIfLVMBased(chdisk):
2409
        return True
2410
  return disk.dev_type == constants.LD_LV
2411

    
2412

    
2413
class LUSetClusterParams(LogicalUnit):
2414
  """Change the parameters of the cluster.
2415

2416
  """
2417
  HPATH = "cluster-modify"
2418
  HTYPE = constants.HTYPE_CLUSTER
2419
  _OP_REQP = []
2420
  _OP_DEFS = [
2421
    ("candidate_pool_size", None),
2422
    ("uid_pool", None),
2423
    ("add_uids", None),
2424
    ("remove_uids", None),
2425
    ]
2426
  REQ_BGL = False
2427

    
2428
  def CheckArguments(self):
2429
    """Check parameters
2430

2431
    """
2432
    if self.op.candidate_pool_size is not None:
2433
      try:
2434
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2435
      except (ValueError, TypeError), err:
2436
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2437
                                   str(err), errors.ECODE_INVAL)
2438
      if self.op.candidate_pool_size < 1:
2439
        raise errors.OpPrereqError("At least one master candidate needed",
2440
                                   errors.ECODE_INVAL)
2441

    
2442
    _CheckBooleanOpField(self.op, "maintain_node_health")
2443

    
2444
    if self.op.uid_pool:
2445
      uidpool.CheckUidPool(self.op.uid_pool)
2446

    
2447
    if self.op.add_uids:
2448
      uidpool.CheckUidPool(self.op.add_uids)
2449

    
2450
    if self.op.remove_uids:
2451
      uidpool.CheckUidPool(self.op.remove_uids)
2452

    
2453
  def ExpandNames(self):
2454
    # FIXME: in the future maybe other cluster params won't require checking on
2455
    # all nodes to be modified.
2456
    self.needed_locks = {
2457
      locking.LEVEL_NODE: locking.ALL_SET,
2458
    }
2459
    self.share_locks[locking.LEVEL_NODE] = 1
2460

    
2461
  def BuildHooksEnv(self):
2462
    """Build hooks env.
2463

2464
    """
2465
    env = {
2466
      "OP_TARGET": self.cfg.GetClusterName(),
2467
      "NEW_VG_NAME": self.op.vg_name,
2468
      }
2469
    mn = self.cfg.GetMasterNode()
2470
    return env, [mn], [mn]
2471

    
2472
  def CheckPrereq(self):
2473
    """Check prerequisites.
2474

2475
    This checks whether the given params don't conflict and
2476
    if the given volume group is valid.
2477

2478
    """
2479
    if self.op.vg_name is not None and not self.op.vg_name:
2480
      instances = self.cfg.GetAllInstancesInfo().values()
2481
      for inst in instances:
2482
        for disk in inst.disks:
2483
          if _RecursiveCheckIfLVMBased(disk):
2484
            raise errors.OpPrereqError("Cannot disable lvm storage while"
2485
                                       " lvm-based instances exist",
2486
                                       errors.ECODE_INVAL)
2487

    
2488
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2489

    
2490
    # if vg_name not None, checks given volume group on all nodes
2491
    if self.op.vg_name:
2492
      vglist = self.rpc.call_vg_list(node_list)
2493
      for node in node_list:
2494
        msg = vglist[node].fail_msg
2495
        if msg:
2496
          # ignoring down node
2497
          self.LogWarning("Error while gathering data on node %s"
2498
                          " (ignoring node): %s", node, msg)
2499
          continue
2500
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2501
                                              self.op.vg_name,
2502
                                              constants.MIN_VG_SIZE)
2503
        if vgstatus:
2504
          raise errors.OpPrereqError("Error on node '%s': %s" %
2505
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2506

    
2507
    self.cluster = cluster = self.cfg.GetClusterInfo()
2508
    # validate params changes
2509
    if self.op.beparams:
2510
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2511
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2512

    
2513
    if self.op.nicparams:
2514
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2515
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2516
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2517
      nic_errors = []
2518

    
2519
      # check all instances for consistency
2520
      for instance in self.cfg.GetAllInstancesInfo().values():
2521
        for nic_idx, nic in enumerate(instance.nics):
2522
          params_copy = copy.deepcopy(nic.nicparams)
2523
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2524

    
2525
          # check parameter syntax
2526
          try:
2527
            objects.NIC.CheckParameterSyntax(params_filled)
2528
          except errors.ConfigurationError, err:
2529
            nic_errors.append("Instance %s, nic/%d: %s" %
2530
                              (instance.name, nic_idx, err))
2531

    
2532
          # if we're moving instances to routed, check that they have an ip
2533
          target_mode = params_filled[constants.NIC_MODE]
2534
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2535
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2536
                              (instance.name, nic_idx))
2537
      if nic_errors:
2538
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2539
                                   "\n".join(nic_errors))
2540

    
2541
    # hypervisor list/parameters
2542
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2543
    if self.op.hvparams:
2544
      if not isinstance(self.op.hvparams, dict):
2545
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2546
                                   errors.ECODE_INVAL)
2547
      for hv_name, hv_dict in self.op.hvparams.items():
2548
        if hv_name not in self.new_hvparams:
2549
          self.new_hvparams[hv_name] = hv_dict
2550
        else:
2551
          self.new_hvparams[hv_name].update(hv_dict)
2552

    
2553
    # os hypervisor parameters
2554
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2555
    if self.op.os_hvp:
2556
      if not isinstance(self.op.os_hvp, dict):
2557
        raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2558
                                   errors.ECODE_INVAL)
2559
      for os_name, hvs in self.op.os_hvp.items():
2560
        if not isinstance(hvs, dict):
2561
          raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2562
                                      " input"), errors.ECODE_INVAL)
2563
        if os_name not in self.new_os_hvp:
2564
          self.new_os_hvp[os_name] = hvs
2565
        else:
2566
          for hv_name, hv_dict in hvs.items():
2567
            if hv_name not in self.new_os_hvp[os_name]:
2568
              self.new_os_hvp[os_name][hv_name] = hv_dict
2569
            else:
2570
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2571

    
2572
    # os parameters
2573
    self.new_osp = objects.FillDict(cluster.osparams, {})
2574
    if self.op.osparams:
2575
      if not isinstance(self.op.osparams, dict):
2576
        raise errors.OpPrereqError("Invalid 'osparams' parameter on input",
2577
                                   errors.ECODE_INVAL)
2578
      for os_name, osp in self.op.osparams.items():
2579
        if not isinstance(osp, dict):
2580
          raise errors.OpPrereqError(("Invalid 'osparams' parameter on"
2581
                                      " input"), errors.ECODE_INVAL)
2582
        if os_name not in self.new_osp:
2583
          self.new_osp[os_name] = {}
2584

    
2585
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2586
                                                  use_none=True)
2587

    
2588
        if not self.new_osp[os_name]:
2589
          # we removed all parameters
2590
          del self.new_osp[os_name]
2591
        else:
2592
          # check the parameter validity (remote check)
2593
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2594
                         os_name, self.new_osp[os_name])
2595

    
2596
    # changes to the hypervisor list
2597
    if self.op.enabled_hypervisors is not None:
2598
      self.hv_list = self.op.enabled_hypervisors
2599
      if not self.hv_list:
2600
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2601
                                   " least one member",
2602
                                   errors.ECODE_INVAL)
2603
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2604
      if invalid_hvs:
2605
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2606
                                   " entries: %s" %
2607
                                   utils.CommaJoin(invalid_hvs),
2608
                                   errors.ECODE_INVAL)
2609
      for hv in self.hv_list:
2610
        # if the hypervisor doesn't already exist in the cluster
2611
        # hvparams, we initialize it to empty, and then (in both
2612
        # cases) we make sure to fill the defaults, as we might not
2613
        # have a complete defaults list if the hypervisor wasn't
2614
        # enabled before
2615
        if hv not in new_hvp:
2616
          new_hvp[hv] = {}
2617
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2618
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2619
    else:
2620
      self.hv_list = cluster.enabled_hypervisors
2621

    
2622
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2623
      # either the enabled list has changed, or the parameters have, validate
2624
      for hv_name, hv_params in self.new_hvparams.items():
2625
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2626
            (self.op.enabled_hypervisors and
2627
             hv_name in self.op.enabled_hypervisors)):
2628
          # either this is a new hypervisor, or its parameters have changed
2629
          hv_class = hypervisor.GetHypervisor(hv_name)
2630
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2631
          hv_class.CheckParameterSyntax(hv_params)
2632
          _CheckHVParams(self, node_list, hv_name, hv_params)
2633

    
2634
    if self.op.os_hvp:
2635
      # no need to check any newly-enabled hypervisors, since the
2636
      # defaults have already been checked in the above code-block
2637
      for os_name, os_hvp in self.new_os_hvp.items():
2638
        for hv_name, hv_params in os_hvp.items():
2639
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2640
          # we need to fill in the new os_hvp on top of the actual hv_p
2641
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2642
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2643
          hv_class = hypervisor.GetHypervisor(hv_name)
2644
          hv_class.CheckParameterSyntax(new_osp)
2645
          _CheckHVParams(self, node_list, hv_name, new_osp)
2646

    
2647

    
2648
  def Exec(self, feedback_fn):
2649
    """Change the parameters of the cluster.
2650

2651
    """
2652
    if self.op.vg_name is not None:
2653
      new_volume = self.op.vg_name
2654
      if not new_volume:
2655
        new_volume = None
2656
      if new_volume != self.cfg.GetVGName():
2657
        self.cfg.SetVGName(new_volume)
2658
      else:
2659
        feedback_fn("Cluster LVM configuration already in desired"
2660
                    " state, not changing")
2661
    if self.op.hvparams:
2662
      self.cluster.hvparams = self.new_hvparams
2663
    if self.op.os_hvp:
2664
      self.cluster.os_hvp = self.new_os_hvp
2665
    if self.op.enabled_hypervisors is not None:
2666
      self.cluster.hvparams = self.new_hvparams
2667
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2668
    if self.op.beparams:
2669
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2670
    if self.op.nicparams:
2671
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2672
    if self.op.osparams:
2673
      self.cluster.osparams = self.new_osp
2674

    
2675
    if self.op.candidate_pool_size is not None:
2676
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2677
      # we need to update the pool size here, otherwise the save will fail
2678
      _AdjustCandidatePool(self, [])
2679

    
2680
    if self.op.maintain_node_health is not None:
2681
      self.cluster.maintain_node_health = self.op.maintain_node_health
2682

    
2683
    if self.op.add_uids is not None:
2684
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2685

    
2686
    if self.op.remove_uids is not None:
2687
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2688

    
2689
    if self.op.uid_pool is not None:
2690
      self.cluster.uid_pool = self.op.uid_pool
2691

    
2692
    self.cfg.Update(self.cluster, feedback_fn)
2693

    
2694

    
2695
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2696
  """Distribute additional files which are part of the cluster configuration.
2697

2698
  ConfigWriter takes care of distributing the config and ssconf files, but
2699
  there are more files which should be distributed to all nodes. This function
2700
  makes sure those are copied.
2701

2702
  @param lu: calling logical unit
2703
  @param additional_nodes: list of nodes not in the config to distribute to
2704

2705
  """
2706
  # 1. Gather target nodes
2707
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2708
  dist_nodes = lu.cfg.GetOnlineNodeList()
2709
  if additional_nodes is not None:
2710
    dist_nodes.extend(additional_nodes)
2711
  if myself.name in dist_nodes:
2712
    dist_nodes.remove(myself.name)
2713

    
2714
  # 2. Gather files to distribute
2715
  dist_files = set([constants.ETC_HOSTS,
2716
                    constants.SSH_KNOWN_HOSTS_FILE,
2717
                    constants.RAPI_CERT_FILE,
2718
                    constants.RAPI_USERS_FILE,
2719
                    constants.CONFD_HMAC_KEY,
2720
                    constants.CLUSTER_DOMAIN_SECRET_FILE,
2721
                   ])
2722

    
2723
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2724
  for hv_name in enabled_hypervisors:
2725
    hv_class = hypervisor.GetHypervisor(hv_name)
2726
    dist_files.update(hv_class.GetAncillaryFiles())
2727

    
2728
  # 3. Perform the files upload
2729
  for fname in dist_files:
2730
    if os.path.exists(fname):
2731
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2732
      for to_node, to_result in result.items():
2733
        msg = to_result.fail_msg
2734
        if msg:
2735
          msg = ("Copy of file %s to node %s failed: %s" %
2736
                 (fname, to_node, msg))
2737
          lu.proc.LogWarning(msg)
2738

    
2739

    
2740
class LURedistributeConfig(NoHooksLU):
2741
  """Force the redistribution of cluster configuration.
2742

2743
  This is a very simple LU.
2744

2745
  """
2746
  _OP_REQP = []
2747
  REQ_BGL = False
2748

    
2749
  def ExpandNames(self):
2750
    self.needed_locks = {
2751
      locking.LEVEL_NODE: locking.ALL_SET,
2752
    }
2753
    self.share_locks[locking.LEVEL_NODE] = 1
2754

    
2755
  def CheckPrereq(self):
2756
    """Check prerequisites.
2757

2758
    """
2759

    
2760
  def Exec(self, feedback_fn):
2761
    """Redistribute the configuration.
2762

2763
    """
2764
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2765
    _RedistributeAncillaryFiles(self)
2766

    
2767

    
2768
def _WaitForSync(lu, instance, disks=None, oneshot=False):
2769
  """Sleep and poll for an instance's disk to sync.
2770

2771
  """
2772
  if not instance.disks or disks is not None and not disks:
2773
    return True
2774

    
2775
  disks = _ExpandCheckDisks(instance, disks)
2776

    
2777
  if not oneshot:
2778
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2779

    
2780
  node = instance.primary_node
2781

    
2782
  for dev in disks:
2783
    lu.cfg.SetDiskID(dev, node)
2784

    
2785
  # TODO: Convert to utils.Retry
2786

    
2787
  retries = 0
2788
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2789
  while True:
2790
    max_time = 0
2791
    done = True
2792
    cumul_degraded = False
2793
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2794
    msg = rstats.fail_msg
2795
    if msg:
2796
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2797
      retries += 1
2798
      if retries >= 10:
2799
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2800
                                 " aborting." % node)
2801
      time.sleep(6)
2802
      continue
2803
    rstats = rstats.payload
2804
    retries = 0
2805
    for i, mstat in enumerate(rstats):
2806
      if mstat is None:
2807
        lu.LogWarning("Can't compute data for node %s/%s",
2808
                           node, disks[i].iv_name)
2809
        continue
2810

    
2811
      cumul_degraded = (cumul_degraded or
2812
                        (mstat.is_degraded and mstat.sync_percent is None))
2813
      if mstat.sync_percent is not None:
2814
        done = False
2815
        if mstat.estimated_time is not None:
2816
          rem_time = ("%s remaining (estimated)" %
2817
                      utils.FormatSeconds(mstat.estimated_time))
2818
          max_time = mstat.estimated_time
2819
        else:
2820
          rem_time = "no time estimate"
2821
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2822
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
2823

    
2824
    # if we're done but degraded, let's do a few small retries, to
2825
    # make sure we see a stable and not transient situation; therefore
2826
    # we force restart of the loop
2827
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2828
      logging.info("Degraded disks found, %d retries left", degr_retries)
2829
      degr_retries -= 1
2830
      time.sleep(1)
2831
      continue
2832

    
2833
    if done or oneshot:
2834
      break
2835

    
2836
    time.sleep(min(60, max_time))
2837

    
2838
  if done:
2839
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2840
  return not cumul_degraded
2841

    
2842

    
2843
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2844
  """Check that mirrors are not degraded.
2845

2846
  The ldisk parameter, if True, will change the test from the
2847
  is_degraded attribute (which represents overall non-ok status for
2848
  the device(s)) to the ldisk (representing the local storage status).
2849

2850
  """
2851
  lu.cfg.SetDiskID(dev, node)
2852

    
2853
  result = True
2854

    
2855
  if on_primary or dev.AssembleOnSecondary():
2856
    rstats = lu.rpc.call_blockdev_find(node, dev)
2857
    msg = rstats.fail_msg
2858
    if msg:
2859
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2860
      result = False
2861
    elif not rstats.payload:
2862
      lu.LogWarning("Can't find disk on node %s", node)
2863
      result = False
2864
    else:
2865
      if ldisk:
2866
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2867
      else:
2868
        result = result and not rstats.payload.is_degraded
2869

    
2870
  if dev.children:
2871
    for child in dev.children:
2872
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2873

    
2874
  return result
2875

    
2876

    
2877
class LUDiagnoseOS(NoHooksLU):
2878
  """Logical unit for OS diagnose/query.
2879

2880
  """
2881
  _OP_REQP = ["output_fields", "names"]
2882
  REQ_BGL = False
2883
  _FIELDS_STATIC = utils.FieldSet()
2884
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants",
2885
                                   "parameters", "api_versions")
2886

    
2887
  def CheckArguments(self):
2888
    if self.op.names:
2889
      raise errors.OpPrereqError("Selective OS query not supported",
2890
                                 errors.ECODE_INVAL)
2891

    
2892
    _CheckOutputFields(static=self._FIELDS_STATIC,
2893
                       dynamic=self._FIELDS_DYNAMIC,
2894
                       selected=self.op.output_fields)
2895

    
2896
  def ExpandNames(self):
2897
    # Lock all nodes, in shared mode
2898
    # Temporary removal of locks, should be reverted later
2899
    # TODO: reintroduce locks when they are lighter-weight
2900
    self.needed_locks = {}
2901
    #self.share_locks[locking.LEVEL_NODE] = 1
2902
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2903

    
2904
  def CheckPrereq(self):
2905
    """Check prerequisites.
2906

2907
    """
2908

    
2909
  @staticmethod
2910
  def _DiagnoseByOS(rlist):
2911
    """Remaps a per-node return list into an a per-os per-node dictionary
2912

2913
    @param rlist: a map with node names as keys and OS objects as values
2914

2915
    @rtype: dict
2916
    @return: a dictionary with osnames as keys and as value another
2917
        map, with nodes as keys and tuples of (path, status, diagnose,
2918
        variants, parameters, api_versions) as values, eg::
2919

2920
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
2921
                                     (/srv/..., False, "invalid api")],
2922
                           "node2": [(/srv/..., True, "", [], [])]}
2923
          }
2924

2925
    """
2926
    all_os = {}
2927
    # we build here the list of nodes that didn't fail the RPC (at RPC
2928
    # level), so that nodes with a non-responding node daemon don't
2929
    # make all OSes invalid
2930
    good_nodes = [node_name for node_name in rlist
2931
                  if not rlist[node_name].fail_msg]
2932
    for node_name, nr in rlist.items():
2933
      if nr.fail_msg or not nr.payload:
2934
        continue
2935
      for (name, path, status, diagnose, variants,
2936
           params, api_versions) in nr.payload:
2937
        if name not in all_os:
2938
          # build a list of nodes for this os containing empty lists
2939
          # for each node in node_list
2940
          all_os[name] = {}
2941
          for nname in good_nodes:
2942
            all_os[name][nname] = []
2943
        # convert params from [name, help] to (name, help)
2944
        params = [tuple(v) for v in params]
2945
        all_os[name][node_name].append((path, status, diagnose,
2946
                                        variants, params, api_versions))
2947
    return all_os
2948

    
2949
  def Exec(self, feedback_fn):
2950
    """Compute the list of OSes.
2951

2952
    """
2953
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2954
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2955
    pol = self._DiagnoseByOS(node_data)
2956
    output = []
2957

    
2958
    for os_name, os_data in pol.items():
2959
      row = []
2960
      valid = True
2961
      (variants, params, api_versions) = null_state = (set(), set(), set())
2962
      for idx, osl in enumerate(os_data.values()):
2963
        valid = bool(valid and osl and osl[0][1])
2964
        if not valid:
2965
          (variants, params, api_versions) = null_state
2966
          break
2967
        node_variants, node_params, node_api = osl[0][3:6]
2968
        if idx == 0: # first entry
2969
          variants = set(node_variants)
2970
          params = set(node_params)
2971
          api_versions = set(node_api)
2972
        else: # keep consistency
2973
          variants.intersection_update(node_variants)
2974
          params.intersection_update(node_params)
2975
          api_versions.intersection_update(node_api)
2976

    
2977
      for field in self.op.output_fields:
2978
        if field == "name":
2979
          val = os_name
2980
        elif field == "valid":
2981
          val = valid
2982
        elif field == "node_status":
2983
          # this is just a copy of the dict
2984
          val = {}
2985
          for node_name, nos_list in os_data.items():
2986
            val[node_name] = nos_list
2987
        elif field == "variants":
2988
          val = list(variants)
2989
        elif field == "parameters":
2990
          val = list(params)
2991
        elif field == "api_versions":
2992
          val = list(api_versions)
2993
        else:
2994
          raise errors.ParameterError(field)
2995
        row.append(val)
2996
      output.append(row)
2997

    
2998
    return output
2999

    
3000

    
3001
class LURemoveNode(LogicalUnit):
3002
  """Logical unit for removing a node.
3003

3004
  """
3005
  HPATH = "node-remove"
3006
  HTYPE = constants.HTYPE_NODE
3007
  _OP_REQP = ["node_name"]
3008

    
3009
  def BuildHooksEnv(self):
3010
    """Build hooks env.
3011

3012
    This doesn't run on the target node in the pre phase as a failed
3013
    node would then be impossible to remove.
3014

3015
    """
3016
    env = {
3017
      "OP_TARGET": self.op.node_name,
3018
      "NODE_NAME": self.op.node_name,
3019
      }
3020
    all_nodes = self.cfg.GetNodeList()
3021
    try:
3022
      all_nodes.remove(self.op.node_name)
3023
    except ValueError:
3024
      logging.warning("Node %s which is about to be removed not found"
3025
                      " in the all nodes list", self.op.node_name)
3026
    return env, all_nodes, all_nodes
3027

    
3028
  def CheckPrereq(self):
3029
    """Check prerequisites.
3030

3031
    This checks:
3032
     - the node exists in the configuration
3033
     - it does not have primary or secondary instances
3034
     - it's not the master
3035

3036
    Any errors are signaled by raising errors.OpPrereqError.
3037

3038
    """
3039
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3040
    node = self.cfg.GetNodeInfo(self.op.node_name)
3041
    assert node is not None
3042

    
3043
    instance_list = self.cfg.GetInstanceList()
3044

    
3045
    masternode = self.cfg.GetMasterNode()
3046
    if node.name == masternode:
3047
      raise errors.OpPrereqError("Node is the master node,"
3048
                                 " you need to failover first.",
3049
                                 errors.ECODE_INVAL)
3050

    
3051
    for instance_name in instance_list:
3052
      instance = self.cfg.GetInstanceInfo(instance_name)
3053
      if node.name in instance.all_nodes:
3054
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3055
                                   " please remove first." % instance_name,
3056
                                   errors.ECODE_INVAL)
3057
    self.op.node_name = node.name
3058
    self.node = node
3059

    
3060
  def Exec(self, feedback_fn):
3061
    """Removes the node from the cluster.
3062

3063
    """
3064
    node = self.node
3065
    logging.info("Stopping the node daemon and removing configs from node %s",
3066
                 node.name)
3067

    
3068
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3069

    
3070
    # Promote nodes to master candidate as needed
3071
    _AdjustCandidatePool(self, exceptions=[node.name])
3072
    self.context.RemoveNode(node.name)
3073

    
3074
    # Run post hooks on the node before it's removed
3075
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3076
    try:
3077
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3078
    except:
3079
      # pylint: disable-msg=W0702
3080
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
3081

    
3082
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3083
    msg = result.fail_msg
3084
    if msg:
3085
      self.LogWarning("Errors encountered on the remote node while leaving"
3086
                      " the cluster: %s", msg)
3087

    
3088
    # Remove node from our /etc/hosts
3089
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3090
      # FIXME: this should be done via an rpc call to node daemon
3091
      utils.RemoveHostFromEtcHosts(node.name)
3092
      _RedistributeAncillaryFiles(self)
3093

    
3094

    
3095
class LUQueryNodes(NoHooksLU):
3096
  """Logical unit for querying nodes.
3097

3098
  """
3099
  # pylint: disable-msg=W0142
3100
  _OP_REQP = ["output_fields", "names", "use_locking"]
3101
  REQ_BGL = False
3102

    
3103
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3104
                    "master_candidate", "offline", "drained"]
3105

    
3106
  _FIELDS_DYNAMIC = utils.FieldSet(
3107
    "dtotal", "dfree",
3108
    "mtotal", "mnode", "mfree",
3109
    "bootid",
3110
    "ctotal", "cnodes", "csockets",
3111
    )
3112

    
3113
  _FIELDS_STATIC = utils.FieldSet(*[
3114
    "pinst_cnt", "sinst_cnt",
3115
    "pinst_list", "sinst_list",
3116
    "pip", "sip", "tags",
3117
    "master",
3118
    "role"] + _SIMPLE_FIELDS
3119
    )
3120

    
3121
  def CheckArguments(self):
3122
    _CheckOutputFields(static=self._FIELDS_STATIC,
3123
                       dynamic=self._FIELDS_DYNAMIC,
3124
                       selected=self.op.output_fields)
3125

    
3126
  def ExpandNames(self):
3127
    self.needed_locks = {}
3128
    self.share_locks[locking.LEVEL_NODE] = 1
3129

    
3130
    if self.op.names:
3131
      self.wanted = _GetWantedNodes(self, self.op.names)
3132
    else:
3133
      self.wanted = locking.ALL_SET
3134

    
3135
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3136
    self.do_locking = self.do_node_query and self.op.use_locking
3137
    if self.do_locking:
3138
      # if we don't request only static fields, we need to lock the nodes
3139
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
3140

    
3141
  def CheckPrereq(self):
3142
    """Check prerequisites.
3143

3144
    """
3145
    # The validation of the node list is done in the _GetWantedNodes,
3146
    # if non empty, and if empty, there's no validation to do
3147
    pass
3148

    
3149
  def Exec(self, feedback_fn):
3150
    """Computes the list of nodes and their attributes.
3151

3152
    """
3153
    all_info = self.cfg.GetAllNodesInfo()
3154
    if self.do_locking:
3155
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
3156
    elif self.wanted != locking.ALL_SET:
3157
      nodenames = self.wanted
3158
      missing = set(nodenames).difference(all_info.keys())
3159
      if missing:
3160
        raise errors.OpExecError(
3161
          "Some nodes were removed before retrieving their data: %s" % missing)
3162
    else:
3163
      nodenames = all_info.keys()
3164

    
3165
    nodenames = utils.NiceSort(nodenames)
3166
    nodelist = [all_info[name] for name in nodenames]
3167

    
3168
    # begin data gathering
3169

    
3170
    if self.do_node_query:
3171
      live_data = {}
3172
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3173
                                          self.cfg.GetHypervisorType())
3174
      for name in nodenames:
3175
        nodeinfo = node_data[name]
3176
        if not nodeinfo.fail_msg and nodeinfo.payload:
3177
          nodeinfo = nodeinfo.payload
3178
          fn = utils.TryConvert
3179
          live_data[name] = {
3180
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3181
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3182
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
3183
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3184
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
3185
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3186
            "bootid": nodeinfo.get('bootid', None),
3187
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3188
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3189
            }
3190
        else:
3191
          live_data[name] = {}
3192
    else:
3193
      live_data = dict.fromkeys(nodenames, {})
3194

    
3195
    node_to_primary = dict([(name, set()) for name in nodenames])
3196
    node_to_secondary = dict([(name, set()) for name in nodenames])
3197

    
3198
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
3199
                             "sinst_cnt", "sinst_list"))
3200
    if inst_fields & frozenset(self.op.output_fields):
3201
      inst_data = self.cfg.GetAllInstancesInfo()
3202

    
3203
      for inst in inst_data.values():
3204
        if inst.primary_node in node_to_primary:
3205
          node_to_primary[inst.primary_node].add(inst.name)
3206
        for secnode in inst.secondary_nodes:
3207
          if secnode in node_to_secondary:
3208
            node_to_secondary[secnode].add(inst.name)
3209

    
3210
    master_node = self.cfg.GetMasterNode()
3211

    
3212
    # end data gathering
3213

    
3214
    output = []
3215
    for node in nodelist:
3216
      node_output = []
3217
      for field in self.op.output_fields:
3218
        if field in self._SIMPLE_FIELDS:
3219
          val = getattr(node, field)
3220
        elif field == "pinst_list":
3221
          val = list(node_to_primary[node.name])
3222
        elif field == "sinst_list":
3223
          val = list(node_to_secondary[node.name])
3224
        elif field == "pinst_cnt":
3225
          val = len(node_to_primary[node.name])
3226
        elif field == "sinst_cnt":
3227
          val = len(node_to_secondary[node.name])
3228
        elif field == "pip":
3229
          val = node.primary_ip
3230
        elif field == "sip":
3231
          val = node.secondary_ip
3232
        elif field == "tags":
3233
          val = list(node.GetTags())
3234
        elif field == "master":
3235
          val = node.name == master_node
3236
        elif self._FIELDS_DYNAMIC.Matches(field):
3237
          val = live_data[node.name].get(field, None)
3238
        elif field == "role":
3239
          if node.name == master_node:
3240
            val = "M"
3241
          elif node.master_candidate:
3242
            val = "C"
3243
          elif node.drained:
3244
            val = "D"
3245
          elif node.offline:
3246
            val = "O"
3247
          else:
3248
            val = "R"
3249
        else:
3250
          raise errors.ParameterError(field)
3251
        node_output.append(val)
3252
      output.append(node_output)
3253

    
3254
    return output
3255

    
3256

    
3257
class LUQueryNodeVolumes(NoHooksLU):
3258
  """Logical unit for getting volumes on node(s).
3259

3260
  """
3261
  _OP_REQP = ["nodes", "output_fields"]
3262
  REQ_BGL = False
3263
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3264
  _FIELDS_STATIC = utils.FieldSet("node")
3265

    
3266
  def CheckArguments(self):
3267
    _CheckOutputFields(static=self._FIELDS_STATIC,
3268
                       dynamic=self._FIELDS_DYNAMIC,
3269
                       selected=self.op.output_fields)
3270

    
3271
  def ExpandNames(self):
3272
    self.needed_locks = {}
3273
    self.share_locks[locking.LEVEL_NODE] = 1
3274
    if not self.op.nodes:
3275
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3276
    else:
3277
      self.needed_locks[locking.LEVEL_NODE] = \
3278
        _GetWantedNodes(self, self.op.nodes)
3279

    
3280
  def CheckPrereq(self):
3281
    """Check prerequisites.
3282

3283
    This checks that the fields required are valid output fields.
3284

3285
    """
3286
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3287

    
3288
  def Exec(self, feedback_fn):
3289
    """Computes the list of nodes and their attributes.
3290

3291
    """
3292
    nodenames = self.nodes
3293
    volumes = self.rpc.call_node_volumes(nodenames)
3294

    
3295
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3296
             in self.cfg.GetInstanceList()]
3297

    
3298
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3299

    
3300
    output = []
3301
    for node in nodenames:
3302
      nresult = volumes[node]
3303
      if nresult.offline:
3304
        continue
3305
      msg = nresult.fail_msg
3306
      if msg:
3307
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3308
        continue
3309

    
3310
      node_vols = nresult.payload[:]
3311
      node_vols.sort(key=lambda vol: vol['dev'])
3312

    
3313
      for vol in node_vols:
3314
        node_output = []
3315
        for field in self.op.output_fields:
3316
          if field == "node":
3317
            val = node
3318
          elif field == "phys":
3319
            val = vol['dev']
3320
          elif field == "vg":
3321
            val = vol['vg']
3322
          elif field == "name":
3323
            val = vol['name']
3324
          elif field == "size":
3325
            val = int(float(vol['size']))
3326
          elif field == "instance":
3327
            for inst in ilist:
3328
              if node not in lv_by_node[inst]:
3329
                continue
3330
              if vol['name'] in lv_by_node[inst][node]:
3331
                val = inst.name
3332
                break
3333
            else:
3334
              val = '-'
3335
          else:
3336
            raise errors.ParameterError(field)
3337
          node_output.append(str(val))
3338

    
3339
        output.append(node_output)
3340

    
3341
    return output
3342

    
3343

    
3344
class LUQueryNodeStorage(NoHooksLU):
3345
  """Logical unit for getting information on storage units on node(s).
3346

3347
  """
3348
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
3349
  _OP_DEFS = [("name", None)]
3350
  REQ_BGL = False
3351
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3352

    
3353
  def CheckArguments(self):
3354
    _CheckStorageType(self.op.storage_type)
3355

    
3356
    _CheckOutputFields(static=self._FIELDS_STATIC,
3357
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3358
                       selected=self.op.output_fields)
3359

    
3360
  def ExpandNames(self):
3361
    self.needed_locks = {}
3362
    self.share_locks[locking.LEVEL_NODE] = 1
3363

    
3364
    if self.op.nodes:
3365
      self.needed_locks[locking.LEVEL_NODE] = \
3366
        _GetWantedNodes(self, self.op.nodes)
3367
    else:
3368
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3369

    
3370
  def CheckPrereq(self):
3371
    """Check prerequisites.
3372

3373
    This checks that the fields required are valid output fields.
3374

3375
    """
3376
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3377

    
3378
  def Exec(self, feedback_fn):
3379
    """Computes the list of nodes and their attributes.
3380

3381
    """
3382
    # Always get name to sort by
3383
    if constants.SF_NAME in self.op.output_fields:
3384
      fields = self.op.output_fields[:]
3385
    else:
3386
      fields = [constants.SF_NAME] + self.op.output_fields
3387

    
3388
    # Never ask for node or type as it's only known to the LU
3389
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3390
      while extra in fields:
3391
        fields.remove(extra)
3392

    
3393
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3394
    name_idx = field_idx[constants.SF_NAME]
3395

    
3396
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3397
    data = self.rpc.call_storage_list(self.nodes,
3398
                                      self.op.storage_type, st_args,
3399
                                      self.op.name, fields)
3400

    
3401
    result = []
3402

    
3403
    for node in utils.NiceSort(self.nodes):
3404
      nresult = data[node]
3405
      if nresult.offline:
3406
        continue
3407

    
3408
      msg = nresult.fail_msg
3409
      if msg:
3410
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3411
        continue
3412

    
3413
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3414

    
3415
      for name in utils.NiceSort(rows.keys()):
3416
        row = rows[name]
3417

    
3418
        out = []
3419

    
3420
        for field in self.op.output_fields:
3421
          if field == constants.SF_NODE:
3422
            val = node
3423
          elif field == constants.SF_TYPE:
3424
            val = self.op.storage_type
3425
          elif field in field_idx:
3426
            val = row[field_idx[field]]
3427
          else:
3428
            raise errors.ParameterError(field)
3429

    
3430
          out.append(val)
3431

    
3432
        result.append(out)
3433

    
3434
    return result
3435

    
3436

    
3437
class LUModifyNodeStorage(NoHooksLU):
3438
  """Logical unit for modifying a storage volume on a node.
3439

3440
  """
3441
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3442
  REQ_BGL = False
3443

    
3444
  def CheckArguments(self):
3445
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3446

    
3447
    _CheckStorageType(self.op.storage_type)
3448

    
3449
  def ExpandNames(self):
3450
    self.needed_locks = {
3451
      locking.LEVEL_NODE: self.op.node_name,
3452
      }
3453

    
3454
  def CheckPrereq(self):
3455
    """Check prerequisites.
3456

3457
    """
3458
    storage_type = self.op.storage_type
3459

    
3460
    try:
3461
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3462
    except KeyError:
3463
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3464
                                 " modified" % storage_type,
3465
                                 errors.ECODE_INVAL)
3466

    
3467
    diff = set(self.op.changes.keys()) - modifiable
3468
    if diff:
3469
      raise errors.OpPrereqError("The following fields can not be modified for"
3470
                                 " storage units of type '%s': %r" %
3471
                                 (storage_type, list(diff)),
3472
                                 errors.ECODE_INVAL)
3473

    
3474
  def Exec(self, feedback_fn):
3475
    """Computes the list of nodes and their attributes.
3476

3477
    """
3478
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3479
    result = self.rpc.call_storage_modify(self.op.node_name,
3480
                                          self.op.storage_type, st_args,
3481
                                          self.op.name, self.op.changes)
3482
    result.Raise("Failed to modify storage unit '%s' on %s" %
3483
                 (self.op.name, self.op.node_name))
3484

    
3485

    
3486
class LUAddNode(LogicalUnit):
3487
  """Logical unit for adding node to the cluster.
3488

3489
  """
3490
  HPATH = "node-add"
3491
  HTYPE = constants.HTYPE_NODE
3492
  _OP_REQP = ["node_name"]
3493
  _OP_DEFS = [("secondary_ip", None)]
3494

    
3495
  def CheckArguments(self):
3496
    # validate/normalize the node name
3497
    self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3498

    
3499
  def BuildHooksEnv(self):
3500
    """Build hooks env.
3501

3502
    This will run on all nodes before, and on all nodes + the new node after.
3503

3504
    """
3505
    env = {
3506
      "OP_TARGET": self.op.node_name,
3507
      "NODE_NAME": self.op.node_name,
3508
      "NODE_PIP": self.op.primary_ip,
3509
      "NODE_SIP": self.op.secondary_ip,
3510
      }
3511
    nodes_0 = self.cfg.GetNodeList()
3512
    nodes_1 = nodes_0 + [self.op.node_name, ]
3513
    return env, nodes_0, nodes_1
3514

    
3515
  def CheckPrereq(self):
3516
    """Check prerequisites.
3517

3518
    This checks:
3519
     - the new node is not already in the config
3520
     - it is resolvable
3521
     - its parameters (single/dual homed) matches the cluster
3522

3523
    Any errors are signaled by raising errors.OpPrereqError.
3524

3525
    """
3526
    node_name = self.op.node_name
3527
    cfg = self.cfg
3528

    
3529
    dns_data = utils.GetHostInfo(node_name)
3530

    
3531
    node = dns_data.name
3532
    primary_ip = self.op.primary_ip = dns_data.ip
3533
    if self.op.secondary_ip is None:
3534
      self.op.secondary_ip = primary_ip
3535
    if not utils.IsValidIP(self.op.secondary_ip):
3536
      raise errors.OpPrereqError("Invalid secondary IP given",
3537
                                 errors.ECODE_INVAL)
3538
    secondary_ip = self.op.secondary_ip
3539

    
3540
    node_list = cfg.GetNodeList()
3541
    if not self.op.readd and node in node_list:
3542
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3543
                                 node, errors.ECODE_EXISTS)
3544
    elif self.op.readd and node not in node_list:
3545
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3546
                                 errors.ECODE_NOENT)
3547

    
3548
    self.changed_primary_ip = False
3549

    
3550
    for existing_node_name in node_list:
3551
      existing_node = cfg.GetNodeInfo(existing_node_name)
3552

    
3553
      if self.op.readd and node == existing_node_name:
3554
        if existing_node.secondary_ip != secondary_ip:
3555
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
3556
                                     " address configuration as before",
3557
                                     errors.ECODE_INVAL)
3558
        if existing_node.primary_ip != primary_ip:
3559
          self.changed_primary_ip = True
3560

    
3561
        continue
3562

    
3563
      if (existing_node.primary_ip == primary_ip or
3564
          existing_node.secondary_ip == primary_ip or
3565
          existing_node.primary_ip == secondary_ip or
3566
          existing_node.secondary_ip == secondary_ip):
3567
        raise errors.OpPrereqError("New node ip address(es) conflict with"
3568
                                   " existing node %s" % existing_node.name,
3569
                                   errors.ECODE_NOTUNIQUE)
3570

    
3571
    # check that the type of the node (single versus dual homed) is the
3572
    # same as for the master
3573
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3574
    master_singlehomed = myself.secondary_ip == myself.primary_ip
3575
    newbie_singlehomed = secondary_ip == primary_ip
3576
    if master_singlehomed != newbie_singlehomed:
3577
      if master_singlehomed:
3578
        raise errors.OpPrereqError("The master has no private ip but the"
3579
                                   " new node has one",
3580
                                   errors.ECODE_INVAL)
3581
      else:
3582
        raise errors.OpPrereqError("The master has a private ip but the"
3583
                                   " new node doesn't have one",
3584
                                   errors.ECODE_INVAL)
3585

    
3586
    # checks reachability
3587
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3588
      raise errors.OpPrereqError("Node not reachable by ping",
3589
                                 errors.ECODE_ENVIRON)
3590

    
3591
    if not newbie_singlehomed:
3592
      # check reachability from my secondary ip to newbie's secondary ip
3593
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3594
                           source=myself.secondary_ip):
3595
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3596
                                   " based ping to noded port",
3597
                                   errors.ECODE_ENVIRON)
3598

    
3599
    if self.op.readd:
3600
      exceptions = [node]
3601
    else:
3602
      exceptions = []
3603

    
3604
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3605

    
3606
    if self.op.readd:
3607
      self.new_node = self.cfg.GetNodeInfo(node)
3608
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
3609
    else:
3610
      self.new_node = objects.Node(name=node,
3611
                                   primary_ip=primary_ip,
3612
                                   secondary_ip=secondary_ip,
3613
                                   master_candidate=self.master_candidate,
3614
                                   offline=False, drained=False)
3615

    
3616
  def Exec(self, feedback_fn):
3617
    """Adds the new node to the cluster.
3618

3619
    """
3620
    new_node = self.new_node
3621
    node = new_node.name
3622

    
3623
    # for re-adds, reset the offline/drained/master-candidate flags;
3624
    # we need to reset here, otherwise offline would prevent RPC calls
3625
    # later in the procedure; this also means that if the re-add
3626
    # fails, we are left with a non-offlined, broken node
3627
    if self.op.readd:
3628
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3629
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3630
      # if we demote the node, we do cleanup later in the procedure
3631
      new_node.master_candidate = self.master_candidate
3632
      if self.changed_primary_ip:
3633
        new_node.primary_ip = self.op.primary_ip
3634

    
3635
    # notify the user about any possible mc promotion
3636
    if new_node.master_candidate:
3637
      self.LogInfo("Node will be a master candidate")
3638

    
3639
    # check connectivity
3640
    result = self.rpc.call_version([node])[node]
3641
    result.Raise("Can't get version information from node %s" % node)
3642
    if constants.PROTOCOL_VERSION == result.payload:
3643
      logging.info("Communication to node %s fine, sw version %s match",
3644
                   node, result.payload)
3645
    else:
3646
      raise errors.OpExecError("Version mismatch master version %s,"
3647
                               " node version %s" %
3648
                               (constants.PROTOCOL_VERSION, result.payload))
3649

    
3650
    # setup ssh on node
3651
    if self.cfg.GetClusterInfo().modify_ssh_setup:
3652
      logging.info("Copy ssh key to node %s", node)
3653
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3654
      keyarray = []
3655
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3656
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3657
                  priv_key, pub_key]
3658

    
3659
      for i in keyfiles:
3660
        keyarray.append(utils.ReadFile(i))
3661

    
3662
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3663
                                      keyarray[2], keyarray[3], keyarray[4],
3664
                                      keyarray[5])
3665
      result.Raise("Cannot transfer ssh keys to the new node")
3666

    
3667
    # Add node to our /etc/hosts, and add key to known_hosts
3668
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3669
      # FIXME: this should be done via an rpc call to node daemon
3670
      utils.AddHostToEtcHosts(new_node.name)
3671

    
3672
    if new_node.secondary_ip != new_node.primary_ip:
3673
      result = self.rpc.call_node_has_ip_address(new_node.name,
3674
                                                 new_node.secondary_ip)
3675
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3676
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3677
      if not result.payload:
3678
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3679
                                 " you gave (%s). Please fix and re-run this"
3680
                                 " command." % new_node.secondary_ip)
3681

    
3682
    node_verify_list = [self.cfg.GetMasterNode()]
3683
    node_verify_param = {
3684
      constants.NV_NODELIST: [node],
3685
      # TODO: do a node-net-test as well?
3686
    }
3687

    
3688
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3689
                                       self.cfg.GetClusterName())
3690
    for verifier in node_verify_list:
3691
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3692
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3693
      if nl_payload:
3694
        for failed in nl_payload:
3695
          feedback_fn("ssh/hostname verification failed"
3696
                      " (checking from %s): %s" %
3697
                      (verifier, nl_payload[failed]))
3698
        raise errors.OpExecError("ssh/hostname verification failed.")
3699

    
3700
    if self.op.readd:
3701
      _RedistributeAncillaryFiles(self)
3702
      self.context.ReaddNode(new_node)
3703
      # make sure we redistribute the config
3704
      self.cfg.Update(new_node, feedback_fn)
3705
      # and make sure the new node will not have old files around
3706
      if not new_node.master_candidate:
3707
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3708
        msg = result.fail_msg
3709
        if msg:
3710
          self.LogWarning("Node failed to demote itself from master"
3711
                          " candidate status: %s" % msg)
3712
    else:
3713
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3714
      self.context.AddNode(new_node, self.proc.GetECId())
3715

    
3716

    
3717
class LUSetNodeParams(LogicalUnit):
3718
  """Modifies the parameters of a node.
3719

3720
  """
3721
  HPATH = "node-modify"
3722
  HTYPE = constants.HTYPE_NODE
3723
  _OP_REQP = ["node_name"]
3724
  REQ_BGL = False
3725

    
3726
  def CheckArguments(self):
3727
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3728
    _CheckBooleanOpField(self.op, 'master_candidate')
3729
    _CheckBooleanOpField(self.op, 'offline')
3730
    _CheckBooleanOpField(self.op, 'drained')
3731
    _CheckBooleanOpField(self.op, 'auto_promote')
3732
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3733
    if all_mods.count(None) == 3:
3734
      raise errors.OpPrereqError("Please pass at least one modification",
3735
                                 errors.ECODE_INVAL)
3736
    if all_mods.count(True) > 1:
3737
      raise errors.OpPrereqError("Can't set the node into more than one"
3738
                                 " state at the same time",
3739
                                 errors.ECODE_INVAL)
3740

    
3741
    # Boolean value that tells us whether we're offlining or draining the node
3742
    self.offline_or_drain = (self.op.offline == True or
3743
                             self.op.drained == True)
3744
    self.deoffline_or_drain = (self.op.offline == False or
3745
                               self.op.drained == False)
3746
    self.might_demote = (self.op.master_candidate == False or
3747
                         self.offline_or_drain)
3748

    
3749
    self.lock_all = self.op.auto_promote and self.might_demote
3750

    
3751

    
3752
  def ExpandNames(self):
3753
    if self.lock_all:
3754
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3755
    else:
3756
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3757

    
3758
  def BuildHooksEnv(self):
3759
    """Build hooks env.
3760

3761
    This runs on the master node.
3762

3763
    """
3764
    env = {
3765
      "OP_TARGET": self.op.node_name,
3766
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3767
      "OFFLINE": str(self.op.offline),
3768
      "DRAINED": str(self.op.drained),
3769
      }
3770
    nl = [self.cfg.GetMasterNode(),
3771
          self.op.node_name]
3772
    return env, nl, nl
3773

    
3774
  def CheckPrereq(self):
3775
    """Check prerequisites.
3776

3777
    This only checks the instance list against the existing names.
3778

3779
    """
3780
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3781

    
3782
    if (self.op.master_candidate is not None or
3783
        self.op.drained is not None or
3784
        self.op.offline is not None):
3785
      # we can't change the master's node flags
3786
      if self.op.node_name == self.cfg.GetMasterNode():
3787
        raise errors.OpPrereqError("The master role can be changed"
3788
                                   " only via masterfailover",
3789
                                   errors.ECODE_INVAL)
3790

    
3791

    
3792
    if node.master_candidate and self.might_demote and not self.lock_all:
3793
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
3794
      # check if after removing the current node, we're missing master
3795
      # candidates
3796
      (mc_remaining, mc_should, _) = \
3797
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3798
      if mc_remaining < mc_should:
3799
        raise errors.OpPrereqError("Not enough master candidates, please"
3800
                                   " pass auto_promote to allow promotion",
3801
                                   errors.ECODE_INVAL)
3802

    
3803
    if (self.op.master_candidate == True and
3804
        ((node.offline and not self.op.offline == False) or
3805
         (node.drained and not self.op.drained == False))):
3806
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3807
                                 " to master_candidate" % node.name,
3808
                                 errors.ECODE_INVAL)
3809

    
3810
    # If we're being deofflined/drained, we'll MC ourself if needed
3811
    if (self.deoffline_or_drain and not self.offline_or_drain and not
3812
        self.op.master_candidate == True and not node.master_candidate):
3813
      self.op.master_candidate = _DecideSelfPromotion(self)
3814
      if self.op.master_candidate:
3815
        self.LogInfo("Autopromoting node to master candidate")
3816

    
3817
    return
3818

    
3819
  def Exec(self, feedback_fn):
3820
    """Modifies a node.
3821

3822
    """
3823
    node = self.node
3824

    
3825
    result = []
3826
    changed_mc = False
3827

    
3828
    if self.op.offline is not None:
3829
      node.offline = self.op.offline
3830
      result.append(("offline", str(self.op.offline)))
3831
      if self.op.offline == True:
3832
        if node.master_candidate:
3833
          node.master_candidate = False
3834
          changed_mc = True
3835
          result.append(("master_candidate", "auto-demotion due to offline"))
3836
        if node.drained:
3837
          node.drained = False
3838
          result.append(("drained", "clear drained status due to offline"))
3839

    
3840
    if self.op.master_candidate is not None:
3841
      node.master_candidate = self.op.master_candidate
3842
      changed_mc = True
3843
      result.append(("master_candidate", str(self.op.master_candidate)))
3844
      if self.op.master_candidate == False:
3845
        rrc = self.rpc.call_node_demote_from_mc(node.name)
3846
        msg = rrc.fail_msg
3847
        if msg:
3848
          self.LogWarning("Node failed to demote itself: %s" % msg)
3849

    
3850
    if self.op.drained is not None:
3851
      node.drained = self.op.drained
3852
      result.append(("drained", str(self.op.drained)))
3853
      if self.op.drained == True:
3854
        if node.master_candidate:
3855
          node.master_candidate = False
3856
          changed_mc = True
3857
          result.append(("master_candidate", "auto-demotion due to drain"))
3858
          rrc = self.rpc.call_node_demote_from_mc(node.name)
3859
          msg = rrc.fail_msg
3860
          if msg:
3861
            self.LogWarning("Node failed to demote itself: %s" % msg)
3862
        if node.offline:
3863
          node.offline = False
3864
          result.append(("offline", "clear offline status due to drain"))
3865

    
3866
    # we locked all nodes, we adjust the CP before updating this node
3867
    if self.lock_all:
3868
      _AdjustCandidatePool(self, [node.name])
3869

    
3870
    # this will trigger configuration file update, if needed
3871
    self.cfg.Update(node, feedback_fn)
3872

    
3873
    # this will trigger job queue propagation or cleanup
3874
    if changed_mc:
3875
      self.context.ReaddNode(node)
3876

    
3877
    return result
3878

    
3879

    
3880
class LUPowercycleNode(NoHooksLU):
3881
  """Powercycles a node.
3882

3883
  """
3884
  _OP_REQP = ["node_name", "force"]
3885
  REQ_BGL = False
3886

    
3887
  def CheckArguments(self):
3888
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3889
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3890
      raise errors.OpPrereqError("The node is the master and the force"
3891
                                 " parameter was not set",
3892
                                 errors.ECODE_INVAL)
3893

    
3894
  def ExpandNames(self):
3895
    """Locking for PowercycleNode.
3896

3897
    This is a last-resort option and shouldn't block on other
3898
    jobs. Therefore, we grab no locks.
3899

3900
    """
3901
    self.needed_locks = {}
3902

    
3903
  def CheckPrereq(self):
3904
    """Check prerequisites.
3905

3906
    This LU has no prereqs.
3907

3908
    """
3909
    pass
3910

    
3911
  def Exec(self, feedback_fn):
3912
    """Reboots a node.
3913

3914
    """
3915
    result = self.rpc.call_node_powercycle(self.op.node_name,
3916
                                           self.cfg.GetHypervisorType())
3917
    result.Raise("Failed to schedule the reboot")
3918
    return result.payload
3919

    
3920

    
3921
class LUQueryClusterInfo(NoHooksLU):
3922
  """Query cluster configuration.
3923

3924
  """
3925
  _OP_REQP = []
3926
  REQ_BGL = False
3927

    
3928
  def ExpandNames(self):
3929
    self.needed_locks = {}
3930

    
3931
  def CheckPrereq(self):
3932
    """No prerequsites needed for this LU.
3933

3934
    """
3935
    pass
3936

    
3937
  def Exec(self, feedback_fn):
3938
    """Return cluster config.
3939

3940
    """
3941
    cluster = self.cfg.GetClusterInfo()
3942
    os_hvp = {}
3943

    
3944
    # Filter just for enabled hypervisors
3945
    for os_name, hv_dict in cluster.os_hvp.items():
3946
      os_hvp[os_name] = {}
3947
      for hv_name, hv_params in hv_dict.items():
3948
        if hv_name in cluster.enabled_hypervisors:
3949
          os_hvp[os_name][hv_name] = hv_params
3950

    
3951
    result = {
3952
      "software_version": constants.RELEASE_VERSION,
3953
      "protocol_version": constants.PROTOCOL_VERSION,
3954
      "config_version": constants.CONFIG_VERSION,
3955
      "os_api_version": max(constants.OS_API_VERSIONS),
3956
      "export_version": constants.EXPORT_VERSION,
3957
      "architecture": (platform.architecture()[0], platform.machine()),
3958
      "name": cluster.cluster_name,
3959
      "master": cluster.master_node,
3960
      "default_hypervisor": cluster.enabled_hypervisors[0],
3961
      "enabled_hypervisors": cluster.enabled_hypervisors,
3962
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3963
                        for hypervisor_name in cluster.enabled_hypervisors]),
3964
      "os_hvp": os_hvp,
3965
      "beparams": cluster.beparams,
3966
      "osparams": cluster.osparams,
3967
      "nicparams": cluster.nicparams,
3968
      "candidate_pool_size": cluster.candidate_pool_size,
3969
      "master_netdev": cluster.master_netdev,
3970
      "volume_group_name": cluster.volume_group_name,
3971
      "file_storage_dir": cluster.file_storage_dir,
3972
      "maintain_node_health": cluster.maintain_node_health,
3973
      "ctime": cluster.ctime,
3974
      "mtime": cluster.mtime,
3975
      "uuid": cluster.uuid,
3976
      "tags": list(cluster.GetTags()),
3977
      "uid_pool": cluster.uid_pool,
3978
      }
3979

    
3980
    return result
3981

    
3982

    
3983
class LUQueryConfigValues(NoHooksLU):
3984
  """Return configuration values.
3985

3986
  """
3987
  _OP_REQP = []
3988
  REQ_BGL = False
3989
  _FIELDS_DYNAMIC = utils.FieldSet()
3990
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3991
                                  "watcher_pause")
3992

    
3993
  def CheckArguments(self):
3994
    _CheckOutputFields(static=self._FIELDS_STATIC,
3995
                       dynamic=self._FIELDS_DYNAMIC,
3996
                       selected=self.op.output_fields)
3997

    
3998
  def ExpandNames(self):
3999
    self.needed_locks = {}
4000

    
4001
  def CheckPrereq(self):
4002
    """No prerequisites.
4003

4004
    """
4005
    pass
4006

    
4007
  def Exec(self, feedback_fn):
4008
    """Dump a representation of the cluster config to the standard output.
4009

4010
    """
4011
    values = []
4012
    for field in self.op.output_fields:
4013
      if field == "cluster_name":
4014
        entry = self.cfg.GetClusterName()
4015
      elif field == "master_node":
4016
        entry = self.cfg.GetMasterNode()
4017
      elif field == "drain_flag":
4018
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4019
      elif field == "watcher_pause":
4020
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4021
      else:
4022
        raise errors.ParameterError(field)
4023
      values.append(entry)
4024
    return values
4025

    
4026

    
4027
class LUActivateInstanceDisks(NoHooksLU):
4028
  """Bring up an instance's disks.
4029

4030
  """
4031
  _OP_REQP = ["instance_name"]
4032
  _OP_DEFS = [("ignore_size", False)]
4033
  REQ_BGL = False
4034

    
4035
  def ExpandNames(self):
4036
    self._ExpandAndLockInstance()
4037
    self.needed_locks[locking.LEVEL_NODE] = []
4038
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4039

    
4040
  def DeclareLocks(self, level):
4041
    if level == locking.LEVEL_NODE:
4042
      self._LockInstancesNodes()
4043

    
4044
  def CheckPrereq(self):
4045
    """Check prerequisites.
4046

4047
    This checks that the instance is in the cluster.
4048

4049
    """
4050
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4051
    assert self.instance is not None, \
4052
      "Cannot retrieve locked instance %s" % self.op.instance_name
4053
    _CheckNodeOnline(self, self.instance.primary_node)
4054

    
4055
  def Exec(self, feedback_fn):
4056
    """Activate the disks.
4057

4058
    """
4059
    disks_ok, disks_info = \
4060
              _AssembleInstanceDisks(self, self.instance,
4061
                                     ignore_size=self.op.ignore_size)
4062
    if not disks_ok:
4063
      raise errors.OpExecError("Cannot activate block devices")
4064

    
4065
    return disks_info
4066

    
4067

    
4068
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4069
                           ignore_size=False):
4070
  """Prepare the block devices for an instance.
4071

4072
  This sets up the block devices on all nodes.
4073

4074
  @type lu: L{LogicalUnit}
4075
  @param lu: the logical unit on whose behalf we execute
4076
  @type instance: L{objects.Instance}
4077
  @param instance: the instance for whose disks we assemble
4078
  @type disks: list of L{objects.Disk} or None
4079
  @param disks: which disks to assemble (or all, if None)
4080
  @type ignore_secondaries: boolean
4081
  @param ignore_secondaries: if true, errors on secondary nodes
4082
      won't result in an error return from the function
4083
  @type ignore_size: boolean
4084
  @param ignore_size: if true, the current known size of the disk
4085
      will not be used during the disk activation, useful for cases
4086
      when the size is wrong
4087
  @return: False if the operation failed, otherwise a list of
4088
      (host, instance_visible_name, node_visible_name)
4089
      with the mapping from node devices to instance devices
4090

4091
  """
4092
  device_info = []
4093
  disks_ok = True
4094
  iname = instance.name
4095
  disks = _ExpandCheckDisks(instance, disks)
4096

    
4097
  # With the two passes mechanism we try to reduce the window of
4098
  # opportunity for the race condition of switching DRBD to primary
4099
  # before handshaking occured, but we do not eliminate it
4100

    
4101
  # The proper fix would be to wait (with some limits) until the
4102
  # connection has been made and drbd transitions from WFConnection
4103
  # into any other network-connected state (Connected, SyncTarget,
4104
  # SyncSource, etc.)
4105

    
4106
  # 1st pass, assemble on all nodes in secondary mode
4107
  for inst_disk in disks:
4108
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4109
      if ignore_size:
4110
        node_disk = node_disk.Copy()
4111
        node_disk.UnsetSize()
4112
      lu.cfg.SetDiskID(node_disk, node)
4113
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4114
      msg = result.fail_msg
4115
      if msg:
4116
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4117
                           " (is_primary=False, pass=1): %s",
4118
                           inst_disk.iv_name, node, msg)
4119
        if not ignore_secondaries:
4120
          disks_ok = False
4121

    
4122
  # FIXME: race condition on drbd migration to primary
4123

    
4124
  # 2nd pass, do only the primary node
4125
  for inst_disk in disks:
4126
    dev_path = None
4127

    
4128
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4129
      if node != instance.primary_node:
4130
        continue
4131
      if ignore_size:
4132
        node_disk = node_disk.Copy()
4133
        node_disk.UnsetSize()
4134
      lu.cfg.SetDiskID(node_disk, node)
4135
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4136
      msg = result.fail_msg
4137
      if msg:
4138
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4139
                           " (is_primary=True, pass=2): %s",
4140
                           inst_disk.iv_name, node, msg)
4141
        disks_ok = False
4142
      else:
4143
        dev_path = result.payload
4144

    
4145
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4146

    
4147
  # leave the disks configured for the primary node
4148
  # this is a workaround that would be fixed better by
4149
  # improving the logical/physical id handling
4150
  for disk in disks:
4151
    lu.cfg.SetDiskID(disk, instance.primary_node)
4152

    
4153
  return disks_ok, device_info
4154

    
4155

    
4156
def _StartInstanceDisks(lu, instance, force):
4157
  """Start the disks of an instance.
4158

4159
  """
4160
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4161
                                           ignore_secondaries=force)
4162
  if not disks_ok:
4163
    _ShutdownInstanceDisks(lu, instance)
4164
    if force is not None and not force:
4165
      lu.proc.LogWarning("", hint="If the message above refers to a"
4166
                         " secondary node,"
4167
                         " you can retry the operation using '--force'.")
4168
    raise errors.OpExecError("Disk consistency error")
4169

    
4170

    
4171
class LUDeactivateInstanceDisks(NoHooksLU):
4172
  """Shutdown an instance's disks.
4173

4174
  """
4175
  _OP_REQP = ["instance_name"]
4176
  REQ_BGL = False
4177

    
4178
  def ExpandNames(self):
4179
    self._ExpandAndLockInstance()
4180
    self.needed_locks[locking.LEVEL_NODE] = []
4181
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4182

    
4183
  def DeclareLocks(self, level):
4184
    if level == locking.LEVEL_NODE:
4185
      self._LockInstancesNodes()
4186

    
4187
  def CheckPrereq(self):
4188
    """Check prerequisites.
4189

4190
    This checks that the instance is in the cluster.
4191

4192
    """
4193
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4194
    assert self.instance is not None, \
4195
      "Cannot retrieve locked instance %s" % self.op.instance_name
4196

    
4197
  def Exec(self, feedback_fn):
4198
    """Deactivate the disks
4199

4200
    """
4201
    instance = self.instance
4202
    _SafeShutdownInstanceDisks(self, instance)
4203

    
4204

    
4205
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4206
  """Shutdown block devices of an instance.
4207

4208
  This function checks if an instance is running, before calling
4209
  _ShutdownInstanceDisks.
4210

4211
  """
4212
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4213
  _ShutdownInstanceDisks(lu, instance, disks=disks)
4214

    
4215

    
4216
def _ExpandCheckDisks(instance, disks):
4217
  """Return the instance disks selected by the disks list
4218

4219
  @type disks: list of L{objects.Disk} or None
4220
  @param disks: selected disks
4221
  @rtype: list of L{objects.Disk}
4222
  @return: selected instance disks to act on
4223

4224
  """
4225
  if disks is None:
4226
    return instance.disks
4227
  else:
4228
    if not set(disks).issubset(instance.disks):
4229
      raise errors.ProgrammerError("Can only act on disks belonging to the"
4230
                                   " target instance")
4231
    return disks
4232

    
4233

    
4234
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4235
  """Shutdown block devices of an instance.
4236

4237
  This does the shutdown on all nodes of the instance.
4238

4239
  If the ignore_primary is false, errors on the primary node are
4240
  ignored.
4241

4242
  """
4243
  all_result = True
4244
  disks = _ExpandCheckDisks(instance, disks)
4245

    
4246
  for disk in disks:
4247
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4248
      lu.cfg.SetDiskID(top_disk, node)
4249
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4250
      msg = result.fail_msg
4251
      if msg:
4252
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4253
                      disk.iv_name, node, msg)
4254
        if not ignore_primary or node != instance.primary_node:
4255
          all_result = False
4256
  return all_result
4257

    
4258

    
4259
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4260
  """Checks if a node has enough free memory.
4261

4262
  This function check if a given node has the needed amount of free
4263
  memory. In case the node has less memory or we cannot get the
4264
  information from the node, this function raise an OpPrereqError
4265
  exception.
4266

4267
  @type lu: C{LogicalUnit}
4268
  @param lu: a logical unit from which we get configuration data
4269
  @type node: C{str}
4270
  @param node: the node to check
4271
  @type reason: C{str}
4272
  @param reason: string to use in the error message
4273
  @type requested: C{int}
4274
  @param requested: the amount of memory in MiB to check for
4275
  @type hypervisor_name: C{str}
4276
  @param hypervisor_name: the hypervisor to ask for memory stats
4277
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4278
      we cannot check the node
4279

4280
  """
4281
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4282
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4283
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4284
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4285
  if not isinstance(free_mem, int):
4286
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4287
                               " was '%s'" % (node, free_mem),
4288
                               errors.ECODE_ENVIRON)
4289
  if requested > free_mem:
4290
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4291
                               " needed %s MiB, available %s MiB" %
4292
                               (node, reason, requested, free_mem),
4293
                               errors.ECODE_NORES)
4294

    
4295

    
4296
def _CheckNodesFreeDisk(lu, nodenames, requested):
4297
  """Checks if nodes have enough free disk space in the default VG.
4298

4299
  This function check if all given nodes have the needed amount of
4300
  free disk. In case any node has less disk or we cannot get the
4301
  information from the node, this function raise an OpPrereqError
4302
  exception.
4303

4304
  @type lu: C{LogicalUnit}
4305
  @param lu: a logical unit from which we get configuration data
4306
  @type nodenames: C{list}
4307
  @param nodenames: the list of node names to check
4308
  @type requested: C{int}
4309
  @param requested: the amount of disk in MiB to check for
4310
  @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4311
      we cannot check the node
4312

4313
  """
4314
  nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4315
                                   lu.cfg.GetHypervisorType())
4316
  for node in nodenames:
4317
    info = nodeinfo[node]
4318
    info.Raise("Cannot get current information from node %s" % node,
4319
               prereq=True, ecode=errors.ECODE_ENVIRON)
4320
    vg_free = info.payload.get("vg_free", None)
4321
    if not isinstance(vg_free, int):
4322
      raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4323
                                 " result was '%s'" % (node, vg_free),
4324
                                 errors.ECODE_ENVIRON)
4325
    if requested > vg_free:
4326
      raise errors.OpPrereqError("Not enough disk space on target node %s:"
4327
                                 " required %d MiB, available %d MiB" %
4328
                                 (node, requested, vg_free),
4329
                                 errors.ECODE_NORES)
4330

    
4331

    
4332
class LUStartupInstance(LogicalUnit):
4333
  """Starts an instance.
4334

4335
  """
4336
  HPATH = "instance-start"
4337
  HTYPE = constants.HTYPE_INSTANCE
4338
  _OP_REQP = ["instance_name", "force"]
4339
  _OP_DEFS = [
4340
    ("beparams", _EmptyDict),
4341
    ("hvparams", _EmptyDict),
4342
    ]
4343
  REQ_BGL = False
4344

    
4345
  def ExpandNames(self):
4346
    self._ExpandAndLockInstance()
4347

    
4348
  def BuildHooksEnv(self):
4349
    """Build hooks env.
4350

4351
    This runs on master, primary and secondary nodes of the instance.
4352

4353
    """
4354
    env = {
4355
      "FORCE": self.op.force,
4356
      }
4357
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4358
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4359
    return env, nl, nl
4360

    
4361
  def CheckPrereq(self):
4362
    """Check prerequisites.
4363

4364
    This checks that the instance is in the cluster.
4365

4366
    """
4367
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4368
    assert self.instance is not None, \
4369
      "Cannot retrieve locked instance %s" % self.op.instance_name
4370

    
4371
    # extra beparams
4372
    if self.op.beparams:
4373
      if not isinstance(self.op.beparams, dict):
4374
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4375
                                   " dict" % (type(self.op.beparams), ),
4376
                                   errors.ECODE_INVAL)
4377
      # fill the beparams dict
4378
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4379

    
4380
    # extra hvparams
4381
    if self.op.hvparams:
4382
      if not isinstance(self.op.hvparams, dict):
4383
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4384
                                   " dict" % (type(self.op.hvparams), ),
4385
                                   errors.ECODE_INVAL)
4386

    
4387
      # check hypervisor parameter syntax (locally)
4388
      cluster = self.cfg.GetClusterInfo()
4389
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4390
      filled_hvp = cluster.FillHV(instance)
4391
      filled_hvp.update(self.op.hvparams)
4392
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4393
      hv_type.CheckParameterSyntax(filled_hvp)
4394
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4395

    
4396
    _CheckNodeOnline(self, instance.primary_node)
4397

    
4398
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4399
    # check bridges existence
4400
    _CheckInstanceBridgesExist(self, instance)
4401

    
4402
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4403
                                              instance.name,
4404
                                              instance.hypervisor)
4405
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4406
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4407
    if not remote_info.payload: # not running already
4408
      _CheckNodeFreeMemory(self, instance.primary_node,
4409
                           "starting instance %s" % instance.name,
4410
                           bep[constants.BE_MEMORY], instance.hypervisor)
4411

    
4412
  def Exec(self, feedback_fn):
4413
    """Start the instance.
4414

4415
    """
4416
    instance = self.instance
4417
    force = self.op.force
4418

    
4419
    self.cfg.MarkInstanceUp(instance.name)
4420

    
4421
    node_current = instance.primary_node
4422

    
4423
    _StartInstanceDisks(self, instance, force)
4424

    
4425
    result = self.rpc.call_instance_start(node_current, instance,
4426
                                          self.op.hvparams, self.op.beparams)
4427
    msg = result.fail_msg
4428
    if msg:
4429
      _ShutdownInstanceDisks(self, instance)
4430
      raise errors.OpExecError("Could not start instance: %s" % msg)
4431

    
4432

    
4433
class LURebootInstance(LogicalUnit):
4434
  """Reboot an instance.
4435

4436
  """
4437
  HPATH = "instance-reboot"
4438
  HTYPE = constants.HTYPE_INSTANCE
4439
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4440
  _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4441
  REQ_BGL = False
4442

    
4443
  def CheckArguments(self):
4444
    if self.op.reboot_type not in constants.REBOOT_TYPES:
4445
      raise errors.OpPrereqError("Invalid reboot type '%s', not one of %s" %
4446
                                  (self.op.reboot_type,
4447
                                   utils.CommaJoin(constants.REBOOT_TYPES)),
4448
                                 errors.ECODE_INVAL)
4449

    
4450
  def ExpandNames(self):
4451
    self._ExpandAndLockInstance()
4452

    
4453
  def BuildHooksEnv(self):
4454
    """Build hooks env.
4455

4456
    This runs on master, primary and secondary nodes of the instance.
4457

4458
    """
4459
    env = {
4460
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4461
      "REBOOT_TYPE": self.op.reboot_type,
4462
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4463
      }
4464
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4465
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4466
    return env, nl, nl
4467

    
4468
  def CheckPrereq(self):
4469
    """Check prerequisites.
4470

4471
    This checks that the instance is in the cluster.
4472

4473
    """
4474
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4475
    assert self.instance is not None, \
4476
      "Cannot retrieve locked instance %s" % self.op.instance_name
4477

    
4478
    _CheckNodeOnline(self, instance.primary_node)
4479

    
4480
    # check bridges existence
4481
    _CheckInstanceBridgesExist(self, instance)
4482

    
4483
  def Exec(self, feedback_fn):
4484
    """Reboot the instance.
4485

4486
    """
4487
    instance = self.instance
4488
    ignore_secondaries = self.op.ignore_secondaries
4489
    reboot_type = self.op.reboot_type
4490

    
4491
    node_current = instance.primary_node
4492

    
4493
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4494
                       constants.INSTANCE_REBOOT_HARD]:
4495
      for disk in instance.disks:
4496
        self.cfg.SetDiskID(disk, node_current)
4497
      result = self.rpc.call_instance_reboot(node_current, instance,
4498
                                             reboot_type,
4499
                                             self.op.shutdown_timeout)
4500
      result.Raise("Could not reboot instance")
4501
    else:
4502
      result = self.rpc.call_instance_shutdown(node_current, instance,
4503
                                               self.op.shutdown_timeout)
4504
      result.Raise("Could not shutdown instance for full reboot")
4505
      _ShutdownInstanceDisks(self, instance)
4506
      _StartInstanceDisks(self, instance, ignore_secondaries)
4507
      result = self.rpc.call_instance_start(node_current, instance, None, None)
4508
      msg = result.fail_msg
4509
      if msg:
4510
        _ShutdownInstanceDisks(self, instance)
4511
        raise errors.OpExecError("Could not start instance for"
4512
                                 " full reboot: %s" % msg)
4513

    
4514
    self.cfg.MarkInstanceUp(instance.name)
4515

    
4516

    
4517
class LUShutdownInstance(LogicalUnit):
4518
  """Shutdown an instance.
4519

4520
  """
4521
  HPATH = "instance-stop"
4522
  HTYPE = constants.HTYPE_INSTANCE
4523
  _OP_REQP = ["instance_name"]
4524
  _OP_DEFS = [("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4525
  REQ_BGL = False
4526

    
4527
  def ExpandNames(self):
4528
    self._ExpandAndLockInstance()
4529

    
4530
  def BuildHooksEnv(self):
4531
    """Build hooks env.
4532

4533
    This runs on master, primary and secondary nodes of the instance.
4534

4535
    """
4536
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4537
    env["TIMEOUT"] = self.op.timeout
4538
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4539
    return env, nl, nl
4540

    
4541
  def CheckPrereq(self):
4542
    """Check prerequisites.
4543

4544
    This checks that the instance is in the cluster.
4545

4546
    """
4547
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4548
    assert self.instance is not None, \
4549
      "Cannot retrieve locked instance %s" % self.op.instance_name
4550
    _CheckNodeOnline(self, self.instance.primary_node)
4551

    
4552
  def Exec(self, feedback_fn):
4553
    """Shutdown the instance.
4554

4555
    """
4556
    instance = self.instance
4557
    node_current = instance.primary_node
4558
    timeout = self.op.timeout
4559
    self.cfg.MarkInstanceDown(instance.name)
4560
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4561
    msg = result.fail_msg
4562
    if msg:
4563
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4564

    
4565
    _ShutdownInstanceDisks(self, instance)
4566

    
4567

    
4568
class LUReinstallInstance(LogicalUnit):
4569
  """Reinstall an instance.
4570

4571
  """
4572
  HPATH = "instance-reinstall"
4573
  HTYPE = constants.HTYPE_INSTANCE
4574
  _OP_REQP = ["instance_name"]
4575
  _OP_DEFS = [
4576
    ("os_type", None),
4577
    ("force_variant", False),
4578
    ]
4579
  REQ_BGL = False
4580

    
4581
  def ExpandNames(self):
4582
    self._ExpandAndLockInstance()
4583

    
4584
  def BuildHooksEnv(self):
4585
    """Build hooks env.
4586

4587
    This runs on master, primary and secondary nodes of the instance.
4588

4589
    """
4590
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4591
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4592
    return env, nl, nl
4593

    
4594
  def CheckPrereq(self):
4595
    """Check prerequisites.
4596

4597
    This checks that the instance is in the cluster and is not running.
4598

4599
    """
4600
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4601
    assert instance is not None, \
4602
      "Cannot retrieve locked instance %s" % self.op.instance_name
4603
    _CheckNodeOnline(self, instance.primary_node)
4604

    
4605
    if instance.disk_template == constants.DT_DISKLESS:
4606
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4607
                                 self.op.instance_name,
4608
                                 errors.ECODE_INVAL)
4609
    _CheckInstanceDown(self, instance, "cannot reinstall")
4610

    
4611
    if self.op.os_type is not None:
4612
      # OS verification
4613
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4614
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4615

    
4616
    self.instance = instance
4617

    
4618
  def Exec(self, feedback_fn):
4619
    """Reinstall the instance.
4620

4621
    """
4622
    inst = self.instance
4623

    
4624
    if self.op.os_type is not None:
4625
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4626
      inst.os = self.op.os_type
4627
      self.cfg.Update(inst, feedback_fn)
4628

    
4629
    _StartInstanceDisks(self, inst, None)
4630
    try:
4631
      feedback_fn("Running the instance OS create scripts...")
4632
      # FIXME: pass debug option from opcode to backend
4633
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4634
                                             self.op.debug_level)
4635
      result.Raise("Could not install OS for instance %s on node %s" %
4636
                   (inst.name, inst.primary_node))
4637
    finally:
4638
      _ShutdownInstanceDisks(self, inst)
4639

    
4640

    
4641
class LURecreateInstanceDisks(LogicalUnit):
4642
  """Recreate an instance's missing disks.
4643

4644
  """
4645
  HPATH = "instance-recreate-disks"
4646
  HTYPE = constants.HTYPE_INSTANCE
4647
  _OP_REQP = ["instance_name", "disks"]
4648
  REQ_BGL = False
4649

    
4650
  def CheckArguments(self):
4651
    """Check the arguments.
4652

4653
    """
4654
    if not isinstance(self.op.disks, list):
4655
      raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4656
    for item in self.op.disks:
4657
      if (not isinstance(item, int) or
4658
          item < 0):
4659
        raise errors.OpPrereqError("Invalid disk specification '%s'" %
4660
                                   str(item), errors.ECODE_INVAL)
4661

    
4662
  def ExpandNames(self):
4663
    self._ExpandAndLockInstance()
4664

    
4665
  def BuildHooksEnv(self):
4666
    """Build hooks env.
4667

4668
    This runs on master, primary and secondary nodes of the instance.
4669

4670
    """
4671
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4672
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4673
    return env, nl, nl
4674

    
4675
  def CheckPrereq(self):
4676
    """Check prerequisites.
4677

4678
    This checks that the instance is in the cluster and is not running.
4679

4680
    """
4681
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4682
    assert instance is not None, \
4683
      "Cannot retrieve locked instance %s" % self.op.instance_name
4684
    _CheckNodeOnline(self, instance.primary_node)
4685

    
4686
    if instance.disk_template == constants.DT_DISKLESS:
4687
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4688
                                 self.op.instance_name, errors.ECODE_INVAL)
4689
    _CheckInstanceDown(self, instance, "cannot recreate disks")
4690

    
4691
    if not self.op.disks:
4692
      self.op.disks = range(len(instance.disks))
4693
    else:
4694
      for idx in self.op.disks:
4695
        if idx >= len(instance.disks):
4696
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4697
                                     errors.ECODE_INVAL)
4698

    
4699
    self.instance = instance
4700

    
4701
  def Exec(self, feedback_fn):
4702
    """Recreate the disks.
4703

4704
    """
4705
    to_skip = []
4706
    for idx, _ in enumerate(self.instance.disks):
4707
      if idx not in self.op.disks: # disk idx has not been passed in
4708
        to_skip.append(idx)
4709
        continue
4710

    
4711
    _CreateDisks(self, self.instance, to_skip=to_skip)
4712

    
4713

    
4714
class LURenameInstance(LogicalUnit):
4715
  """Rename an instance.
4716

4717
  """
4718
  HPATH = "instance-rename"
4719
  HTYPE = constants.HTYPE_INSTANCE
4720
  _OP_REQP = ["instance_name", "new_name"]
4721
  _OP_DEFS = [("ignore_ip", False)]
4722

    
4723
  def BuildHooksEnv(self):
4724
    """Build hooks env.
4725

4726
    This runs on master, primary and secondary nodes of the instance.
4727

4728
    """
4729
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4730
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4731
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4732
    return env, nl, nl
4733

    
4734
  def CheckPrereq(self):
4735
    """Check prerequisites.
4736

4737
    This checks that the instance is in the cluster and is not running.
4738

4739
    """
4740
    self.op.instance_name = _ExpandInstanceName(self.cfg,
4741
                                                self.op.instance_name)
4742
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4743
    assert instance is not None
4744
    _CheckNodeOnline(self, instance.primary_node)
4745
    _CheckInstanceDown(self, instance, "cannot rename")
4746
    self.instance = instance
4747

    
4748
    # new name verification
4749
    name_info = utils.GetHostInfo(self.op.new_name)
4750

    
4751
    self.op.new_name = new_name = name_info.name
4752
    instance_list = self.cfg.GetInstanceList()
4753
    if new_name in instance_list:
4754
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4755
                                 new_name, errors.ECODE_EXISTS)
4756

    
4757
    if not self.op.ignore_ip:
4758
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4759
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4760
                                   (name_info.ip, new_name),
4761
                                   errors.ECODE_NOTUNIQUE)
4762

    
4763

    
4764
  def Exec(self, feedback_fn):
4765
    """Reinstall the instance.
4766

4767
    """
4768
    inst = self.instance
4769
    old_name = inst.name
4770

    
4771
    if inst.disk_template == constants.DT_FILE:
4772
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4773

    
4774
    self.cfg.RenameInstance(inst.name, self.op.new_name)
4775
    # Change the instance lock. This is definitely safe while we hold the BGL
4776
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4777
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4778

    
4779
    # re-read the instance from the configuration after rename
4780
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4781

    
4782
    if inst.disk_template == constants.DT_FILE:
4783
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4784
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4785
                                                     old_file_storage_dir,
4786
                                                     new_file_storage_dir)
4787
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4788
                   " (but the instance has been renamed in Ganeti)" %
4789
                   (inst.primary_node, old_file_storage_dir,
4790
                    new_file_storage_dir))
4791

    
4792
    _StartInstanceDisks(self, inst, None)
4793
    try:
4794
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4795
                                                 old_name, self.op.debug_level)
4796
      msg = result.fail_msg
4797
      if msg:
4798
        msg = ("Could not run OS rename script for instance %s on node %s"
4799
               " (but the instance has been renamed in Ganeti): %s" %
4800
               (inst.name, inst.primary_node, msg))
4801
        self.proc.LogWarning(msg)
4802
    finally:
4803
      _ShutdownInstanceDisks(self, inst)
4804

    
4805

    
4806
class LURemoveInstance(LogicalUnit):
4807
  """Remove an instance.
4808

4809
  """
4810
  HPATH = "instance-remove"
4811
  HTYPE = constants.HTYPE_INSTANCE
4812
  _OP_REQP = ["instance_name", "ignore_failures"]
4813
  _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4814
  REQ_BGL = False
4815

    
4816
  def ExpandNames(self):
4817
    self._ExpandAndLockInstance()
4818
    self.needed_locks[locking.LEVEL_NODE] = []
4819
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4820

    
4821
  def DeclareLocks(self, level):
4822
    if level == locking.LEVEL_NODE:
4823
      self._LockInstancesNodes()
4824

    
4825
  def BuildHooksEnv(self):
4826
    """Build hooks env.
4827

4828
    This runs on master, primary and secondary nodes of the instance.
4829

4830
    """
4831
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4832
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
4833
    nl = [self.cfg.GetMasterNode()]
4834
    nl_post = list(self.instance.all_nodes) + nl
4835
    return env, nl, nl_post
4836

    
4837
  def CheckPrereq(self):
4838
    """Check prerequisites.
4839

4840
    This checks that the instance is in the cluster.
4841

4842
    """
4843
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4844
    assert self.instance is not None, \
4845
      "Cannot retrieve locked instance %s" % self.op.instance_name
4846

    
4847
  def Exec(self, feedback_fn):
4848
    """Remove the instance.
4849

4850
    """
4851
    instance = self.instance
4852
    logging.info("Shutting down instance %s on node %s",
4853
                 instance.name, instance.primary_node)
4854

    
4855
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4856
                                             self.op.shutdown_timeout)
4857
    msg = result.fail_msg
4858
    if msg:
4859
      if self.op.ignore_failures:
4860
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
4861
      else:
4862
        raise errors.OpExecError("Could not shutdown instance %s on"
4863
                                 " node %s: %s" %
4864
                                 (instance.name, instance.primary_node, msg))
4865

    
4866
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4867

    
4868

    
4869
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4870
  """Utility function to remove an instance.
4871

4872
  """
4873
  logging.info("Removing block devices for instance %s", instance.name)
4874

    
4875
  if not _RemoveDisks(lu, instance):
4876
    if not ignore_failures:
4877
      raise errors.OpExecError("Can't remove instance's disks")
4878
    feedback_fn("Warning: can't remove instance's disks")
4879

    
4880
  logging.info("Removing instance %s out of cluster config", instance.name)
4881

    
4882
  lu.cfg.RemoveInstance(instance.name)
4883

    
4884
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4885
    "Instance lock removal conflict"
4886

    
4887
  # Remove lock for the instance
4888
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4889

    
4890

    
4891
class LUQueryInstances(NoHooksLU):
4892
  """Logical unit for querying instances.
4893

4894
  """
4895
  # pylint: disable-msg=W0142
4896
  _OP_REQP = ["output_fields", "names", "use_locking"]
4897
  REQ_BGL = False
4898
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4899
                    "serial_no", "ctime", "mtime", "uuid"]
4900
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4901
                                    "admin_state",
4902
                                    "disk_template", "ip", "mac", "bridge",
4903
                                    "nic_mode", "nic_link",
4904
                                    "sda_size", "sdb_size", "vcpus", "tags",
4905
                                    "network_port", "beparams",
4906
                                    r"(disk)\.(size)/([0-9]+)",
4907
                                    r"(disk)\.(sizes)", "disk_usage",
4908
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4909
                                    r"(nic)\.(bridge)/([0-9]+)",
4910
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
4911
                                    r"(disk|nic)\.(count)",
4912
                                    "hvparams",
4913
                                    ] + _SIMPLE_FIELDS +
4914
                                  ["hv/%s" % name
4915
                                   for name in constants.HVS_PARAMETERS
4916
                                   if name not in constants.HVC_GLOBALS] +
4917
                                  ["be/%s" % name
4918
                                   for name in constants.BES_PARAMETERS])
4919
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4920

    
4921

    
4922
  def CheckArguments(self):
4923
    _CheckOutputFields(static=self._FIELDS_STATIC,
4924
                       dynamic=self._FIELDS_DYNAMIC,
4925
                       selected=self.op.output_fields)
4926

    
4927
  def ExpandNames(self):
4928
    self.needed_locks = {}
4929
    self.share_locks[locking.LEVEL_INSTANCE] = 1
4930
    self.share_locks[locking.LEVEL_NODE] = 1
4931

    
4932
    if self.op.names:
4933
      self.wanted = _GetWantedInstances(self, self.op.names)
4934
    else:
4935
      self.wanted = locking.ALL_SET
4936

    
4937
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4938
    self.do_locking = self.do_node_query and self.op.use_locking
4939
    if self.do_locking:
4940
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4941
      self.needed_locks[locking.LEVEL_NODE] = []
4942
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4943

    
4944
  def DeclareLocks(self, level):
4945
    if level == locking.LEVEL_NODE and self.do_locking:
4946
      self._LockInstancesNodes()
4947

    
4948
  def CheckPrereq(self):
4949
    """Check prerequisites.
4950

4951
    """
4952
    pass
4953

    
4954
  def Exec(self, feedback_fn):
4955
    """Computes the list of nodes and their attributes.
4956

4957
    """
4958
    # pylint: disable-msg=R0912
4959
    # way too many branches here
4960
    all_info = self.cfg.GetAllInstancesInfo()
4961
    if self.wanted == locking.ALL_SET:
4962
      # caller didn't specify instance names, so ordering is not important
4963
      if self.do_locking:
4964
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4965
      else:
4966
        instance_names = all_info.keys()
4967
      instance_names = utils.NiceSort(instance_names)
4968
    else:
4969
      # caller did specify names, so we must keep the ordering
4970
      if self.do_locking:
4971
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4972
      else:
4973
        tgt_set = all_info.keys()
4974
      missing = set(self.wanted).difference(tgt_set)
4975
      if missing:
4976
        raise errors.OpExecError("Some instances were removed before"
4977
                                 " retrieving their data: %s" % missing)
4978
      instance_names = self.wanted
4979

    
4980
    instance_list = [all_info[iname] for iname in instance_names]
4981

    
4982
    # begin data gathering
4983

    
4984
    nodes = frozenset([inst.primary_node for inst in instance_list])
4985
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4986

    
4987
    bad_nodes = []
4988
    off_nodes = []
4989
    if self.do_node_query:
4990
      live_data = {}
4991
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4992
      for name in nodes:
4993
        result = node_data[name]
4994
        if result.offline:
4995
          # offline nodes will be in both lists
4996
          off_nodes.append(name)
4997
        if result.fail_msg:
4998
          bad_nodes.append(name)
4999
        else:
5000
          if result.payload:
5001
            live_data.update(result.payload)
5002
          # else no instance is alive
5003
    else:
5004
      live_data = dict([(name, {}) for name in instance_names])
5005

    
5006
    # end data gathering
5007

    
5008
    HVPREFIX = "hv/"
5009
    BEPREFIX = "be/"
5010
    output = []
5011
    cluster = self.cfg.GetClusterInfo()
5012
    for instance in instance_list:
5013
      iout = []
5014
      i_hv = cluster.FillHV(instance, skip_globals=True)
5015
      i_be = cluster.FillBE(instance)
5016
      i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5017
      for field in self.op.output_fields:
5018
        st_match = self._FIELDS_STATIC.Matches(field)
5019
        if field in self._SIMPLE_FIELDS:
5020
          val = getattr(instance, field)
5021
        elif field == "pnode":
5022
          val = instance.primary_node
5023
        elif field == "snodes":
5024
          val = list(instance.secondary_nodes)
5025
        elif field == "admin_state":
5026
          val = instance.admin_up
5027
        elif field == "oper_state":
5028
          if instance.primary_node in bad_nodes:
5029
            val = None
5030
          else:
5031
            val = bool(live_data.get(instance.name))
5032
        elif field == "status":
5033
          if instance.primary_node in off_nodes:
5034
            val = "ERROR_nodeoffline"
5035
          elif instance.primary_node in bad_nodes:
5036
            val = "ERROR_nodedown"
5037
          else:
5038
            running = bool(live_data.get(instance.name))
5039
            if running:
5040
              if instance.admin_up:
5041
                val = "running"
5042
              else:
5043
                val = "ERROR_up"
5044
            else:
5045
              if instance.admin_up:
5046
                val = "ERROR_down"
5047
              else:
5048
                val = "ADMIN_down"
5049
        elif field == "oper_ram":
5050
          if instance.primary_node in bad_nodes:
5051
            val = None
5052
          elif instance.name in live_data:
5053
            val = live_data[instance.name].get("memory", "?")
5054
          else:
5055
            val = "-"
5056
        elif field == "vcpus":
5057
          val = i_be[constants.BE_VCPUS]
5058
        elif field == "disk_template":
5059
          val = instance.disk_template
5060
        elif field == "ip":
5061
          if instance.nics:
5062
            val = instance.nics[0].ip
5063
          else:
5064
            val = None
5065
        elif field == "nic_mode":
5066
          if instance.nics:
5067
            val = i_nicp[0][constants.NIC_MODE]
5068
          else:
5069
            val = None
5070
        elif field == "nic_link":
5071
          if instance.nics:
5072
            val = i_nicp[0][constants.NIC_LINK]
5073
          else:
5074
            val = None
5075
        elif field == "bridge":
5076
          if (instance.nics and
5077
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5078
            val = i_nicp[0][constants.NIC_LINK]
5079
          else:
5080
            val = None
5081
        elif field == "mac":
5082
          if instance.nics:
5083
            val = instance.nics[0].mac
5084
          else:
5085
            val = None
5086
        elif field == "sda_size" or field == "sdb_size":
5087
          idx = ord(field[2]) - ord('a')
5088
          try:
5089
            val = instance.FindDisk(idx).size
5090
          except errors.OpPrereqError:
5091
            val = None
5092
        elif field == "disk_usage": # total disk usage per node
5093
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
5094
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5095
        elif field == "tags":
5096
          val = list(instance.GetTags())
5097
        elif field == "hvparams":
5098
          val = i_hv
5099
        elif (field.startswith(HVPREFIX) and
5100
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5101
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5102
          val = i_hv.get(field[len(HVPREFIX):], None)
5103
        elif field == "beparams":
5104
          val = i_be
5105
        elif (field.startswith(BEPREFIX) and
5106
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5107
          val = i_be.get(field[len(BEPREFIX):], None)
5108
        elif st_match and st_match.groups():
5109
          # matches a variable list
5110
          st_groups = st_match.groups()
5111
          if st_groups and st_groups[0] == "disk":
5112
            if st_groups[1] == "count":
5113
              val = len(instance.disks)
5114
            elif st_groups[1] == "sizes":
5115
              val = [disk.size for disk in instance.disks]
5116
            elif st_groups[1] == "size":
5117
              try:
5118
                val = instance.FindDisk(st_groups[2]).size
5119
              except errors.OpPrereqError:
5120
                val = None
5121
            else:
5122
              assert False, "Unhandled disk parameter"
5123
          elif st_groups[0] == "nic":
5124
            if st_groups[1] == "count":
5125
              val = len(instance.nics)
5126
            elif st_groups[1] == "macs":
5127
              val = [nic.mac for nic in instance.nics]
5128
            elif st_groups[1] == "ips":
5129
              val = [nic.ip for nic in instance.nics]
5130
            elif st_groups[1] == "modes":
5131
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5132
            elif st_groups[1] == "links":
5133
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5134
            elif st_groups[1] == "bridges":
5135
              val = []
5136
              for nicp in i_nicp:
5137
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5138
                  val.append(nicp[constants.NIC_LINK])
5139
                else:
5140
                  val.append(None)
5141
            else:
5142
              # index-based item
5143
              nic_idx = int(st_groups[2])
5144
              if nic_idx >= len(instance.nics):
5145
                val = None
5146
              else:
5147
                if st_groups[1] == "mac":
5148
                  val = instance.nics[nic_idx].mac
5149
                elif st_groups[1] == "ip":
5150
                  val = instance.nics[nic_idx].ip
5151
                elif st_groups[1] == "mode":
5152
                  val = i_nicp[nic_idx][constants.NIC_MODE]
5153
                elif st_groups[1] == "link":
5154
                  val = i_nicp[nic_idx][constants.NIC_LINK]
5155
                elif st_groups[1] == "bridge":
5156
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5157
                  if nic_mode == constants.NIC_MODE_BRIDGED:
5158
                    val = i_nicp[nic_idx][constants.NIC_LINK]
5159
                  else:
5160
                    val = None
5161
                else:
5162
                  assert False, "Unhandled NIC parameter"
5163
          else:
5164
            assert False, ("Declared but unhandled variable parameter '%s'" %
5165
                           field)
5166
        else:
5167
          assert False, "Declared but unhandled parameter '%s'" % field
5168
        iout.append(val)
5169
      output.append(iout)
5170

    
5171
    return output
5172

    
5173

    
5174
class LUFailoverInstance(LogicalUnit):
5175
  """Failover an instance.
5176

5177
  """
5178
  HPATH = "instance-failover"
5179
  HTYPE = constants.HTYPE_INSTANCE
5180
  _OP_REQP = ["instance_name", "ignore_consistency"]
5181
  _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
5182
  REQ_BGL = False
5183

    
5184
  def ExpandNames(self):
5185
    self._ExpandAndLockInstance()
5186
    self.needed_locks[locking.LEVEL_NODE] = []
5187
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5188

    
5189
  def DeclareLocks(self, level):
5190
    if level == locking.LEVEL_NODE:
5191
      self._LockInstancesNodes()
5192

    
5193
  def BuildHooksEnv(self):
5194
    """Build hooks env.
5195

5196
    This runs on master, primary and secondary nodes of the instance.
5197

5198
    """
5199
    instance = self.instance
5200
    source_node = instance.primary_node
5201
    target_node = instance.secondary_nodes[0]
5202
    env = {
5203
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5204
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5205
      "OLD_PRIMARY": source_node,
5206
      "OLD_SECONDARY": target_node,
5207
      "NEW_PRIMARY": target_node,
5208
      "NEW_SECONDARY": source_node,
5209
      }
5210
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5211
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5212
    nl_post = list(nl)
5213
    nl_post.append(source_node)
5214
    return env, nl, nl_post
5215

    
5216
  def CheckPrereq(self):
5217
    """Check prerequisites.
5218

5219
    This checks that the instance is in the cluster.
5220

5221
    """
5222
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5223
    assert self.instance is not None, \
5224
      "Cannot retrieve locked instance %s" % self.op.instance_name
5225

    
5226
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5227
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5228
      raise errors.OpPrereqError("Instance's disk layout is not"
5229
                                 " network mirrored, cannot failover.",
5230
                                 errors.ECODE_STATE)
5231

    
5232
    secondary_nodes = instance.secondary_nodes
5233
    if not secondary_nodes:
5234
      raise errors.ProgrammerError("no secondary node but using "
5235
                                   "a mirrored disk template")
5236

    
5237
    target_node = secondary_nodes[0]
5238
    _CheckNodeOnline(self, target_node)
5239
    _CheckNodeNotDrained(self, target_node)
5240
    if instance.admin_up:
5241
      # check memory requirements on the secondary node
5242
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5243
                           instance.name, bep[constants.BE_MEMORY],
5244
                           instance.hypervisor)
5245
    else:
5246
      self.LogInfo("Not checking memory on the secondary node as"
5247
                   " instance will not be started")
5248

    
5249
    # check bridge existance
5250
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5251

    
5252
  def Exec(self, feedback_fn):
5253
    """Failover an instance.
5254

5255
    The failover is done by shutting it down on its present node and
5256
    starting it on the secondary.
5257

5258
    """
5259
    instance = self.instance
5260

    
5261
    source_node = instance.primary_node
5262
    target_node = instance.secondary_nodes[0]
5263

    
5264
    if instance.admin_up:
5265
      feedback_fn("* checking disk consistency between source and target")
5266
      for dev in instance.disks:
5267
        # for drbd, these are drbd over lvm
5268
        if not _CheckDiskConsistency(self, dev, target_node, False):
5269
          if not self.op.ignore_consistency:
5270
            raise errors.OpExecError("Disk %s is degraded on target node,"
5271
                                     " aborting failover." % dev.iv_name)
5272
    else:
5273
      feedback_fn("* not checking disk consistency as instance is not running")
5274

    
5275
    feedback_fn("* shutting down instance on source node")
5276
    logging.info("Shutting down instance %s on node %s",
5277
                 instance.name, source_node)
5278

    
5279
    result = self.rpc.call_instance_shutdown(source_node, instance,
5280
                                             self.op.shutdown_timeout)
5281
    msg = result.fail_msg
5282
    if msg:
5283
      if self.op.ignore_consistency:
5284
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5285
                             " Proceeding anyway. Please make sure node"
5286
                             " %s is down. Error details: %s",
5287
                             instance.name, source_node, source_node, msg)
5288
      else:
5289
        raise errors.OpExecError("Could not shutdown instance %s on"
5290
                                 " node %s: %s" %
5291
                                 (instance.name, source_node, msg))
5292

    
5293
    feedback_fn("* deactivating the instance's disks on source node")
5294
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5295
      raise errors.OpExecError("Can't shut down the instance's disks.")
5296

    
5297
    instance.primary_node = target_node
5298
    # distribute new instance config to the other nodes
5299
    self.cfg.Update(instance, feedback_fn)
5300

    
5301
    # Only start the instance if it's marked as up
5302
    if instance.admin_up:
5303
      feedback_fn("* activating the instance's disks on target node")
5304
      logging.info("Starting instance %s on node %s",
5305
                   instance.name, target_node)
5306

    
5307
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5308
                                           ignore_secondaries=True)
5309
      if not disks_ok:
5310
        _ShutdownInstanceDisks(self, instance)
5311
        raise errors.OpExecError("Can't activate the instance's disks")
5312

    
5313
      feedback_fn("* starting the instance on the target node")
5314
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5315
      msg = result.fail_msg
5316
      if msg:
5317
        _ShutdownInstanceDisks(self, instance)
5318
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5319
                                 (instance.name, target_node, msg))
5320

    
5321

    
5322
class LUMigrateInstance(LogicalUnit):
5323
  """Migrate an instance.
5324

5325
  This is migration without shutting down, compared to the failover,
5326
  which is done with shutdown.
5327

5328
  """
5329
  HPATH = "instance-migrate"
5330
  HTYPE = constants.HTYPE_INSTANCE
5331
  _OP_REQP = ["instance_name", "live", "cleanup"]
5332

    
5333
  REQ_BGL = False
5334

    
5335
  def ExpandNames(self):
5336
    self._ExpandAndLockInstance()
5337

    
5338
    self.needed_locks[locking.LEVEL_NODE] = []
5339
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5340

    
5341
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5342
                                       self.op.live, self.op.cleanup)
5343
    self.tasklets = [self._migrater]
5344

    
5345
  def DeclareLocks(self, level):
5346
    if level == locking.LEVEL_NODE:
5347
      self._LockInstancesNodes()
5348

    
5349
  def BuildHooksEnv(self):
5350
    """Build hooks env.
5351

5352
    This runs on master, primary and secondary nodes of the instance.
5353

5354
    """
5355
    instance = self._migrater.instance
5356
    source_node = instance.primary_node
5357
    target_node = instance.secondary_nodes[0]
5358
    env = _BuildInstanceHookEnvByObject(self, instance)
5359
    env["MIGRATE_LIVE"] = self.op.live
5360
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5361
    env.update({
5362
        "OLD_PRIMARY": source_node,
5363
        "OLD_SECONDARY": target_node,
5364
        "NEW_PRIMARY": target_node,
5365
        "NEW_SECONDARY": source_node,
5366
        })
5367
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5368
    nl_post = list(nl)
5369
    nl_post.append(source_node)
5370
    return env, nl, nl_post
5371

    
5372

    
5373
class LUMoveInstance(LogicalUnit):
5374
  """Move an instance by data-copying.
5375

5376
  """
5377
  HPATH = "instance-move"
5378
  HTYPE = constants.HTYPE_INSTANCE
5379
  _OP_REQP = ["instance_name", "target_node"]
5380
  _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
5381
  REQ_BGL = False
5382

    
5383
  def ExpandNames(self):
5384
    self._ExpandAndLockInstance()
5385
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5386
    self.op.target_node = target_node
5387
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5388
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5389

    
5390
  def DeclareLocks(self, level):
5391
    if level == locking.LEVEL_NODE:
5392
      self._LockInstancesNodes(primary_only=True)
5393

    
5394
  def BuildHooksEnv(self):
5395
    """Build hooks env.
5396

5397
    This runs on master, primary and secondary nodes of the instance.
5398

5399
    """
5400
    env = {
5401
      "TARGET_NODE": self.op.target_node,
5402
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5403
      }
5404
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5405
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5406
                                       self.op.target_node]
5407
    return env, nl, nl
5408

    
5409
  def CheckPrereq(self):
5410
    """Check prerequisites.
5411

5412
    This checks that the instance is in the cluster.
5413

5414
    """
5415
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5416
    assert self.instance is not None, \
5417
      "Cannot retrieve locked instance %s" % self.op.instance_name
5418

    
5419
    node = self.cfg.GetNodeInfo(self.op.target_node)
5420
    assert node is not None, \
5421
      "Cannot retrieve locked node %s" % self.op.target_node
5422

    
5423
    self.target_node = target_node = node.name
5424

    
5425
    if target_node == instance.primary_node:
5426
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
5427
                                 (instance.name, target_node),
5428
                                 errors.ECODE_STATE)
5429

    
5430
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5431

    
5432
    for idx, dsk in enumerate(instance.disks):
5433
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5434
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5435
                                   " cannot copy" % idx, errors.ECODE_STATE)
5436

    
5437
    _CheckNodeOnline(self, target_node)
5438
    _CheckNodeNotDrained(self, target_node)
5439

    
5440
    if instance.admin_up:
5441
      # check memory requirements on the secondary node
5442
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5443
                           instance.name, bep[constants.BE_MEMORY],
5444
                           instance.hypervisor)
5445
    else:
5446
      self.LogInfo("Not checking memory on the secondary node as"
5447
                   " instance will not be started")
5448

    
5449
    # check bridge existance
5450
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5451

    
5452
  def Exec(self, feedback_fn):
5453
    """Move an instance.
5454

5455
    The move is done by shutting it down on its present node, copying
5456
    the data over (slow) and starting it on the new node.
5457

5458
    """
5459
    instance = self.instance
5460

    
5461
    source_node = instance.primary_node
5462
    target_node = self.target_node
5463

    
5464
    self.LogInfo("Shutting down instance %s on source node %s",
5465
                 instance.name, source_node)
5466

    
5467
    result = self.rpc.call_instance_shutdown(source_node, instance,
5468
                                             self.op.shutdown_timeout)
5469
    msg = result.fail_msg
5470
    if msg:
5471
      if self.op.ignore_consistency:
5472
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5473
                             " Proceeding anyway. Please make sure node"
5474
                             " %s is down. Error details: %s",
5475
                             instance.name, source_node, source_node, msg)
5476
      else:
5477
        raise errors.OpExecError("Could not shutdown instance %s on"
5478
                                 " node %s: %s" %
5479
                                 (instance.name, source_node, msg))
5480

    
5481
    # create the target disks
5482
    try:
5483
      _CreateDisks(self, instance, target_node=target_node)
5484
    except errors.OpExecError:
5485
      self.LogWarning("Device creation failed, reverting...")
5486
      try:
5487
        _RemoveDisks(self, instance, target_node=target_node)
5488
      finally:
5489
        self.cfg.ReleaseDRBDMinors(instance.name)
5490
        raise
5491

    
5492
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5493

    
5494
    errs = []
5495
    # activate, get path, copy the data over
5496
    for idx, disk in enumerate(instance.disks):
5497
      self.LogInfo("Copying data for disk %d", idx)
5498
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5499
                                               instance.name, True)
5500
      if result.fail_msg:
5501
        self.LogWarning("Can't assemble newly created disk %d: %s",
5502
                        idx, result.fail_msg)
5503
        errs.append(result.fail_msg)
5504
        break
5505
      dev_path = result.payload
5506
      result = self.rpc.call_blockdev_export(source_node, disk,
5507
                                             target_node, dev_path,
5508
                                             cluster_name)
5509
      if result.fail_msg:
5510
        self.LogWarning("Can't copy data over for disk %d: %s",
5511
                        idx, result.fail_msg)
5512
        errs.append(result.fail_msg)
5513
        break
5514

    
5515
    if errs:
5516
      self.LogWarning("Some disks failed to copy, aborting")
5517
      try:
5518
        _RemoveDisks(self, instance, target_node=target_node)
5519
      finally:
5520
        self.cfg.ReleaseDRBDMinors(instance.name)
5521
        raise errors.OpExecError("Errors during disk copy: %s" %
5522
                                 (",".join(errs),))
5523

    
5524
    instance.primary_node = target_node
5525
    self.cfg.Update(instance, feedback_fn)
5526

    
5527
    self.LogInfo("Removing the disks on the original node")
5528
    _RemoveDisks(self, instance, target_node=source_node)
5529

    
5530
    # Only start the instance if it's marked as up
5531
    if instance.admin_up:
5532
      self.LogInfo("Starting instance %s on node %s",
5533
                   instance.name, target_node)
5534

    
5535
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5536
                                           ignore_secondaries=True)
5537
      if not disks_ok:
5538
        _ShutdownInstanceDisks(self, instance)
5539
        raise errors.OpExecError("Can't activate the instance's disks")
5540

    
5541
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5542
      msg = result.fail_msg
5543
      if msg:
5544
        _ShutdownInstanceDisks(self, instance)
5545
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5546
                                 (instance.name, target_node, msg))
5547

    
5548

    
5549
class LUMigrateNode(LogicalUnit):
5550
  """Migrate all instances from a node.
5551

5552
  """
5553
  HPATH = "node-migrate"
5554
  HTYPE = constants.HTYPE_NODE
5555
  _OP_REQP = ["node_name", "live"]
5556
  REQ_BGL = False
5557

    
5558
  def ExpandNames(self):
5559
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5560

    
5561
    self.needed_locks = {
5562
      locking.LEVEL_NODE: [self.op.node_name],
5563
      }
5564

    
5565
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5566

    
5567
    # Create tasklets for migrating instances for all instances on this node
5568
    names = []
5569
    tasklets = []
5570

    
5571
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5572
      logging.debug("Migrating instance %s", inst.name)
5573
      names.append(inst.name)
5574

    
5575
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5576

    
5577
    self.tasklets = tasklets
5578

    
5579
    # Declare instance locks
5580
    self.needed_locks[locking.LEVEL_INSTANCE] = names
5581

    
5582
  def DeclareLocks(self, level):
5583
    if level == locking.LEVEL_NODE:
5584
      self._LockInstancesNodes()
5585

    
5586
  def BuildHooksEnv(self):
5587
    """Build hooks env.
5588

5589
    This runs on the master, the primary and all the secondaries.
5590

5591
    """
5592
    env = {
5593
      "NODE_NAME": self.op.node_name,
5594
      }
5595

    
5596
    nl = [self.cfg.GetMasterNode()]
5597

    
5598
    return (env, nl, nl)
5599

    
5600

    
5601
class TLMigrateInstance(Tasklet):
5602
  def __init__(self, lu, instance_name, live, cleanup):
5603
    """Initializes this class.
5604

5605
    """
5606
    Tasklet.__init__(self, lu)
5607

    
5608
    # Parameters
5609
    self.instance_name = instance_name
5610
    self.live = live
5611
    self.cleanup = cleanup
5612

    
5613
  def CheckPrereq(self):
5614
    """Check prerequisites.
5615

5616
    This checks that the instance is in the cluster.
5617

5618
    """
5619
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5620
    instance = self.cfg.GetInstanceInfo(instance_name)
5621
    assert instance is not None
5622

    
5623
    if instance.disk_template != constants.DT_DRBD8:
5624
      raise errors.OpPrereqError("Instance's disk layout is not"
5625
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5626

    
5627
    secondary_nodes = instance.secondary_nodes
5628
    if not secondary_nodes:
5629
      raise errors.ConfigurationError("No secondary node but using"
5630
                                      " drbd8 disk template")
5631

    
5632
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
5633

    
5634
    target_node = secondary_nodes[0]
5635
    # check memory requirements on the secondary node
5636
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5637
                         instance.name, i_be[constants.BE_MEMORY],
5638
                         instance.hypervisor)
5639

    
5640
    # check bridge existance
5641
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5642

    
5643
    if not self.cleanup:
5644
      _CheckNodeNotDrained(self.lu, target_node)
5645
      result = self.rpc.call_instance_migratable(instance.primary_node,
5646
                                                 instance)
5647
      result.Raise("Can't migrate, please use failover",
5648
                   prereq=True, ecode=errors.ECODE_STATE)
5649

    
5650
    self.instance = instance
5651

    
5652
  def _WaitUntilSync(self):
5653
    """Poll with custom rpc for disk sync.
5654

5655
    This uses our own step-based rpc call.
5656

5657
    """
5658
    self.feedback_fn("* wait until resync is done")
5659
    all_done = False
5660
    while not all_done:
5661
      all_done = True
5662
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5663
                                            self.nodes_ip,
5664
                                            self.instance.disks)
5665
      min_percent = 100
5666
      for node, nres in result.items():
5667
        nres.Raise("Cannot resync disks on node %s" % node)
5668
        node_done, node_percent = nres.payload
5669
        all_done = all_done and node_done
5670
        if node_percent is not None:
5671
          min_percent = min(min_percent, node_percent)
5672
      if not all_done:
5673
        if min_percent < 100:
5674
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
5675
        time.sleep(2)
5676

    
5677
  def _EnsureSecondary(self, node):
5678
    """Demote a node to secondary.
5679

5680
    """
5681
    self.feedback_fn("* switching node %s to secondary mode" % node)
5682

    
5683
    for dev in self.instance.disks:
5684
      self.cfg.SetDiskID(dev, node)
5685

    
5686
    result = self.rpc.call_blockdev_close(node, self.instance.name,
5687
                                          self.instance.disks)
5688
    result.Raise("Cannot change disk to secondary on node %s" % node)
5689

    
5690
  def _GoStandalone(self):
5691
    """Disconnect from the network.
5692

5693
    """
5694
    self.feedback_fn("* changing into standalone mode")
5695
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5696
                                               self.instance.disks)
5697
    for node, nres in result.items():
5698
      nres.Raise("Cannot disconnect disks node %s" % node)
5699

    
5700
  def _GoReconnect(self, multimaster):
5701
    """Reconnect to the network.
5702

5703
    """
5704
    if multimaster:
5705
      msg = "dual-master"
5706
    else:
5707
      msg = "single-master"
5708
    self.feedback_fn("* changing disks into %s mode" % msg)
5709
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5710
                                           self.instance.disks,
5711
                                           self.instance.name, multimaster)
5712
    for node, nres in result.items():
5713
      nres.Raise("Cannot change disks config on node %s" % node)
5714

    
5715
  def _ExecCleanup(self):
5716
    """Try to cleanup after a failed migration.
5717

5718
    The cleanup is done by:
5719
      - check that the instance is running only on one node
5720
        (and update the config if needed)
5721
      - change disks on its secondary node to secondary
5722
      - wait until disks are fully synchronized
5723
      - disconnect from the network
5724
      - change disks into single-master mode
5725
      - wait again until disks are fully synchronized
5726

5727
    """
5728
    instance = self.instance
5729
    target_node = self.target_node
5730
    source_node = self.source_node
5731

    
5732
    # check running on only one node
5733
    self.feedback_fn("* checking where the instance actually runs"
5734
                     " (if this hangs, the hypervisor might be in"
5735
                     " a bad state)")
5736
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5737
    for node, result in ins_l.items():
5738
      result.Raise("Can't contact node %s" % node)
5739

    
5740
    runningon_source = instance.name in ins_l[source_node].payload
5741
    runningon_target = instance.name in ins_l[target_node].payload
5742

    
5743
    if runningon_source and runningon_target:
5744
      raise errors.OpExecError("Instance seems to be running on two nodes,"
5745
                               " or the hypervisor is confused. You will have"
5746
                               " to ensure manually that it runs only on one"
5747
                               " and restart this operation.")
5748

    
5749
    if not (runningon_source or runningon_target):
5750
      raise errors.OpExecError("Instance does not seem to be running at all."
5751
                               " In this case, it's safer to repair by"
5752
                               " running 'gnt-instance stop' to ensure disk"
5753
                               " shutdown, and then restarting it.")
5754

    
5755
    if runningon_target:
5756
      # the migration has actually succeeded, we need to update the config
5757
      self.feedback_fn("* instance running on secondary node (%s),"
5758
                       " updating config" % target_node)
5759
      instance.primary_node = target_node
5760
      self.cfg.Update(instance, self.feedback_fn)
5761
      demoted_node = source_node
5762
    else:
5763
      self.feedback_fn("* instance confirmed to be running on its"
5764
                       " primary node (%s)" % source_node)
5765
      demoted_node = target_node
5766

    
5767
    self._EnsureSecondary(demoted_node)
5768
    try:
5769
      self._WaitUntilSync()
5770
    except errors.OpExecError:
5771
      # we ignore here errors, since if the device is standalone, it
5772
      # won't be able to sync
5773
      pass
5774
    self._GoStandalone()
5775
    self._GoReconnect(False)
5776
    self._WaitUntilSync()
5777

    
5778
    self.feedback_fn("* done")
5779

    
5780
  def _RevertDiskStatus(self):
5781
    """Try to revert the disk status after a failed migration.
5782

5783
    """
5784
    target_node = self.target_node
5785
    try:
5786
      self._EnsureSecondary(target_node)
5787
      self._GoStandalone()
5788
      self._GoReconnect(False)
5789
      self._WaitUntilSync()
5790
    except errors.OpExecError, err:
5791
      self.lu.LogWarning("Migration failed and I can't reconnect the"
5792
                         " drives: error '%s'\n"
5793
                         "Please look and recover the instance status" %
5794
                         str(err))
5795

    
5796
  def _AbortMigration(self):
5797
    """Call the hypervisor code to abort a started migration.
5798

5799
    """
5800
    instance = self.instance
5801
    target_node = self.target_node
5802
    migration_info = self.migration_info
5803

    
5804
    abort_result = self.rpc.call_finalize_migration(target_node,
5805
                                                    instance,
5806
                                                    migration_info,
5807
                                                    False)
5808
    abort_msg = abort_result.fail_msg
5809
    if abort_msg:
5810
      logging.error("Aborting migration failed on target node %s: %s",
5811
                    target_node, abort_msg)
5812
      # Don't raise an exception here, as we stil have to try to revert the
5813
      # disk status, even if this step failed.
5814

    
5815
  def _ExecMigration(self):
5816
    """Migrate an instance.
5817

5818
    The migrate is done by:
5819
      - change the disks into dual-master mode
5820
      - wait until disks are fully synchronized again
5821
      - migrate the instance
5822
      - change disks on the new secondary node (the old primary) to secondary
5823
      - wait until disks are fully synchronized
5824
      - change disks into single-master mode
5825

5826
    """
5827
    instance = self.instance
5828
    target_node = self.target_node
5829
    source_node = self.source_node
5830

    
5831
    self.feedback_fn("* checking disk consistency between source and target")
5832
    for dev in instance.disks:
5833
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
5834
        raise errors.OpExecError("Disk %s is degraded or not fully"
5835
                                 " synchronized on target node,"
5836
                                 " aborting migrate." % dev.iv_name)
5837

    
5838
    # First get the migration information from the remote node
5839
    result = self.rpc.call_migration_info(source_node, instance)
5840
    msg = result.fail_msg
5841
    if msg:
5842
      log_err = ("Failed fetching source migration information from %s: %s" %
5843
                 (source_node, msg))
5844
      logging.error(log_err)
5845
      raise errors.OpExecError(log_err)
5846

    
5847
    self.migration_info = migration_info = result.payload
5848

    
5849
    # Then switch the disks to master/master mode
5850
    self._EnsureSecondary(target_node)
5851
    self._GoStandalone()
5852
    self._GoReconnect(True)
5853
    self._WaitUntilSync()
5854

    
5855
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
5856
    result = self.rpc.call_accept_instance(target_node,
5857
                                           instance,
5858
                                           migration_info,
5859
                                           self.nodes_ip[target_node])
5860

    
5861
    msg = result.fail_msg
5862
    if msg:
5863
      logging.error("Instance pre-migration failed, trying to revert"
5864
                    " disk status: %s", msg)
5865
      self.feedback_fn("Pre-migration failed, aborting")
5866
      self._AbortMigration()
5867
      self._RevertDiskStatus()
5868
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5869
                               (instance.name, msg))
5870

    
5871
    self.feedback_fn("* migrating instance to %s" % target_node)
5872
    time.sleep(10)
5873
    result = self.rpc.call_instance_migrate(source_node, instance,
5874
                                            self.nodes_ip[target_node],
5875
                                            self.live)
5876
    msg = result.fail_msg
5877
    if msg:
5878
      logging.error("Instance migration failed, trying to revert"
5879
                    " disk status: %s", msg)
5880
      self.feedback_fn("Migration failed, aborting")
5881
      self._AbortMigration()
5882
      self._RevertDiskStatus()
5883
      raise errors.OpExecError("Could not migrate instance %s: %s" %
5884
                               (instance.name, msg))
5885
    time.sleep(10)
5886

    
5887
    instance.primary_node = target_node
5888
    # distribute new instance config to the other nodes
5889
    self.cfg.Update(instance, self.feedback_fn)
5890

    
5891
    result = self.rpc.call_finalize_migration(target_node,
5892
                                              instance,
5893
                                              migration_info,
5894
                                              True)
5895
    msg = result.fail_msg
5896
    if msg:
5897
      logging.error("Instance migration succeeded, but finalization failed:"
5898
                    " %s", msg)
5899
      raise errors.OpExecError("Could not finalize instance migration: %s" %
5900
                               msg)
5901

    
5902
    self._EnsureSecondary(source_node)
5903
    self._WaitUntilSync()
5904
    self._GoStandalone()
5905
    self._GoReconnect(False)
5906
    self._WaitUntilSync()
5907

    
5908
    self.feedback_fn("* done")
5909

    
5910
  def Exec(self, feedback_fn):
5911
    """Perform the migration.
5912

5913
    """
5914
    feedback_fn("Migrating instance %s" % self.instance.name)
5915

    
5916
    self.feedback_fn = feedback_fn
5917

    
5918
    self.source_node = self.instance.primary_node
5919
    self.target_node = self.instance.secondary_nodes[0]
5920
    self.all_nodes = [self.source_node, self.target_node]
5921
    self.nodes_ip = {
5922
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5923
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5924
      }
5925

    
5926
    if self.cleanup:
5927
      return self._ExecCleanup()
5928
    else:
5929
      return self._ExecMigration()
5930

    
5931

    
5932
def _CreateBlockDev(lu, node, instance, device, force_create,
5933
                    info, force_open):
5934
  """Create a tree of block devices on a given node.
5935

5936
  If this device type has to be created on secondaries, create it and
5937
  all its children.
5938

5939
  If not, just recurse to children keeping the same 'force' value.
5940

5941
  @param lu: the lu on whose behalf we execute
5942
  @param node: the node on which to create the device
5943
  @type instance: L{objects.Instance}
5944
  @param instance: the instance which owns the device
5945
  @type device: L{objects.Disk}
5946
  @param device: the device to create
5947
  @type force_create: boolean
5948
  @param force_create: whether to force creation of this device; this
5949
      will be change to True whenever we find a device which has
5950
      CreateOnSecondary() attribute
5951
  @param info: the extra 'metadata' we should attach to the device
5952
      (this will be represented as a LVM tag)
5953
  @type force_open: boolean
5954
  @param force_open: this parameter will be passes to the
5955
      L{backend.BlockdevCreate} function where it specifies
5956
      whether we run on primary or not, and it affects both
5957
      the child assembly and the device own Open() execution
5958

5959
  """
5960
  if device.CreateOnSecondary():
5961
    force_create = True
5962

    
5963
  if device.children:
5964
    for child in device.children:
5965
      _CreateBlockDev(lu, node, instance, child, force_create,
5966
                      info, force_open)
5967

    
5968
  if not force_create:
5969
    return
5970

    
5971
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5972

    
5973

    
5974
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5975
  """Create a single block device on a given node.
5976

5977
  This will not recurse over children of the device, so they must be
5978
  created in advance.
5979

5980
  @param lu: the lu on whose behalf we execute
5981
  @param node: the node on which to create the device
5982
  @type instance: L{objects.Instance}
5983
  @param instance: the instance which owns the device
5984
  @type device: L{objects.Disk}
5985
  @param device: the device to create
5986
  @param info: the extra 'metadata' we should attach to the device
5987
      (this will be represented as a LVM tag)
5988
  @type force_open: boolean
5989
  @param force_open: this parameter will be passes to the
5990
      L{backend.BlockdevCreate} function where it specifies
5991
      whether we run on primary or not, and it affects both
5992
      the child assembly and the device own Open() execution
5993

5994
  """
5995
  lu.cfg.SetDiskID(device, node)
5996
  result = lu.rpc.call_blockdev_create(node, device, device.size,
5997
                                       instance.name, force_open, info)
5998
  result.Raise("Can't create block device %s on"
5999
               " node %s for instance %s" % (device, node, instance.name))
6000
  if device.physical_id is None:
6001
    device.physical_id = result.payload
6002

    
6003

    
6004
def _GenerateUniqueNames(lu, exts):
6005
  """Generate a suitable LV name.
6006

6007
  This will generate a logical volume name for the given instance.
6008

6009
  """
6010
  results = []
6011
  for val in exts:
6012
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6013
    results.append("%s%s" % (new_id, val))
6014
  return results
6015

    
6016

    
6017
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6018
                         p_minor, s_minor):
6019
  """Generate a drbd8 device complete with its children.
6020

6021
  """
6022
  port = lu.cfg.AllocatePort()
6023
  vgname = lu.cfg.GetVGName()
6024
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6025
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6026
                          logical_id=(vgname, names[0]))
6027
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6028
                          logical_id=(vgname, names[1]))
6029
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6030
                          logical_id=(primary, secondary, port,
6031
                                      p_minor, s_minor,
6032
                                      shared_secret),
6033
                          children=[dev_data, dev_meta],
6034
                          iv_name=iv_name)
6035
  return drbd_dev
6036

    
6037

    
6038
def _GenerateDiskTemplate(lu, template_name,
6039
                          instance_name, primary_node,
6040
                          secondary_nodes, disk_info,
6041
                          file_storage_dir, file_driver,
6042
                          base_index):
6043
  """Generate the entire disk layout for a given template type.
6044

6045
  """
6046
  #TODO: compute space requirements
6047

    
6048
  vgname = lu.cfg.GetVGName()
6049
  disk_count = len(disk_info)
6050
  disks = []
6051
  if template_name == constants.DT_DISKLESS:
6052
    pass
6053
  elif template_name == constants.DT_PLAIN:
6054
    if len(secondary_nodes) != 0:
6055
      raise errors.ProgrammerError("Wrong template configuration")
6056

    
6057
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6058
                                      for i in range(disk_count)])
6059
    for idx, disk in enumerate(disk_info):
6060
      disk_index = idx + base_index
6061
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6062
                              logical_id=(vgname, names[idx]),
6063
                              iv_name="disk/%d" % disk_index,
6064
                              mode=disk["mode"])
6065
      disks.append(disk_dev)
6066
  elif template_name == constants.DT_DRBD8:
6067
    if len(secondary_nodes) != 1:
6068
      raise errors.ProgrammerError("Wrong template configuration")
6069
    remote_node = secondary_nodes[0]
6070
    minors = lu.cfg.AllocateDRBDMinor(
6071
      [primary_node, remote_node] * len(disk_info), instance_name)
6072

    
6073
    names = []
6074
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6075
                                               for i in range(disk_count)]):
6076
      names.append(lv_prefix + "_data")
6077
      names.append(lv_prefix + "_meta")
6078
    for idx, disk in enumerate(disk_info):
6079
      disk_index = idx + base_index
6080
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6081
                                      disk["size"], names[idx*2:idx*2+2],
6082
                                      "disk/%d" % disk_index,
6083
                                      minors[idx*2], minors[idx*2+1])
6084
      disk_dev.mode = disk["mode"]
6085
      disks.append(disk_dev)
6086
  elif template_name == constants.DT_FILE:
6087
    if len(secondary_nodes) != 0:
6088
      raise errors.ProgrammerError("Wrong template configuration")
6089

    
6090
    _RequireFileStorage()
6091

    
6092
    for idx, disk in enumerate(disk_info):
6093
      disk_index = idx + base_index
6094
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6095
                              iv_name="disk/%d" % disk_index,
6096
                              logical_id=(file_driver,
6097
                                          "%s/disk%d" % (file_storage_dir,
6098
                                                         disk_index)),
6099
                              mode=disk["mode"])
6100
      disks.append(disk_dev)
6101
  else:
6102
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6103
  return disks
6104

    
6105

    
6106
def _GetInstanceInfoText(instance):
6107
  """Compute that text that should be added to the disk's metadata.
6108

6109
  """
6110
  return "originstname+%s" % instance.name
6111

    
6112

    
6113
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6114
  """Create all disks for an instance.
6115

6116
  This abstracts away some work from AddInstance.
6117

6118
  @type lu: L{LogicalUnit}
6119
  @param lu: the logical unit on whose behalf we execute
6120
  @type instance: L{objects.Instance}
6121
  @param instance: the instance whose disks we should create
6122
  @type to_skip: list
6123
  @param to_skip: list of indices to skip
6124
  @type target_node: string
6125
  @param target_node: if passed, overrides the target node for creation
6126
  @rtype: boolean
6127
  @return: the success of the creation
6128

6129
  """
6130
  info = _GetInstanceInfoText(instance)
6131
  if target_node is None:
6132
    pnode = instance.primary_node
6133
    all_nodes = instance.all_nodes
6134
  else:
6135
    pnode = target_node
6136
    all_nodes = [pnode]
6137

    
6138
  if instance.disk_template == constants.DT_FILE:
6139
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6140
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6141

    
6142
    result.Raise("Failed to create directory '%s' on"
6143
                 " node %s" % (file_storage_dir, pnode))
6144

    
6145
  # Note: this needs to be kept in sync with adding of disks in
6146
  # LUSetInstanceParams
6147
  for idx, device in enumerate(instance.disks):
6148
    if to_skip and idx in to_skip:
6149
      continue
6150
    logging.info("Creating volume %s for instance %s",
6151
                 device.iv_name, instance.name)
6152
    #HARDCODE
6153
    for node in all_nodes:
6154
      f_create = node == pnode
6155
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6156

    
6157

    
6158
def _RemoveDisks(lu, instance, target_node=None):
6159
  """Remove all disks for an instance.
6160

6161
  This abstracts away some work from `AddInstance()` and
6162
  `RemoveInstance()`. Note that in case some of the devices couldn't
6163
  be removed, the removal will continue with the other ones (compare
6164
  with `_CreateDisks()`).
6165

6166
  @type lu: L{LogicalUnit}
6167
  @param lu: the logical unit on whose behalf we execute
6168
  @type instance: L{objects.Instance}
6169
  @param instance: the instance whose disks we should remove
6170
  @type target_node: string
6171
  @param target_node: used to override the node on which to remove the disks
6172
  @rtype: boolean
6173
  @return: the success of the removal
6174

6175
  """
6176
  logging.info("Removing block devices for instance %s", instance.name)
6177

    
6178
  all_result = True
6179
  for device in instance.disks:
6180
    if target_node:
6181
      edata = [(target_node, device)]
6182
    else:
6183
      edata = device.ComputeNodeTree(instance.primary_node)
6184
    for node, disk in edata:
6185
      lu.cfg.SetDiskID(disk, node)
6186
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6187
      if msg:
6188
        lu.LogWarning("Could not remove block device %s on node %s,"
6189
                      " continuing anyway: %s", device.iv_name, node, msg)
6190
        all_result = False
6191

    
6192
  if instance.disk_template == constants.DT_FILE:
6193
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6194
    if target_node:
6195
      tgt = target_node
6196
    else:
6197
      tgt = instance.primary_node
6198
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6199
    if result.fail_msg:
6200
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6201
                    file_storage_dir, instance.primary_node, result.fail_msg)
6202
      all_result = False
6203

    
6204
  return all_result
6205

    
6206

    
6207
def _ComputeDiskSize(disk_template, disks):
6208
  """Compute disk size requirements in the volume group
6209

6210
  """
6211
  # Required free disk space as a function of disk and swap space
6212
  req_size_dict = {
6213
    constants.DT_DISKLESS: None,
6214
    constants.DT_PLAIN: sum(d["size"] for d in disks),
6215
    # 128 MB are added for drbd metadata for each disk
6216
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6217
    constants.DT_FILE: None,
6218
  }
6219

    
6220
  if disk_template not in req_size_dict:
6221
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6222
                                 " is unknown" %  disk_template)
6223

    
6224
  return req_size_dict[disk_template]
6225

    
6226

    
6227
def _CheckHVParams(lu, nodenames, hvname, hvparams):
6228
  """Hypervisor parameter validation.
6229

6230
  This function abstract the hypervisor parameter validation to be
6231
  used in both instance create and instance modify.
6232

6233
  @type lu: L{LogicalUnit}
6234
  @param lu: the logical unit for which we check
6235
  @type nodenames: list
6236
  @param nodenames: the list of nodes on which we should check
6237
  @type hvname: string
6238
  @param hvname: the name of the hypervisor we should use
6239
  @type hvparams: dict
6240
  @param hvparams: the parameters which we need to check
6241
  @raise errors.OpPrereqError: if the parameters are not valid
6242

6243
  """
6244
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6245
                                                  hvname,
6246
                                                  hvparams)
6247
  for node in nodenames:
6248
    info = hvinfo[node]
6249
    if info.offline:
6250
      continue
6251
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6252

    
6253

    
6254
def _CheckOSParams(lu, required, nodenames, osname, osparams):
6255
  """OS parameters validation.
6256

6257
  @type lu: L{LogicalUnit}
6258
  @param lu: the logical unit for which we check
6259
  @type required: boolean
6260
  @param required: whether the validation should fail if the OS is not
6261
      found
6262
  @type nodenames: list
6263
  @param nodenames: the list of nodes on which we should check
6264
  @type osname: string
6265
  @param osname: the name of the hypervisor we should use
6266
  @type osparams: dict
6267
  @param osparams: the parameters which we need to check
6268
  @raise errors.OpPrereqError: if the parameters are not valid
6269

6270
  """
6271
  result = lu.rpc.call_os_validate(required, nodenames, osname,
6272
                                   [constants.OS_VALIDATE_PARAMETERS],
6273
                                   osparams)
6274
  for node, nres in result.items():
6275
    # we don't check for offline cases since this should be run only
6276
    # against the master node and/or an instance's nodes
6277
    nres.Raise("OS Parameters validation failed on node %s" % node)
6278
    if not nres.payload:
6279
      lu.LogInfo("OS %s not found on node %s, validation skipped",
6280
                 osname, node)
6281

    
6282

    
6283
class LUCreateInstance(LogicalUnit):
6284
  """Create an instance.
6285

6286
  """
6287
  HPATH = "instance-add"
6288
  HTYPE = constants.HTYPE_INSTANCE
6289
  _OP_REQP = ["instance_name", "disks",
6290
              "mode", "start",
6291
              "wait_for_sync", "ip_check", "nics",
6292
              "hvparams", "beparams", "osparams"]
6293
  _OP_DEFS = [
6294
    ("name_check", True),
6295
    ("no_install", False),
6296
    ("os_type", None),
6297
    ("force_variant", False),
6298
    ("source_handshake", None),
6299
    ("source_x509_ca", None),
6300
    ("source_instance_name", None),
6301
    ("src_node", None),
6302
    ("src_path", None),
6303
    ("pnode", None),
6304
    ("snode", None),
6305
    ("iallocator", None),
6306
    ("hypervisor", None),
6307
    ("disk_template", None),
6308
    ("identify_defaults", None),
6309
    ]
6310
  REQ_BGL = False
6311

    
6312
  def CheckArguments(self):
6313
    """Check arguments.
6314

6315
    """
6316
    # do not require name_check to ease forward/backward compatibility
6317
    # for tools
6318
    if self.op.no_install and self.op.start:
6319
      self.LogInfo("No-installation mode selected, disabling startup")
6320
      self.op.start = False
6321
    # validate/normalize the instance name
6322
    self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6323
    if self.op.ip_check and not self.op.name_check:
6324
      # TODO: make the ip check more flexible and not depend on the name check
6325
      raise errors.OpPrereqError("Cannot do ip checks without a name check",
6326
                                 errors.ECODE_INVAL)
6327

    
6328
    # check nics' parameter names
6329
    for nic in self.op.nics:
6330
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6331

    
6332
    # check disks. parameter names and consistent adopt/no-adopt strategy
6333
    has_adopt = has_no_adopt = False
6334
    for disk in self.op.disks:
6335
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6336
      if "adopt" in disk:
6337
        has_adopt = True
6338
      else:
6339
        has_no_adopt = True
6340
    if has_adopt and has_no_adopt:
6341
      raise errors.OpPrereqError("Either all disks are adopted or none is",
6342
                                 errors.ECODE_INVAL)
6343
    if has_adopt:
6344
      if self.op.disk_template != constants.DT_PLAIN:
6345
        raise errors.OpPrereqError("Disk adoption is only supported for the"
6346
                                   " 'plain' disk template",
6347
                                   errors.ECODE_INVAL)
6348
      if self.op.iallocator is not None:
6349
        raise errors.OpPrereqError("Disk adoption not allowed with an"
6350
                                   " iallocator script", errors.ECODE_INVAL)
6351
      if self.op.mode == constants.INSTANCE_IMPORT:
6352
        raise errors.OpPrereqError("Disk adoption not allowed for"
6353
                                   " instance import", errors.ECODE_INVAL)
6354

    
6355
    self.adopt_disks = has_adopt
6356

    
6357
    # verify creation mode
6358
    if self.op.mode not in constants.INSTANCE_CREATE_MODES:
6359
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6360
                                 self.op.mode, errors.ECODE_INVAL)
6361

    
6362
    # instance name verification
6363
    if self.op.name_check:
6364
      self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6365
      self.op.instance_name = self.hostname1.name
6366
      # used in CheckPrereq for ip ping check
6367
      self.check_ip = self.hostname1.ip
6368
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6369
      raise errors.OpPrereqError("Remote imports require names to be checked" %
6370
                                 errors.ECODE_INVAL)
6371
    else:
6372
      self.check_ip = None
6373

    
6374
    # file storage checks
6375
    if (self.op.file_driver and
6376
        not self.op.file_driver in constants.FILE_DRIVER):
6377
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6378
                                 self.op.file_driver, errors.ECODE_INVAL)
6379

    
6380
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6381
      raise errors.OpPrereqError("File storage directory path not absolute",
6382
                                 errors.ECODE_INVAL)
6383

    
6384
    ### Node/iallocator related checks
6385
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
6386
      raise errors.OpPrereqError("One and only one of iallocator and primary"
6387
                                 " node must be given",
6388
                                 errors.ECODE_INVAL)
6389

    
6390
    self._cds = _GetClusterDomainSecret()
6391

    
6392
    if self.op.mode == constants.INSTANCE_IMPORT:
6393
      # On import force_variant must be True, because if we forced it at
6394
      # initial install, our only chance when importing it back is that it
6395
      # works again!
6396
      self.op.force_variant = True
6397

    
6398
      if self.op.no_install:
6399
        self.LogInfo("No-installation mode has no effect during import")
6400

    
6401
    elif self.op.mode == constants.INSTANCE_CREATE:
6402
      if self.op.os_type is None:
6403
        raise errors.OpPrereqError("No guest OS specified",
6404
                                   errors.ECODE_INVAL)
6405
      if self.op.disk_template is None:
6406
        raise errors.OpPrereqError("No disk template specified",
6407
                                   errors.ECODE_INVAL)
6408

    
6409
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6410
      # Check handshake to ensure both clusters have the same domain secret
6411
      src_handshake = self.op.source_handshake
6412
      if not src_handshake:
6413
        raise errors.OpPrereqError("Missing source handshake",
6414
                                   errors.ECODE_INVAL)
6415

    
6416
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6417
                                                           src_handshake)
6418
      if errmsg:
6419
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6420
                                   errors.ECODE_INVAL)
6421

    
6422
      # Load and check source CA
6423
      self.source_x509_ca_pem = self.op.source_x509_ca
6424
      if not self.source_x509_ca_pem:
6425
        raise errors.OpPrereqError("Missing source X509 CA",
6426
                                   errors.ECODE_INVAL)
6427

    
6428
      try:
6429
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6430
                                                    self._cds)
6431
      except OpenSSL.crypto.Error, err:
6432
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6433
                                   (err, ), errors.ECODE_INVAL)
6434

    
6435
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6436
      if errcode is not None:
6437
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6438
                                   errors.ECODE_INVAL)
6439

    
6440
      self.source_x509_ca = cert
6441

    
6442
      src_instance_name = self.op.source_instance_name
6443
      if not src_instance_name:
6444
        raise errors.OpPrereqError("Missing source instance name",
6445
                                   errors.ECODE_INVAL)
6446

    
6447
      self.source_instance_name = \
6448
        utils.GetHostInfo(utils.HostInfo.NormalizeName(src_instance_name)).name
6449

    
6450
    else:
6451
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
6452
                                 self.op.mode, errors.ECODE_INVAL)
6453

    
6454
  def ExpandNames(self):
6455
    """ExpandNames for CreateInstance.
6456

6457
    Figure out the right locks for instance creation.
6458

6459
    """
6460
    self.needed_locks = {}
6461

    
6462
    instance_name = self.op.instance_name
6463
    # this is just a preventive check, but someone might still add this
6464
    # instance in the meantime, and creation will fail at lock-add time
6465
    if instance_name in self.cfg.GetInstanceList():
6466
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6467
                                 instance_name, errors.ECODE_EXISTS)
6468

    
6469
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6470

    
6471
    if self.op.iallocator:
6472
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6473
    else:
6474
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6475
      nodelist = [self.op.pnode]
6476
      if self.op.snode is not None:
6477
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6478
        nodelist.append(self.op.snode)
6479
      self.needed_locks[locking.LEVEL_NODE] = nodelist
6480

    
6481
    # in case of import lock the source node too
6482
    if self.op.mode == constants.INSTANCE_IMPORT:
6483
      src_node = self.op.src_node
6484
      src_path = self.op.src_path
6485

    
6486
      if src_path is None:
6487
        self.op.src_path = src_path = self.op.instance_name
6488

    
6489
      if src_node is None:
6490
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6491
        self.op.src_node = None
6492
        if os.path.isabs(src_path):
6493
          raise errors.OpPrereqError("Importing an instance from an absolute"
6494
                                     " path requires a source node option.",
6495
                                     errors.ECODE_INVAL)
6496
      else:
6497
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6498
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6499
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
6500
        if not os.path.isabs(src_path):
6501
          self.op.src_path = src_path = \
6502
            utils.PathJoin(constants.EXPORT_DIR, src_path)
6503

    
6504
  def _RunAllocator(self):
6505
    """Run the allocator based on input opcode.
6506

6507
    """
6508
    nics = [n.ToDict() for n in self.nics]
6509
    ial = IAllocator(self.cfg, self.rpc,
6510
                     mode=constants.IALLOCATOR_MODE_ALLOC,
6511
                     name=self.op.instance_name,
6512
                     disk_template=self.op.disk_template,
6513
                     tags=[],
6514
                     os=self.op.os_type,
6515
                     vcpus=self.be_full[constants.BE_VCPUS],
6516
                     mem_size=self.be_full[constants.BE_MEMORY],
6517
                     disks=self.disks,
6518
                     nics=nics,
6519
                     hypervisor=self.op.hypervisor,
6520
                     )
6521

    
6522
    ial.Run(self.op.iallocator)
6523

    
6524
    if not ial.success:
6525
      raise errors.OpPrereqError("Can't compute nodes using"
6526
                                 " iallocator '%s': %s" %
6527
                                 (self.op.iallocator, ial.info),
6528
                                 errors.ECODE_NORES)
6529
    if len(ial.result) != ial.required_nodes:
6530
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6531
                                 " of nodes (%s), required %s" %
6532
                                 (self.op.iallocator, len(ial.result),
6533
                                  ial.required_nodes), errors.ECODE_FAULT)
6534
    self.op.pnode = ial.result[0]
6535
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6536
                 self.op.instance_name, self.op.iallocator,
6537
                 utils.CommaJoin(ial.result))
6538
    if ial.required_nodes == 2:
6539
      self.op.snode = ial.result[1]
6540

    
6541
  def BuildHooksEnv(self):
6542
    """Build hooks env.
6543

6544
    This runs on master, primary and secondary nodes of the instance.
6545

6546
    """
6547
    env = {
6548
      "ADD_MODE": self.op.mode,
6549
      }
6550
    if self.op.mode == constants.INSTANCE_IMPORT:
6551
      env["SRC_NODE"] = self.op.src_node
6552
      env["SRC_PATH"] = self.op.src_path
6553
      env["SRC_IMAGES"] = self.src_images
6554

    
6555
    env.update(_BuildInstanceHookEnv(
6556
      name=self.op.instance_name,
6557
      primary_node=self.op.pnode,
6558
      secondary_nodes=self.secondaries,
6559
      status=self.op.start,
6560
      os_type=self.op.os_type,
6561
      memory=self.be_full[constants.BE_MEMORY],
6562
      vcpus=self.be_full[constants.BE_VCPUS],
6563
      nics=_NICListToTuple(self, self.nics),
6564
      disk_template=self.op.disk_template,
6565
      disks=[(d["size"], d["mode"]) for d in self.disks],
6566
      bep=self.be_full,
6567
      hvp=self.hv_full,
6568
      hypervisor_name=self.op.hypervisor,
6569
    ))
6570

    
6571
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6572
          self.secondaries)
6573
    return env, nl, nl
6574

    
6575
  def _ReadExportInfo(self):
6576
    """Reads the export information from disk.
6577

6578
    It will override the opcode source node and path with the actual
6579
    information, if these two were not specified before.
6580

6581
    @return: the export information
6582

6583
    """
6584
    assert self.op.mode == constants.INSTANCE_IMPORT
6585

    
6586
    src_node = self.op.src_node
6587
    src_path = self.op.src_path
6588

    
6589
    if src_node is None:
6590
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6591
      exp_list = self.rpc.call_export_list(locked_nodes)
6592
      found = False
6593
      for node in exp_list:
6594
        if exp_list[node].fail_msg:
6595
          continue
6596
        if src_path in exp_list[node].payload:
6597
          found = True
6598
          self.op.src_node = src_node = node
6599
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6600
                                                       src_path)
6601
          break
6602
      if not found:
6603
        raise errors.OpPrereqError("No export found for relative path %s" %
6604
                                    src_path, errors.ECODE_INVAL)
6605

    
6606
    _CheckNodeOnline(self, src_node)
6607
    result = self.rpc.call_export_info(src_node, src_path)
6608
    result.Raise("No export or invalid export found in dir %s" % src_path)
6609

    
6610
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6611
    if not export_info.has_section(constants.INISECT_EXP):
6612
      raise errors.ProgrammerError("Corrupted export config",
6613
                                   errors.ECODE_ENVIRON)
6614

    
6615
    ei_version = export_info.get(constants.INISECT_EXP, "version")
6616
    if (int(ei_version) != constants.EXPORT_VERSION):
6617
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6618
                                 (ei_version, constants.EXPORT_VERSION),
6619
                                 errors.ECODE_ENVIRON)
6620
    return export_info
6621

    
6622
  def _ReadExportParams(self, einfo):
6623
    """Use export parameters as defaults.
6624

6625
    In case the opcode doesn't specify (as in override) some instance
6626
    parameters, then try to use them from the export information, if
6627
    that declares them.
6628

6629
    """
6630
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6631

    
6632
    if self.op.disk_template is None:
6633
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
6634
        self.op.disk_template = einfo.get(constants.INISECT_INS,
6635
                                          "disk_template")
6636
      else:
6637
        raise errors.OpPrereqError("No disk template specified and the export"
6638
                                   " is missing the disk_template information",
6639
                                   errors.ECODE_INVAL)
6640

    
6641
    if not self.op.disks:
6642
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
6643
        disks = []
6644
        # TODO: import the disk iv_name too
6645
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6646
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6647
          disks.append({"size": disk_sz})
6648
        self.op.disks = disks
6649
      else:
6650
        raise errors.OpPrereqError("No disk info specified and the export"
6651
                                   " is missing the disk information",
6652
                                   errors.ECODE_INVAL)
6653

    
6654
    if (not self.op.nics and
6655
        einfo.has_option(constants.INISECT_INS, "nic_count")):
6656
      nics = []
6657
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6658
        ndict = {}
6659
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6660
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6661
          ndict[name] = v
6662
        nics.append(ndict)
6663
      self.op.nics = nics
6664

    
6665
    if (self.op.hypervisor is None and
6666
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
6667
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6668
    if einfo.has_section(constants.INISECT_HYP):
6669
      # use the export parameters but do not override the ones
6670
      # specified by the user
6671
      for name, value in einfo.items(constants.INISECT_HYP):
6672
        if name not in self.op.hvparams:
6673
          self.op.hvparams[name] = value
6674

    
6675
    if einfo.has_section(constants.INISECT_BEP):
6676
      # use the parameters, without overriding
6677
      for name, value in einfo.items(constants.INISECT_BEP):
6678
        if name not in self.op.beparams:
6679
          self.op.beparams[name] = value
6680
    else:
6681
      # try to read the parameters old style, from the main section
6682
      for name in constants.BES_PARAMETERS:
6683
        if (name not in self.op.beparams and
6684
            einfo.has_option(constants.INISECT_INS, name)):
6685
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6686

    
6687
    if einfo.has_section(constants.INISECT_OSP):
6688
      # use the parameters, without overriding
6689
      for name, value in einfo.items(constants.INISECT_OSP):
6690
        if name not in self.op.osparams:
6691
          self.op.osparams[name] = value
6692

    
6693
  def _RevertToDefaults(self, cluster):
6694
    """Revert the instance parameters to the default values.
6695

6696
    """
6697
    # hvparams
6698
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6699
    for name in self.op.hvparams.keys():
6700
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6701
        del self.op.hvparams[name]
6702
    # beparams
6703
    be_defs = cluster.SimpleFillBE({})
6704
    for name in self.op.beparams.keys():
6705
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
6706
        del self.op.beparams[name]
6707
    # nic params
6708
    nic_defs = cluster.SimpleFillNIC({})
6709
    for nic in self.op.nics:
6710
      for name in constants.NICS_PARAMETERS:
6711
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6712
          del nic[name]
6713
    # osparams
6714
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6715
    for name in self.op.osparams.keys():
6716
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
6717
        del self.op.osparams[name]
6718

    
6719
  def CheckPrereq(self):
6720
    """Check prerequisites.
6721

6722
    """
6723
    if self.op.mode == constants.INSTANCE_IMPORT:
6724
      export_info = self._ReadExportInfo()
6725
      self._ReadExportParams(export_info)
6726

    
6727
    _CheckDiskTemplate(self.op.disk_template)
6728

    
6729
    if (not self.cfg.GetVGName() and
6730
        self.op.disk_template not in constants.DTS_NOT_LVM):
6731
      raise errors.OpPrereqError("Cluster does not support lvm-based"
6732
                                 " instances", errors.ECODE_STATE)
6733

    
6734
    if self.op.hypervisor is None:
6735
      self.op.hypervisor = self.cfg.GetHypervisorType()
6736

    
6737
    cluster = self.cfg.GetClusterInfo()
6738
    enabled_hvs = cluster.enabled_hypervisors
6739
    if self.op.hypervisor not in enabled_hvs:
6740
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6741
                                 " cluster (%s)" % (self.op.hypervisor,
6742
                                  ",".join(enabled_hvs)),
6743
                                 errors.ECODE_STATE)
6744

    
6745
    # check hypervisor parameter syntax (locally)
6746
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6747
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6748
                                      self.op.hvparams)
6749
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6750
    hv_type.CheckParameterSyntax(filled_hvp)
6751
    self.hv_full = filled_hvp
6752
    # check that we don't specify global parameters on an instance
6753
    _CheckGlobalHvParams(self.op.hvparams)
6754

    
6755
    # fill and remember the beparams dict
6756
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6757
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
6758

    
6759
    # build os parameters
6760
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6761

    
6762
    # now that hvp/bep are in final format, let's reset to defaults,
6763
    # if told to do so
6764
    if self.op.identify_defaults:
6765
      self._RevertToDefaults(cluster)
6766

    
6767
    # NIC buildup
6768
    self.nics = []
6769
    for idx, nic in enumerate(self.op.nics):
6770
      nic_mode_req = nic.get("mode", None)
6771
      nic_mode = nic_mode_req
6772
      if nic_mode is None:
6773
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6774

    
6775
      # in routed mode, for the first nic, the default ip is 'auto'
6776
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6777
        default_ip_mode = constants.VALUE_AUTO
6778
      else:
6779
        default_ip_mode = constants.VALUE_NONE
6780

    
6781
      # ip validity checks
6782
      ip = nic.get("ip", default_ip_mode)
6783
      if ip is None or ip.lower() == constants.VALUE_NONE:
6784
        nic_ip = None
6785
      elif ip.lower() == constants.VALUE_AUTO:
6786
        if not self.op.name_check:
6787
          raise errors.OpPrereqError("IP address set to auto but name checks"
6788
                                     " have been skipped. Aborting.",
6789
                                     errors.ECODE_INVAL)
6790
        nic_ip = self.hostname1.ip
6791
      else:
6792
        if not utils.IsValidIP(ip):
6793
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6794
                                     " like a valid IP" % ip,
6795
                                     errors.ECODE_INVAL)
6796
        nic_ip = ip
6797

    
6798
      # TODO: check the ip address for uniqueness
6799
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6800
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
6801
                                   errors.ECODE_INVAL)
6802

    
6803
      # MAC address verification
6804
      mac = nic.get("mac", constants.VALUE_AUTO)
6805
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6806
        mac = utils.NormalizeAndValidateMac(mac)
6807

    
6808
        try:
6809
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
6810
        except errors.ReservationError:
6811
          raise errors.OpPrereqError("MAC address %s already in use"
6812
                                     " in cluster" % mac,
6813
                                     errors.ECODE_NOTUNIQUE)
6814

    
6815
      # bridge verification
6816
      bridge = nic.get("bridge", None)
6817
      link = nic.get("link", None)
6818
      if bridge and link:
6819
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6820
                                   " at the same time", errors.ECODE_INVAL)
6821
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6822
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6823
                                   errors.ECODE_INVAL)
6824
      elif bridge:
6825
        link = bridge
6826

    
6827
      nicparams = {}
6828
      if nic_mode_req:
6829
        nicparams[constants.NIC_MODE] = nic_mode_req
6830
      if link:
6831
        nicparams[constants.NIC_LINK] = link
6832

    
6833
      check_params = cluster.SimpleFillNIC(nicparams)
6834
      objects.NIC.CheckParameterSyntax(check_params)
6835
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6836

    
6837
    # disk checks/pre-build
6838
    self.disks = []
6839
    for disk in self.op.disks:
6840
      mode = disk.get("mode", constants.DISK_RDWR)
6841
      if mode not in constants.DISK_ACCESS_SET:
6842
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6843
                                   mode, errors.ECODE_INVAL)
6844
      size = disk.get("size", None)
6845
      if size is None:
6846
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6847
      try:
6848
        size = int(size)
6849
      except (TypeError, ValueError):
6850
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6851
                                   errors.ECODE_INVAL)
6852
      new_disk = {"size": size, "mode": mode}
6853
      if "adopt" in disk:
6854
        new_disk["adopt"] = disk["adopt"]
6855
      self.disks.append(new_disk)
6856

    
6857
    if self.op.mode == constants.INSTANCE_IMPORT:
6858

    
6859
      # Check that the new instance doesn't have less disks than the export
6860
      instance_disks = len(self.disks)
6861
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6862
      if instance_disks < export_disks:
6863
        raise errors.OpPrereqError("Not enough disks to import."
6864
                                   " (instance: %d, export: %d)" %
6865
                                   (instance_disks, export_disks),
6866
                                   errors.ECODE_INVAL)
6867

    
6868
      disk_images = []
6869
      for idx in range(export_disks):
6870
        option = 'disk%d_dump' % idx
6871
        if export_info.has_option(constants.INISECT_INS, option):
6872
          # FIXME: are the old os-es, disk sizes, etc. useful?
6873
          export_name = export_info.get(constants.INISECT_INS, option)
6874
          image = utils.PathJoin(self.op.src_path, export_name)
6875
          disk_images.append(image)
6876
        else:
6877
          disk_images.append(False)
6878

    
6879
      self.src_images = disk_images
6880

    
6881
      old_name = export_info.get(constants.INISECT_INS, 'name')
6882
      try:
6883
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6884
      except (TypeError, ValueError), err:
6885
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
6886
                                   " an integer: %s" % str(err),
6887
                                   errors.ECODE_STATE)
6888
      if self.op.instance_name == old_name:
6889
        for idx, nic in enumerate(self.nics):
6890
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6891
            nic_mac_ini = 'nic%d_mac' % idx
6892
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6893

    
6894
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6895

    
6896
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
6897
    if self.op.ip_check:
6898
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6899
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6900
                                   (self.check_ip, self.op.instance_name),
6901
                                   errors.ECODE_NOTUNIQUE)
6902

    
6903
    #### mac address generation
6904
    # By generating here the mac address both the allocator and the hooks get
6905
    # the real final mac address rather than the 'auto' or 'generate' value.
6906
    # There is a race condition between the generation and the instance object
6907
    # creation, which means that we know the mac is valid now, but we're not
6908
    # sure it will be when we actually add the instance. If things go bad
6909
    # adding the instance will abort because of a duplicate mac, and the
6910
    # creation job will fail.
6911
    for nic in self.nics:
6912
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6913
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6914

    
6915
    #### allocator run
6916

    
6917
    if self.op.iallocator is not None:
6918
      self._RunAllocator()
6919

    
6920
    #### node related checks
6921

    
6922
    # check primary node
6923
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6924
    assert self.pnode is not None, \
6925
      "Cannot retrieve locked node %s" % self.op.pnode
6926
    if pnode.offline:
6927
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6928
                                 pnode.name, errors.ECODE_STATE)
6929
    if pnode.drained:
6930
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6931
                                 pnode.name, errors.ECODE_STATE)
6932

    
6933
    self.secondaries = []
6934

    
6935
    # mirror node verification
6936
    if self.op.disk_template in constants.DTS_NET_MIRROR:
6937
      if self.op.snode is None:
6938
        raise errors.OpPrereqError("The networked disk templates need"
6939
                                   " a mirror node", errors.ECODE_INVAL)
6940
      if self.op.snode == pnode.name:
6941
        raise errors.OpPrereqError("The secondary node cannot be the"
6942
                                   " primary node.", errors.ECODE_INVAL)
6943
      _CheckNodeOnline(self, self.op.snode)
6944
      _CheckNodeNotDrained(self, self.op.snode)
6945
      self.secondaries.append(self.op.snode)
6946

    
6947
    nodenames = [pnode.name] + self.secondaries
6948

    
6949
    req_size = _ComputeDiskSize(self.op.disk_template,
6950
                                self.disks)
6951

    
6952
    # Check lv size requirements, if not adopting
6953
    if req_size is not None and not self.adopt_disks:
6954
      _CheckNodesFreeDisk(self, nodenames, req_size)
6955

    
6956
    if self.adopt_disks: # instead, we must check the adoption data
6957
      all_lvs = set([i["adopt"] for i in self.disks])
6958
      if len(all_lvs) != len(self.disks):
6959
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
6960
                                   errors.ECODE_INVAL)
6961
      for lv_name in all_lvs:
6962
        try:
6963
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6964
        except errors.ReservationError:
6965
          raise errors.OpPrereqError("LV named %s used by another instance" %
6966
                                     lv_name, errors.ECODE_NOTUNIQUE)
6967

    
6968
      node_lvs = self.rpc.call_lv_list([pnode.name],
6969
                                       self.cfg.GetVGName())[pnode.name]
6970
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6971
      node_lvs = node_lvs.payload
6972
      delta = all_lvs.difference(node_lvs.keys())
6973
      if delta:
6974
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
6975
                                   utils.CommaJoin(delta),
6976
                                   errors.ECODE_INVAL)
6977
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6978
      if online_lvs:
6979
        raise errors.OpPrereqError("Online logical volumes found, cannot"
6980
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
6981
                                   errors.ECODE_STATE)
6982
      # update the size of disk based on what is found
6983
      for dsk in self.disks:
6984
        dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6985

    
6986
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6987

    
6988
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6989
    # check OS parameters (remotely)
6990
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
6991

    
6992
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6993

    
6994
    # memory check on primary node
6995
    if self.op.start:
6996
      _CheckNodeFreeMemory(self, self.pnode.name,
6997
                           "creating instance %s" % self.op.instance_name,
6998
                           self.be_full[constants.BE_MEMORY],
6999
                           self.op.hypervisor)
7000

    
7001
    self.dry_run_result = list(nodenames)
7002

    
7003
  def Exec(self, feedback_fn):
7004
    """Create and add the instance to the cluster.
7005

7006
    """
7007
    instance = self.op.instance_name
7008
    pnode_name = self.pnode.name
7009

    
7010
    ht_kind = self.op.hypervisor
7011
    if ht_kind in constants.HTS_REQ_PORT:
7012
      network_port = self.cfg.AllocatePort()
7013
    else:
7014
      network_port = None
7015

    
7016
    if constants.ENABLE_FILE_STORAGE:
7017
      # this is needed because os.path.join does not accept None arguments
7018
      if self.op.file_storage_dir is None:
7019
        string_file_storage_dir = ""
7020
      else:
7021
        string_file_storage_dir = self.op.file_storage_dir
7022

    
7023
      # build the full file storage dir path
7024
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7025
                                        string_file_storage_dir, instance)
7026
    else:
7027
      file_storage_dir = ""
7028

    
7029
    disks = _GenerateDiskTemplate(self,
7030
                                  self.op.disk_template,
7031
                                  instance, pnode_name,
7032
                                  self.secondaries,
7033
                                  self.disks,
7034
                                  file_storage_dir,
7035
                                  self.op.file_driver,
7036
                                  0)
7037

    
7038
    iobj = objects.Instance(name=instance, os=self.op.os_type,
7039
                            primary_node=pnode_name,
7040
                            nics=self.nics, disks=disks,
7041
                            disk_template=self.op.disk_template,
7042
                            admin_up=False,
7043
                            network_port=network_port,
7044
                            beparams=self.op.beparams,
7045
                            hvparams=self.op.hvparams,
7046
                            hypervisor=self.op.hypervisor,
7047
                            osparams=self.op.osparams,
7048
                            )
7049

    
7050
    if self.adopt_disks:
7051
      # rename LVs to the newly-generated names; we need to construct
7052
      # 'fake' LV disks with the old data, plus the new unique_id
7053
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7054
      rename_to = []
7055
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7056
        rename_to.append(t_dsk.logical_id)
7057
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7058
        self.cfg.SetDiskID(t_dsk, pnode_name)
7059
      result = self.rpc.call_blockdev_rename(pnode_name,
7060
                                             zip(tmp_disks, rename_to))
7061
      result.Raise("Failed to rename adoped LVs")
7062
    else:
7063
      feedback_fn("* creating instance disks...")
7064
      try:
7065
        _CreateDisks(self, iobj)
7066
      except errors.OpExecError:
7067
        self.LogWarning("Device creation failed, reverting...")
7068
        try:
7069
          _RemoveDisks(self, iobj)
7070
        finally:
7071
          self.cfg.ReleaseDRBDMinors(instance)
7072
          raise
7073

    
7074
    feedback_fn("adding instance %s to cluster config" % instance)
7075

    
7076
    self.cfg.AddInstance(iobj, self.proc.GetECId())
7077

    
7078
    # Declare that we don't want to remove the instance lock anymore, as we've
7079
    # added the instance to the config
7080
    del self.remove_locks[locking.LEVEL_INSTANCE]
7081
    # Unlock all the nodes
7082
    if self.op.mode == constants.INSTANCE_IMPORT:
7083
      nodes_keep = [self.op.src_node]
7084
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7085
                       if node != self.op.src_node]
7086
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7087
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7088
    else:
7089
      self.context.glm.release(locking.LEVEL_NODE)
7090
      del self.acquired_locks[locking.LEVEL_NODE]
7091

    
7092
    if self.op.wait_for_sync:
7093
      disk_abort = not _WaitForSync(self, iobj)
7094
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
7095
      # make sure the disks are not degraded (still sync-ing is ok)
7096
      time.sleep(15)
7097
      feedback_fn("* checking mirrors status")
7098
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7099
    else:
7100
      disk_abort = False
7101

    
7102
    if disk_abort:
7103
      _RemoveDisks(self, iobj)
7104
      self.cfg.RemoveInstance(iobj.name)
7105
      # Make sure the instance lock gets removed
7106
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7107
      raise errors.OpExecError("There are some degraded disks for"
7108
                               " this instance")
7109

    
7110
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7111
      if self.op.mode == constants.INSTANCE_CREATE:
7112
        if not self.op.no_install:
7113
          feedback_fn("* running the instance OS create scripts...")
7114
          # FIXME: pass debug option from opcode to backend
7115
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7116
                                                 self.op.debug_level)
7117
          result.Raise("Could not add os for instance %s"
7118
                       " on node %s" % (instance, pnode_name))
7119

    
7120
      elif self.op.mode == constants.INSTANCE_IMPORT:
7121
        feedback_fn("* running the instance OS import scripts...")
7122

    
7123
        transfers = []
7124

    
7125
        for idx, image in enumerate(self.src_images):
7126
          if not image:
7127
            continue
7128

    
7129
          # FIXME: pass debug option from opcode to backend
7130
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7131
                                             constants.IEIO_FILE, (image, ),
7132
                                             constants.IEIO_SCRIPT,
7133
                                             (iobj.disks[idx], idx),
7134
                                             None)
7135
          transfers.append(dt)
7136

    
7137
        import_result = \
7138
          masterd.instance.TransferInstanceData(self, feedback_fn,
7139
                                                self.op.src_node, pnode_name,
7140
                                                self.pnode.secondary_ip,
7141
                                                iobj, transfers)
7142
        if not compat.all(import_result):
7143
          self.LogWarning("Some disks for instance %s on node %s were not"
7144
                          " imported successfully" % (instance, pnode_name))
7145

    
7146
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7147
        feedback_fn("* preparing remote import...")
7148
        connect_timeout = constants.RIE_CONNECT_TIMEOUT
7149
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7150

    
7151
        disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7152
                                                     self.source_x509_ca,
7153
                                                     self._cds, timeouts)
7154
        if not compat.all(disk_results):
7155
          # TODO: Should the instance still be started, even if some disks
7156
          # failed to import (valid for local imports, too)?
7157
          self.LogWarning("Some disks for instance %s on node %s were not"
7158
                          " imported successfully" % (instance, pnode_name))
7159

    
7160
        # Run rename script on newly imported instance
7161
        assert iobj.name == instance
7162
        feedback_fn("Running rename script for %s" % instance)
7163
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7164
                                                   self.source_instance_name,
7165
                                                   self.op.debug_level)
7166
        if result.fail_msg:
7167
          self.LogWarning("Failed to run rename script for %s on node"
7168
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
7169

    
7170
      else:
7171
        # also checked in the prereq part
7172
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7173
                                     % self.op.mode)
7174

    
7175
    if self.op.start:
7176
      iobj.admin_up = True
7177
      self.cfg.Update(iobj, feedback_fn)
7178
      logging.info("Starting instance %s on node %s", instance, pnode_name)
7179
      feedback_fn("* starting instance...")
7180
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7181
      result.Raise("Could not start instance")
7182

    
7183
    return list(iobj.all_nodes)
7184

    
7185

    
7186
class LUConnectConsole(NoHooksLU):
7187
  """Connect to an instance's console.
7188

7189
  This is somewhat special in that it returns the command line that
7190
  you need to run on the master node in order to connect to the
7191
  console.
7192

7193
  """
7194
  _OP_REQP = ["instance_name"]
7195
  REQ_BGL = False
7196

    
7197
  def ExpandNames(self):
7198
    self._ExpandAndLockInstance()
7199

    
7200
  def CheckPrereq(self):
7201
    """Check prerequisites.
7202

7203
    This checks that the instance is in the cluster.
7204

7205
    """
7206
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7207
    assert self.instance is not None, \
7208
      "Cannot retrieve locked instance %s" % self.op.instance_name
7209
    _CheckNodeOnline(self, self.instance.primary_node)
7210

    
7211
  def Exec(self, feedback_fn):
7212
    """Connect to the console of an instance
7213

7214
    """
7215
    instance = self.instance
7216
    node = instance.primary_node
7217

    
7218
    node_insts = self.rpc.call_instance_list([node],
7219
                                             [instance.hypervisor])[node]
7220
    node_insts.Raise("Can't get node information from %s" % node)
7221

    
7222
    if instance.name not in node_insts.payload:
7223
      raise errors.OpExecError("Instance %s is not running." % instance.name)
7224

    
7225
    logging.debug("Connecting to console of %s on %s", instance.name, node)
7226

    
7227
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
7228
    cluster = self.cfg.GetClusterInfo()
7229
    # beparams and hvparams are passed separately, to avoid editing the
7230
    # instance and then saving the defaults in the instance itself.
7231
    hvparams = cluster.FillHV(instance)
7232
    beparams = cluster.FillBE(instance)
7233
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7234

    
7235
    # build ssh cmdline
7236
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7237

    
7238

    
7239
class LUReplaceDisks(LogicalUnit):
7240
  """Replace the disks of an instance.
7241

7242
  """
7243
  HPATH = "mirrors-replace"
7244
  HTYPE = constants.HTYPE_INSTANCE
7245
  _OP_REQP = ["instance_name", "mode", "disks"]
7246
  _OP_DEFS = [
7247
    ("remote_node", None),
7248
    ("iallocator", None),
7249
    ("early_release", None),
7250
    ]
7251
  REQ_BGL = False
7252

    
7253
  def CheckArguments(self):
7254
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7255
                                  self.op.iallocator)
7256

    
7257
  def ExpandNames(self):
7258
    self._ExpandAndLockInstance()
7259

    
7260
    if self.op.iallocator is not None:
7261
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7262

    
7263
    elif self.op.remote_node is not None:
7264
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7265
      self.op.remote_node = remote_node
7266

    
7267
      # Warning: do not remove the locking of the new secondary here
7268
      # unless DRBD8.AddChildren is changed to work in parallel;
7269
      # currently it doesn't since parallel invocations of
7270
      # FindUnusedMinor will conflict
7271
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7272
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7273

    
7274
    else:
7275
      self.needed_locks[locking.LEVEL_NODE] = []
7276
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7277

    
7278
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7279
                                   self.op.iallocator, self.op.remote_node,
7280
                                   self.op.disks, False, self.op.early_release)
7281

    
7282
    self.tasklets = [self.replacer]
7283

    
7284
  def DeclareLocks(self, level):
7285
    # If we're not already locking all nodes in the set we have to declare the
7286
    # instance's primary/secondary nodes.
7287
    if (level == locking.LEVEL_NODE and
7288
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7289
      self._LockInstancesNodes()
7290

    
7291
  def BuildHooksEnv(self):
7292
    """Build hooks env.
7293

7294
    This runs on the master, the primary and all the secondaries.
7295

7296
    """
7297
    instance = self.replacer.instance
7298
    env = {
7299
      "MODE": self.op.mode,
7300
      "NEW_SECONDARY": self.op.remote_node,
7301
      "OLD_SECONDARY": instance.secondary_nodes[0],
7302
      }
7303
    env.update(_BuildInstanceHookEnvByObject(self, instance))
7304
    nl = [
7305
      self.cfg.GetMasterNode(),
7306
      instance.primary_node,
7307
      ]
7308
    if self.op.remote_node is not None:
7309
      nl.append(self.op.remote_node)
7310
    return env, nl, nl
7311

    
7312

    
7313
class LUEvacuateNode(LogicalUnit):
7314
  """Relocate the secondary instances from a node.
7315

7316
  """
7317
  HPATH = "node-evacuate"
7318
  HTYPE = constants.HTYPE_NODE
7319
  _OP_REQP = ["node_name"]
7320
  _OP_DEFS = [
7321
    ("remote_node", None),
7322
    ("iallocator", None),
7323
    ("early_release", False),
7324
    ]
7325
  REQ_BGL = False
7326

    
7327
  def CheckArguments(self):
7328
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
7329
                                  self.op.remote_node,
7330
                                  self.op.iallocator)
7331

    
7332
  def ExpandNames(self):
7333
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7334

    
7335
    self.needed_locks = {}
7336

    
7337
    # Declare node locks
7338
    if self.op.iallocator is not None:
7339
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7340

    
7341
    elif self.op.remote_node is not None:
7342
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7343

    
7344
      # Warning: do not remove the locking of the new secondary here
7345
      # unless DRBD8.AddChildren is changed to work in parallel;
7346
      # currently it doesn't since parallel invocations of
7347
      # FindUnusedMinor will conflict
7348
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
7349
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7350

    
7351
    else:
7352
      raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
7353

    
7354
    # Create tasklets for replacing disks for all secondary instances on this
7355
    # node
7356
    names = []
7357
    tasklets = []
7358

    
7359
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
7360
      logging.debug("Replacing disks for instance %s", inst.name)
7361
      names.append(inst.name)
7362

    
7363
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
7364
                                self.op.iallocator, self.op.remote_node, [],
7365
                                True, self.op.early_release)
7366
      tasklets.append(replacer)
7367

    
7368
    self.tasklets = tasklets
7369
    self.instance_names = names
7370

    
7371
    # Declare instance locks
7372
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
7373

    
7374
  def DeclareLocks(self, level):
7375
    # If we're not already locking all nodes in the set we have to declare the
7376
    # instance's primary/secondary nodes.
7377
    if (level == locking.LEVEL_NODE and
7378
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7379
      self._LockInstancesNodes()
7380

    
7381
  def BuildHooksEnv(self):
7382
    """Build hooks env.
7383

7384
    This runs on the master, the primary and all the secondaries.
7385

7386
    """
7387
    env = {
7388
      "NODE_NAME": self.op.node_name,
7389
      }
7390

    
7391
    nl = [self.cfg.GetMasterNode()]
7392

    
7393
    if self.op.remote_node is not None:
7394
      env["NEW_SECONDARY"] = self.op.remote_node
7395
      nl.append(self.op.remote_node)
7396

    
7397
    return (env, nl, nl)
7398

    
7399

    
7400
class TLReplaceDisks(Tasklet):
7401
  """Replaces disks for an instance.
7402

7403
  Note: Locking is not within the scope of this class.
7404

7405
  """
7406
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7407
               disks, delay_iallocator, early_release):
7408
    """Initializes this class.
7409

7410
    """
7411
    Tasklet.__init__(self, lu)
7412

    
7413
    # Parameters
7414
    self.instance_name = instance_name
7415
    self.mode = mode
7416
    self.iallocator_name = iallocator_name
7417
    self.remote_node = remote_node
7418
    self.disks = disks
7419
    self.delay_iallocator = delay_iallocator
7420
    self.early_release = early_release
7421

    
7422
    # Runtime data
7423
    self.instance = None
7424
    self.new_node = None
7425
    self.target_node = None
7426
    self.other_node = None
7427
    self.remote_node_info = None
7428
    self.node_secondary_ip = None
7429

    
7430
  @staticmethod
7431
  def CheckArguments(mode, remote_node, iallocator):
7432
    """Helper function for users of this class.
7433

7434
    """
7435
    # check for valid parameter combination
7436
    if mode == constants.REPLACE_DISK_CHG:
7437
      if remote_node is None and iallocator is None:
7438
        raise errors.OpPrereqError("When changing the secondary either an"
7439
                                   " iallocator script must be used or the"
7440
                                   " new node given", errors.ECODE_INVAL)
7441

    
7442
      if remote_node is not None and iallocator is not None:
7443
        raise errors.OpPrereqError("Give either the iallocator or the new"
7444
                                   " secondary, not both", errors.ECODE_INVAL)
7445

    
7446
    elif remote_node is not None or iallocator is not None:
7447
      # Not replacing the secondary
7448
      raise errors.OpPrereqError("The iallocator and new node options can"
7449
                                 " only be used when changing the"
7450
                                 " secondary node", errors.ECODE_INVAL)
7451

    
7452
  @staticmethod
7453
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7454
    """Compute a new secondary node using an IAllocator.
7455

7456
    """
7457
    ial = IAllocator(lu.cfg, lu.rpc,
7458
                     mode=constants.IALLOCATOR_MODE_RELOC,
7459
                     name=instance_name,
7460
                     relocate_from=relocate_from)
7461

    
7462
    ial.Run(iallocator_name)
7463

    
7464
    if not ial.success:
7465
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7466
                                 " %s" % (iallocator_name, ial.info),
7467
                                 errors.ECODE_NORES)
7468

    
7469
    if len(ial.result) != ial.required_nodes:
7470
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7471
                                 " of nodes (%s), required %s" %
7472
                                 (iallocator_name,
7473
                                  len(ial.result), ial.required_nodes),
7474
                                 errors.ECODE_FAULT)
7475

    
7476
    remote_node_name = ial.result[0]
7477

    
7478
    lu.LogInfo("Selected new secondary for instance '%s': %s",
7479
               instance_name, remote_node_name)
7480

    
7481
    return remote_node_name
7482

    
7483
  def _FindFaultyDisks(self, node_name):
7484
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7485
                                    node_name, True)
7486

    
7487
  def CheckPrereq(self):
7488
    """Check prerequisites.
7489

7490
    This checks that the instance is in the cluster.
7491

7492
    """
7493
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7494
    assert instance is not None, \
7495
      "Cannot retrieve locked instance %s" % self.instance_name
7496

    
7497
    if instance.disk_template != constants.DT_DRBD8:
7498
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7499
                                 " instances", errors.ECODE_INVAL)
7500

    
7501
    if len(instance.secondary_nodes) != 1:
7502
      raise errors.OpPrereqError("The instance has a strange layout,"
7503
                                 " expected one secondary but found %d" %
7504
                                 len(instance.secondary_nodes),
7505
                                 errors.ECODE_FAULT)
7506

    
7507
    if not self.delay_iallocator:
7508
      self._CheckPrereq2()
7509

    
7510
  def _CheckPrereq2(self):
7511
    """Check prerequisites, second part.
7512

7513
    This function should always be part of CheckPrereq. It was separated and is
7514
    now called from Exec because during node evacuation iallocator was only
7515
    called with an unmodified cluster model, not taking planned changes into
7516
    account.
7517

7518
    """
7519
    instance = self.instance
7520
    secondary_node = instance.secondary_nodes[0]
7521

    
7522
    if self.iallocator_name is None:
7523
      remote_node = self.remote_node
7524
    else:
7525
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7526
                                       instance.name, instance.secondary_nodes)
7527

    
7528
    if remote_node is not None:
7529
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7530
      assert self.remote_node_info is not None, \
7531
        "Cannot retrieve locked node %s" % remote_node
7532
    else:
7533
      self.remote_node_info = None
7534

    
7535
    if remote_node == self.instance.primary_node:
7536
      raise errors.OpPrereqError("The specified node is the primary node of"
7537
                                 " the instance.", errors.ECODE_INVAL)
7538

    
7539
    if remote_node == secondary_node:
7540
      raise errors.OpPrereqError("The specified node is already the"
7541
                                 " secondary node of the instance.",
7542
                                 errors.ECODE_INVAL)
7543

    
7544
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7545
                                    constants.REPLACE_DISK_CHG):
7546
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
7547
                                 errors.ECODE_INVAL)
7548

    
7549
    if self.mode == constants.REPLACE_DISK_AUTO:
7550
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
7551
      faulty_secondary = self._FindFaultyDisks(secondary_node)
7552

    
7553
      if faulty_primary and faulty_secondary:
7554
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7555
                                   " one node and can not be repaired"
7556
                                   " automatically" % self.instance_name,
7557
                                   errors.ECODE_STATE)
7558

    
7559
      if faulty_primary:
7560
        self.disks = faulty_primary
7561
        self.target_node = instance.primary_node
7562
        self.other_node = secondary_node
7563
        check_nodes = [self.target_node, self.other_node]
7564
      elif faulty_secondary:
7565
        self.disks = faulty_secondary
7566
        self.target_node = secondary_node
7567
        self.other_node = instance.primary_node
7568
        check_nodes = [self.target_node, self.other_node]
7569
      else:
7570
        self.disks = []
7571
        check_nodes = []
7572

    
7573
    else:
7574
      # Non-automatic modes
7575
      if self.mode == constants.REPLACE_DISK_PRI:
7576
        self.target_node = instance.primary_node
7577
        self.other_node = secondary_node
7578
        check_nodes = [self.target_node, self.other_node]
7579

    
7580
      elif self.mode == constants.REPLACE_DISK_SEC:
7581
        self.target_node = secondary_node
7582
        self.other_node = instance.primary_node
7583
        check_nodes = [self.target_node, self.other_node]
7584

    
7585
      elif self.mode == constants.REPLACE_DISK_CHG:
7586
        self.new_node = remote_node
7587
        self.other_node = instance.primary_node
7588
        self.target_node = secondary_node
7589
        check_nodes = [self.new_node, self.other_node]
7590

    
7591
        _CheckNodeNotDrained(self.lu, remote_node)
7592

    
7593
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
7594
        assert old_node_info is not None
7595
        if old_node_info.offline and not self.early_release:
7596
          # doesn't make sense to delay the release
7597
          self.early_release = True
7598
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7599
                          " early-release mode", secondary_node)
7600

    
7601
      else:
7602
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7603
                                     self.mode)
7604

    
7605
      # If not specified all disks should be replaced
7606
      if not self.disks:
7607
        self.disks = range(len(self.instance.disks))
7608

    
7609
    for node in check_nodes:
7610
      _CheckNodeOnline(self.lu, node)
7611

    
7612
    # Check whether disks are valid
7613
    for disk_idx in self.disks:
7614
      instance.FindDisk(disk_idx)
7615

    
7616
    # Get secondary node IP addresses
7617
    node_2nd_ip = {}
7618

    
7619
    for node_name in [self.target_node, self.other_node, self.new_node]:
7620
      if node_name is not None:
7621
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7622

    
7623
    self.node_secondary_ip = node_2nd_ip
7624

    
7625
  def Exec(self, feedback_fn):
7626
    """Execute disk replacement.
7627

7628
    This dispatches the disk replacement to the appropriate handler.
7629

7630
    """
7631
    if self.delay_iallocator:
7632
      self._CheckPrereq2()
7633

    
7634
    if not self.disks:
7635
      feedback_fn("No disks need replacement")
7636
      return
7637

    
7638
    feedback_fn("Replacing disk(s) %s for %s" %
7639
                (utils.CommaJoin(self.disks), self.instance.name))
7640

    
7641
    activate_disks = (not self.instance.admin_up)
7642

    
7643
    # Activate the instance disks if we're replacing them on a down instance
7644
    if activate_disks:
7645
      _StartInstanceDisks(self.lu, self.instance, True)
7646

    
7647
    try:
7648
      # Should we replace the secondary node?
7649
      if self.new_node is not None:
7650
        fn = self._ExecDrbd8Secondary
7651
      else:
7652
        fn = self._ExecDrbd8DiskOnly
7653

    
7654
      return fn(feedback_fn)
7655

    
7656
    finally:
7657
      # Deactivate the instance disks if we're replacing them on a
7658
      # down instance
7659
      if activate_disks:
7660
        _SafeShutdownInstanceDisks(self.lu, self.instance)
7661

    
7662
  def _CheckVolumeGroup(self, nodes):
7663
    self.lu.LogInfo("Checking volume groups")
7664

    
7665
    vgname = self.cfg.GetVGName()
7666

    
7667
    # Make sure volume group exists on all involved nodes
7668
    results = self.rpc.call_vg_list(nodes)
7669
    if not results:
7670
      raise errors.OpExecError("Can't list volume groups on the nodes")
7671

    
7672
    for node in nodes:
7673
      res = results[node]
7674
      res.Raise("Error checking node %s" % node)
7675
      if vgname not in res.payload:
7676
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
7677
                                 (vgname, node))
7678

    
7679
  def _CheckDisksExistence(self, nodes):
7680
    # Check disk existence
7681
    for idx, dev in enumerate(self.instance.disks):
7682
      if idx not in self.disks:
7683
        continue
7684

    
7685
      for node in nodes:
7686
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7687
        self.cfg.SetDiskID(dev, node)
7688

    
7689
        result = self.rpc.call_blockdev_find(node, dev)
7690

    
7691
        msg = result.fail_msg
7692
        if msg or not result.payload:
7693
          if not msg:
7694
            msg = "disk not found"
7695
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7696
                                   (idx, node, msg))
7697

    
7698
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7699
    for idx, dev in enumerate(self.instance.disks):
7700
      if idx not in self.disks:
7701
        continue
7702

    
7703
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7704
                      (idx, node_name))
7705

    
7706
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7707
                                   ldisk=ldisk):
7708
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7709
                                 " replace disks for instance %s" %
7710
                                 (node_name, self.instance.name))
7711

    
7712
  def _CreateNewStorage(self, node_name):
7713
    vgname = self.cfg.GetVGName()
7714
    iv_names = {}
7715

    
7716
    for idx, dev in enumerate(self.instance.disks):
7717
      if idx not in self.disks:
7718
        continue
7719

    
7720
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7721

    
7722
      self.cfg.SetDiskID(dev, node_name)
7723

    
7724
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7725
      names = _GenerateUniqueNames(self.lu, lv_names)
7726

    
7727
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7728
                             logical_id=(vgname, names[0]))
7729
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7730
                             logical_id=(vgname, names[1]))
7731

    
7732
      new_lvs = [lv_data, lv_meta]
7733
      old_lvs = dev.children
7734
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7735

    
7736
      # we pass force_create=True to force the LVM creation
7737
      for new_lv in new_lvs:
7738
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7739
                        _GetInstanceInfoText(self.instance), False)
7740

    
7741
    return iv_names
7742

    
7743
  def _CheckDevices(self, node_name, iv_names):
7744
    for name, (dev, _, _) in iv_names.iteritems():
7745
      self.cfg.SetDiskID(dev, node_name)
7746

    
7747
      result = self.rpc.call_blockdev_find(node_name, dev)
7748

    
7749
      msg = result.fail_msg
7750
      if msg or not result.payload:
7751
        if not msg:
7752
          msg = "disk not found"
7753
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
7754
                                 (name, msg))
7755

    
7756
      if result.payload.is_degraded:
7757
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
7758

    
7759
  def _RemoveOldStorage(self, node_name, iv_names):
7760
    for name, (_, old_lvs, _) in iv_names.iteritems():
7761
      self.lu.LogInfo("Remove logical volumes for %s" % name)
7762

    
7763
      for lv in old_lvs:
7764
        self.cfg.SetDiskID(lv, node_name)
7765

    
7766
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7767
        if msg:
7768
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
7769
                             hint="remove unused LVs manually")
7770

    
7771
  def _ReleaseNodeLock(self, node_name):
7772
    """Releases the lock for a given node."""
7773
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7774

    
7775
  def _ExecDrbd8DiskOnly(self, feedback_fn):
7776
    """Replace a disk on the primary or secondary for DRBD 8.
7777

7778
    The algorithm for replace is quite complicated:
7779

7780
      1. for each disk to be replaced:
7781

7782
        1. create new LVs on the target node with unique names
7783
        1. detach old LVs from the drbd device
7784
        1. rename old LVs to name_replaced.<time_t>
7785
        1. rename new LVs to old LVs
7786
        1. attach the new LVs (with the old names now) to the drbd device
7787

7788
      1. wait for sync across all devices
7789

7790
      1. for each modified disk:
7791

7792
        1. remove old LVs (which have the name name_replaces.<time_t>)
7793

7794
    Failures are not very well handled.
7795

7796
    """
7797
    steps_total = 6
7798

    
7799
    # Step: check device activation
7800
    self.lu.LogStep(1, steps_total, "Check device existence")
7801
    self._CheckDisksExistence([self.other_node, self.target_node])
7802
    self._CheckVolumeGroup([self.target_node, self.other_node])
7803

    
7804
    # Step: check other node consistency
7805
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7806
    self._CheckDisksConsistency(self.other_node,
7807
                                self.other_node == self.instance.primary_node,
7808
                                False)
7809

    
7810
    # Step: create new storage
7811
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7812
    iv_names = self._CreateNewStorage(self.target_node)
7813

    
7814
    # Step: for each lv, detach+rename*2+attach
7815
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7816
    for dev, old_lvs, new_lvs in iv_names.itervalues():
7817
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7818

    
7819
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7820
                                                     old_lvs)
7821
      result.Raise("Can't detach drbd from local storage on node"
7822
                   " %s for device %s" % (self.target_node, dev.iv_name))
7823
      #dev.children = []
7824
      #cfg.Update(instance)
7825

    
7826
      # ok, we created the new LVs, so now we know we have the needed
7827
      # storage; as such, we proceed on the target node to rename
7828
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7829
      # using the assumption that logical_id == physical_id (which in
7830
      # turn is the unique_id on that node)
7831

    
7832
      # FIXME(iustin): use a better name for the replaced LVs
7833
      temp_suffix = int(time.time())
7834
      ren_fn = lambda d, suff: (d.physical_id[0],
7835
                                d.physical_id[1] + "_replaced-%s" % suff)
7836

    
7837
      # Build the rename list based on what LVs exist on the node
7838
      rename_old_to_new = []
7839
      for to_ren in old_lvs:
7840
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7841
        if not result.fail_msg and result.payload:
7842
          # device exists
7843
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7844

    
7845
      self.lu.LogInfo("Renaming the old LVs on the target node")
7846
      result = self.rpc.call_blockdev_rename(self.target_node,
7847
                                             rename_old_to_new)
7848
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
7849

    
7850
      # Now we rename the new LVs to the old LVs
7851
      self.lu.LogInfo("Renaming the new LVs on the target node")
7852
      rename_new_to_old = [(new, old.physical_id)
7853
                           for old, new in zip(old_lvs, new_lvs)]
7854
      result = self.rpc.call_blockdev_rename(self.target_node,
7855
                                             rename_new_to_old)
7856
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
7857

    
7858
      for old, new in zip(old_lvs, new_lvs):
7859
        new.logical_id = old.logical_id
7860
        self.cfg.SetDiskID(new, self.target_node)
7861

    
7862
      for disk in old_lvs:
7863
        disk.logical_id = ren_fn(disk, temp_suffix)
7864
        self.cfg.SetDiskID(disk, self.target_node)
7865

    
7866
      # Now that the new lvs have the old name, we can add them to the device
7867
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7868
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7869
                                                  new_lvs)
7870
      msg = result.fail_msg
7871
      if msg:
7872
        for new_lv in new_lvs:
7873
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
7874
                                               new_lv).fail_msg
7875
          if msg2:
7876
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7877
                               hint=("cleanup manually the unused logical"
7878
                                     "volumes"))
7879
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7880

    
7881
      dev.children = new_lvs
7882

    
7883
      self.cfg.Update(self.instance, feedback_fn)
7884

    
7885
    cstep = 5
7886
    if self.early_release:
7887
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7888
      cstep += 1
7889
      self._RemoveOldStorage(self.target_node, iv_names)
7890
      # WARNING: we release both node locks here, do not do other RPCs
7891
      # than WaitForSync to the primary node
7892
      self._ReleaseNodeLock([self.target_node, self.other_node])
7893

    
7894
    # Wait for sync
7895
    # This can fail as the old devices are degraded and _WaitForSync
7896
    # does a combined result over all disks, so we don't check its return value
7897
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7898
    cstep += 1
7899
    _WaitForSync(self.lu, self.instance)
7900

    
7901
    # Check all devices manually
7902
    self._CheckDevices(self.instance.primary_node, iv_names)
7903

    
7904
    # Step: remove old storage
7905
    if not self.early_release:
7906
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7907
      cstep += 1
7908
      self._RemoveOldStorage(self.target_node, iv_names)
7909

    
7910
  def _ExecDrbd8Secondary(self, feedback_fn):
7911
    """Replace the secondary node for DRBD 8.
7912

7913
    The algorithm for replace is quite complicated:
7914
      - for all disks of the instance:
7915
        - create new LVs on the new node with same names
7916
        - shutdown the drbd device on the old secondary
7917
        - disconnect the drbd network on the primary
7918
        - create the drbd device on the new secondary
7919
        - network attach the drbd on the primary, using an artifice:
7920
          the drbd code for Attach() will connect to the network if it
7921
          finds a device which is connected to the good local disks but
7922
          not network enabled
7923
      - wait for sync across all devices
7924
      - remove all disks from the old secondary
7925

7926
    Failures are not very well handled.
7927

7928
    """
7929
    steps_total = 6
7930

    
7931
    # Step: check device activation
7932
    self.lu.LogStep(1, steps_total, "Check device existence")
7933
    self._CheckDisksExistence([self.instance.primary_node])
7934
    self._CheckVolumeGroup([self.instance.primary_node])
7935

    
7936
    # Step: check other node consistency
7937
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7938
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
7939

    
7940
    # Step: create new storage
7941
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7942
    for idx, dev in enumerate(self.instance.disks):
7943
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7944
                      (self.new_node, idx))
7945
      # we pass force_create=True to force LVM creation
7946
      for new_lv in dev.children:
7947
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7948
                        _GetInstanceInfoText(self.instance), False)
7949

    
7950
    # Step 4: dbrd minors and drbd setups changes
7951
    # after this, we must manually remove the drbd minors on both the
7952
    # error and the success paths
7953
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7954
    minors = self.cfg.AllocateDRBDMinor([self.new_node
7955
                                         for dev in self.instance.disks],
7956
                                        self.instance.name)
7957
    logging.debug("Allocated minors %r", minors)
7958

    
7959
    iv_names = {}
7960
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7961
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7962
                      (self.new_node, idx))
7963
      # create new devices on new_node; note that we create two IDs:
7964
      # one without port, so the drbd will be activated without
7965
      # networking information on the new node at this stage, and one
7966
      # with network, for the latter activation in step 4
7967
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7968
      if self.instance.primary_node == o_node1:
7969
        p_minor = o_minor1
7970
      else:
7971
        assert self.instance.primary_node == o_node2, "Three-node instance?"
7972
        p_minor = o_minor2
7973

    
7974
      new_alone_id = (self.instance.primary_node, self.new_node, None,
7975
                      p_minor, new_minor, o_secret)
7976
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
7977
                    p_minor, new_minor, o_secret)
7978

    
7979
      iv_names[idx] = (dev, dev.children, new_net_id)
7980
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7981
                    new_net_id)
7982
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7983
                              logical_id=new_alone_id,
7984
                              children=dev.children,
7985
                              size=dev.size)
7986
      try:
7987
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7988
                              _GetInstanceInfoText(self.instance), False)
7989
      except errors.GenericError:
7990
        self.cfg.ReleaseDRBDMinors(self.instance.name)
7991
        raise
7992

    
7993
    # We have new devices, shutdown the drbd on the old secondary
7994
    for idx, dev in enumerate(self.instance.disks):
7995
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7996
      self.cfg.SetDiskID(dev, self.target_node)
7997
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7998
      if msg:
7999
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8000
                           "node: %s" % (idx, msg),
8001
                           hint=("Please cleanup this device manually as"
8002
                                 " soon as possible"))
8003

    
8004
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8005
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8006
                                               self.node_secondary_ip,
8007
                                               self.instance.disks)\
8008
                                              [self.instance.primary_node]
8009

    
8010
    msg = result.fail_msg
8011
    if msg:
8012
      # detaches didn't succeed (unlikely)
8013
      self.cfg.ReleaseDRBDMinors(self.instance.name)
8014
      raise errors.OpExecError("Can't detach the disks from the network on"
8015
                               " old node: %s" % (msg,))
8016

    
8017
    # if we managed to detach at least one, we update all the disks of
8018
    # the instance to point to the new secondary
8019
    self.lu.LogInfo("Updating instance configuration")
8020
    for dev, _, new_logical_id in iv_names.itervalues():
8021
      dev.logical_id = new_logical_id
8022
      self.cfg.SetDiskID(dev, self.instance.primary_node)
8023

    
8024
    self.cfg.Update(self.instance, feedback_fn)
8025

    
8026
    # and now perform the drbd attach
8027
    self.lu.LogInfo("Attaching primary drbds to new secondary"
8028
                    " (standalone => connected)")
8029
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8030
                                            self.new_node],
8031
                                           self.node_secondary_ip,
8032
                                           self.instance.disks,
8033
                                           self.instance.name,
8034
                                           False)
8035
    for to_node, to_result in result.items():
8036
      msg = to_result.fail_msg
8037
      if msg:
8038
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8039
                           to_node, msg,
8040
                           hint=("please do a gnt-instance info to see the"
8041
                                 " status of disks"))
8042
    cstep = 5
8043
    if self.early_release:
8044
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8045
      cstep += 1
8046
      self._RemoveOldStorage(self.target_node, iv_names)
8047
      # WARNING: we release all node locks here, do not do other RPCs
8048
      # than WaitForSync to the primary node
8049
      self._ReleaseNodeLock([self.instance.primary_node,
8050
                             self.target_node,
8051
                             self.new_node])
8052

    
8053
    # Wait for sync
8054
    # This can fail as the old devices are degraded and _WaitForSync
8055
    # does a combined result over all disks, so we don't check its return value
8056
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8057
    cstep += 1
8058
    _WaitForSync(self.lu, self.instance)
8059

    
8060
    # Check all devices manually
8061
    self._CheckDevices(self.instance.primary_node, iv_names)
8062

    
8063
    # Step: remove old storage
8064
    if not self.early_release:
8065
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8066
      self._RemoveOldStorage(self.target_node, iv_names)
8067

    
8068

    
8069
class LURepairNodeStorage(NoHooksLU):
8070
  """Repairs the volume group on a node.
8071

8072
  """
8073
  _OP_REQP = ["node_name"]
8074
  REQ_BGL = False
8075

    
8076
  def CheckArguments(self):
8077
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8078

    
8079
    _CheckStorageType(self.op.storage_type)
8080

    
8081
  def ExpandNames(self):
8082
    self.needed_locks = {
8083
      locking.LEVEL_NODE: [self.op.node_name],
8084
      }
8085

    
8086
  def _CheckFaultyDisks(self, instance, node_name):
8087
    """Ensure faulty disks abort the opcode or at least warn."""
8088
    try:
8089
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8090
                                  node_name, True):
8091
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8092
                                   " node '%s'" % (instance.name, node_name),
8093
                                   errors.ECODE_STATE)
8094
    except errors.OpPrereqError, err:
8095
      if self.op.ignore_consistency:
8096
        self.proc.LogWarning(str(err.args[0]))
8097
      else:
8098
        raise
8099

    
8100
  def CheckPrereq(self):
8101
    """Check prerequisites.
8102

8103
    """
8104
    storage_type = self.op.storage_type
8105

    
8106
    if (constants.SO_FIX_CONSISTENCY not in
8107
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8108
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
8109
                                 " repaired" % storage_type,
8110
                                 errors.ECODE_INVAL)
8111

    
8112
    # Check whether any instance on this node has faulty disks
8113
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8114
      if not inst.admin_up:
8115
        continue
8116
      check_nodes = set(inst.all_nodes)
8117
      check_nodes.discard(self.op.node_name)
8118
      for inst_node_name in check_nodes:
8119
        self._CheckFaultyDisks(inst, inst_node_name)
8120

    
8121
  def Exec(self, feedback_fn):
8122
    feedback_fn("Repairing storage unit '%s' on %s ..." %
8123
                (self.op.name, self.op.node_name))
8124

    
8125
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8126
    result = self.rpc.call_storage_execute(self.op.node_name,
8127
                                           self.op.storage_type, st_args,
8128
                                           self.op.name,
8129
                                           constants.SO_FIX_CONSISTENCY)
8130
    result.Raise("Failed to repair storage unit '%s' on %s" %
8131
                 (self.op.name, self.op.node_name))
8132

    
8133

    
8134
class LUNodeEvacuationStrategy(NoHooksLU):
8135
  """Computes the node evacuation strategy.
8136

8137
  """
8138
  _OP_REQP = ["nodes"]
8139
  _OP_DEFS = [
8140
    ("remote_node", None),
8141
    ("iallocator", None),
8142
    ]
8143
  REQ_BGL = False
8144

    
8145
  def CheckArguments(self):
8146
    if self.op.remote_node is not None and self.op.iallocator is not None:
8147
      raise errors.OpPrereqError("Give either the iallocator or the new"
8148
                                 " secondary, not both", errors.ECODE_INVAL)
8149

    
8150
  def ExpandNames(self):
8151
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8152
    self.needed_locks = locks = {}
8153
    if self.op.remote_node is None:
8154
      locks[locking.LEVEL_NODE] = locking.ALL_SET
8155
    else:
8156
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8157
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8158

    
8159
  def CheckPrereq(self):
8160
    pass
8161

    
8162
  def Exec(self, feedback_fn):
8163
    if self.op.remote_node is not None:
8164
      instances = []
8165
      for node in self.op.nodes:
8166
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8167
      result = []
8168
      for i in instances:
8169
        if i.primary_node == self.op.remote_node:
8170
          raise errors.OpPrereqError("Node %s is the primary node of"
8171
                                     " instance %s, cannot use it as"
8172
                                     " secondary" %
8173
                                     (self.op.remote_node, i.name),
8174
                                     errors.ECODE_INVAL)
8175
        result.append([i.name, self.op.remote_node])
8176
    else:
8177
      ial = IAllocator(self.cfg, self.rpc,
8178
                       mode=constants.IALLOCATOR_MODE_MEVAC,
8179
                       evac_nodes=self.op.nodes)
8180
      ial.Run(self.op.iallocator, validate=True)
8181
      if not ial.success:
8182
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8183
                                 errors.ECODE_NORES)
8184
      result = ial.result
8185
    return result
8186

    
8187

    
8188
class LUGrowDisk(LogicalUnit):
8189
  """Grow a disk of an instance.
8190

8191
  """
8192
  HPATH = "disk-grow"
8193
  HTYPE = constants.HTYPE_INSTANCE
8194
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
8195
  REQ_BGL = False
8196

    
8197
  def ExpandNames(self):
8198
    self._ExpandAndLockInstance()
8199
    self.needed_locks[locking.LEVEL_NODE] = []
8200
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8201

    
8202
  def DeclareLocks(self, level):
8203
    if level == locking.LEVEL_NODE:
8204
      self._LockInstancesNodes()
8205

    
8206
  def BuildHooksEnv(self):
8207
    """Build hooks env.
8208

8209
    This runs on the master, the primary and all the secondaries.
8210

8211
    """
8212
    env = {
8213
      "DISK": self.op.disk,
8214
      "AMOUNT": self.op.amount,
8215
      }
8216
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8217
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8218
    return env, nl, nl
8219

    
8220
  def CheckPrereq(self):
8221
    """Check prerequisites.
8222

8223
    This checks that the instance is in the cluster.
8224

8225
    """
8226
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8227
    assert instance is not None, \
8228
      "Cannot retrieve locked instance %s" % self.op.instance_name
8229
    nodenames = list(instance.all_nodes)
8230
    for node in nodenames:
8231
      _CheckNodeOnline(self, node)
8232

    
8233

    
8234
    self.instance = instance
8235

    
8236
    if instance.disk_template not in constants.DTS_GROWABLE:
8237
      raise errors.OpPrereqError("Instance's disk layout does not support"
8238
                                 " growing.", errors.ECODE_INVAL)
8239

    
8240
    self.disk = instance.FindDisk(self.op.disk)
8241

    
8242
    if instance.disk_template != constants.DT_FILE:
8243
      # TODO: check the free disk space for file, when that feature will be
8244
      # supported
8245
      _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8246

    
8247
  def Exec(self, feedback_fn):
8248
    """Execute disk grow.
8249

8250
    """
8251
    instance = self.instance
8252
    disk = self.disk
8253

    
8254
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8255
    if not disks_ok:
8256
      raise errors.OpExecError("Cannot activate block device to grow")
8257

    
8258
    for node in instance.all_nodes:
8259
      self.cfg.SetDiskID(disk, node)
8260
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8261
      result.Raise("Grow request failed to node %s" % node)
8262

    
8263
      # TODO: Rewrite code to work properly
8264
      # DRBD goes into sync mode for a short amount of time after executing the
8265
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8266
      # calling "resize" in sync mode fails. Sleeping for a short amount of
8267
      # time is a work-around.
8268
      time.sleep(5)
8269

    
8270
    disk.RecordGrow(self.op.amount)
8271
    self.cfg.Update(instance, feedback_fn)
8272
    if self.op.wait_for_sync:
8273
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
8274
      if disk_abort:
8275
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8276
                             " status.\nPlease check the instance.")
8277
      if not instance.admin_up:
8278
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8279
    elif not instance.admin_up:
8280
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
8281
                           " not supposed to be running because no wait for"
8282
                           " sync mode was requested.")
8283

    
8284

    
8285
class LUQueryInstanceData(NoHooksLU):
8286
  """Query runtime instance data.
8287

8288
  """
8289
  _OP_REQP = ["instances", "static"]
8290
  REQ_BGL = False
8291

    
8292
  def CheckArguments(self):
8293
    if not isinstance(self.op.instances, list):
8294
      raise errors.OpPrereqError("Invalid argument type 'instances'",
8295
                                 errors.ECODE_INVAL)
8296

    
8297
  def ExpandNames(self):
8298
    self.needed_locks = {}
8299
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8300

    
8301
    if self.op.instances:
8302
      self.wanted_names = []
8303
      for name in self.op.instances:
8304
        full_name = _ExpandInstanceName(self.cfg, name)
8305
        self.wanted_names.append(full_name)
8306
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8307
    else:
8308
      self.wanted_names = None
8309
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8310

    
8311
    self.needed_locks[locking.LEVEL_NODE] = []
8312
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8313

    
8314
  def DeclareLocks(self, level):
8315
    if level == locking.LEVEL_NODE:
8316
      self._LockInstancesNodes()
8317

    
8318
  def CheckPrereq(self):
8319
    """Check prerequisites.
8320

8321
    This only checks the optional instance list against the existing names.
8322

8323
    """
8324
    if self.wanted_names is None:
8325
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8326

    
8327
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8328
                             in self.wanted_names]
8329
    return
8330

    
8331
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
8332
    """Returns the status of a block device
8333

8334
    """
8335
    if self.op.static or not node:
8336
      return None
8337

    
8338
    self.cfg.SetDiskID(dev, node)
8339

    
8340
    result = self.rpc.call_blockdev_find(node, dev)
8341
    if result.offline:
8342
      return None
8343

    
8344
    result.Raise("Can't compute disk status for %s" % instance_name)
8345

    
8346
    status = result.payload
8347
    if status is None:
8348
      return None
8349

    
8350
    return (status.dev_path, status.major, status.minor,
8351
            status.sync_percent, status.estimated_time,
8352
            status.is_degraded, status.ldisk_status)
8353

    
8354
  def _ComputeDiskStatus(self, instance, snode, dev):
8355
    """Compute block device status.
8356

8357
    """
8358
    if dev.dev_type in constants.LDS_DRBD:
8359
      # we change the snode then (otherwise we use the one passed in)
8360
      if dev.logical_id[0] == instance.primary_node:
8361
        snode = dev.logical_id[1]
8362
      else:
8363
        snode = dev.logical_id[0]
8364

    
8365
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8366
                                              instance.name, dev)
8367
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8368

    
8369
    if dev.children:
8370
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
8371
                      for child in dev.children]
8372
    else:
8373
      dev_children = []
8374

    
8375
    data = {
8376
      "iv_name": dev.iv_name,
8377
      "dev_type": dev.dev_type,
8378
      "logical_id": dev.logical_id,
8379
      "physical_id": dev.physical_id,
8380
      "pstatus": dev_pstatus,
8381
      "sstatus": dev_sstatus,
8382
      "children": dev_children,
8383
      "mode": dev.mode,
8384
      "size": dev.size,
8385
      }
8386

    
8387
    return data
8388

    
8389
  def Exec(self, feedback_fn):
8390
    """Gather and return data"""
8391
    result = {}
8392

    
8393
    cluster = self.cfg.GetClusterInfo()
8394

    
8395
    for instance in self.wanted_instances:
8396
      if not self.op.static:
8397
        remote_info = self.rpc.call_instance_info(instance.primary_node,
8398
                                                  instance.name,
8399
                                                  instance.hypervisor)
8400
        remote_info.Raise("Error checking node %s" % instance.primary_node)
8401
        remote_info = remote_info.payload
8402
        if remote_info and "state" in remote_info:
8403
          remote_state = "up"
8404
        else:
8405
          remote_state = "down"
8406
      else:
8407
        remote_state = None
8408
      if instance.admin_up:
8409
        config_state = "up"
8410
      else:
8411
        config_state = "down"
8412

    
8413
      disks = [self._ComputeDiskStatus(instance, None, device)
8414
               for device in instance.disks]
8415

    
8416
      idict = {
8417
        "name": instance.name,
8418
        "config_state": config_state,
8419
        "run_state": remote_state,
8420
        "pnode": instance.primary_node,
8421
        "snodes": instance.secondary_nodes,
8422
        "os": instance.os,
8423
        # this happens to be the same format used for hooks
8424
        "nics": _NICListToTuple(self, instance.nics),
8425
        "disk_template": instance.disk_template,
8426
        "disks": disks,
8427
        "hypervisor": instance.hypervisor,
8428
        "network_port": instance.network_port,
8429
        "hv_instance": instance.hvparams,
8430
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
8431
        "be_instance": instance.beparams,
8432
        "be_actual": cluster.FillBE(instance),
8433
        "os_instance": instance.osparams,
8434
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8435
        "serial_no": instance.serial_no,
8436
        "mtime": instance.mtime,
8437
        "ctime": instance.ctime,
8438
        "uuid": instance.uuid,
8439
        }
8440

    
8441
      result[instance.name] = idict
8442

    
8443
    return result
8444

    
8445

    
8446
class LUSetInstanceParams(LogicalUnit):
8447
  """Modifies an instances's parameters.
8448

8449
  """
8450
  HPATH = "instance-modify"
8451
  HTYPE = constants.HTYPE_INSTANCE
8452
  _OP_REQP = ["instance_name"]
8453
  _OP_DEFS = [
8454
    ("nics", _EmptyList),
8455
    ("disks", _EmptyList),
8456
    ("beparams", _EmptyDict),
8457
    ("hvparams", _EmptyDict),
8458
    ("disk_template", None),
8459
    ("remote_node", None),
8460
    ("os_name", None),
8461
    ("force_variant", False),
8462
    ("osparams", None),
8463
    ("force", False),
8464
    ]
8465
  REQ_BGL = False
8466

    
8467
  def CheckArguments(self):
8468
    if not (self.op.nics or self.op.disks or self.op.disk_template or
8469
            self.op.hvparams or self.op.beparams or self.op.os_name):
8470
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8471

    
8472
    if self.op.hvparams:
8473
      _CheckGlobalHvParams(self.op.hvparams)
8474

    
8475
    # Disk validation
8476
    disk_addremove = 0
8477
    for disk_op, disk_dict in self.op.disks:
8478
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8479
      if disk_op == constants.DDM_REMOVE:
8480
        disk_addremove += 1
8481
        continue
8482
      elif disk_op == constants.DDM_ADD:
8483
        disk_addremove += 1
8484
      else:
8485
        if not isinstance(disk_op, int):
8486
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8487
        if not isinstance(disk_dict, dict):
8488
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8489
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8490

    
8491
      if disk_op == constants.DDM_ADD:
8492
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8493
        if mode not in constants.DISK_ACCESS_SET:
8494
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8495
                                     errors.ECODE_INVAL)
8496
        size = disk_dict.get('size', None)
8497
        if size is None:
8498
          raise errors.OpPrereqError("Required disk parameter size missing",
8499
                                     errors.ECODE_INVAL)
8500
        try:
8501
          size = int(size)
8502
        except (TypeError, ValueError), err:
8503
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8504
                                     str(err), errors.ECODE_INVAL)
8505
        disk_dict['size'] = size
8506
      else:
8507
        # modification of disk
8508
        if 'size' in disk_dict:
8509
          raise errors.OpPrereqError("Disk size change not possible, use"
8510
                                     " grow-disk", errors.ECODE_INVAL)
8511

    
8512
    if disk_addremove > 1:
8513
      raise errors.OpPrereqError("Only one disk add or remove operation"
8514
                                 " supported at a time", errors.ECODE_INVAL)
8515

    
8516
    if self.op.disks and self.op.disk_template is not None:
8517
      raise errors.OpPrereqError("Disk template conversion and other disk"
8518
                                 " changes not supported at the same time",
8519
                                 errors.ECODE_INVAL)
8520

    
8521
    if self.op.disk_template:
8522
      _CheckDiskTemplate(self.op.disk_template)
8523
      if (self.op.disk_template in constants.DTS_NET_MIRROR and
8524
          self.op.remote_node is None):
8525
        raise errors.OpPrereqError("Changing the disk template to a mirrored"
8526
                                   " one requires specifying a secondary node",
8527
                                   errors.ECODE_INVAL)
8528

    
8529
    # NIC validation
8530
    nic_addremove = 0
8531
    for nic_op, nic_dict in self.op.nics:
8532
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8533
      if nic_op == constants.DDM_REMOVE:
8534
        nic_addremove += 1
8535
        continue
8536
      elif nic_op == constants.DDM_ADD:
8537
        nic_addremove += 1
8538
      else:
8539
        if not isinstance(nic_op, int):
8540
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8541
        if not isinstance(nic_dict, dict):
8542
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8543
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8544

    
8545
      # nic_dict should be a dict
8546
      nic_ip = nic_dict.get('ip', None)
8547
      if nic_ip is not None:
8548
        if nic_ip.lower() == constants.VALUE_NONE:
8549
          nic_dict['ip'] = None
8550
        else:
8551
          if not utils.IsValidIP(nic_ip):
8552
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8553
                                       errors.ECODE_INVAL)
8554

    
8555
      nic_bridge = nic_dict.get('bridge', None)
8556
      nic_link = nic_dict.get('link', None)
8557
      if nic_bridge and nic_link:
8558
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8559
                                   " at the same time", errors.ECODE_INVAL)
8560
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8561
        nic_dict['bridge'] = None
8562
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8563
        nic_dict['link'] = None
8564

    
8565
      if nic_op == constants.DDM_ADD:
8566
        nic_mac = nic_dict.get('mac', None)
8567
        if nic_mac is None:
8568
          nic_dict['mac'] = constants.VALUE_AUTO
8569

    
8570
      if 'mac' in nic_dict:
8571
        nic_mac = nic_dict['mac']
8572
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8573
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8574

    
8575
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8576
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8577
                                     " modifying an existing nic",
8578
                                     errors.ECODE_INVAL)
8579

    
8580
    if nic_addremove > 1:
8581
      raise errors.OpPrereqError("Only one NIC add or remove operation"
8582
                                 " supported at a time", errors.ECODE_INVAL)
8583

    
8584
  def ExpandNames(self):
8585
    self._ExpandAndLockInstance()
8586
    self.needed_locks[locking.LEVEL_NODE] = []
8587
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8588

    
8589
  def DeclareLocks(self, level):
8590
    if level == locking.LEVEL_NODE:
8591
      self._LockInstancesNodes()
8592
      if self.op.disk_template and self.op.remote_node:
8593
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8594
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8595

    
8596
  def BuildHooksEnv(self):
8597
    """Build hooks env.
8598

8599
    This runs on the master, primary and secondaries.
8600

8601
    """
8602
    args = dict()
8603
    if constants.BE_MEMORY in self.be_new:
8604
      args['memory'] = self.be_new[constants.BE_MEMORY]
8605
    if constants.BE_VCPUS in self.be_new:
8606
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
8607
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8608
    # information at all.
8609
    if self.op.nics:
8610
      args['nics'] = []
8611
      nic_override = dict(self.op.nics)
8612
      for idx, nic in enumerate(self.instance.nics):
8613
        if idx in nic_override:
8614
          this_nic_override = nic_override[idx]
8615
        else:
8616
          this_nic_override = {}
8617
        if 'ip' in this_nic_override:
8618
          ip = this_nic_override['ip']
8619
        else:
8620
          ip = nic.ip
8621
        if 'mac' in this_nic_override:
8622
          mac = this_nic_override['mac']
8623
        else:
8624
          mac = nic.mac
8625
        if idx in self.nic_pnew:
8626
          nicparams = self.nic_pnew[idx]
8627
        else:
8628
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8629
        mode = nicparams[constants.NIC_MODE]
8630
        link = nicparams[constants.NIC_LINK]
8631
        args['nics'].append((ip, mac, mode, link))
8632
      if constants.DDM_ADD in nic_override:
8633
        ip = nic_override[constants.DDM_ADD].get('ip', None)
8634
        mac = nic_override[constants.DDM_ADD]['mac']
8635
        nicparams = self.nic_pnew[constants.DDM_ADD]
8636
        mode = nicparams[constants.NIC_MODE]
8637
        link = nicparams[constants.NIC_LINK]
8638
        args['nics'].append((ip, mac, mode, link))
8639
      elif constants.DDM_REMOVE in nic_override:
8640
        del args['nics'][-1]
8641

    
8642
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8643
    if self.op.disk_template:
8644
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8645
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8646
    return env, nl, nl
8647

    
8648
  def CheckPrereq(self):
8649
    """Check prerequisites.
8650

8651
    This only checks the instance list against the existing names.
8652

8653
    """
8654
    # checking the new params on the primary/secondary nodes
8655

    
8656
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8657
    cluster = self.cluster = self.cfg.GetClusterInfo()
8658
    assert self.instance is not None, \
8659
      "Cannot retrieve locked instance %s" % self.op.instance_name
8660
    pnode = instance.primary_node
8661
    nodelist = list(instance.all_nodes)
8662

    
8663
    # OS change
8664
    if self.op.os_name and not self.op.force:
8665
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8666
                      self.op.force_variant)
8667
      instance_os = self.op.os_name
8668
    else:
8669
      instance_os = instance.os
8670

    
8671
    if self.op.disk_template:
8672
      if instance.disk_template == self.op.disk_template:
8673
        raise errors.OpPrereqError("Instance already has disk template %s" %
8674
                                   instance.disk_template, errors.ECODE_INVAL)
8675

    
8676
      if (instance.disk_template,
8677
          self.op.disk_template) not in self._DISK_CONVERSIONS:
8678
        raise errors.OpPrereqError("Unsupported disk template conversion from"
8679
                                   " %s to %s" % (instance.disk_template,
8680
                                                  self.op.disk_template),
8681
                                   errors.ECODE_INVAL)
8682
      if self.op.disk_template in constants.DTS_NET_MIRROR:
8683
        _CheckNodeOnline(self, self.op.remote_node)
8684
        _CheckNodeNotDrained(self, self.op.remote_node)
8685
        disks = [{"size": d.size} for d in instance.disks]
8686
        required = _ComputeDiskSize(self.op.disk_template, disks)
8687
        _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8688
        _CheckInstanceDown(self, instance, "cannot change disk template")
8689

    
8690
    # hvparams processing
8691
    if self.op.hvparams:
8692
      hv_type = instance.hypervisor
8693
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8694
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8695
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8696

    
8697
      # local check
8698
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8699
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8700
      self.hv_new = hv_new # the new actual values
8701
      self.hv_inst = i_hvdict # the new dict (without defaults)
8702
    else:
8703
      self.hv_new = self.hv_inst = {}
8704

    
8705
    # beparams processing
8706
    if self.op.beparams:
8707
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8708
                                   use_none=True)
8709
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8710
      be_new = cluster.SimpleFillBE(i_bedict)
8711
      self.be_new = be_new # the new actual values
8712
      self.be_inst = i_bedict # the new dict (without defaults)
8713
    else:
8714
      self.be_new = self.be_inst = {}
8715

    
8716
    # osparams processing
8717
    if self.op.osparams:
8718
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8719
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8720
      self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8721
      self.os_inst = i_osdict # the new dict (without defaults)
8722
    else:
8723
      self.os_new = self.os_inst = {}
8724

    
8725
    self.warn = []
8726

    
8727
    if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8728
      mem_check_list = [pnode]
8729
      if be_new[constants.BE_AUTO_BALANCE]:
8730
        # either we changed auto_balance to yes or it was from before
8731
        mem_check_list.extend(instance.secondary_nodes)
8732
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
8733
                                                  instance.hypervisor)
8734
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8735
                                         instance.hypervisor)
8736
      pninfo = nodeinfo[pnode]
8737
      msg = pninfo.fail_msg
8738
      if msg:
8739
        # Assume the primary node is unreachable and go ahead
8740
        self.warn.append("Can't get info from primary node %s: %s" %
8741
                         (pnode,  msg))
8742
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
8743
        self.warn.append("Node data from primary node %s doesn't contain"
8744
                         " free memory information" % pnode)
8745
      elif instance_info.fail_msg:
8746
        self.warn.append("Can't get instance runtime information: %s" %
8747
                        instance_info.fail_msg)
8748
      else:
8749
        if instance_info.payload:
8750
          current_mem = int(instance_info.payload['memory'])
8751
        else:
8752
          # Assume instance not running
8753
          # (there is a slight race condition here, but it's not very probable,
8754
          # and we have no other way to check)
8755
          current_mem = 0
8756
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8757
                    pninfo.payload['memory_free'])
8758
        if miss_mem > 0:
8759
          raise errors.OpPrereqError("This change will prevent the instance"
8760
                                     " from starting, due to %d MB of memory"
8761
                                     " missing on its primary node" % miss_mem,
8762
                                     errors.ECODE_NORES)
8763

    
8764
      if be_new[constants.BE_AUTO_BALANCE]:
8765
        for node, nres in nodeinfo.items():
8766
          if node not in instance.secondary_nodes:
8767
            continue
8768
          msg = nres.fail_msg
8769
          if msg:
8770
            self.warn.append("Can't get info from secondary node %s: %s" %
8771
                             (node, msg))
8772
          elif not isinstance(nres.payload.get('memory_free', None), int):
8773
            self.warn.append("Secondary node %s didn't return free"
8774
                             " memory information" % node)
8775
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8776
            self.warn.append("Not enough memory to failover instance to"
8777
                             " secondary node %s" % node)
8778

    
8779
    # NIC processing
8780
    self.nic_pnew = {}
8781
    self.nic_pinst = {}
8782
    for nic_op, nic_dict in self.op.nics:
8783
      if nic_op == constants.DDM_REMOVE:
8784
        if not instance.nics:
8785
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8786
                                     errors.ECODE_INVAL)
8787
        continue
8788
      if nic_op != constants.DDM_ADD:
8789
        # an existing nic
8790
        if not instance.nics:
8791
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8792
                                     " no NICs" % nic_op,
8793
                                     errors.ECODE_INVAL)
8794
        if nic_op < 0 or nic_op >= len(instance.nics):
8795
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8796
                                     " are 0 to %d" %
8797
                                     (nic_op, len(instance.nics) - 1),
8798
                                     errors.ECODE_INVAL)
8799
        old_nic_params = instance.nics[nic_op].nicparams
8800
        old_nic_ip = instance.nics[nic_op].ip
8801
      else:
8802
        old_nic_params = {}
8803
        old_nic_ip = None
8804

    
8805
      update_params_dict = dict([(key, nic_dict[key])
8806
                                 for key in constants.NICS_PARAMETERS
8807
                                 if key in nic_dict])
8808

    
8809
      if 'bridge' in nic_dict:
8810
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8811

    
8812
      new_nic_params = _GetUpdatedParams(old_nic_params,
8813
                                         update_params_dict)
8814
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8815
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8816
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8817
      self.nic_pinst[nic_op] = new_nic_params
8818
      self.nic_pnew[nic_op] = new_filled_nic_params
8819
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8820

    
8821
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
8822
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8823
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8824
        if msg:
8825
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8826
          if self.op.force:
8827
            self.warn.append(msg)
8828
          else:
8829
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8830
      if new_nic_mode == constants.NIC_MODE_ROUTED:
8831
        if 'ip' in nic_dict:
8832
          nic_ip = nic_dict['ip']
8833
        else:
8834
          nic_ip = old_nic_ip
8835
        if nic_ip is None:
8836
          raise errors.OpPrereqError('Cannot set the nic ip to None'
8837
                                     ' on a routed nic', errors.ECODE_INVAL)
8838
      if 'mac' in nic_dict:
8839
        nic_mac = nic_dict['mac']
8840
        if nic_mac is None:
8841
          raise errors.OpPrereqError('Cannot set the nic mac to None',
8842
                                     errors.ECODE_INVAL)
8843
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8844
          # otherwise generate the mac
8845
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8846
        else:
8847
          # or validate/reserve the current one
8848
          try:
8849
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8850
          except errors.ReservationError:
8851
            raise errors.OpPrereqError("MAC address %s already in use"
8852
                                       " in cluster" % nic_mac,
8853
                                       errors.ECODE_NOTUNIQUE)
8854

    
8855
    # DISK processing
8856
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8857
      raise errors.OpPrereqError("Disk operations not supported for"
8858
                                 " diskless instances",
8859
                                 errors.ECODE_INVAL)
8860
    for disk_op, _ in self.op.disks:
8861
      if disk_op == constants.DDM_REMOVE:
8862
        if len(instance.disks) == 1:
8863
          raise errors.OpPrereqError("Cannot remove the last disk of"
8864
                                     " an instance", errors.ECODE_INVAL)
8865
        _CheckInstanceDown(self, instance, "cannot remove disks")
8866

    
8867
      if (disk_op == constants.DDM_ADD and
8868
          len(instance.nics) >= constants.MAX_DISKS):
8869
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8870
                                   " add more" % constants.MAX_DISKS,
8871
                                   errors.ECODE_STATE)
8872
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8873
        # an existing disk
8874
        if disk_op < 0 or disk_op >= len(instance.disks):
8875
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
8876
                                     " are 0 to %d" %
8877
                                     (disk_op, len(instance.disks)),
8878
                                     errors.ECODE_INVAL)
8879

    
8880
    return
8881

    
8882
  def _ConvertPlainToDrbd(self, feedback_fn):
8883
    """Converts an instance from plain to drbd.
8884

8885
    """
8886
    feedback_fn("Converting template to drbd")
8887
    instance = self.instance
8888
    pnode = instance.primary_node
8889
    snode = self.op.remote_node
8890

    
8891
    # create a fake disk info for _GenerateDiskTemplate
8892
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8893
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8894
                                      instance.name, pnode, [snode],
8895
                                      disk_info, None, None, 0)
8896
    info = _GetInstanceInfoText(instance)
8897
    feedback_fn("Creating aditional volumes...")
8898
    # first, create the missing data and meta devices
8899
    for disk in new_disks:
8900
      # unfortunately this is... not too nice
8901
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8902
                            info, True)
8903
      for child in disk.children:
8904
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
8905
    # at this stage, all new LVs have been created, we can rename the
8906
    # old ones
8907
    feedback_fn("Renaming original volumes...")
8908
    rename_list = [(o, n.children[0].logical_id)
8909
                   for (o, n) in zip(instance.disks, new_disks)]
8910
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
8911
    result.Raise("Failed to rename original LVs")
8912

    
8913
    feedback_fn("Initializing DRBD devices...")
8914
    # all child devices are in place, we can now create the DRBD devices
8915
    for disk in new_disks:
8916
      for node in [pnode, snode]:
8917
        f_create = node == pnode
8918
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8919

    
8920
    # at this point, the instance has been modified
8921
    instance.disk_template = constants.DT_DRBD8
8922
    instance.disks = new_disks
8923
    self.cfg.Update(instance, feedback_fn)
8924

    
8925
    # disks are created, waiting for sync
8926
    disk_abort = not _WaitForSync(self, instance)
8927
    if disk_abort:
8928
      raise errors.OpExecError("There are some degraded disks for"
8929
                               " this instance, please cleanup manually")
8930

    
8931
  def _ConvertDrbdToPlain(self, feedback_fn):
8932
    """Converts an instance from drbd to plain.
8933

8934
    """
8935
    instance = self.instance
8936
    assert len(instance.secondary_nodes) == 1
8937
    pnode = instance.primary_node
8938
    snode = instance.secondary_nodes[0]
8939
    feedback_fn("Converting template to plain")
8940

    
8941
    old_disks = instance.disks
8942
    new_disks = [d.children[0] for d in old_disks]
8943

    
8944
    # copy over size and mode
8945
    for parent, child in zip(old_disks, new_disks):
8946
      child.size = parent.size
8947
      child.mode = parent.mode
8948

    
8949
    # update instance structure
8950
    instance.disks = new_disks
8951
    instance.disk_template = constants.DT_PLAIN
8952
    self.cfg.Update(instance, feedback_fn)
8953

    
8954
    feedback_fn("Removing volumes on the secondary node...")
8955
    for disk in old_disks:
8956
      self.cfg.SetDiskID(disk, snode)
8957
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8958
      if msg:
8959
        self.LogWarning("Could not remove block device %s on node %s,"
8960
                        " continuing anyway: %s", disk.iv_name, snode, msg)
8961

    
8962
    feedback_fn("Removing unneeded volumes on the primary node...")
8963
    for idx, disk in enumerate(old_disks):
8964
      meta = disk.children[1]
8965
      self.cfg.SetDiskID(meta, pnode)
8966
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8967
      if msg:
8968
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
8969
                        " continuing anyway: %s", idx, pnode, msg)
8970

    
8971

    
8972
  def Exec(self, feedback_fn):
8973
    """Modifies an instance.
8974

8975
    All parameters take effect only at the next restart of the instance.
8976

8977
    """
8978
    # Process here the warnings from CheckPrereq, as we don't have a
8979
    # feedback_fn there.
8980
    for warn in self.warn:
8981
      feedback_fn("WARNING: %s" % warn)
8982

    
8983
    result = []
8984
    instance = self.instance
8985
    # disk changes
8986
    for disk_op, disk_dict in self.op.disks:
8987
      if disk_op == constants.DDM_REMOVE:
8988
        # remove the last disk
8989
        device = instance.disks.pop()
8990
        device_idx = len(instance.disks)
8991
        for node, disk in device.ComputeNodeTree(instance.primary_node):
8992
          self.cfg.SetDiskID(disk, node)
8993
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8994
          if msg:
8995
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
8996
                            " continuing anyway", device_idx, node, msg)
8997
        result.append(("disk/%d" % device_idx, "remove"))
8998
      elif disk_op == constants.DDM_ADD:
8999
        # add a new disk
9000
        if instance.disk_template == constants.DT_FILE:
9001
          file_driver, file_path = instance.disks[0].logical_id
9002
          file_path = os.path.dirname(file_path)
9003
        else:
9004
          file_driver = file_path = None
9005
        disk_idx_base = len(instance.disks)
9006
        new_disk = _GenerateDiskTemplate(self,
9007
                                         instance.disk_template,
9008
                                         instance.name, instance.primary_node,
9009
                                         instance.secondary_nodes,
9010
                                         [disk_dict],
9011
                                         file_path,
9012
                                         file_driver,
9013
                                         disk_idx_base)[0]
9014
        instance.disks.append(new_disk)
9015
        info = _GetInstanceInfoText(instance)
9016

    
9017
        logging.info("Creating volume %s for instance %s",
9018
                     new_disk.iv_name, instance.name)
9019
        # Note: this needs to be kept in sync with _CreateDisks
9020
        #HARDCODE
9021
        for node in instance.all_nodes:
9022
          f_create = node == instance.primary_node
9023
          try:
9024
            _CreateBlockDev(self, node, instance, new_disk,
9025
                            f_create, info, f_create)
9026
          except errors.OpExecError, err:
9027
            self.LogWarning("Failed to create volume %s (%s) on"
9028
                            " node %s: %s",
9029
                            new_disk.iv_name, new_disk, node, err)
9030
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9031
                       (new_disk.size, new_disk.mode)))
9032
      else:
9033
        # change a given disk
9034
        instance.disks[disk_op].mode = disk_dict['mode']
9035
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9036

    
9037
    if self.op.disk_template:
9038
      r_shut = _ShutdownInstanceDisks(self, instance)
9039
      if not r_shut:
9040
        raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9041
                                 " proceed with disk template conversion")
9042
      mode = (instance.disk_template, self.op.disk_template)
9043
      try:
9044
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
9045
      except:
9046
        self.cfg.ReleaseDRBDMinors(instance.name)
9047
        raise
9048
      result.append(("disk_template", self.op.disk_template))
9049

    
9050
    # NIC changes
9051
    for nic_op, nic_dict in self.op.nics:
9052
      if nic_op == constants.DDM_REMOVE:
9053
        # remove the last nic
9054
        del instance.nics[-1]
9055
        result.append(("nic.%d" % len(instance.nics), "remove"))
9056
      elif nic_op == constants.DDM_ADD:
9057
        # mac and bridge should be set, by now
9058
        mac = nic_dict['mac']
9059
        ip = nic_dict.get('ip', None)
9060
        nicparams = self.nic_pinst[constants.DDM_ADD]
9061
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9062
        instance.nics.append(new_nic)
9063
        result.append(("nic.%d" % (len(instance.nics) - 1),
9064
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
9065
                       (new_nic.mac, new_nic.ip,
9066
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9067
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9068
                       )))
9069
      else:
9070
        for key in 'mac', 'ip':
9071
          if key in nic_dict:
9072
            setattr(instance.nics[nic_op], key, nic_dict[key])
9073
        if nic_op in self.nic_pinst:
9074
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9075
        for key, val in nic_dict.iteritems():
9076
          result.append(("nic.%s/%d" % (key, nic_op), val))
9077

    
9078
    # hvparams changes
9079
    if self.op.hvparams:
9080
      instance.hvparams = self.hv_inst
9081
      for key, val in self.op.hvparams.iteritems():
9082
        result.append(("hv/%s" % key, val))
9083

    
9084
    # beparams changes
9085
    if self.op.beparams:
9086
      instance.beparams = self.be_inst
9087
      for key, val in self.op.beparams.iteritems():
9088
        result.append(("be/%s" % key, val))
9089

    
9090
    # OS change
9091
    if self.op.os_name:
9092
      instance.os = self.op.os_name
9093

    
9094
    # osparams changes
9095
    if self.op.osparams:
9096
      instance.osparams = self.os_inst
9097
      for key, val in self.op.osparams.iteritems():
9098
        result.append(("os/%s" % key, val))
9099

    
9100
    self.cfg.Update(instance, feedback_fn)
9101

    
9102
    return result
9103

    
9104
  _DISK_CONVERSIONS = {
9105
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9106
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9107
    }
9108

    
9109

    
9110
class LUQueryExports(NoHooksLU):
9111
  """Query the exports list
9112

9113
  """
9114
  _OP_REQP = ['nodes']
9115
  REQ_BGL = False
9116

    
9117
  def ExpandNames(self):
9118
    self.needed_locks = {}
9119
    self.share_locks[locking.LEVEL_NODE] = 1
9120
    if not self.op.nodes:
9121
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9122
    else:
9123
      self.needed_locks[locking.LEVEL_NODE] = \
9124
        _GetWantedNodes(self, self.op.nodes)
9125

    
9126
  def CheckPrereq(self):
9127
    """Check prerequisites.
9128

9129
    """
9130
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9131

    
9132
  def Exec(self, feedback_fn):
9133
    """Compute the list of all the exported system images.
9134

9135
    @rtype: dict
9136
    @return: a dictionary with the structure node->(export-list)
9137
        where export-list is a list of the instances exported on
9138
        that node.
9139

9140
    """
9141
    rpcresult = self.rpc.call_export_list(self.nodes)
9142
    result = {}
9143
    for node in rpcresult:
9144
      if rpcresult[node].fail_msg:
9145
        result[node] = False
9146
      else:
9147
        result[node] = rpcresult[node].payload
9148

    
9149
    return result
9150

    
9151

    
9152
class LUPrepareExport(NoHooksLU):
9153
  """Prepares an instance for an export and returns useful information.
9154

9155
  """
9156
  _OP_REQP = ["instance_name", "mode"]
9157
  REQ_BGL = False
9158

    
9159
  def CheckArguments(self):
9160
    """Check the arguments.
9161

9162
    """
9163
    if self.op.mode not in constants.EXPORT_MODES:
9164
      raise errors.OpPrereqError("Invalid export mode %r" % self.op.mode,
9165
                                 errors.ECODE_INVAL)
9166

    
9167
  def ExpandNames(self):
9168
    self._ExpandAndLockInstance()
9169

    
9170
  def CheckPrereq(self):
9171
    """Check prerequisites.
9172

9173
    """
9174
    instance_name = self.op.instance_name
9175

    
9176
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9177
    assert self.instance is not None, \
9178
          "Cannot retrieve locked instance %s" % self.op.instance_name
9179
    _CheckNodeOnline(self, self.instance.primary_node)
9180

    
9181
    self._cds = _GetClusterDomainSecret()
9182

    
9183
  def Exec(self, feedback_fn):
9184
    """Prepares an instance for an export.
9185

9186
    """
9187
    instance = self.instance
9188

    
9189
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9190
      salt = utils.GenerateSecret(8)
9191

    
9192
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9193
      result = self.rpc.call_x509_cert_create(instance.primary_node,
9194
                                              constants.RIE_CERT_VALIDITY)
9195
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
9196

    
9197
      (name, cert_pem) = result.payload
9198

    
9199
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9200
                                             cert_pem)
9201

    
9202
      return {
9203
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9204
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9205
                          salt),
9206
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9207
        }
9208

    
9209
    return None
9210

    
9211

    
9212
class LUExportInstance(LogicalUnit):
9213
  """Export an instance to an image in the cluster.
9214

9215
  """
9216
  HPATH = "instance-export"
9217
  HTYPE = constants.HTYPE_INSTANCE
9218
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
9219
  _OP_DEFS = [
9220
    ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT),
9221
    ("remove_instance", False),
9222
    ("ignore_remove_failures", False),
9223
    ("mode", constants.EXPORT_MODE_LOCAL),
9224
    ("x509_key_name", None),
9225
    ("destination_x509_ca", None),
9226
    ]
9227
  REQ_BGL = False
9228

    
9229
  def CheckArguments(self):
9230
    """Check the arguments.
9231

9232
    """
9233
    self.x509_key_name = self.op.x509_key_name
9234
    self.dest_x509_ca_pem = self.op.destination_x509_ca
9235

    
9236
    if self.op.remove_instance and not self.op.shutdown:
9237
      raise errors.OpPrereqError("Can not remove instance without shutting it"
9238
                                 " down before")
9239

    
9240
    if self.op.mode not in constants.EXPORT_MODES:
9241
      raise errors.OpPrereqError("Invalid export mode %r" % self.op.mode,
9242
                                 errors.ECODE_INVAL)
9243

    
9244
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9245
      if not self.x509_key_name:
9246
        raise errors.OpPrereqError("Missing X509 key name for encryption",
9247
                                   errors.ECODE_INVAL)
9248

    
9249
      if not self.dest_x509_ca_pem:
9250
        raise errors.OpPrereqError("Missing destination X509 CA",
9251
                                   errors.ECODE_INVAL)
9252

    
9253
  def ExpandNames(self):
9254
    self._ExpandAndLockInstance()
9255

    
9256
    # Lock all nodes for local exports
9257
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9258
      # FIXME: lock only instance primary and destination node
9259
      #
9260
      # Sad but true, for now we have do lock all nodes, as we don't know where
9261
      # the previous export might be, and in this LU we search for it and
9262
      # remove it from its current node. In the future we could fix this by:
9263
      #  - making a tasklet to search (share-lock all), then create the
9264
      #    new one, then one to remove, after
9265
      #  - removing the removal operation altogether
9266
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9267

    
9268
  def DeclareLocks(self, level):
9269
    """Last minute lock declaration."""
9270
    # All nodes are locked anyway, so nothing to do here.
9271

    
9272
  def BuildHooksEnv(self):
9273
    """Build hooks env.
9274

9275
    This will run on the master, primary node and target node.
9276

9277
    """
9278
    env = {
9279
      "EXPORT_MODE": self.op.mode,
9280
      "EXPORT_NODE": self.op.target_node,
9281
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9282
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9283
      # TODO: Generic function for boolean env variables
9284
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9285
      }
9286

    
9287
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9288

    
9289
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9290

    
9291
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9292
      nl.append(self.op.target_node)
9293

    
9294
    return env, nl, nl
9295

    
9296
  def CheckPrereq(self):
9297
    """Check prerequisites.
9298

9299
    This checks that the instance and node names are valid.
9300

9301
    """
9302
    instance_name = self.op.instance_name
9303

    
9304
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9305
    assert self.instance is not None, \
9306
          "Cannot retrieve locked instance %s" % self.op.instance_name
9307
    _CheckNodeOnline(self, self.instance.primary_node)
9308

    
9309
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9310
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9311
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9312
      assert self.dst_node is not None
9313

    
9314
      _CheckNodeOnline(self, self.dst_node.name)
9315
      _CheckNodeNotDrained(self, self.dst_node.name)
9316

    
9317
      self._cds = None
9318
      self.dest_disk_info = None
9319
      self.dest_x509_ca = None
9320

    
9321
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9322
      self.dst_node = None
9323

    
9324
      if len(self.op.target_node) != len(self.instance.disks):
9325
        raise errors.OpPrereqError(("Received destination information for %s"
9326
                                    " disks, but instance %s has %s disks") %
9327
                                   (len(self.op.target_node), instance_name,
9328
                                    len(self.instance.disks)),
9329
                                   errors.ECODE_INVAL)
9330

    
9331
      cds = _GetClusterDomainSecret()
9332

    
9333
      # Check X509 key name
9334
      try:
9335
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9336
      except (TypeError, ValueError), err:
9337
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9338

    
9339
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9340
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9341
                                   errors.ECODE_INVAL)
9342

    
9343
      # Load and verify CA
9344
      try:
9345
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9346
      except OpenSSL.crypto.Error, err:
9347
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9348
                                   (err, ), errors.ECODE_INVAL)
9349

    
9350
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9351
      if errcode is not None:
9352
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9353
                                   (msg, ), errors.ECODE_INVAL)
9354

    
9355
      self.dest_x509_ca = cert
9356

    
9357
      # Verify target information
9358
      disk_info = []
9359
      for idx, disk_data in enumerate(self.op.target_node):
9360
        try:
9361
          (host, port, magic) = \
9362
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9363
        except errors.GenericError, err:
9364
          raise errors.OpPrereqError("Target info for disk %s: %s" %
9365
                                     (idx, err), errors.ECODE_INVAL)
9366

    
9367
        disk_info.append((host, port, magic))
9368

    
9369
      assert len(disk_info) == len(self.op.target_node)
9370
      self.dest_disk_info = disk_info
9371

    
9372
    else:
9373
      raise errors.ProgrammerError("Unhandled export mode %r" %
9374
                                   self.op.mode)
9375

    
9376
    # instance disk type verification
9377
    # TODO: Implement export support for file-based disks
9378
    for disk in self.instance.disks:
9379
      if disk.dev_type == constants.LD_FILE:
9380
        raise errors.OpPrereqError("Export not supported for instances with"
9381
                                   " file-based disks", errors.ECODE_INVAL)
9382

    
9383
  def _CleanupExports(self, feedback_fn):
9384
    """Removes exports of current instance from all other nodes.
9385

9386
    If an instance in a cluster with nodes A..D was exported to node C, its
9387
    exports will be removed from the nodes A, B and D.
9388

9389
    """
9390
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
9391

    
9392
    nodelist = self.cfg.GetNodeList()
9393
    nodelist.remove(self.dst_node.name)
9394

    
9395
    # on one-node clusters nodelist will be empty after the removal
9396
    # if we proceed the backup would be removed because OpQueryExports
9397
    # substitutes an empty list with the full cluster node list.
9398
    iname = self.instance.name
9399
    if nodelist:
9400
      feedback_fn("Removing old exports for instance %s" % iname)
9401
      exportlist = self.rpc.call_export_list(nodelist)
9402
      for node in exportlist:
9403
        if exportlist[node].fail_msg:
9404
          continue
9405
        if iname in exportlist[node].payload:
9406
          msg = self.rpc.call_export_remove(node, iname).fail_msg
9407
          if msg:
9408
            self.LogWarning("Could not remove older export for instance %s"
9409
                            " on node %s: %s", iname, node, msg)
9410

    
9411
  def Exec(self, feedback_fn):
9412
    """Export an instance to an image in the cluster.
9413

9414
    """
9415
    assert self.op.mode in constants.EXPORT_MODES
9416

    
9417
    instance = self.instance
9418
    src_node = instance.primary_node
9419

    
9420
    if self.op.shutdown:
9421
      # shutdown the instance, but not the disks
9422
      feedback_fn("Shutting down instance %s" % instance.name)
9423
      result = self.rpc.call_instance_shutdown(src_node, instance,
9424
                                               self.op.shutdown_timeout)
9425
      # TODO: Maybe ignore failures if ignore_remove_failures is set
9426
      result.Raise("Could not shutdown instance %s on"
9427
                   " node %s" % (instance.name, src_node))
9428

    
9429
    # set the disks ID correctly since call_instance_start needs the
9430
    # correct drbd minor to create the symlinks
9431
    for disk in instance.disks:
9432
      self.cfg.SetDiskID(disk, src_node)
9433

    
9434
    activate_disks = (not instance.admin_up)
9435

    
9436
    if activate_disks:
9437
      # Activate the instance disks if we'exporting a stopped instance
9438
      feedback_fn("Activating disks for %s" % instance.name)
9439
      _StartInstanceDisks(self, instance, None)
9440

    
9441
    try:
9442
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9443
                                                     instance)
9444

    
9445
      helper.CreateSnapshots()
9446
      try:
9447
        if (self.op.shutdown and instance.admin_up and
9448
            not self.op.remove_instance):
9449
          assert not activate_disks
9450
          feedback_fn("Starting instance %s" % instance.name)
9451
          result = self.rpc.call_instance_start(src_node, instance, None, None)
9452
          msg = result.fail_msg
9453
          if msg:
9454
            feedback_fn("Failed to start instance: %s" % msg)
9455
            _ShutdownInstanceDisks(self, instance)
9456
            raise errors.OpExecError("Could not start instance: %s" % msg)
9457

    
9458
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
9459
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9460
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9461
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
9462
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9463

    
9464
          (key_name, _, _) = self.x509_key_name
9465

    
9466
          dest_ca_pem = \
9467
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9468
                                            self.dest_x509_ca)
9469

    
9470
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9471
                                                     key_name, dest_ca_pem,
9472
                                                     timeouts)
9473
      finally:
9474
        helper.Cleanup()
9475

    
9476
      # Check for backwards compatibility
9477
      assert len(dresults) == len(instance.disks)
9478
      assert compat.all(isinstance(i, bool) for i in dresults), \
9479
             "Not all results are boolean: %r" % dresults
9480

    
9481
    finally:
9482
      if activate_disks:
9483
        feedback_fn("Deactivating disks for %s" % instance.name)
9484
        _ShutdownInstanceDisks(self, instance)
9485

    
9486
    # Remove instance if requested
9487
    if self.op.remove_instance:
9488
      if not (compat.all(dresults) and fin_resu):
9489
        feedback_fn("Not removing instance %s as parts of the export failed" %
9490
                    instance.name)
9491
      else:
9492
        feedback_fn("Removing instance %s" % instance.name)
9493
        _RemoveInstance(self, feedback_fn, instance,
9494
                        self.op.ignore_remove_failures)
9495

    
9496
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9497
      self._CleanupExports(feedback_fn)
9498

    
9499
    return fin_resu, dresults
9500

    
9501

    
9502
class LURemoveExport(NoHooksLU):
9503
  """Remove exports related to the named instance.
9504

9505
  """
9506
  _OP_REQP = ["instance_name"]
9507
  REQ_BGL = False
9508

    
9509
  def ExpandNames(self):
9510
    self.needed_locks = {}
9511
    # We need all nodes to be locked in order for RemoveExport to work, but we
9512
    # don't need to lock the instance itself, as nothing will happen to it (and
9513
    # we can remove exports also for a removed instance)
9514
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9515

    
9516
  def CheckPrereq(self):
9517
    """Check prerequisites.
9518
    """
9519
    pass
9520

    
9521
  def Exec(self, feedback_fn):
9522
    """Remove any export.
9523

9524
    """
9525
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9526
    # If the instance was not found we'll try with the name that was passed in.
9527
    # This will only work if it was an FQDN, though.
9528
    fqdn_warn = False
9529
    if not instance_name:
9530
      fqdn_warn = True
9531
      instance_name = self.op.instance_name
9532

    
9533
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9534
    exportlist = self.rpc.call_export_list(locked_nodes)
9535
    found = False
9536
    for node in exportlist:
9537
      msg = exportlist[node].fail_msg
9538
      if msg:
9539
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9540
        continue
9541
      if instance_name in exportlist[node].payload:
9542
        found = True
9543
        result = self.rpc.call_export_remove(node, instance_name)
9544
        msg = result.fail_msg
9545
        if msg:
9546
          logging.error("Could not remove export for instance %s"
9547
                        " on node %s: %s", instance_name, node, msg)
9548

    
9549
    if fqdn_warn and not found:
9550
      feedback_fn("Export not found. If trying to remove an export belonging"
9551
                  " to a deleted instance please use its Fully Qualified"
9552
                  " Domain Name.")
9553

    
9554

    
9555
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9556
  """Generic tags LU.
9557

9558
  This is an abstract class which is the parent of all the other tags LUs.
9559

9560
  """
9561

    
9562
  def ExpandNames(self):
9563
    self.needed_locks = {}
9564
    if self.op.kind == constants.TAG_NODE:
9565
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9566
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
9567
    elif self.op.kind == constants.TAG_INSTANCE:
9568
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9569
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9570

    
9571
  def CheckPrereq(self):
9572
    """Check prerequisites.
9573

9574
    """
9575
    if self.op.kind == constants.TAG_CLUSTER:
9576
      self.target = self.cfg.GetClusterInfo()
9577
    elif self.op.kind == constants.TAG_NODE:
9578
      self.target = self.cfg.GetNodeInfo(self.op.name)
9579
    elif self.op.kind == constants.TAG_INSTANCE:
9580
      self.target = self.cfg.GetInstanceInfo(self.op.name)
9581
    else:
9582
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9583
                                 str(self.op.kind), errors.ECODE_INVAL)
9584

    
9585

    
9586
class LUGetTags(TagsLU):
9587
  """Returns the tags of a given object.
9588

9589
  """
9590
  _OP_REQP = ["kind", "name"]
9591
  REQ_BGL = False
9592

    
9593
  def Exec(self, feedback_fn):
9594
    """Returns the tag list.
9595

9596
    """
9597
    return list(self.target.GetTags())
9598

    
9599

    
9600
class LUSearchTags(NoHooksLU):
9601
  """Searches the tags for a given pattern.
9602

9603
  """
9604
  _OP_REQP = ["pattern"]
9605
  REQ_BGL = False
9606

    
9607
  def ExpandNames(self):
9608
    self.needed_locks = {}
9609

    
9610
  def CheckPrereq(self):
9611
    """Check prerequisites.
9612

9613
    This checks the pattern passed for validity by compiling it.
9614

9615
    """
9616
    try:
9617
      self.re = re.compile(self.op.pattern)
9618
    except re.error, err:
9619
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9620
                                 (self.op.pattern, err), errors.ECODE_INVAL)
9621

    
9622
  def Exec(self, feedback_fn):
9623
    """Returns the tag list.
9624

9625
    """
9626
    cfg = self.cfg
9627
    tgts = [("/cluster", cfg.GetClusterInfo())]
9628
    ilist = cfg.GetAllInstancesInfo().values()
9629
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9630
    nlist = cfg.GetAllNodesInfo().values()
9631
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9632
    results = []
9633
    for path, target in tgts:
9634
      for tag in target.GetTags():
9635
        if self.re.search(tag):
9636
          results.append((path, tag))
9637
    return results
9638

    
9639

    
9640
class LUAddTags(TagsLU):
9641
  """Sets a tag on a given object.
9642

9643
  """
9644
  _OP_REQP = ["kind", "name", "tags"]
9645
  REQ_BGL = False
9646

    
9647
  def CheckPrereq(self):
9648
    """Check prerequisites.
9649

9650
    This checks the type and length of the tag name and value.
9651

9652
    """
9653
    TagsLU.CheckPrereq(self)
9654
    for tag in self.op.tags:
9655
      objects.TaggableObject.ValidateTag(tag)
9656

    
9657
  def Exec(self, feedback_fn):
9658
    """Sets the tag.
9659

9660
    """
9661
    try:
9662
      for tag in self.op.tags:
9663
        self.target.AddTag(tag)
9664
    except errors.TagError, err:
9665
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
9666
    self.cfg.Update(self.target, feedback_fn)
9667

    
9668

    
9669
class LUDelTags(TagsLU):
9670
  """Delete a list of tags from a given object.
9671

9672
  """
9673
  _OP_REQP = ["kind", "name", "tags"]
9674
  REQ_BGL = False
9675

    
9676
  def CheckPrereq(self):
9677
    """Check prerequisites.
9678

9679
    This checks that we have the given tag.
9680

9681
    """
9682
    TagsLU.CheckPrereq(self)
9683
    for tag in self.op.tags:
9684
      objects.TaggableObject.ValidateTag(tag)
9685
    del_tags = frozenset(self.op.tags)
9686
    cur_tags = self.target.GetTags()
9687
    if not del_tags <= cur_tags:
9688
      diff_tags = del_tags - cur_tags
9689
      diff_names = ["'%s'" % tag for tag in diff_tags]
9690
      diff_names.sort()
9691
      raise errors.OpPrereqError("Tag(s) %s not found" %
9692
                                 (",".join(diff_names)), errors.ECODE_NOENT)
9693

    
9694
  def Exec(self, feedback_fn):
9695
    """Remove the tag from the object.
9696

9697
    """
9698
    for tag in self.op.tags:
9699
      self.target.RemoveTag(tag)
9700
    self.cfg.Update(self.target, feedback_fn)
9701

    
9702

    
9703
class LUTestDelay(NoHooksLU):
9704
  """Sleep for a specified amount of time.
9705

9706
  This LU sleeps on the master and/or nodes for a specified amount of
9707
  time.
9708

9709
  """
9710
  _OP_REQP = ["duration", "on_master", "on_nodes"]
9711
  REQ_BGL = False
9712

    
9713
  def CheckArguments(self):
9714
    # TODO: convert to the type system
9715
    self.op.repeat = getattr(self.op, "repeat", 0)
9716
    if self.op.repeat < 0:
9717
      raise errors.OpPrereqError("Repetition count cannot be negative")
9718

    
9719
  def ExpandNames(self):
9720
    """Expand names and set required locks.
9721

9722
    This expands the node list, if any.
9723

9724
    """
9725
    self.needed_locks = {}
9726
    if self.op.on_nodes:
9727
      # _GetWantedNodes can be used here, but is not always appropriate to use
9728
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9729
      # more information.
9730
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9731
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9732

    
9733
  def CheckPrereq(self):
9734
    """Check prerequisites.
9735

9736
    """
9737

    
9738
  def _TestDelay(self):
9739
    """Do the actual sleep.
9740

9741
    """
9742
    if self.op.on_master:
9743
      if not utils.TestDelay(self.op.duration):
9744
        raise errors.OpExecError("Error during master delay test")
9745
    if self.op.on_nodes:
9746
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9747
      for node, node_result in result.items():
9748
        node_result.Raise("Failure during rpc call to node %s" % node)
9749

    
9750
  def Exec(self, feedback_fn):
9751
    """Execute the test delay opcode, with the wanted repetitions.
9752

9753
    """
9754
    if self.op.repeat == 0:
9755
      self._TestDelay()
9756
    else:
9757
      top_value = self.op.repeat - 1
9758
      for i in range(self.op.repeat):
9759
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9760
        self._TestDelay()
9761

    
9762

    
9763
class IAllocator(object):
9764
  """IAllocator framework.
9765

9766
  An IAllocator instance has three sets of attributes:
9767
    - cfg that is needed to query the cluster
9768
    - input data (all members of the _KEYS class attribute are required)
9769
    - four buffer attributes (in|out_data|text), that represent the
9770
      input (to the external script) in text and data structure format,
9771
      and the output from it, again in two formats
9772
    - the result variables from the script (success, info, nodes) for
9773
      easy usage
9774

9775
  """
9776
  # pylint: disable-msg=R0902
9777
  # lots of instance attributes
9778
  _ALLO_KEYS = [
9779
    "name", "mem_size", "disks", "disk_template",
9780
    "os", "tags", "nics", "vcpus", "hypervisor",
9781
    ]
9782
  _RELO_KEYS = [
9783
    "name", "relocate_from",
9784
    ]
9785
  _EVAC_KEYS = [
9786
    "evac_nodes",
9787
    ]
9788

    
9789
  def __init__(self, cfg, rpc, mode, **kwargs):
9790
    self.cfg = cfg
9791
    self.rpc = rpc
9792
    # init buffer variables
9793
    self.in_text = self.out_text = self.in_data = self.out_data = None
9794
    # init all input fields so that pylint is happy
9795
    self.mode = mode
9796
    self.mem_size = self.disks = self.disk_template = None
9797
    self.os = self.tags = self.nics = self.vcpus = None
9798
    self.hypervisor = None
9799
    self.relocate_from = None
9800
    self.name = None
9801
    self.evac_nodes = None
9802
    # computed fields
9803
    self.required_nodes = None
9804
    # init result fields
9805
    self.success = self.info = self.result = None
9806
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9807
      keyset = self._ALLO_KEYS
9808
      fn = self._AddNewInstance
9809
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9810
      keyset = self._RELO_KEYS
9811
      fn = self._AddRelocateInstance
9812
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9813
      keyset = self._EVAC_KEYS
9814
      fn = self._AddEvacuateNodes
9815
    else:
9816
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9817
                                   " IAllocator" % self.mode)
9818
    for key in kwargs:
9819
      if key not in keyset:
9820
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
9821
                                     " IAllocator" % key)
9822
      setattr(self, key, kwargs[key])
9823

    
9824
    for key in keyset:
9825
      if key not in kwargs:
9826
        raise errors.ProgrammerError("Missing input parameter '%s' to"
9827
                                     " IAllocator" % key)
9828
    self._BuildInputData(fn)
9829

    
9830
  def _ComputeClusterData(self):
9831
    """Compute the generic allocator input data.
9832

9833
    This is the data that is independent of the actual operation.
9834

9835
    """
9836
    cfg = self.cfg
9837
    cluster_info = cfg.GetClusterInfo()
9838
    # cluster data
9839
    data = {
9840
      "version": constants.IALLOCATOR_VERSION,
9841
      "cluster_name": cfg.GetClusterName(),
9842
      "cluster_tags": list(cluster_info.GetTags()),
9843
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9844
      # we don't have job IDs
9845
      }
9846
    iinfo = cfg.GetAllInstancesInfo().values()
9847
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9848

    
9849
    # node data
9850
    node_results = {}
9851
    node_list = cfg.GetNodeList()
9852

    
9853
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9854
      hypervisor_name = self.hypervisor
9855
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9856
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9857
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9858
      hypervisor_name = cluster_info.enabled_hypervisors[0]
9859

    
9860
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9861
                                        hypervisor_name)
9862
    node_iinfo = \
9863
      self.rpc.call_all_instances_info(node_list,
9864
                                       cluster_info.enabled_hypervisors)
9865
    for nname, nresult in node_data.items():
9866
      # first fill in static (config-based) values
9867
      ninfo = cfg.GetNodeInfo(nname)
9868
      pnr = {
9869
        "tags": list(ninfo.GetTags()),
9870
        "primary_ip": ninfo.primary_ip,
9871
        "secondary_ip": ninfo.secondary_ip,
9872
        "offline": ninfo.offline,
9873
        "drained": ninfo.drained,
9874
        "master_candidate": ninfo.master_candidate,
9875
        }
9876

    
9877
      if not (ninfo.offline or ninfo.drained):
9878
        nresult.Raise("Can't get data for node %s" % nname)
9879
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9880
                                nname)
9881
        remote_info = nresult.payload
9882

    
9883
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
9884
                     'vg_size', 'vg_free', 'cpu_total']:
9885
          if attr not in remote_info:
9886
            raise errors.OpExecError("Node '%s' didn't return attribute"
9887
                                     " '%s'" % (nname, attr))
9888
          if not isinstance(remote_info[attr], int):
9889
            raise errors.OpExecError("Node '%s' returned invalid value"
9890
                                     " for '%s': %s" %
9891
                                     (nname, attr, remote_info[attr]))
9892
        # compute memory used by primary instances
9893
        i_p_mem = i_p_up_mem = 0
9894
        for iinfo, beinfo in i_list:
9895
          if iinfo.primary_node == nname:
9896
            i_p_mem += beinfo[constants.BE_MEMORY]
9897
            if iinfo.name not in node_iinfo[nname].payload:
9898
              i_used_mem = 0
9899
            else:
9900
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9901
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9902
            remote_info['memory_free'] -= max(0, i_mem_diff)
9903

    
9904
            if iinfo.admin_up:
9905
              i_p_up_mem += beinfo[constants.BE_MEMORY]
9906

    
9907
        # compute memory used by instances
9908
        pnr_dyn = {
9909
          "total_memory": remote_info['memory_total'],
9910
          "reserved_memory": remote_info['memory_dom0'],
9911
          "free_memory": remote_info['memory_free'],
9912
          "total_disk": remote_info['vg_size'],
9913
          "free_disk": remote_info['vg_free'],
9914
          "total_cpus": remote_info['cpu_total'],
9915
          "i_pri_memory": i_p_mem,
9916
          "i_pri_up_memory": i_p_up_mem,
9917
          }
9918
        pnr.update(pnr_dyn)
9919

    
9920
      node_results[nname] = pnr
9921
    data["nodes"] = node_results
9922

    
9923
    # instance data
9924
    instance_data = {}
9925
    for iinfo, beinfo in i_list:
9926
      nic_data = []
9927
      for nic in iinfo.nics:
9928
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
9929
        nic_dict = {"mac": nic.mac,
9930
                    "ip": nic.ip,
9931
                    "mode": filled_params[constants.NIC_MODE],
9932
                    "link": filled_params[constants.NIC_LINK],
9933
                   }
9934
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9935
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9936
        nic_data.append(nic_dict)
9937
      pir = {
9938
        "tags": list(iinfo.GetTags()),
9939
        "admin_up": iinfo.admin_up,
9940
        "vcpus": beinfo[constants.BE_VCPUS],
9941
        "memory": beinfo[constants.BE_MEMORY],
9942
        "os": iinfo.os,
9943
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9944
        "nics": nic_data,
9945
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9946
        "disk_template": iinfo.disk_template,
9947
        "hypervisor": iinfo.hypervisor,
9948
        }
9949
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9950
                                                 pir["disks"])
9951
      instance_data[iinfo.name] = pir
9952

    
9953
    data["instances"] = instance_data
9954

    
9955
    self.in_data = data
9956

    
9957
  def _AddNewInstance(self):
9958
    """Add new instance data to allocator structure.
9959

9960
    This in combination with _AllocatorGetClusterData will create the
9961
    correct structure needed as input for the allocator.
9962

9963
    The checks for the completeness of the opcode must have already been
9964
    done.
9965

9966
    """
9967
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9968

    
9969
    if self.disk_template in constants.DTS_NET_MIRROR:
9970
      self.required_nodes = 2
9971
    else:
9972
      self.required_nodes = 1
9973
    request = {
9974
      "name": self.name,
9975
      "disk_template": self.disk_template,
9976
      "tags": self.tags,
9977
      "os": self.os,
9978
      "vcpus": self.vcpus,
9979
      "memory": self.mem_size,
9980
      "disks": self.disks,
9981
      "disk_space_total": disk_space,
9982
      "nics": self.nics,
9983
      "required_nodes": self.required_nodes,
9984
      }
9985
    return request
9986

    
9987
  def _AddRelocateInstance(self):
9988
    """Add relocate instance data to allocator structure.
9989

9990
    This in combination with _IAllocatorGetClusterData will create the
9991
    correct structure needed as input for the allocator.
9992

9993
    The checks for the completeness of the opcode must have already been
9994
    done.
9995

9996
    """
9997
    instance = self.cfg.GetInstanceInfo(self.name)
9998
    if instance is None:
9999
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
10000
                                   " IAllocator" % self.name)
10001

    
10002
    if instance.disk_template not in constants.DTS_NET_MIRROR:
10003
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10004
                                 errors.ECODE_INVAL)
10005

    
10006
    if len(instance.secondary_nodes) != 1:
10007
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
10008
                                 errors.ECODE_STATE)
10009

    
10010
    self.required_nodes = 1
10011
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
10012
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10013

    
10014
    request = {
10015
      "name": self.name,
10016
      "disk_space_total": disk_space,
10017
      "required_nodes": self.required_nodes,
10018
      "relocate_from": self.relocate_from,
10019
      }
10020
    return request
10021

    
10022
  def _AddEvacuateNodes(self):
10023
    """Add evacuate nodes data to allocator structure.
10024

10025
    """
10026
    request = {
10027
      "evac_nodes": self.evac_nodes
10028
      }
10029
    return request
10030

    
10031
  def _BuildInputData(self, fn):
10032
    """Build input data structures.
10033

10034
    """
10035
    self._ComputeClusterData()
10036

    
10037
    request = fn()
10038
    request["type"] = self.mode
10039
    self.in_data["request"] = request
10040

    
10041
    self.in_text = serializer.Dump(self.in_data)
10042

    
10043
  def Run(self, name, validate=True, call_fn=None):
10044
    """Run an instance allocator and return the results.
10045

10046
    """
10047
    if call_fn is None:
10048
      call_fn = self.rpc.call_iallocator_runner
10049

    
10050
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10051
    result.Raise("Failure while running the iallocator script")
10052

    
10053
    self.out_text = result.payload
10054
    if validate:
10055
      self._ValidateResult()
10056

    
10057
  def _ValidateResult(self):
10058
    """Process the allocator results.
10059

10060
    This will process and if successful save the result in
10061
    self.out_data and the other parameters.
10062

10063
    """
10064
    try:
10065
      rdict = serializer.Load(self.out_text)
10066
    except Exception, err:
10067
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10068

    
10069
    if not isinstance(rdict, dict):
10070
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
10071

    
10072
    # TODO: remove backwards compatiblity in later versions
10073
    if "nodes" in rdict and "result" not in rdict:
10074
      rdict["result"] = rdict["nodes"]
10075
      del rdict["nodes"]
10076

    
10077
    for key in "success", "info", "result":
10078
      if key not in rdict:
10079
        raise errors.OpExecError("Can't parse iallocator results:"
10080
                                 " missing key '%s'" % key)
10081
      setattr(self, key, rdict[key])
10082

    
10083
    if not isinstance(rdict["result"], list):
10084
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10085
                               " is not a list")
10086
    self.out_data = rdict
10087

    
10088

    
10089
class LUTestAllocator(NoHooksLU):
10090
  """Run allocator tests.
10091

10092
  This LU runs the allocator tests
10093

10094
  """
10095
  _OP_REQP = ["direction", "mode", "name"]
10096
  _OP_DEFS = [
10097
    ("hypervisor", None),
10098
    ("allocator", None),
10099
    ]
10100

    
10101
  def CheckPrereq(self):
10102
    """Check prerequisites.
10103

10104
    This checks the opcode parameters depending on the director and mode test.
10105

10106
    """
10107
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10108
      for attr in ["name", "mem_size", "disks", "disk_template",
10109
                   "os", "tags", "nics", "vcpus"]:
10110
        if not hasattr(self.op, attr):
10111
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10112
                                     attr, errors.ECODE_INVAL)
10113
      iname = self.cfg.ExpandInstanceName(self.op.name)
10114
      if iname is not None:
10115
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10116
                                   iname, errors.ECODE_EXISTS)
10117
      if not isinstance(self.op.nics, list):
10118
        raise errors.OpPrereqError("Invalid parameter 'nics'",
10119
                                   errors.ECODE_INVAL)
10120
      for row in self.op.nics:
10121
        if (not isinstance(row, dict) or
10122
            "mac" not in row or
10123
            "ip" not in row or
10124
            "bridge" not in row):
10125
          raise errors.OpPrereqError("Invalid contents of the 'nics'"
10126
                                     " parameter", errors.ECODE_INVAL)
10127
      if not isinstance(self.op.disks, list):
10128
        raise errors.OpPrereqError("Invalid parameter 'disks'",
10129
                                   errors.ECODE_INVAL)
10130
      for row in self.op.disks:
10131
        if (not isinstance(row, dict) or
10132
            "size" not in row or
10133
            not isinstance(row["size"], int) or
10134
            "mode" not in row or
10135
            row["mode"] not in ['r', 'w']):
10136
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
10137
                                     " parameter", errors.ECODE_INVAL)
10138
      if self.op.hypervisor is None:
10139
        self.op.hypervisor = self.cfg.GetHypervisorType()
10140
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10141
      if not hasattr(self.op, "name"):
10142
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
10143
                                   errors.ECODE_INVAL)
10144
      fname = _ExpandInstanceName(self.cfg, self.op.name)
10145
      self.op.name = fname
10146
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10147
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10148
      if not hasattr(self.op, "evac_nodes"):
10149
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10150
                                   " opcode input", errors.ECODE_INVAL)
10151
    else:
10152
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10153
                                 self.op.mode, errors.ECODE_INVAL)
10154

    
10155
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10156
      if self.op.allocator is None:
10157
        raise errors.OpPrereqError("Missing allocator name",
10158
                                   errors.ECODE_INVAL)
10159
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10160
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
10161
                                 self.op.direction, errors.ECODE_INVAL)
10162

    
10163
  def Exec(self, feedback_fn):
10164
    """Run the allocator test.
10165

10166
    """
10167
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10168
      ial = IAllocator(self.cfg, self.rpc,
10169
                       mode=self.op.mode,
10170
                       name=self.op.name,
10171
                       mem_size=self.op.mem_size,
10172
                       disks=self.op.disks,
10173
                       disk_template=self.op.disk_template,
10174
                       os=self.op.os,
10175
                       tags=self.op.tags,
10176
                       nics=self.op.nics,
10177
                       vcpus=self.op.vcpus,
10178
                       hypervisor=self.op.hypervisor,
10179
                       )
10180
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10181
      ial = IAllocator(self.cfg, self.rpc,
10182
                       mode=self.op.mode,
10183
                       name=self.op.name,
10184
                       relocate_from=list(self.relocate_from),
10185
                       )
10186
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10187
      ial = IAllocator(self.cfg, self.rpc,
10188
                       mode=self.op.mode,
10189
                       evac_nodes=self.op.evac_nodes)
10190
    else:
10191
      raise errors.ProgrammerError("Uncatched mode %s in"
10192
                                   " LUTestAllocator.Exec", self.op.mode)
10193

    
10194
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
10195
      result = ial.in_text
10196
    else:
10197
      ial.Run(self.op.allocator, validate=False)
10198
      result = ial.out_text
10199
    return result