Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ e2334900

History | View | Annotate | Download (383.7 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42

    
43
from ganeti import ssh
44
from ganeti import utils
45
from ganeti import errors
46
from ganeti import hypervisor
47
from ganeti import locking
48
from ganeti import constants
49
from ganeti import objects
50
from ganeti import serializer
51
from ganeti import ssconf
52
from ganeti import uidpool
53
from ganeti import compat
54
from ganeti import masterd
55
from ganeti import netutils
56
from ganeti import ht
57

    
58
import ganeti.masterd.instance # pylint: disable-msg=W0611
59

    
60
# Common opcode attributes
61

    
62
#: output fields for a query operation
63
_POutputFields = ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString))
64

    
65

    
66
#: the shutdown timeout
67
_PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
68
                     ht.TPositiveInt)
69

    
70
#: the force parameter
71
_PForce = ("force", False, ht.TBool)
72

    
73
#: a required instance name (for single-instance LUs)
74
_PInstanceName = ("instance_name", ht.NoDefault, ht.TNonEmptyString)
75

    
76
#: Whether to ignore offline nodes
77
_PIgnoreOfflineNodes = ("ignore_offline_nodes", False, ht.TBool)
78

    
79
#: a required node name (for single-node LUs)
80
_PNodeName = ("node_name", ht.NoDefault, ht.TNonEmptyString)
81

    
82
#: the migration type (live/non-live)
83
_PMigrationMode = ("mode", None,
84
                   ht.TOr(ht.TNone, ht.TElemOf(constants.HT_MIGRATION_MODES)))
85

    
86
#: the obsolete 'live' mode (boolean)
87
_PMigrationLive = ("live", None, ht.TMaybeBool)
88

    
89

    
90
# End types
91
class LogicalUnit(object):
92
  """Logical Unit base class.
93

94
  Subclasses must follow these rules:
95
    - implement ExpandNames
96
    - implement CheckPrereq (except when tasklets are used)
97
    - implement Exec (except when tasklets are used)
98
    - implement BuildHooksEnv
99
    - redefine HPATH and HTYPE
100
    - optionally redefine their run requirements:
101
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
102

103
  Note that all commands require root permissions.
104

105
  @ivar dry_run_result: the value (if any) that will be returned to the caller
106
      in dry-run mode (signalled by opcode dry_run parameter)
107
  @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
108
      they should get if not already defined, and types they must match
109

110
  """
111
  HPATH = None
112
  HTYPE = None
113
  _OP_PARAMS = []
114
  REQ_BGL = True
115

    
116
  def __init__(self, processor, op, context, rpc):
117
    """Constructor for LogicalUnit.
118

119
    This needs to be overridden in derived classes in order to check op
120
    validity.
121

122
    """
123
    self.proc = processor
124
    self.op = op
125
    self.cfg = context.cfg
126
    self.context = context
127
    self.rpc = rpc
128
    # Dicts used to declare locking needs to mcpu
129
    self.needed_locks = None
130
    self.acquired_locks = {}
131
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
132
    self.add_locks = {}
133
    self.remove_locks = {}
134
    # Used to force good behavior when calling helper functions
135
    self.recalculate_locks = {}
136
    self.__ssh = None
137
    # logging
138
    self.Log = processor.Log # pylint: disable-msg=C0103
139
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
140
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
141
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
142
    # support for dry-run
143
    self.dry_run_result = None
144
    # support for generic debug attribute
145
    if (not hasattr(self.op, "debug_level") or
146
        not isinstance(self.op.debug_level, int)):
147
      self.op.debug_level = 0
148

    
149
    # Tasklets
150
    self.tasklets = None
151

    
152
    # The new kind-of-type-system
153
    op_id = self.op.OP_ID
154
    for attr_name, aval, test in self._OP_PARAMS:
155
      if not hasattr(op, attr_name):
156
        if aval == ht.NoDefault:
157
          raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
158
                                     (op_id, attr_name), errors.ECODE_INVAL)
159
        else:
160
          if callable(aval):
161
            dval = aval()
162
          else:
163
            dval = aval
164
          setattr(self.op, attr_name, dval)
165
      attr_val = getattr(op, attr_name)
166
      if test == ht.NoType:
167
        # no tests here
168
        continue
169
      if not callable(test):
170
        raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
171
                                     " given type is not a proper type (%s)" %
172
                                     (op_id, attr_name, test))
173
      if not test(attr_val):
174
        logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
175
                      self.op.OP_ID, attr_name, type(attr_val), attr_val)
176
        raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
177
                                   (op_id, attr_name), errors.ECODE_INVAL)
178

    
179
    self.CheckArguments()
180

    
181
  def __GetSSH(self):
182
    """Returns the SshRunner object
183

184
    """
185
    if not self.__ssh:
186
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
187
    return self.__ssh
188

    
189
  ssh = property(fget=__GetSSH)
190

    
191
  def CheckArguments(self):
192
    """Check syntactic validity for the opcode arguments.
193

194
    This method is for doing a simple syntactic check and ensure
195
    validity of opcode parameters, without any cluster-related
196
    checks. While the same can be accomplished in ExpandNames and/or
197
    CheckPrereq, doing these separate is better because:
198

199
      - ExpandNames is left as as purely a lock-related function
200
      - CheckPrereq is run after we have acquired locks (and possible
201
        waited for them)
202

203
    The function is allowed to change the self.op attribute so that
204
    later methods can no longer worry about missing parameters.
205

206
    """
207
    pass
208

    
209
  def ExpandNames(self):
210
    """Expand names for this LU.
211

212
    This method is called before starting to execute the opcode, and it should
213
    update all the parameters of the opcode to their canonical form (e.g. a
214
    short node name must be fully expanded after this method has successfully
215
    completed). This way locking, hooks, logging, ecc. can work correctly.
216

217
    LUs which implement this method must also populate the self.needed_locks
218
    member, as a dict with lock levels as keys, and a list of needed lock names
219
    as values. Rules:
220

221
      - use an empty dict if you don't need any lock
222
      - if you don't need any lock at a particular level omit that level
223
      - don't put anything for the BGL level
224
      - if you want all locks at a level use locking.ALL_SET as a value
225

226
    If you need to share locks (rather than acquire them exclusively) at one
227
    level you can modify self.share_locks, setting a true value (usually 1) for
228
    that level. By default locks are not shared.
229

230
    This function can also define a list of tasklets, which then will be
231
    executed in order instead of the usual LU-level CheckPrereq and Exec
232
    functions, if those are not defined by the LU.
233

234
    Examples::
235

236
      # Acquire all nodes and one instance
237
      self.needed_locks = {
238
        locking.LEVEL_NODE: locking.ALL_SET,
239
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
240
      }
241
      # Acquire just two nodes
242
      self.needed_locks = {
243
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
244
      }
245
      # Acquire no locks
246
      self.needed_locks = {} # No, you can't leave it to the default value None
247

248
    """
249
    # The implementation of this method is mandatory only if the new LU is
250
    # concurrent, so that old LUs don't need to be changed all at the same
251
    # time.
252
    if self.REQ_BGL:
253
      self.needed_locks = {} # Exclusive LUs don't need locks.
254
    else:
255
      raise NotImplementedError
256

    
257
  def DeclareLocks(self, level):
258
    """Declare LU locking needs for a level
259

260
    While most LUs can just declare their locking needs at ExpandNames time,
261
    sometimes there's the need to calculate some locks after having acquired
262
    the ones before. This function is called just before acquiring locks at a
263
    particular level, but after acquiring the ones at lower levels, and permits
264
    such calculations. It can be used to modify self.needed_locks, and by
265
    default it does nothing.
266

267
    This function is only called if you have something already set in
268
    self.needed_locks for the level.
269

270
    @param level: Locking level which is going to be locked
271
    @type level: member of ganeti.locking.LEVELS
272

273
    """
274

    
275
  def CheckPrereq(self):
276
    """Check prerequisites for this LU.
277

278
    This method should check that the prerequisites for the execution
279
    of this LU are fulfilled. It can do internode communication, but
280
    it should be idempotent - no cluster or system changes are
281
    allowed.
282

283
    The method should raise errors.OpPrereqError in case something is
284
    not fulfilled. Its return value is ignored.
285

286
    This method should also update all the parameters of the opcode to
287
    their canonical form if it hasn't been done by ExpandNames before.
288

289
    """
290
    if self.tasklets is not None:
291
      for (idx, tl) in enumerate(self.tasklets):
292
        logging.debug("Checking prerequisites for tasklet %s/%s",
293
                      idx + 1, len(self.tasklets))
294
        tl.CheckPrereq()
295
    else:
296
      pass
297

    
298
  def Exec(self, feedback_fn):
299
    """Execute the LU.
300

301
    This method should implement the actual work. It should raise
302
    errors.OpExecError for failures that are somewhat dealt with in
303
    code, or expected.
304

305
    """
306
    if self.tasklets is not None:
307
      for (idx, tl) in enumerate(self.tasklets):
308
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
309
        tl.Exec(feedback_fn)
310
    else:
311
      raise NotImplementedError
312

    
313
  def BuildHooksEnv(self):
314
    """Build hooks environment for this LU.
315

316
    This method should return a three-node tuple consisting of: a dict
317
    containing the environment that will be used for running the
318
    specific hook for this LU, a list of node names on which the hook
319
    should run before the execution, and a list of node names on which
320
    the hook should run after the execution.
321

322
    The keys of the dict must not have 'GANETI_' prefixed as this will
323
    be handled in the hooks runner. Also note additional keys will be
324
    added by the hooks runner. If the LU doesn't define any
325
    environment, an empty dict (and not None) should be returned.
326

327
    No nodes should be returned as an empty list (and not None).
328

329
    Note that if the HPATH for a LU class is None, this function will
330
    not be called.
331

332
    """
333
    raise NotImplementedError
334

    
335
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
336
    """Notify the LU about the results of its hooks.
337

338
    This method is called every time a hooks phase is executed, and notifies
339
    the Logical Unit about the hooks' result. The LU can then use it to alter
340
    its result based on the hooks.  By default the method does nothing and the
341
    previous result is passed back unchanged but any LU can define it if it
342
    wants to use the local cluster hook-scripts somehow.
343

344
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
345
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
346
    @param hook_results: the results of the multi-node hooks rpc call
347
    @param feedback_fn: function used send feedback back to the caller
348
    @param lu_result: the previous Exec result this LU had, or None
349
        in the PRE phase
350
    @return: the new Exec result, based on the previous result
351
        and hook results
352

353
    """
354
    # API must be kept, thus we ignore the unused argument and could
355
    # be a function warnings
356
    # pylint: disable-msg=W0613,R0201
357
    return lu_result
358

    
359
  def _ExpandAndLockInstance(self):
360
    """Helper function to expand and lock an instance.
361

362
    Many LUs that work on an instance take its name in self.op.instance_name
363
    and need to expand it and then declare the expanded name for locking. This
364
    function does it, and then updates self.op.instance_name to the expanded
365
    name. It also initializes needed_locks as a dict, if this hasn't been done
366
    before.
367

368
    """
369
    if self.needed_locks is None:
370
      self.needed_locks = {}
371
    else:
372
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
373
        "_ExpandAndLockInstance called with instance-level locks set"
374
    self.op.instance_name = _ExpandInstanceName(self.cfg,
375
                                                self.op.instance_name)
376
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
377

    
378
  def _LockInstancesNodes(self, primary_only=False):
379
    """Helper function to declare instances' nodes for locking.
380

381
    This function should be called after locking one or more instances to lock
382
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
383
    with all primary or secondary nodes for instances already locked and
384
    present in self.needed_locks[locking.LEVEL_INSTANCE].
385

386
    It should be called from DeclareLocks, and for safety only works if
387
    self.recalculate_locks[locking.LEVEL_NODE] is set.
388

389
    In the future it may grow parameters to just lock some instance's nodes, or
390
    to just lock primaries or secondary nodes, if needed.
391

392
    If should be called in DeclareLocks in a way similar to::
393

394
      if level == locking.LEVEL_NODE:
395
        self._LockInstancesNodes()
396

397
    @type primary_only: boolean
398
    @param primary_only: only lock primary nodes of locked instances
399

400
    """
401
    assert locking.LEVEL_NODE in self.recalculate_locks, \
402
      "_LockInstancesNodes helper function called with no nodes to recalculate"
403

    
404
    # TODO: check if we're really been called with the instance locks held
405

    
406
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
407
    # future we might want to have different behaviors depending on the value
408
    # of self.recalculate_locks[locking.LEVEL_NODE]
409
    wanted_nodes = []
410
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
411
      instance = self.context.cfg.GetInstanceInfo(instance_name)
412
      wanted_nodes.append(instance.primary_node)
413
      if not primary_only:
414
        wanted_nodes.extend(instance.secondary_nodes)
415

    
416
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
417
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
418
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
419
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
420

    
421
    del self.recalculate_locks[locking.LEVEL_NODE]
422

    
423

    
424
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
425
  """Simple LU which runs no hooks.
426

427
  This LU is intended as a parent for other LogicalUnits which will
428
  run no hooks, in order to reduce duplicate code.
429

430
  """
431
  HPATH = None
432
  HTYPE = None
433

    
434
  def BuildHooksEnv(self):
435
    """Empty BuildHooksEnv for NoHooksLu.
436

437
    This just raises an error.
438

439
    """
440
    assert False, "BuildHooksEnv called for NoHooksLUs"
441

    
442

    
443
class Tasklet:
444
  """Tasklet base class.
445

446
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
447
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
448
  tasklets know nothing about locks.
449

450
  Subclasses must follow these rules:
451
    - Implement CheckPrereq
452
    - Implement Exec
453

454
  """
455
  def __init__(self, lu):
456
    self.lu = lu
457

    
458
    # Shortcuts
459
    self.cfg = lu.cfg
460
    self.rpc = lu.rpc
461

    
462
  def CheckPrereq(self):
463
    """Check prerequisites for this tasklets.
464

465
    This method should check whether the prerequisites for the execution of
466
    this tasklet are fulfilled. It can do internode communication, but it
467
    should be idempotent - no cluster or system changes are allowed.
468

469
    The method should raise errors.OpPrereqError in case something is not
470
    fulfilled. Its return value is ignored.
471

472
    This method should also update all parameters to their canonical form if it
473
    hasn't been done before.
474

475
    """
476
    pass
477

    
478
  def Exec(self, feedback_fn):
479
    """Execute the tasklet.
480

481
    This method should implement the actual work. It should raise
482
    errors.OpExecError for failures that are somewhat dealt with in code, or
483
    expected.
484

485
    """
486
    raise NotImplementedError
487

    
488

    
489
def _GetWantedNodes(lu, nodes):
490
  """Returns list of checked and expanded node names.
491

492
  @type lu: L{LogicalUnit}
493
  @param lu: the logical unit on whose behalf we execute
494
  @type nodes: list
495
  @param nodes: list of node names or None for all nodes
496
  @rtype: list
497
  @return: the list of nodes, sorted
498
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
499

500
  """
501
  if not nodes:
502
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
503
      " non-empty list of nodes whose name is to be expanded.")
504

    
505
  wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
506
  return utils.NiceSort(wanted)
507

    
508

    
509
def _GetWantedInstances(lu, instances):
510
  """Returns list of checked and expanded instance names.
511

512
  @type lu: L{LogicalUnit}
513
  @param lu: the logical unit on whose behalf we execute
514
  @type instances: list
515
  @param instances: list of instance names or None for all instances
516
  @rtype: list
517
  @return: the list of instances, sorted
518
  @raise errors.OpPrereqError: if the instances parameter is wrong type
519
  @raise errors.OpPrereqError: if any of the passed instances is not found
520

521
  """
522
  if instances:
523
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
524
  else:
525
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
526
  return wanted
527

    
528

    
529
def _GetUpdatedParams(old_params, update_dict,
530
                      use_default=True, use_none=False):
531
  """Return the new version of a parameter dictionary.
532

533
  @type old_params: dict
534
  @param old_params: old parameters
535
  @type update_dict: dict
536
  @param update_dict: dict containing new parameter values, or
537
      constants.VALUE_DEFAULT to reset the parameter to its default
538
      value
539
  @param use_default: boolean
540
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
541
      values as 'to be deleted' values
542
  @param use_none: boolean
543
  @type use_none: whether to recognise C{None} values as 'to be
544
      deleted' values
545
  @rtype: dict
546
  @return: the new parameter dictionary
547

548
  """
549
  params_copy = copy.deepcopy(old_params)
550
  for key, val in update_dict.iteritems():
551
    if ((use_default and val == constants.VALUE_DEFAULT) or
552
        (use_none and val is None)):
553
      try:
554
        del params_copy[key]
555
      except KeyError:
556
        pass
557
    else:
558
      params_copy[key] = val
559
  return params_copy
560

    
561

    
562
def _CheckOutputFields(static, dynamic, selected):
563
  """Checks whether all selected fields are valid.
564

565
  @type static: L{utils.FieldSet}
566
  @param static: static fields set
567
  @type dynamic: L{utils.FieldSet}
568
  @param dynamic: dynamic fields set
569

570
  """
571
  f = utils.FieldSet()
572
  f.Extend(static)
573
  f.Extend(dynamic)
574

    
575
  delta = f.NonMatching(selected)
576
  if delta:
577
    raise errors.OpPrereqError("Unknown output fields selected: %s"
578
                               % ",".join(delta), errors.ECODE_INVAL)
579

    
580

    
581
def _CheckGlobalHvParams(params):
582
  """Validates that given hypervisor params are not global ones.
583

584
  This will ensure that instances don't get customised versions of
585
  global params.
586

587
  """
588
  used_globals = constants.HVC_GLOBALS.intersection(params)
589
  if used_globals:
590
    msg = ("The following hypervisor parameters are global and cannot"
591
           " be customized at instance level, please modify them at"
592
           " cluster level: %s" % utils.CommaJoin(used_globals))
593
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
594

    
595

    
596
def _CheckNodeOnline(lu, node):
597
  """Ensure that a given node is online.
598

599
  @param lu: the LU on behalf of which we make the check
600
  @param node: the node to check
601
  @raise errors.OpPrereqError: if the node is offline
602

603
  """
604
  if lu.cfg.GetNodeInfo(node).offline:
605
    raise errors.OpPrereqError("Can't use offline node %s" % node,
606
                               errors.ECODE_STATE)
607

    
608

    
609
def _CheckNodeNotDrained(lu, node):
610
  """Ensure that a given node is not drained.
611

612
  @param lu: the LU on behalf of which we make the check
613
  @param node: the node to check
614
  @raise errors.OpPrereqError: if the node is drained
615

616
  """
617
  if lu.cfg.GetNodeInfo(node).drained:
618
    raise errors.OpPrereqError("Can't use drained node %s" % node,
619
                               errors.ECODE_STATE)
620

    
621

    
622
def _CheckNodeVmCapable(lu, node):
623
  """Ensure that a given node is vm capable.
624

625
  @param lu: the LU on behalf of which we make the check
626
  @param node: the node to check
627
  @raise errors.OpPrereqError: if the node is not vm capable
628

629
  """
630
  if not lu.cfg.GetNodeInfo(node).vm_capable:
631
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
632
                               errors.ECODE_STATE)
633

    
634

    
635
def _CheckNodeHasOS(lu, node, os_name, force_variant):
636
  """Ensure that a node supports a given OS.
637

638
  @param lu: the LU on behalf of which we make the check
639
  @param node: the node to check
640
  @param os_name: the OS to query about
641
  @param force_variant: whether to ignore variant errors
642
  @raise errors.OpPrereqError: if the node is not supporting the OS
643

644
  """
645
  result = lu.rpc.call_os_get(node, os_name)
646
  result.Raise("OS '%s' not in supported OS list for node %s" %
647
               (os_name, node),
648
               prereq=True, ecode=errors.ECODE_INVAL)
649
  if not force_variant:
650
    _CheckOSVariant(result.payload, os_name)
651

    
652

    
653
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
654
  """Ensure that a node has the given secondary ip.
655

656
  @type lu: L{LogicalUnit}
657
  @param lu: the LU on behalf of which we make the check
658
  @type node: string
659
  @param node: the node to check
660
  @type secondary_ip: string
661
  @param secondary_ip: the ip to check
662
  @type prereq: boolean
663
  @param prereq: whether to throw a prerequisite or an execute error
664
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
665
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
666

667
  """
668
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
669
  result.Raise("Failure checking secondary ip on node %s" % node,
670
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
671
  if not result.payload:
672
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
673
           " please fix and re-run this command" % secondary_ip)
674
    if prereq:
675
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
676
    else:
677
      raise errors.OpExecError(msg)
678

    
679

    
680
def _RequireFileStorage():
681
  """Checks that file storage is enabled.
682

683
  @raise errors.OpPrereqError: when file storage is disabled
684

685
  """
686
  if not constants.ENABLE_FILE_STORAGE:
687
    raise errors.OpPrereqError("File storage disabled at configure time",
688
                               errors.ECODE_INVAL)
689

    
690

    
691
def _CheckDiskTemplate(template):
692
  """Ensure a given disk template is valid.
693

694
  """
695
  if template not in constants.DISK_TEMPLATES:
696
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
697
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
698
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
699
  if template == constants.DT_FILE:
700
    _RequireFileStorage()
701
  return True
702

    
703

    
704
def _CheckStorageType(storage_type):
705
  """Ensure a given storage type is valid.
706

707
  """
708
  if storage_type not in constants.VALID_STORAGE_TYPES:
709
    raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
710
                               errors.ECODE_INVAL)
711
  if storage_type == constants.ST_FILE:
712
    _RequireFileStorage()
713
  return True
714

    
715

    
716
def _GetClusterDomainSecret():
717
  """Reads the cluster domain secret.
718

719
  """
720
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
721
                               strict=True)
722

    
723

    
724
def _CheckInstanceDown(lu, instance, reason):
725
  """Ensure that an instance is not running."""
726
  if instance.admin_up:
727
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
728
                               (instance.name, reason), errors.ECODE_STATE)
729

    
730
  pnode = instance.primary_node
731
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
732
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
733
              prereq=True, ecode=errors.ECODE_ENVIRON)
734

    
735
  if instance.name in ins_l.payload:
736
    raise errors.OpPrereqError("Instance %s is running, %s" %
737
                               (instance.name, reason), errors.ECODE_STATE)
738

    
739

    
740
def _ExpandItemName(fn, name, kind):
741
  """Expand an item name.
742

743
  @param fn: the function to use for expansion
744
  @param name: requested item name
745
  @param kind: text description ('Node' or 'Instance')
746
  @return: the resolved (full) name
747
  @raise errors.OpPrereqError: if the item is not found
748

749
  """
750
  full_name = fn(name)
751
  if full_name is None:
752
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
753
                               errors.ECODE_NOENT)
754
  return full_name
755

    
756

    
757
def _ExpandNodeName(cfg, name):
758
  """Wrapper over L{_ExpandItemName} for nodes."""
759
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
760

    
761

    
762
def _ExpandInstanceName(cfg, name):
763
  """Wrapper over L{_ExpandItemName} for instance."""
764
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
765

    
766

    
767
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
768
                          memory, vcpus, nics, disk_template, disks,
769
                          bep, hvp, hypervisor_name):
770
  """Builds instance related env variables for hooks
771

772
  This builds the hook environment from individual variables.
773

774
  @type name: string
775
  @param name: the name of the instance
776
  @type primary_node: string
777
  @param primary_node: the name of the instance's primary node
778
  @type secondary_nodes: list
779
  @param secondary_nodes: list of secondary nodes as strings
780
  @type os_type: string
781
  @param os_type: the name of the instance's OS
782
  @type status: boolean
783
  @param status: the should_run status of the instance
784
  @type memory: string
785
  @param memory: the memory size of the instance
786
  @type vcpus: string
787
  @param vcpus: the count of VCPUs the instance has
788
  @type nics: list
789
  @param nics: list of tuples (ip, mac, mode, link) representing
790
      the NICs the instance has
791
  @type disk_template: string
792
  @param disk_template: the disk template of the instance
793
  @type disks: list
794
  @param disks: the list of (size, mode) pairs
795
  @type bep: dict
796
  @param bep: the backend parameters for the instance
797
  @type hvp: dict
798
  @param hvp: the hypervisor parameters for the instance
799
  @type hypervisor_name: string
800
  @param hypervisor_name: the hypervisor for the instance
801
  @rtype: dict
802
  @return: the hook environment for this instance
803

804
  """
805
  if status:
806
    str_status = "up"
807
  else:
808
    str_status = "down"
809
  env = {
810
    "OP_TARGET": name,
811
    "INSTANCE_NAME": name,
812
    "INSTANCE_PRIMARY": primary_node,
813
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
814
    "INSTANCE_OS_TYPE": os_type,
815
    "INSTANCE_STATUS": str_status,
816
    "INSTANCE_MEMORY": memory,
817
    "INSTANCE_VCPUS": vcpus,
818
    "INSTANCE_DISK_TEMPLATE": disk_template,
819
    "INSTANCE_HYPERVISOR": hypervisor_name,
820
  }
821

    
822
  if nics:
823
    nic_count = len(nics)
824
    for idx, (ip, mac, mode, link) in enumerate(nics):
825
      if ip is None:
826
        ip = ""
827
      env["INSTANCE_NIC%d_IP" % idx] = ip
828
      env["INSTANCE_NIC%d_MAC" % idx] = mac
829
      env["INSTANCE_NIC%d_MODE" % idx] = mode
830
      env["INSTANCE_NIC%d_LINK" % idx] = link
831
      if mode == constants.NIC_MODE_BRIDGED:
832
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
833
  else:
834
    nic_count = 0
835

    
836
  env["INSTANCE_NIC_COUNT"] = nic_count
837

    
838
  if disks:
839
    disk_count = len(disks)
840
    for idx, (size, mode) in enumerate(disks):
841
      env["INSTANCE_DISK%d_SIZE" % idx] = size
842
      env["INSTANCE_DISK%d_MODE" % idx] = mode
843
  else:
844
    disk_count = 0
845

    
846
  env["INSTANCE_DISK_COUNT"] = disk_count
847

    
848
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
849
    for key, value in source.items():
850
      env["INSTANCE_%s_%s" % (kind, key)] = value
851

    
852
  return env
853

    
854

    
855
def _NICListToTuple(lu, nics):
856
  """Build a list of nic information tuples.
857

858
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
859
  value in LUQueryInstanceData.
860

861
  @type lu:  L{LogicalUnit}
862
  @param lu: the logical unit on whose behalf we execute
863
  @type nics: list of L{objects.NIC}
864
  @param nics: list of nics to convert to hooks tuples
865

866
  """
867
  hooks_nics = []
868
  cluster = lu.cfg.GetClusterInfo()
869
  for nic in nics:
870
    ip = nic.ip
871
    mac = nic.mac
872
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
873
    mode = filled_params[constants.NIC_MODE]
874
    link = filled_params[constants.NIC_LINK]
875
    hooks_nics.append((ip, mac, mode, link))
876
  return hooks_nics
877

    
878

    
879
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
880
  """Builds instance related env variables for hooks from an object.
881

882
  @type lu: L{LogicalUnit}
883
  @param lu: the logical unit on whose behalf we execute
884
  @type instance: L{objects.Instance}
885
  @param instance: the instance for which we should build the
886
      environment
887
  @type override: dict
888
  @param override: dictionary with key/values that will override
889
      our values
890
  @rtype: dict
891
  @return: the hook environment dictionary
892

893
  """
894
  cluster = lu.cfg.GetClusterInfo()
895
  bep = cluster.FillBE(instance)
896
  hvp = cluster.FillHV(instance)
897
  args = {
898
    'name': instance.name,
899
    'primary_node': instance.primary_node,
900
    'secondary_nodes': instance.secondary_nodes,
901
    'os_type': instance.os,
902
    'status': instance.admin_up,
903
    'memory': bep[constants.BE_MEMORY],
904
    'vcpus': bep[constants.BE_VCPUS],
905
    'nics': _NICListToTuple(lu, instance.nics),
906
    'disk_template': instance.disk_template,
907
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
908
    'bep': bep,
909
    'hvp': hvp,
910
    'hypervisor_name': instance.hypervisor,
911
  }
912
  if override:
913
    args.update(override)
914
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
915

    
916

    
917
def _AdjustCandidatePool(lu, exceptions):
918
  """Adjust the candidate pool after node operations.
919

920
  """
921
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
922
  if mod_list:
923
    lu.LogInfo("Promoted nodes to master candidate role: %s",
924
               utils.CommaJoin(node.name for node in mod_list))
925
    for name in mod_list:
926
      lu.context.ReaddNode(name)
927
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
928
  if mc_now > mc_max:
929
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
930
               (mc_now, mc_max))
931

    
932

    
933
def _DecideSelfPromotion(lu, exceptions=None):
934
  """Decide whether I should promote myself as a master candidate.
935

936
  """
937
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
938
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
939
  # the new node will increase mc_max with one, so:
940
  mc_should = min(mc_should + 1, cp_size)
941
  return mc_now < mc_should
942

    
943

    
944
def _CheckNicsBridgesExist(lu, target_nics, target_node):
945
  """Check that the brigdes needed by a list of nics exist.
946

947
  """
948
  cluster = lu.cfg.GetClusterInfo()
949
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
950
  brlist = [params[constants.NIC_LINK] for params in paramslist
951
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
952
  if brlist:
953
    result = lu.rpc.call_bridges_exist(target_node, brlist)
954
    result.Raise("Error checking bridges on destination node '%s'" %
955
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
956

    
957

    
958
def _CheckInstanceBridgesExist(lu, instance, node=None):
959
  """Check that the brigdes needed by an instance exist.
960

961
  """
962
  if node is None:
963
    node = instance.primary_node
964
  _CheckNicsBridgesExist(lu, instance.nics, node)
965

    
966

    
967
def _CheckOSVariant(os_obj, name):
968
  """Check whether an OS name conforms to the os variants specification.
969

970
  @type os_obj: L{objects.OS}
971
  @param os_obj: OS object to check
972
  @type name: string
973
  @param name: OS name passed by the user, to check for validity
974

975
  """
976
  if not os_obj.supported_variants:
977
    return
978
  variant = objects.OS.GetVariant(name)
979
  if not variant:
980
    raise errors.OpPrereqError("OS name must include a variant",
981
                               errors.ECODE_INVAL)
982

    
983
  if variant not in os_obj.supported_variants:
984
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
985

    
986

    
987
def _GetNodeInstancesInner(cfg, fn):
988
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
989

    
990

    
991
def _GetNodeInstances(cfg, node_name):
992
  """Returns a list of all primary and secondary instances on a node.
993

994
  """
995

    
996
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
997

    
998

    
999
def _GetNodePrimaryInstances(cfg, node_name):
1000
  """Returns primary instances on a node.
1001

1002
  """
1003
  return _GetNodeInstancesInner(cfg,
1004
                                lambda inst: node_name == inst.primary_node)
1005

    
1006

    
1007
def _GetNodeSecondaryInstances(cfg, node_name):
1008
  """Returns secondary instances on a node.
1009

1010
  """
1011
  return _GetNodeInstancesInner(cfg,
1012
                                lambda inst: node_name in inst.secondary_nodes)
1013

    
1014

    
1015
def _GetStorageTypeArgs(cfg, storage_type):
1016
  """Returns the arguments for a storage type.
1017

1018
  """
1019
  # Special case for file storage
1020
  if storage_type == constants.ST_FILE:
1021
    # storage.FileStorage wants a list of storage directories
1022
    return [[cfg.GetFileStorageDir()]]
1023

    
1024
  return []
1025

    
1026

    
1027
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1028
  faulty = []
1029

    
1030
  for dev in instance.disks:
1031
    cfg.SetDiskID(dev, node_name)
1032

    
1033
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1034
  result.Raise("Failed to get disk status from node %s" % node_name,
1035
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1036

    
1037
  for idx, bdev_status in enumerate(result.payload):
1038
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1039
      faulty.append(idx)
1040

    
1041
  return faulty
1042

    
1043

    
1044
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1045
  """Check the sanity of iallocator and node arguments and use the
1046
  cluster-wide iallocator if appropriate.
1047

1048
  Check that at most one of (iallocator, node) is specified. If none is
1049
  specified, then the LU's opcode's iallocator slot is filled with the
1050
  cluster-wide default iallocator.
1051

1052
  @type iallocator_slot: string
1053
  @param iallocator_slot: the name of the opcode iallocator slot
1054
  @type node_slot: string
1055
  @param node_slot: the name of the opcode target node slot
1056

1057
  """
1058
  node = getattr(lu.op, node_slot, None)
1059
  iallocator = getattr(lu.op, iallocator_slot, None)
1060

    
1061
  if node is not None and iallocator is not None:
1062
    raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1063
                               errors.ECODE_INVAL)
1064
  elif node is None and iallocator is None:
1065
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1066
    if default_iallocator:
1067
      setattr(lu.op, iallocator_slot, default_iallocator)
1068
    else:
1069
      raise errors.OpPrereqError("No iallocator or node given and no"
1070
                                 " cluster-wide default iallocator found."
1071
                                 " Please specify either an iallocator or a"
1072
                                 " node, or set a cluster-wide default"
1073
                                 " iallocator.")
1074

    
1075

    
1076
class LUPostInitCluster(LogicalUnit):
1077
  """Logical unit for running hooks after cluster initialization.
1078

1079
  """
1080
  HPATH = "cluster-init"
1081
  HTYPE = constants.HTYPE_CLUSTER
1082

    
1083
  def BuildHooksEnv(self):
1084
    """Build hooks env.
1085

1086
    """
1087
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1088
    mn = self.cfg.GetMasterNode()
1089
    return env, [], [mn]
1090

    
1091
  def Exec(self, feedback_fn):
1092
    """Nothing to do.
1093

1094
    """
1095
    return True
1096

    
1097

    
1098
class LUDestroyCluster(LogicalUnit):
1099
  """Logical unit for destroying the cluster.
1100

1101
  """
1102
  HPATH = "cluster-destroy"
1103
  HTYPE = constants.HTYPE_CLUSTER
1104

    
1105
  def BuildHooksEnv(self):
1106
    """Build hooks env.
1107

1108
    """
1109
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1110
    return env, [], []
1111

    
1112
  def CheckPrereq(self):
1113
    """Check prerequisites.
1114

1115
    This checks whether the cluster is empty.
1116

1117
    Any errors are signaled by raising errors.OpPrereqError.
1118

1119
    """
1120
    master = self.cfg.GetMasterNode()
1121

    
1122
    nodelist = self.cfg.GetNodeList()
1123
    if len(nodelist) != 1 or nodelist[0] != master:
1124
      raise errors.OpPrereqError("There are still %d node(s) in"
1125
                                 " this cluster." % (len(nodelist) - 1),
1126
                                 errors.ECODE_INVAL)
1127
    instancelist = self.cfg.GetInstanceList()
1128
    if instancelist:
1129
      raise errors.OpPrereqError("There are still %d instance(s) in"
1130
                                 " this cluster." % len(instancelist),
1131
                                 errors.ECODE_INVAL)
1132

    
1133
  def Exec(self, feedback_fn):
1134
    """Destroys the cluster.
1135

1136
    """
1137
    master = self.cfg.GetMasterNode()
1138

    
1139
    # Run post hooks on master node before it's removed
1140
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1141
    try:
1142
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1143
    except:
1144
      # pylint: disable-msg=W0702
1145
      self.LogWarning("Errors occurred running hooks on %s" % master)
1146

    
1147
    result = self.rpc.call_node_stop_master(master, False)
1148
    result.Raise("Could not disable the master role")
1149

    
1150
    return master
1151

    
1152

    
1153
def _VerifyCertificate(filename):
1154
  """Verifies a certificate for LUVerifyCluster.
1155

1156
  @type filename: string
1157
  @param filename: Path to PEM file
1158

1159
  """
1160
  try:
1161
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1162
                                           utils.ReadFile(filename))
1163
  except Exception, err: # pylint: disable-msg=W0703
1164
    return (LUVerifyCluster.ETYPE_ERROR,
1165
            "Failed to load X509 certificate %s: %s" % (filename, err))
1166

    
1167
  (errcode, msg) = \
1168
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1169
                                constants.SSL_CERT_EXPIRATION_ERROR)
1170

    
1171
  if msg:
1172
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1173
  else:
1174
    fnamemsg = None
1175

    
1176
  if errcode is None:
1177
    return (None, fnamemsg)
1178
  elif errcode == utils.CERT_WARNING:
1179
    return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1180
  elif errcode == utils.CERT_ERROR:
1181
    return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1182

    
1183
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1184

    
1185

    
1186
class LUVerifyCluster(LogicalUnit):
1187
  """Verifies the cluster status.
1188

1189
  """
1190
  HPATH = "cluster-verify"
1191
  HTYPE = constants.HTYPE_CLUSTER
1192
  _OP_PARAMS = [
1193
    ("skip_checks", ht.EmptyList,
1194
     ht.TListOf(ht.TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1195
    ("verbose", False, ht.TBool),
1196
    ("error_codes", False, ht.TBool),
1197
    ("debug_simulate_errors", False, ht.TBool),
1198
    ]
1199
  REQ_BGL = False
1200

    
1201
  TCLUSTER = "cluster"
1202
  TNODE = "node"
1203
  TINSTANCE = "instance"
1204

    
1205
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1206
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1207
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1208
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1209
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1210
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1211
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1212
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1213
  ENODEDRBD = (TNODE, "ENODEDRBD")
1214
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1215
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1216
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1217
  ENODEHV = (TNODE, "ENODEHV")
1218
  ENODELVM = (TNODE, "ENODELVM")
1219
  ENODEN1 = (TNODE, "ENODEN1")
1220
  ENODENET = (TNODE, "ENODENET")
1221
  ENODEOS = (TNODE, "ENODEOS")
1222
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1223
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1224
  ENODERPC = (TNODE, "ENODERPC")
1225
  ENODESSH = (TNODE, "ENODESSH")
1226
  ENODEVERSION = (TNODE, "ENODEVERSION")
1227
  ENODESETUP = (TNODE, "ENODESETUP")
1228
  ENODETIME = (TNODE, "ENODETIME")
1229

    
1230
  ETYPE_FIELD = "code"
1231
  ETYPE_ERROR = "ERROR"
1232
  ETYPE_WARNING = "WARNING"
1233

    
1234
  class NodeImage(object):
1235
    """A class representing the logical and physical status of a node.
1236

1237
    @type name: string
1238
    @ivar name: the node name to which this object refers
1239
    @ivar volumes: a structure as returned from
1240
        L{ganeti.backend.GetVolumeList} (runtime)
1241
    @ivar instances: a list of running instances (runtime)
1242
    @ivar pinst: list of configured primary instances (config)
1243
    @ivar sinst: list of configured secondary instances (config)
1244
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1245
        of this node (config)
1246
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1247
    @ivar dfree: free disk, as reported by the node (runtime)
1248
    @ivar offline: the offline status (config)
1249
    @type rpc_fail: boolean
1250
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1251
        not whether the individual keys were correct) (runtime)
1252
    @type lvm_fail: boolean
1253
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1254
    @type hyp_fail: boolean
1255
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1256
    @type ghost: boolean
1257
    @ivar ghost: whether this is a known node or not (config)
1258
    @type os_fail: boolean
1259
    @ivar os_fail: whether the RPC call didn't return valid OS data
1260
    @type oslist: list
1261
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1262
    @type vm_capable: boolean
1263
    @ivar vm_capable: whether the node can host instances
1264

1265
    """
1266
    def __init__(self, offline=False, name=None, vm_capable=True):
1267
      self.name = name
1268
      self.volumes = {}
1269
      self.instances = []
1270
      self.pinst = []
1271
      self.sinst = []
1272
      self.sbp = {}
1273
      self.mfree = 0
1274
      self.dfree = 0
1275
      self.offline = offline
1276
      self.vm_capable = vm_capable
1277
      self.rpc_fail = False
1278
      self.lvm_fail = False
1279
      self.hyp_fail = False
1280
      self.ghost = False
1281
      self.os_fail = False
1282
      self.oslist = {}
1283

    
1284
  def ExpandNames(self):
1285
    self.needed_locks = {
1286
      locking.LEVEL_NODE: locking.ALL_SET,
1287
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1288
    }
1289
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1290

    
1291
  def _Error(self, ecode, item, msg, *args, **kwargs):
1292
    """Format an error message.
1293

1294
    Based on the opcode's error_codes parameter, either format a
1295
    parseable error code, or a simpler error string.
1296

1297
    This must be called only from Exec and functions called from Exec.
1298

1299
    """
1300
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1301
    itype, etxt = ecode
1302
    # first complete the msg
1303
    if args:
1304
      msg = msg % args
1305
    # then format the whole message
1306
    if self.op.error_codes:
1307
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1308
    else:
1309
      if item:
1310
        item = " " + item
1311
      else:
1312
        item = ""
1313
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1314
    # and finally report it via the feedback_fn
1315
    self._feedback_fn("  - %s" % msg)
1316

    
1317
  def _ErrorIf(self, cond, *args, **kwargs):
1318
    """Log an error message if the passed condition is True.
1319

1320
    """
1321
    cond = bool(cond) or self.op.debug_simulate_errors
1322
    if cond:
1323
      self._Error(*args, **kwargs)
1324
    # do not mark the operation as failed for WARN cases only
1325
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1326
      self.bad = self.bad or cond
1327

    
1328
  def _VerifyNode(self, ninfo, nresult):
1329
    """Perform some basic validation on data returned from a node.
1330

1331
      - check the result data structure is well formed and has all the
1332
        mandatory fields
1333
      - check ganeti version
1334

1335
    @type ninfo: L{objects.Node}
1336
    @param ninfo: the node to check
1337
    @param nresult: the results from the node
1338
    @rtype: boolean
1339
    @return: whether overall this call was successful (and we can expect
1340
         reasonable values in the respose)
1341

1342
    """
1343
    node = ninfo.name
1344
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1345

    
1346
    # main result, nresult should be a non-empty dict
1347
    test = not nresult or not isinstance(nresult, dict)
1348
    _ErrorIf(test, self.ENODERPC, node,
1349
                  "unable to verify node: no data returned")
1350
    if test:
1351
      return False
1352

    
1353
    # compares ganeti version
1354
    local_version = constants.PROTOCOL_VERSION
1355
    remote_version = nresult.get("version", None)
1356
    test = not (remote_version and
1357
                isinstance(remote_version, (list, tuple)) and
1358
                len(remote_version) == 2)
1359
    _ErrorIf(test, self.ENODERPC, node,
1360
             "connection to node returned invalid data")
1361
    if test:
1362
      return False
1363

    
1364
    test = local_version != remote_version[0]
1365
    _ErrorIf(test, self.ENODEVERSION, node,
1366
             "incompatible protocol versions: master %s,"
1367
             " node %s", local_version, remote_version[0])
1368
    if test:
1369
      return False
1370

    
1371
    # node seems compatible, we can actually try to look into its results
1372

    
1373
    # full package version
1374
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1375
                  self.ENODEVERSION, node,
1376
                  "software version mismatch: master %s, node %s",
1377
                  constants.RELEASE_VERSION, remote_version[1],
1378
                  code=self.ETYPE_WARNING)
1379

    
1380
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1381
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1382
      for hv_name, hv_result in hyp_result.iteritems():
1383
        test = hv_result is not None
1384
        _ErrorIf(test, self.ENODEHV, node,
1385
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1386

    
1387
    test = nresult.get(constants.NV_NODESETUP,
1388
                           ["Missing NODESETUP results"])
1389
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1390
             "; ".join(test))
1391

    
1392
    return True
1393

    
1394
  def _VerifyNodeTime(self, ninfo, nresult,
1395
                      nvinfo_starttime, nvinfo_endtime):
1396
    """Check the node time.
1397

1398
    @type ninfo: L{objects.Node}
1399
    @param ninfo: the node to check
1400
    @param nresult: the remote results for the node
1401
    @param nvinfo_starttime: the start time of the RPC call
1402
    @param nvinfo_endtime: the end time of the RPC call
1403

1404
    """
1405
    node = ninfo.name
1406
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1407

    
1408
    ntime = nresult.get(constants.NV_TIME, None)
1409
    try:
1410
      ntime_merged = utils.MergeTime(ntime)
1411
    except (ValueError, TypeError):
1412
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1413
      return
1414

    
1415
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1416
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1417
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1418
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1419
    else:
1420
      ntime_diff = None
1421

    
1422
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1423
             "Node time diverges by at least %s from master node time",
1424
             ntime_diff)
1425

    
1426
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1427
    """Check the node time.
1428

1429
    @type ninfo: L{objects.Node}
1430
    @param ninfo: the node to check
1431
    @param nresult: the remote results for the node
1432
    @param vg_name: the configured VG name
1433

1434
    """
1435
    if vg_name is None:
1436
      return
1437

    
1438
    node = ninfo.name
1439
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1440

    
1441
    # checks vg existence and size > 20G
1442
    vglist = nresult.get(constants.NV_VGLIST, None)
1443
    test = not vglist
1444
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1445
    if not test:
1446
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1447
                                            constants.MIN_VG_SIZE)
1448
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1449

    
1450
    # check pv names
1451
    pvlist = nresult.get(constants.NV_PVLIST, None)
1452
    test = pvlist is None
1453
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1454
    if not test:
1455
      # check that ':' is not present in PV names, since it's a
1456
      # special character for lvcreate (denotes the range of PEs to
1457
      # use on the PV)
1458
      for _, pvname, owner_vg in pvlist:
1459
        test = ":" in pvname
1460
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1461
                 " '%s' of VG '%s'", pvname, owner_vg)
1462

    
1463
  def _VerifyNodeNetwork(self, ninfo, nresult):
1464
    """Check the node time.
1465

1466
    @type ninfo: L{objects.Node}
1467
    @param ninfo: the node to check
1468
    @param nresult: the remote results for the node
1469

1470
    """
1471
    node = ninfo.name
1472
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1473

    
1474
    test = constants.NV_NODELIST not in nresult
1475
    _ErrorIf(test, self.ENODESSH, node,
1476
             "node hasn't returned node ssh connectivity data")
1477
    if not test:
1478
      if nresult[constants.NV_NODELIST]:
1479
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1480
          _ErrorIf(True, self.ENODESSH, node,
1481
                   "ssh communication with node '%s': %s", a_node, a_msg)
1482

    
1483
    test = constants.NV_NODENETTEST not in nresult
1484
    _ErrorIf(test, self.ENODENET, node,
1485
             "node hasn't returned node tcp connectivity data")
1486
    if not test:
1487
      if nresult[constants.NV_NODENETTEST]:
1488
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1489
        for anode in nlist:
1490
          _ErrorIf(True, self.ENODENET, node,
1491
                   "tcp communication with node '%s': %s",
1492
                   anode, nresult[constants.NV_NODENETTEST][anode])
1493

    
1494
    test = constants.NV_MASTERIP not in nresult
1495
    _ErrorIf(test, self.ENODENET, node,
1496
             "node hasn't returned node master IP reachability data")
1497
    if not test:
1498
      if not nresult[constants.NV_MASTERIP]:
1499
        if node == self.master_node:
1500
          msg = "the master node cannot reach the master IP (not configured?)"
1501
        else:
1502
          msg = "cannot reach the master IP"
1503
        _ErrorIf(True, self.ENODENET, node, msg)
1504

    
1505
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1506
                      diskstatus):
1507
    """Verify an instance.
1508

1509
    This function checks to see if the required block devices are
1510
    available on the instance's node.
1511

1512
    """
1513
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1514
    node_current = instanceconfig.primary_node
1515

    
1516
    node_vol_should = {}
1517
    instanceconfig.MapLVsByNode(node_vol_should)
1518

    
1519
    for node in node_vol_should:
1520
      n_img = node_image[node]
1521
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1522
        # ignore missing volumes on offline or broken nodes
1523
        continue
1524
      for volume in node_vol_should[node]:
1525
        test = volume not in n_img.volumes
1526
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1527
                 "volume %s missing on node %s", volume, node)
1528

    
1529
    if instanceconfig.admin_up:
1530
      pri_img = node_image[node_current]
1531
      test = instance not in pri_img.instances and not pri_img.offline
1532
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1533
               "instance not running on its primary node %s",
1534
               node_current)
1535

    
1536
    for node, n_img in node_image.items():
1537
      if (not node == node_current):
1538
        test = instance in n_img.instances
1539
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1540
                 "instance should not run on node %s", node)
1541

    
1542
    diskdata = [(nname, success, status, idx)
1543
                for (nname, disks) in diskstatus.items()
1544
                for idx, (success, status) in enumerate(disks)]
1545

    
1546
    for nname, success, bdev_status, idx in diskdata:
1547
      _ErrorIf(instanceconfig.admin_up and not success,
1548
               self.EINSTANCEFAULTYDISK, instance,
1549
               "couldn't retrieve status for disk/%s on %s: %s",
1550
               idx, nname, bdev_status)
1551
      _ErrorIf((instanceconfig.admin_up and success and
1552
                bdev_status.ldisk_status == constants.LDS_FAULTY),
1553
               self.EINSTANCEFAULTYDISK, instance,
1554
               "disk/%s on %s is faulty", idx, nname)
1555

    
1556
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1557
    """Verify if there are any unknown volumes in the cluster.
1558

1559
    The .os, .swap and backup volumes are ignored. All other volumes are
1560
    reported as unknown.
1561

1562
    @type reserved: L{ganeti.utils.FieldSet}
1563
    @param reserved: a FieldSet of reserved volume names
1564

1565
    """
1566
    for node, n_img in node_image.items():
1567
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1568
        # skip non-healthy nodes
1569
        continue
1570
      for volume in n_img.volumes:
1571
        test = ((node not in node_vol_should or
1572
                volume not in node_vol_should[node]) and
1573
                not reserved.Matches(volume))
1574
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1575
                      "volume %s is unknown", volume)
1576

    
1577
  def _VerifyOrphanInstances(self, instancelist, node_image):
1578
    """Verify the list of running instances.
1579

1580
    This checks what instances are running but unknown to the cluster.
1581

1582
    """
1583
    for node, n_img in node_image.items():
1584
      for o_inst in n_img.instances:
1585
        test = o_inst not in instancelist
1586
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1587
                      "instance %s on node %s should not exist", o_inst, node)
1588

    
1589
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1590
    """Verify N+1 Memory Resilience.
1591

1592
    Check that if one single node dies we can still start all the
1593
    instances it was primary for.
1594

1595
    """
1596
    for node, n_img in node_image.items():
1597
      # This code checks that every node which is now listed as
1598
      # secondary has enough memory to host all instances it is
1599
      # supposed to should a single other node in the cluster fail.
1600
      # FIXME: not ready for failover to an arbitrary node
1601
      # FIXME: does not support file-backed instances
1602
      # WARNING: we currently take into account down instances as well
1603
      # as up ones, considering that even if they're down someone
1604
      # might want to start them even in the event of a node failure.
1605
      for prinode, instances in n_img.sbp.items():
1606
        needed_mem = 0
1607
        for instance in instances:
1608
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1609
          if bep[constants.BE_AUTO_BALANCE]:
1610
            needed_mem += bep[constants.BE_MEMORY]
1611
        test = n_img.mfree < needed_mem
1612
        self._ErrorIf(test, self.ENODEN1, node,
1613
                      "not enough memory on to accommodate"
1614
                      " failovers should peer node %s fail", prinode)
1615

    
1616
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1617
                       master_files):
1618
    """Verifies and computes the node required file checksums.
1619

1620
    @type ninfo: L{objects.Node}
1621
    @param ninfo: the node to check
1622
    @param nresult: the remote results for the node
1623
    @param file_list: required list of files
1624
    @param local_cksum: dictionary of local files and their checksums
1625
    @param master_files: list of files that only masters should have
1626

1627
    """
1628
    node = ninfo.name
1629
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1630

    
1631
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1632
    test = not isinstance(remote_cksum, dict)
1633
    _ErrorIf(test, self.ENODEFILECHECK, node,
1634
             "node hasn't returned file checksum data")
1635
    if test:
1636
      return
1637

    
1638
    for file_name in file_list:
1639
      node_is_mc = ninfo.master_candidate
1640
      must_have = (file_name not in master_files) or node_is_mc
1641
      # missing
1642
      test1 = file_name not in remote_cksum
1643
      # invalid checksum
1644
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1645
      # existing and good
1646
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1647
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1648
               "file '%s' missing", file_name)
1649
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1650
               "file '%s' has wrong checksum", file_name)
1651
      # not candidate and this is not a must-have file
1652
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1653
               "file '%s' should not exist on non master"
1654
               " candidates (and the file is outdated)", file_name)
1655
      # all good, except non-master/non-must have combination
1656
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1657
               "file '%s' should not exist"
1658
               " on non master candidates", file_name)
1659

    
1660
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1661
                      drbd_map):
1662
    """Verifies and the node DRBD status.
1663

1664
    @type ninfo: L{objects.Node}
1665
    @param ninfo: the node to check
1666
    @param nresult: the remote results for the node
1667
    @param instanceinfo: the dict of instances
1668
    @param drbd_helper: the configured DRBD usermode helper
1669
    @param drbd_map: the DRBD map as returned by
1670
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1671

1672
    """
1673
    node = ninfo.name
1674
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1675

    
1676
    if drbd_helper:
1677
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1678
      test = (helper_result == None)
1679
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
1680
               "no drbd usermode helper returned")
1681
      if helper_result:
1682
        status, payload = helper_result
1683
        test = not status
1684
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1685
                 "drbd usermode helper check unsuccessful: %s", payload)
1686
        test = status and (payload != drbd_helper)
1687
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1688
                 "wrong drbd usermode helper: %s", payload)
1689

    
1690
    # compute the DRBD minors
1691
    node_drbd = {}
1692
    for minor, instance in drbd_map[node].items():
1693
      test = instance not in instanceinfo
1694
      _ErrorIf(test, self.ECLUSTERCFG, None,
1695
               "ghost instance '%s' in temporary DRBD map", instance)
1696
        # ghost instance should not be running, but otherwise we
1697
        # don't give double warnings (both ghost instance and
1698
        # unallocated minor in use)
1699
      if test:
1700
        node_drbd[minor] = (instance, False)
1701
      else:
1702
        instance = instanceinfo[instance]
1703
        node_drbd[minor] = (instance.name, instance.admin_up)
1704

    
1705
    # and now check them
1706
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1707
    test = not isinstance(used_minors, (tuple, list))
1708
    _ErrorIf(test, self.ENODEDRBD, node,
1709
             "cannot parse drbd status file: %s", str(used_minors))
1710
    if test:
1711
      # we cannot check drbd status
1712
      return
1713

    
1714
    for minor, (iname, must_exist) in node_drbd.items():
1715
      test = minor not in used_minors and must_exist
1716
      _ErrorIf(test, self.ENODEDRBD, node,
1717
               "drbd minor %d of instance %s is not active", minor, iname)
1718
    for minor in used_minors:
1719
      test = minor not in node_drbd
1720
      _ErrorIf(test, self.ENODEDRBD, node,
1721
               "unallocated drbd minor %d is in use", minor)
1722

    
1723
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1724
    """Builds the node OS structures.
1725

1726
    @type ninfo: L{objects.Node}
1727
    @param ninfo: the node to check
1728
    @param nresult: the remote results for the node
1729
    @param nimg: the node image object
1730

1731
    """
1732
    node = ninfo.name
1733
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1734

    
1735
    remote_os = nresult.get(constants.NV_OSLIST, None)
1736
    test = (not isinstance(remote_os, list) or
1737
            not compat.all(isinstance(v, list) and len(v) == 7
1738
                           for v in remote_os))
1739

    
1740
    _ErrorIf(test, self.ENODEOS, node,
1741
             "node hasn't returned valid OS data")
1742

    
1743
    nimg.os_fail = test
1744

    
1745
    if test:
1746
      return
1747

    
1748
    os_dict = {}
1749

    
1750
    for (name, os_path, status, diagnose,
1751
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1752

    
1753
      if name not in os_dict:
1754
        os_dict[name] = []
1755

    
1756
      # parameters is a list of lists instead of list of tuples due to
1757
      # JSON lacking a real tuple type, fix it:
1758
      parameters = [tuple(v) for v in parameters]
1759
      os_dict[name].append((os_path, status, diagnose,
1760
                            set(variants), set(parameters), set(api_ver)))
1761

    
1762
    nimg.oslist = os_dict
1763

    
1764
  def _VerifyNodeOS(self, ninfo, nimg, base):
1765
    """Verifies the node OS list.
1766

1767
    @type ninfo: L{objects.Node}
1768
    @param ninfo: the node to check
1769
    @param nimg: the node image object
1770
    @param base: the 'template' node we match against (e.g. from the master)
1771

1772
    """
1773
    node = ninfo.name
1774
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1775

    
1776
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1777

    
1778
    for os_name, os_data in nimg.oslist.items():
1779
      assert os_data, "Empty OS status for OS %s?!" % os_name
1780
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1781
      _ErrorIf(not f_status, self.ENODEOS, node,
1782
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1783
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1784
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1785
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1786
      # this will catched in backend too
1787
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1788
               and not f_var, self.ENODEOS, node,
1789
               "OS %s with API at least %d does not declare any variant",
1790
               os_name, constants.OS_API_V15)
1791
      # comparisons with the 'base' image
1792
      test = os_name not in base.oslist
1793
      _ErrorIf(test, self.ENODEOS, node,
1794
               "Extra OS %s not present on reference node (%s)",
1795
               os_name, base.name)
1796
      if test:
1797
        continue
1798
      assert base.oslist[os_name], "Base node has empty OS status?"
1799
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1800
      if not b_status:
1801
        # base OS is invalid, skipping
1802
        continue
1803
      for kind, a, b in [("API version", f_api, b_api),
1804
                         ("variants list", f_var, b_var),
1805
                         ("parameters", f_param, b_param)]:
1806
        _ErrorIf(a != b, self.ENODEOS, node,
1807
                 "OS %s %s differs from reference node %s: %s vs. %s",
1808
                 kind, os_name, base.name,
1809
                 utils.CommaJoin(a), utils.CommaJoin(b))
1810

    
1811
    # check any missing OSes
1812
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1813
    _ErrorIf(missing, self.ENODEOS, node,
1814
             "OSes present on reference node %s but missing on this node: %s",
1815
             base.name, utils.CommaJoin(missing))
1816

    
1817
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1818
    """Verifies and updates the node volume data.
1819

1820
    This function will update a L{NodeImage}'s internal structures
1821
    with data from the remote call.
1822

1823
    @type ninfo: L{objects.Node}
1824
    @param ninfo: the node to check
1825
    @param nresult: the remote results for the node
1826
    @param nimg: the node image object
1827
    @param vg_name: the configured VG name
1828

1829
    """
1830
    node = ninfo.name
1831
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1832

    
1833
    nimg.lvm_fail = True
1834
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1835
    if vg_name is None:
1836
      pass
1837
    elif isinstance(lvdata, basestring):
1838
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1839
               utils.SafeEncode(lvdata))
1840
    elif not isinstance(lvdata, dict):
1841
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1842
    else:
1843
      nimg.volumes = lvdata
1844
      nimg.lvm_fail = False
1845

    
1846
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1847
    """Verifies and updates the node instance list.
1848

1849
    If the listing was successful, then updates this node's instance
1850
    list. Otherwise, it marks the RPC call as failed for the instance
1851
    list key.
1852

1853
    @type ninfo: L{objects.Node}
1854
    @param ninfo: the node to check
1855
    @param nresult: the remote results for the node
1856
    @param nimg: the node image object
1857

1858
    """
1859
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1860
    test = not isinstance(idata, list)
1861
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1862
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1863
    if test:
1864
      nimg.hyp_fail = True
1865
    else:
1866
      nimg.instances = idata
1867

    
1868
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1869
    """Verifies and computes a node information map
1870

1871
    @type ninfo: L{objects.Node}
1872
    @param ninfo: the node to check
1873
    @param nresult: the remote results for the node
1874
    @param nimg: the node image object
1875
    @param vg_name: the configured VG name
1876

1877
    """
1878
    node = ninfo.name
1879
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1880

    
1881
    # try to read free memory (from the hypervisor)
1882
    hv_info = nresult.get(constants.NV_HVINFO, None)
1883
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1884
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1885
    if not test:
1886
      try:
1887
        nimg.mfree = int(hv_info["memory_free"])
1888
      except (ValueError, TypeError):
1889
        _ErrorIf(True, self.ENODERPC, node,
1890
                 "node returned invalid nodeinfo, check hypervisor")
1891

    
1892
    # FIXME: devise a free space model for file based instances as well
1893
    if vg_name is not None:
1894
      test = (constants.NV_VGLIST not in nresult or
1895
              vg_name not in nresult[constants.NV_VGLIST])
1896
      _ErrorIf(test, self.ENODELVM, node,
1897
               "node didn't return data for the volume group '%s'"
1898
               " - it is either missing or broken", vg_name)
1899
      if not test:
1900
        try:
1901
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1902
        except (ValueError, TypeError):
1903
          _ErrorIf(True, self.ENODERPC, node,
1904
                   "node returned invalid LVM info, check LVM status")
1905

    
1906
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1907
    """Gets per-disk status information for all instances.
1908

1909
    @type nodelist: list of strings
1910
    @param nodelist: Node names
1911
    @type node_image: dict of (name, L{objects.Node})
1912
    @param node_image: Node objects
1913
    @type instanceinfo: dict of (name, L{objects.Instance})
1914
    @param instanceinfo: Instance objects
1915

1916
    """
1917
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1918

    
1919
    node_disks = {}
1920
    node_disks_devonly = {}
1921

    
1922
    for nname in nodelist:
1923
      disks = [(inst, disk)
1924
               for instlist in [node_image[nname].pinst,
1925
                                node_image[nname].sinst]
1926
               for inst in instlist
1927
               for disk in instanceinfo[inst].disks]
1928

    
1929
      if not disks:
1930
        # No need to collect data
1931
        continue
1932

    
1933
      node_disks[nname] = disks
1934

    
1935
      # Creating copies as SetDiskID below will modify the objects and that can
1936
      # lead to incorrect data returned from nodes
1937
      devonly = [dev.Copy() for (_, dev) in disks]
1938

    
1939
      for dev in devonly:
1940
        self.cfg.SetDiskID(dev, nname)
1941

    
1942
      node_disks_devonly[nname] = devonly
1943

    
1944
    assert len(node_disks) == len(node_disks_devonly)
1945

    
1946
    # Collect data from all nodes with disks
1947
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
1948
                                                          node_disks_devonly)
1949

    
1950
    assert len(result) == len(node_disks)
1951

    
1952
    instdisk = {}
1953

    
1954
    for (nname, nres) in result.items():
1955
      if nres.offline:
1956
        # Ignore offline node
1957
        continue
1958

    
1959
      disks = node_disks[nname]
1960

    
1961
      msg = nres.fail_msg
1962
      _ErrorIf(msg, self.ENODERPC, nname,
1963
               "while getting disk information: %s", nres.fail_msg)
1964
      if msg:
1965
        # No data from this node
1966
        data = len(disks) * [None]
1967
      else:
1968
        data = nres.payload
1969

    
1970
      for ((inst, _), status) in zip(disks, data):
1971
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
1972

    
1973
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
1974
                      len(nnames) <= len(instanceinfo[inst].all_nodes)
1975
                      for inst, nnames in instdisk.items()
1976
                      for nname, statuses in nnames.items())
1977

    
1978
    return instdisk
1979

    
1980
  def BuildHooksEnv(self):
1981
    """Build hooks env.
1982

1983
    Cluster-Verify hooks just ran in the post phase and their failure makes
1984
    the output be logged in the verify output and the verification to fail.
1985

1986
    """
1987
    all_nodes = self.cfg.GetNodeList()
1988
    env = {
1989
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1990
      }
1991
    for node in self.cfg.GetAllNodesInfo().values():
1992
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1993

    
1994
    return env, [], all_nodes
1995

    
1996
  def Exec(self, feedback_fn):
1997
    """Verify integrity of cluster, performing various test on nodes.
1998

1999
    """
2000
    self.bad = False
2001
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2002
    verbose = self.op.verbose
2003
    self._feedback_fn = feedback_fn
2004
    feedback_fn("* Verifying global settings")
2005
    for msg in self.cfg.VerifyConfig():
2006
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2007

    
2008
    # Check the cluster certificates
2009
    for cert_filename in constants.ALL_CERT_FILES:
2010
      (errcode, msg) = _VerifyCertificate(cert_filename)
2011
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2012

    
2013
    vg_name = self.cfg.GetVGName()
2014
    drbd_helper = self.cfg.GetDRBDHelper()
2015
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2016
    cluster = self.cfg.GetClusterInfo()
2017
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
2018
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2019
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2020
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2021
                        for iname in instancelist)
2022
    i_non_redundant = [] # Non redundant instances
2023
    i_non_a_balanced = [] # Non auto-balanced instances
2024
    n_offline = 0 # Count of offline nodes
2025
    n_drained = 0 # Count of nodes being drained
2026
    node_vol_should = {}
2027

    
2028
    # FIXME: verify OS list
2029
    # do local checksums
2030
    master_files = [constants.CLUSTER_CONF_FILE]
2031
    master_node = self.master_node = self.cfg.GetMasterNode()
2032
    master_ip = self.cfg.GetMasterIP()
2033

    
2034
    file_names = ssconf.SimpleStore().GetFileList()
2035
    file_names.extend(constants.ALL_CERT_FILES)
2036
    file_names.extend(master_files)
2037
    if cluster.modify_etc_hosts:
2038
      file_names.append(constants.ETC_HOSTS)
2039

    
2040
    local_checksums = utils.FingerprintFiles(file_names)
2041

    
2042
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2043
    node_verify_param = {
2044
      constants.NV_FILELIST: file_names,
2045
      constants.NV_NODELIST: [node.name for node in nodeinfo
2046
                              if not node.offline],
2047
      constants.NV_HYPERVISOR: hypervisors,
2048
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2049
                                  node.secondary_ip) for node in nodeinfo
2050
                                 if not node.offline],
2051
      constants.NV_INSTANCELIST: hypervisors,
2052
      constants.NV_VERSION: None,
2053
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2054
      constants.NV_NODESETUP: None,
2055
      constants.NV_TIME: None,
2056
      constants.NV_MASTERIP: (master_node, master_ip),
2057
      constants.NV_OSLIST: None,
2058
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2059
      }
2060

    
2061
    if vg_name is not None:
2062
      node_verify_param[constants.NV_VGLIST] = None
2063
      node_verify_param[constants.NV_LVLIST] = vg_name
2064
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2065
      node_verify_param[constants.NV_DRBDLIST] = None
2066

    
2067
    if drbd_helper:
2068
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2069

    
2070
    # Build our expected cluster state
2071
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2072
                                                 name=node.name,
2073
                                                 vm_capable=node.vm_capable))
2074
                      for node in nodeinfo)
2075

    
2076
    for instance in instancelist:
2077
      inst_config = instanceinfo[instance]
2078

    
2079
      for nname in inst_config.all_nodes:
2080
        if nname not in node_image:
2081
          # ghost node
2082
          gnode = self.NodeImage(name=nname)
2083
          gnode.ghost = True
2084
          node_image[nname] = gnode
2085

    
2086
      inst_config.MapLVsByNode(node_vol_should)
2087

    
2088
      pnode = inst_config.primary_node
2089
      node_image[pnode].pinst.append(instance)
2090

    
2091
      for snode in inst_config.secondary_nodes:
2092
        nimg = node_image[snode]
2093
        nimg.sinst.append(instance)
2094
        if pnode not in nimg.sbp:
2095
          nimg.sbp[pnode] = []
2096
        nimg.sbp[pnode].append(instance)
2097

    
2098
    # At this point, we have the in-memory data structures complete,
2099
    # except for the runtime information, which we'll gather next
2100

    
2101
    # Due to the way our RPC system works, exact response times cannot be
2102
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2103
    # time before and after executing the request, we can at least have a time
2104
    # window.
2105
    nvinfo_starttime = time.time()
2106
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2107
                                           self.cfg.GetClusterName())
2108
    nvinfo_endtime = time.time()
2109

    
2110
    all_drbd_map = self.cfg.ComputeDRBDMap()
2111

    
2112
    feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2113
    instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2114

    
2115
    feedback_fn("* Verifying node status")
2116

    
2117
    refos_img = None
2118

    
2119
    for node_i in nodeinfo:
2120
      node = node_i.name
2121
      nimg = node_image[node]
2122

    
2123
      if node_i.offline:
2124
        if verbose:
2125
          feedback_fn("* Skipping offline node %s" % (node,))
2126
        n_offline += 1
2127
        continue
2128

    
2129
      if node == master_node:
2130
        ntype = "master"
2131
      elif node_i.master_candidate:
2132
        ntype = "master candidate"
2133
      elif node_i.drained:
2134
        ntype = "drained"
2135
        n_drained += 1
2136
      else:
2137
        ntype = "regular"
2138
      if verbose:
2139
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2140

    
2141
      msg = all_nvinfo[node].fail_msg
2142
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2143
      if msg:
2144
        nimg.rpc_fail = True
2145
        continue
2146

    
2147
      nresult = all_nvinfo[node].payload
2148

    
2149
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2150
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2151
      self._VerifyNodeNetwork(node_i, nresult)
2152
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2153
                            master_files)
2154

    
2155
      if nimg.vm_capable:
2156
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2157
        self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2158
                             all_drbd_map)
2159

    
2160
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2161
        self._UpdateNodeInstances(node_i, nresult, nimg)
2162
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2163
        self._UpdateNodeOS(node_i, nresult, nimg)
2164
        if not nimg.os_fail:
2165
          if refos_img is None:
2166
            refos_img = nimg
2167
          self._VerifyNodeOS(node_i, nimg, refos_img)
2168

    
2169
    feedback_fn("* Verifying instance status")
2170
    for instance in instancelist:
2171
      if verbose:
2172
        feedback_fn("* Verifying instance %s" % instance)
2173
      inst_config = instanceinfo[instance]
2174
      self._VerifyInstance(instance, inst_config, node_image,
2175
                           instdisk[instance])
2176
      inst_nodes_offline = []
2177

    
2178
      pnode = inst_config.primary_node
2179
      pnode_img = node_image[pnode]
2180
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2181
               self.ENODERPC, pnode, "instance %s, connection to"
2182
               " primary node failed", instance)
2183

    
2184
      if pnode_img.offline:
2185
        inst_nodes_offline.append(pnode)
2186

    
2187
      # If the instance is non-redundant we cannot survive losing its primary
2188
      # node, so we are not N+1 compliant. On the other hand we have no disk
2189
      # templates with more than one secondary so that situation is not well
2190
      # supported either.
2191
      # FIXME: does not support file-backed instances
2192
      if not inst_config.secondary_nodes:
2193
        i_non_redundant.append(instance)
2194
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2195
               instance, "instance has multiple secondary nodes: %s",
2196
               utils.CommaJoin(inst_config.secondary_nodes),
2197
               code=self.ETYPE_WARNING)
2198

    
2199
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2200
        i_non_a_balanced.append(instance)
2201

    
2202
      for snode in inst_config.secondary_nodes:
2203
        s_img = node_image[snode]
2204
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2205
                 "instance %s, connection to secondary node failed", instance)
2206

    
2207
        if s_img.offline:
2208
          inst_nodes_offline.append(snode)
2209

    
2210
      # warn that the instance lives on offline nodes
2211
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2212
               "instance lives on offline node(s) %s",
2213
               utils.CommaJoin(inst_nodes_offline))
2214
      # ... or ghost/non-vm_capable nodes
2215
      for node in inst_config.all_nodes:
2216
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2217
                 "instance lives on ghost node %s", node)
2218
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2219
                 instance, "instance lives on non-vm_capable node %s", node)
2220

    
2221
    feedback_fn("* Verifying orphan volumes")
2222
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2223
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2224

    
2225
    feedback_fn("* Verifying orphan instances")
2226
    self._VerifyOrphanInstances(instancelist, node_image)
2227

    
2228
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2229
      feedback_fn("* Verifying N+1 Memory redundancy")
2230
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
2231

    
2232
    feedback_fn("* Other Notes")
2233
    if i_non_redundant:
2234
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2235
                  % len(i_non_redundant))
2236

    
2237
    if i_non_a_balanced:
2238
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2239
                  % len(i_non_a_balanced))
2240

    
2241
    if n_offline:
2242
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2243

    
2244
    if n_drained:
2245
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2246

    
2247
    return not self.bad
2248

    
2249
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2250
    """Analyze the post-hooks' result
2251

2252
    This method analyses the hook result, handles it, and sends some
2253
    nicely-formatted feedback back to the user.
2254

2255
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2256
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2257
    @param hooks_results: the results of the multi-node hooks rpc call
2258
    @param feedback_fn: function used send feedback back to the caller
2259
    @param lu_result: previous Exec result
2260
    @return: the new Exec result, based on the previous result
2261
        and hook results
2262

2263
    """
2264
    # We only really run POST phase hooks, and are only interested in
2265
    # their results
2266
    if phase == constants.HOOKS_PHASE_POST:
2267
      # Used to change hooks' output to proper indentation
2268
      indent_re = re.compile('^', re.M)
2269
      feedback_fn("* Hooks Results")
2270
      assert hooks_results, "invalid result from hooks"
2271

    
2272
      for node_name in hooks_results:
2273
        res = hooks_results[node_name]
2274
        msg = res.fail_msg
2275
        test = msg and not res.offline
2276
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2277
                      "Communication failure in hooks execution: %s", msg)
2278
        if res.offline or msg:
2279
          # No need to investigate payload if node is offline or gave an error.
2280
          # override manually lu_result here as _ErrorIf only
2281
          # overrides self.bad
2282
          lu_result = 1
2283
          continue
2284
        for script, hkr, output in res.payload:
2285
          test = hkr == constants.HKR_FAIL
2286
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2287
                        "Script %s failed, output:", script)
2288
          if test:
2289
            output = indent_re.sub('      ', output)
2290
            feedback_fn("%s" % output)
2291
            lu_result = 0
2292

    
2293
      return lu_result
2294

    
2295

    
2296
class LUVerifyDisks(NoHooksLU):
2297
  """Verifies the cluster disks status.
2298

2299
  """
2300
  REQ_BGL = False
2301

    
2302
  def ExpandNames(self):
2303
    self.needed_locks = {
2304
      locking.LEVEL_NODE: locking.ALL_SET,
2305
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2306
    }
2307
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2308

    
2309
  def Exec(self, feedback_fn):
2310
    """Verify integrity of cluster disks.
2311

2312
    @rtype: tuple of three items
2313
    @return: a tuple of (dict of node-to-node_error, list of instances
2314
        which need activate-disks, dict of instance: (node, volume) for
2315
        missing volumes
2316

2317
    """
2318
    result = res_nodes, res_instances, res_missing = {}, [], {}
2319

    
2320
    vg_name = self.cfg.GetVGName()
2321
    nodes = utils.NiceSort(self.cfg.GetNodeList())
2322
    instances = [self.cfg.GetInstanceInfo(name)
2323
                 for name in self.cfg.GetInstanceList()]
2324

    
2325
    nv_dict = {}
2326
    for inst in instances:
2327
      inst_lvs = {}
2328
      if (not inst.admin_up or
2329
          inst.disk_template not in constants.DTS_NET_MIRROR):
2330
        continue
2331
      inst.MapLVsByNode(inst_lvs)
2332
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2333
      for node, vol_list in inst_lvs.iteritems():
2334
        for vol in vol_list:
2335
          nv_dict[(node, vol)] = inst
2336

    
2337
    if not nv_dict:
2338
      return result
2339

    
2340
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2341

    
2342
    for node in nodes:
2343
      # node_volume
2344
      node_res = node_lvs[node]
2345
      if node_res.offline:
2346
        continue
2347
      msg = node_res.fail_msg
2348
      if msg:
2349
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2350
        res_nodes[node] = msg
2351
        continue
2352

    
2353
      lvs = node_res.payload
2354
      for lv_name, (_, _, lv_online) in lvs.items():
2355
        inst = nv_dict.pop((node, lv_name), None)
2356
        if (not lv_online and inst is not None
2357
            and inst.name not in res_instances):
2358
          res_instances.append(inst.name)
2359

    
2360
    # any leftover items in nv_dict are missing LVs, let's arrange the
2361
    # data better
2362
    for key, inst in nv_dict.iteritems():
2363
      if inst.name not in res_missing:
2364
        res_missing[inst.name] = []
2365
      res_missing[inst.name].append(key)
2366

    
2367
    return result
2368

    
2369

    
2370
class LURepairDiskSizes(NoHooksLU):
2371
  """Verifies the cluster disks sizes.
2372

2373
  """
2374
  _OP_PARAMS = [("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString))]
2375
  REQ_BGL = False
2376

    
2377
  def ExpandNames(self):
2378
    if self.op.instances:
2379
      self.wanted_names = []
2380
      for name in self.op.instances:
2381
        full_name = _ExpandInstanceName(self.cfg, name)
2382
        self.wanted_names.append(full_name)
2383
      self.needed_locks = {
2384
        locking.LEVEL_NODE: [],
2385
        locking.LEVEL_INSTANCE: self.wanted_names,
2386
        }
2387
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2388
    else:
2389
      self.wanted_names = None
2390
      self.needed_locks = {
2391
        locking.LEVEL_NODE: locking.ALL_SET,
2392
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2393
        }
2394
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2395

    
2396
  def DeclareLocks(self, level):
2397
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2398
      self._LockInstancesNodes(primary_only=True)
2399

    
2400
  def CheckPrereq(self):
2401
    """Check prerequisites.
2402

2403
    This only checks the optional instance list against the existing names.
2404

2405
    """
2406
    if self.wanted_names is None:
2407
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2408

    
2409
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2410
                             in self.wanted_names]
2411

    
2412
  def _EnsureChildSizes(self, disk):
2413
    """Ensure children of the disk have the needed disk size.
2414

2415
    This is valid mainly for DRBD8 and fixes an issue where the
2416
    children have smaller disk size.
2417

2418
    @param disk: an L{ganeti.objects.Disk} object
2419

2420
    """
2421
    if disk.dev_type == constants.LD_DRBD8:
2422
      assert disk.children, "Empty children for DRBD8?"
2423
      fchild = disk.children[0]
2424
      mismatch = fchild.size < disk.size
2425
      if mismatch:
2426
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2427
                     fchild.size, disk.size)
2428
        fchild.size = disk.size
2429

    
2430
      # and we recurse on this child only, not on the metadev
2431
      return self._EnsureChildSizes(fchild) or mismatch
2432
    else:
2433
      return False
2434

    
2435
  def Exec(self, feedback_fn):
2436
    """Verify the size of cluster disks.
2437

2438
    """
2439
    # TODO: check child disks too
2440
    # TODO: check differences in size between primary/secondary nodes
2441
    per_node_disks = {}
2442
    for instance in self.wanted_instances:
2443
      pnode = instance.primary_node
2444
      if pnode not in per_node_disks:
2445
        per_node_disks[pnode] = []
2446
      for idx, disk in enumerate(instance.disks):
2447
        per_node_disks[pnode].append((instance, idx, disk))
2448

    
2449
    changed = []
2450
    for node, dskl in per_node_disks.items():
2451
      newl = [v[2].Copy() for v in dskl]
2452
      for dsk in newl:
2453
        self.cfg.SetDiskID(dsk, node)
2454
      result = self.rpc.call_blockdev_getsizes(node, newl)
2455
      if result.fail_msg:
2456
        self.LogWarning("Failure in blockdev_getsizes call to node"
2457
                        " %s, ignoring", node)
2458
        continue
2459
      if len(result.data) != len(dskl):
2460
        self.LogWarning("Invalid result from node %s, ignoring node results",
2461
                        node)
2462
        continue
2463
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2464
        if size is None:
2465
          self.LogWarning("Disk %d of instance %s did not return size"
2466
                          " information, ignoring", idx, instance.name)
2467
          continue
2468
        if not isinstance(size, (int, long)):
2469
          self.LogWarning("Disk %d of instance %s did not return valid"
2470
                          " size information, ignoring", idx, instance.name)
2471
          continue
2472
        size = size >> 20
2473
        if size != disk.size:
2474
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2475
                       " correcting: recorded %d, actual %d", idx,
2476
                       instance.name, disk.size, size)
2477
          disk.size = size
2478
          self.cfg.Update(instance, feedback_fn)
2479
          changed.append((instance.name, idx, size))
2480
        if self._EnsureChildSizes(disk):
2481
          self.cfg.Update(instance, feedback_fn)
2482
          changed.append((instance.name, idx, disk.size))
2483
    return changed
2484

    
2485

    
2486
class LURenameCluster(LogicalUnit):
2487
  """Rename the cluster.
2488

2489
  """
2490
  HPATH = "cluster-rename"
2491
  HTYPE = constants.HTYPE_CLUSTER
2492
  _OP_PARAMS = [("name", ht.NoDefault, ht.TNonEmptyString)]
2493

    
2494
  def BuildHooksEnv(self):
2495
    """Build hooks env.
2496

2497
    """
2498
    env = {
2499
      "OP_TARGET": self.cfg.GetClusterName(),
2500
      "NEW_NAME": self.op.name,
2501
      }
2502
    mn = self.cfg.GetMasterNode()
2503
    all_nodes = self.cfg.GetNodeList()
2504
    return env, [mn], all_nodes
2505

    
2506
  def CheckPrereq(self):
2507
    """Verify that the passed name is a valid one.
2508

2509
    """
2510
    hostname = netutils.GetHostname(name=self.op.name,
2511
                                    family=self.cfg.GetPrimaryIPFamily())
2512

    
2513
    new_name = hostname.name
2514
    self.ip = new_ip = hostname.ip
2515
    old_name = self.cfg.GetClusterName()
2516
    old_ip = self.cfg.GetMasterIP()
2517
    if new_name == old_name and new_ip == old_ip:
2518
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2519
                                 " cluster has changed",
2520
                                 errors.ECODE_INVAL)
2521
    if new_ip != old_ip:
2522
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2523
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2524
                                   " reachable on the network" %
2525
                                   new_ip, errors.ECODE_NOTUNIQUE)
2526

    
2527
    self.op.name = new_name
2528

    
2529
  def Exec(self, feedback_fn):
2530
    """Rename the cluster.
2531

2532
    """
2533
    clustername = self.op.name
2534
    ip = self.ip
2535

    
2536
    # shutdown the master IP
2537
    master = self.cfg.GetMasterNode()
2538
    result = self.rpc.call_node_stop_master(master, False)
2539
    result.Raise("Could not disable the master role")
2540

    
2541
    try:
2542
      cluster = self.cfg.GetClusterInfo()
2543
      cluster.cluster_name = clustername
2544
      cluster.master_ip = ip
2545
      self.cfg.Update(cluster, feedback_fn)
2546

    
2547
      # update the known hosts file
2548
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2549
      node_list = self.cfg.GetNodeList()
2550
      try:
2551
        node_list.remove(master)
2552
      except ValueError:
2553
        pass
2554
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2555
    finally:
2556
      result = self.rpc.call_node_start_master(master, False, False)
2557
      msg = result.fail_msg
2558
      if msg:
2559
        self.LogWarning("Could not re-enable the master role on"
2560
                        " the master, please restart manually: %s", msg)
2561

    
2562
    return clustername
2563

    
2564

    
2565
class LUSetClusterParams(LogicalUnit):
2566
  """Change the parameters of the cluster.
2567

2568
  """
2569
  HPATH = "cluster-modify"
2570
  HTYPE = constants.HTYPE_CLUSTER
2571
  _OP_PARAMS = [
2572
    ("vg_name", None, ht.TMaybeString),
2573
    ("enabled_hypervisors", None,
2574
     ht.TOr(ht.TAnd(ht.TListOf(ht.TElemOf(constants.HYPER_TYPES)), ht.TTrue),
2575
            ht.TNone)),
2576
    ("hvparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2577
                              ht.TNone)),
2578
    ("beparams", None, ht.TOr(ht.TDict, ht.TNone)),
2579
    ("os_hvp", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2580
                            ht.TNone)),
2581
    ("osparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2582
                              ht.TNone)),
2583
    ("candidate_pool_size", None, ht.TOr(ht.TStrictPositiveInt, ht.TNone)),
2584
    ("uid_pool", None, ht.NoType),
2585
    ("add_uids", None, ht.NoType),
2586
    ("remove_uids", None, ht.NoType),
2587
    ("maintain_node_health", None, ht.TMaybeBool),
2588
    ("prealloc_wipe_disks", None, ht.TMaybeBool),
2589
    ("nicparams", None, ht.TOr(ht.TDict, ht.TNone)),
2590
    ("drbd_helper", None, ht.TOr(ht.TString, ht.TNone)),
2591
    ("default_iallocator", None, ht.TOr(ht.TString, ht.TNone)),
2592
    ("reserved_lvs", None, ht.TOr(ht.TListOf(ht.TNonEmptyString), ht.TNone)),
2593
    ("hidden_os", None, ht.TOr(ht.TListOf(\
2594
          ht.TAnd(ht.TList,
2595
                ht.TIsLength(2),
2596
                ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2597
          ht.TNone)),
2598
    ("blacklisted_os", None, ht.TOr(ht.TListOf(\
2599
          ht.TAnd(ht.TList,
2600
                ht.TIsLength(2),
2601
                ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2602
          ht.TNone)),
2603
    ]
2604
  REQ_BGL = False
2605

    
2606
  def CheckArguments(self):
2607
    """Check parameters
2608

2609
    """
2610
    if self.op.uid_pool:
2611
      uidpool.CheckUidPool(self.op.uid_pool)
2612

    
2613
    if self.op.add_uids:
2614
      uidpool.CheckUidPool(self.op.add_uids)
2615

    
2616
    if self.op.remove_uids:
2617
      uidpool.CheckUidPool(self.op.remove_uids)
2618

    
2619
  def ExpandNames(self):
2620
    # FIXME: in the future maybe other cluster params won't require checking on
2621
    # all nodes to be modified.
2622
    self.needed_locks = {
2623
      locking.LEVEL_NODE: locking.ALL_SET,
2624
    }
2625
    self.share_locks[locking.LEVEL_NODE] = 1
2626

    
2627
  def BuildHooksEnv(self):
2628
    """Build hooks env.
2629

2630
    """
2631
    env = {
2632
      "OP_TARGET": self.cfg.GetClusterName(),
2633
      "NEW_VG_NAME": self.op.vg_name,
2634
      }
2635
    mn = self.cfg.GetMasterNode()
2636
    return env, [mn], [mn]
2637

    
2638
  def CheckPrereq(self):
2639
    """Check prerequisites.
2640

2641
    This checks whether the given params don't conflict and
2642
    if the given volume group is valid.
2643

2644
    """
2645
    if self.op.vg_name is not None and not self.op.vg_name:
2646
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2647
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2648
                                   " instances exist", errors.ECODE_INVAL)
2649

    
2650
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
2651
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2652
        raise errors.OpPrereqError("Cannot disable drbd helper while"
2653
                                   " drbd-based instances exist",
2654
                                   errors.ECODE_INVAL)
2655

    
2656
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2657

    
2658
    # if vg_name not None, checks given volume group on all nodes
2659
    if self.op.vg_name:
2660
      vglist = self.rpc.call_vg_list(node_list)
2661
      for node in node_list:
2662
        msg = vglist[node].fail_msg
2663
        if msg:
2664
          # ignoring down node
2665
          self.LogWarning("Error while gathering data on node %s"
2666
                          " (ignoring node): %s", node, msg)
2667
          continue
2668
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2669
                                              self.op.vg_name,
2670
                                              constants.MIN_VG_SIZE)
2671
        if vgstatus:
2672
          raise errors.OpPrereqError("Error on node '%s': %s" %
2673
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2674

    
2675
    if self.op.drbd_helper:
2676
      # checks given drbd helper on all nodes
2677
      helpers = self.rpc.call_drbd_helper(node_list)
2678
      for node in node_list:
2679
        ninfo = self.cfg.GetNodeInfo(node)
2680
        if ninfo.offline:
2681
          self.LogInfo("Not checking drbd helper on offline node %s", node)
2682
          continue
2683
        msg = helpers[node].fail_msg
2684
        if msg:
2685
          raise errors.OpPrereqError("Error checking drbd helper on node"
2686
                                     " '%s': %s" % (node, msg),
2687
                                     errors.ECODE_ENVIRON)
2688
        node_helper = helpers[node].payload
2689
        if node_helper != self.op.drbd_helper:
2690
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2691
                                     (node, node_helper), errors.ECODE_ENVIRON)
2692

    
2693
    self.cluster = cluster = self.cfg.GetClusterInfo()
2694
    # validate params changes
2695
    if self.op.beparams:
2696
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2697
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2698

    
2699
    if self.op.nicparams:
2700
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2701
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2702
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2703
      nic_errors = []
2704

    
2705
      # check all instances for consistency
2706
      for instance in self.cfg.GetAllInstancesInfo().values():
2707
        for nic_idx, nic in enumerate(instance.nics):
2708
          params_copy = copy.deepcopy(nic.nicparams)
2709
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2710

    
2711
          # check parameter syntax
2712
          try:
2713
            objects.NIC.CheckParameterSyntax(params_filled)
2714
          except errors.ConfigurationError, err:
2715
            nic_errors.append("Instance %s, nic/%d: %s" %
2716
                              (instance.name, nic_idx, err))
2717

    
2718
          # if we're moving instances to routed, check that they have an ip
2719
          target_mode = params_filled[constants.NIC_MODE]
2720
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2721
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2722
                              (instance.name, nic_idx))
2723
      if nic_errors:
2724
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2725
                                   "\n".join(nic_errors))
2726

    
2727
    # hypervisor list/parameters
2728
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2729
    if self.op.hvparams:
2730
      for hv_name, hv_dict in self.op.hvparams.items():
2731
        if hv_name not in self.new_hvparams:
2732
          self.new_hvparams[hv_name] = hv_dict
2733
        else:
2734
          self.new_hvparams[hv_name].update(hv_dict)
2735

    
2736
    # os hypervisor parameters
2737
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2738
    if self.op.os_hvp:
2739
      for os_name, hvs in self.op.os_hvp.items():
2740
        if os_name not in self.new_os_hvp:
2741
          self.new_os_hvp[os_name] = hvs
2742
        else:
2743
          for hv_name, hv_dict in hvs.items():
2744
            if hv_name not in self.new_os_hvp[os_name]:
2745
              self.new_os_hvp[os_name][hv_name] = hv_dict
2746
            else:
2747
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2748

    
2749
    # os parameters
2750
    self.new_osp = objects.FillDict(cluster.osparams, {})
2751
    if self.op.osparams:
2752
      for os_name, osp in self.op.osparams.items():
2753
        if os_name not in self.new_osp:
2754
          self.new_osp[os_name] = {}
2755

    
2756
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2757
                                                  use_none=True)
2758

    
2759
        if not self.new_osp[os_name]:
2760
          # we removed all parameters
2761
          del self.new_osp[os_name]
2762
        else:
2763
          # check the parameter validity (remote check)
2764
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2765
                         os_name, self.new_osp[os_name])
2766

    
2767
    # changes to the hypervisor list
2768
    if self.op.enabled_hypervisors is not None:
2769
      self.hv_list = self.op.enabled_hypervisors
2770
      for hv in self.hv_list:
2771
        # if the hypervisor doesn't already exist in the cluster
2772
        # hvparams, we initialize it to empty, and then (in both
2773
        # cases) we make sure to fill the defaults, as we might not
2774
        # have a complete defaults list if the hypervisor wasn't
2775
        # enabled before
2776
        if hv not in new_hvp:
2777
          new_hvp[hv] = {}
2778
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2779
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2780
    else:
2781
      self.hv_list = cluster.enabled_hypervisors
2782

    
2783
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2784
      # either the enabled list has changed, or the parameters have, validate
2785
      for hv_name, hv_params in self.new_hvparams.items():
2786
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2787
            (self.op.enabled_hypervisors and
2788
             hv_name in self.op.enabled_hypervisors)):
2789
          # either this is a new hypervisor, or its parameters have changed
2790
          hv_class = hypervisor.GetHypervisor(hv_name)
2791
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2792
          hv_class.CheckParameterSyntax(hv_params)
2793
          _CheckHVParams(self, node_list, hv_name, hv_params)
2794

    
2795
    if self.op.os_hvp:
2796
      # no need to check any newly-enabled hypervisors, since the
2797
      # defaults have already been checked in the above code-block
2798
      for os_name, os_hvp in self.new_os_hvp.items():
2799
        for hv_name, hv_params in os_hvp.items():
2800
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2801
          # we need to fill in the new os_hvp on top of the actual hv_p
2802
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2803
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2804
          hv_class = hypervisor.GetHypervisor(hv_name)
2805
          hv_class.CheckParameterSyntax(new_osp)
2806
          _CheckHVParams(self, node_list, hv_name, new_osp)
2807

    
2808
    if self.op.default_iallocator:
2809
      alloc_script = utils.FindFile(self.op.default_iallocator,
2810
                                    constants.IALLOCATOR_SEARCH_PATH,
2811
                                    os.path.isfile)
2812
      if alloc_script is None:
2813
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2814
                                   " specified" % self.op.default_iallocator,
2815
                                   errors.ECODE_INVAL)
2816

    
2817
  def Exec(self, feedback_fn):
2818
    """Change the parameters of the cluster.
2819

2820
    """
2821
    if self.op.vg_name is not None:
2822
      new_volume = self.op.vg_name
2823
      if not new_volume:
2824
        new_volume = None
2825
      if new_volume != self.cfg.GetVGName():
2826
        self.cfg.SetVGName(new_volume)
2827
      else:
2828
        feedback_fn("Cluster LVM configuration already in desired"
2829
                    " state, not changing")
2830
    if self.op.drbd_helper is not None:
2831
      new_helper = self.op.drbd_helper
2832
      if not new_helper:
2833
        new_helper = None
2834
      if new_helper != self.cfg.GetDRBDHelper():
2835
        self.cfg.SetDRBDHelper(new_helper)
2836
      else:
2837
        feedback_fn("Cluster DRBD helper already in desired state,"
2838
                    " not changing")
2839
    if self.op.hvparams:
2840
      self.cluster.hvparams = self.new_hvparams
2841
    if self.op.os_hvp:
2842
      self.cluster.os_hvp = self.new_os_hvp
2843
    if self.op.enabled_hypervisors is not None:
2844
      self.cluster.hvparams = self.new_hvparams
2845
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2846
    if self.op.beparams:
2847
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2848
    if self.op.nicparams:
2849
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2850
    if self.op.osparams:
2851
      self.cluster.osparams = self.new_osp
2852

    
2853
    if self.op.candidate_pool_size is not None:
2854
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2855
      # we need to update the pool size here, otherwise the save will fail
2856
      _AdjustCandidatePool(self, [])
2857

    
2858
    if self.op.maintain_node_health is not None:
2859
      self.cluster.maintain_node_health = self.op.maintain_node_health
2860

    
2861
    if self.op.prealloc_wipe_disks is not None:
2862
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
2863

    
2864
    if self.op.add_uids is not None:
2865
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2866

    
2867
    if self.op.remove_uids is not None:
2868
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2869

    
2870
    if self.op.uid_pool is not None:
2871
      self.cluster.uid_pool = self.op.uid_pool
2872

    
2873
    if self.op.default_iallocator is not None:
2874
      self.cluster.default_iallocator = self.op.default_iallocator
2875

    
2876
    if self.op.reserved_lvs is not None:
2877
      self.cluster.reserved_lvs = self.op.reserved_lvs
2878

    
2879
    def helper_os(aname, mods, desc):
2880
      desc += " OS list"
2881
      lst = getattr(self.cluster, aname)
2882
      for key, val in mods:
2883
        if key == constants.DDM_ADD:
2884
          if val in lst:
2885
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
2886
          else:
2887
            lst.append(val)
2888
        elif key == constants.DDM_REMOVE:
2889
          if val in lst:
2890
            lst.remove(val)
2891
          else:
2892
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
2893
        else:
2894
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
2895

    
2896
    if self.op.hidden_os:
2897
      helper_os("hidden_os", self.op.hidden_os, "hidden")
2898

    
2899
    if self.op.blacklisted_os:
2900
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
2901

    
2902
    self.cfg.Update(self.cluster, feedback_fn)
2903

    
2904

    
2905
def _UploadHelper(lu, nodes, fname):
2906
  """Helper for uploading a file and showing warnings.
2907

2908
  """
2909
  if os.path.exists(fname):
2910
    result = lu.rpc.call_upload_file(nodes, fname)
2911
    for to_node, to_result in result.items():
2912
      msg = to_result.fail_msg
2913
      if msg:
2914
        msg = ("Copy of file %s to node %s failed: %s" %
2915
               (fname, to_node, msg))
2916
        lu.proc.LogWarning(msg)
2917

    
2918

    
2919
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
2920
  """Distribute additional files which are part of the cluster configuration.
2921

2922
  ConfigWriter takes care of distributing the config and ssconf files, but
2923
  there are more files which should be distributed to all nodes. This function
2924
  makes sure those are copied.
2925

2926
  @param lu: calling logical unit
2927
  @param additional_nodes: list of nodes not in the config to distribute to
2928
  @type additional_vm: boolean
2929
  @param additional_vm: whether the additional nodes are vm-capable or not
2930

2931
  """
2932
  # 1. Gather target nodes
2933
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2934
  dist_nodes = lu.cfg.GetOnlineNodeList()
2935
  nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
2936
  vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
2937
  if additional_nodes is not None:
2938
    dist_nodes.extend(additional_nodes)
2939
    if additional_vm:
2940
      vm_nodes.extend(additional_nodes)
2941
  if myself.name in dist_nodes:
2942
    dist_nodes.remove(myself.name)
2943
  if myself.name in vm_nodes:
2944
    vm_nodes.remove(myself.name)
2945

    
2946
  # 2. Gather files to distribute
2947
  dist_files = set([constants.ETC_HOSTS,
2948
                    constants.SSH_KNOWN_HOSTS_FILE,
2949
                    constants.RAPI_CERT_FILE,
2950
                    constants.RAPI_USERS_FILE,
2951
                    constants.CONFD_HMAC_KEY,
2952
                    constants.CLUSTER_DOMAIN_SECRET_FILE,
2953
                   ])
2954

    
2955
  vm_files = set()
2956
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2957
  for hv_name in enabled_hypervisors:
2958
    hv_class = hypervisor.GetHypervisor(hv_name)
2959
    vm_files.update(hv_class.GetAncillaryFiles())
2960

    
2961
  # 3. Perform the files upload
2962
  for fname in dist_files:
2963
    _UploadHelper(lu, dist_nodes, fname)
2964
  for fname in vm_files:
2965
    _UploadHelper(lu, vm_nodes, fname)
2966

    
2967

    
2968
class LURedistributeConfig(NoHooksLU):
2969
  """Force the redistribution of cluster configuration.
2970

2971
  This is a very simple LU.
2972

2973
  """
2974
  REQ_BGL = False
2975

    
2976
  def ExpandNames(self):
2977
    self.needed_locks = {
2978
      locking.LEVEL_NODE: locking.ALL_SET,
2979
    }
2980
    self.share_locks[locking.LEVEL_NODE] = 1
2981

    
2982
  def Exec(self, feedback_fn):
2983
    """Redistribute the configuration.
2984

2985
    """
2986
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2987
    _RedistributeAncillaryFiles(self)
2988

    
2989

    
2990
def _WaitForSync(lu, instance, disks=None, oneshot=False):
2991
  """Sleep and poll for an instance's disk to sync.
2992

2993
  """
2994
  if not instance.disks or disks is not None and not disks:
2995
    return True
2996

    
2997
  disks = _ExpandCheckDisks(instance, disks)
2998

    
2999
  if not oneshot:
3000
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3001

    
3002
  node = instance.primary_node
3003

    
3004
  for dev in disks:
3005
    lu.cfg.SetDiskID(dev, node)
3006

    
3007
  # TODO: Convert to utils.Retry
3008

    
3009
  retries = 0
3010
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3011
  while True:
3012
    max_time = 0
3013
    done = True
3014
    cumul_degraded = False
3015
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3016
    msg = rstats.fail_msg
3017
    if msg:
3018
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3019
      retries += 1
3020
      if retries >= 10:
3021
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3022
                                 " aborting." % node)
3023
      time.sleep(6)
3024
      continue
3025
    rstats = rstats.payload
3026
    retries = 0
3027
    for i, mstat in enumerate(rstats):
3028
      if mstat is None:
3029
        lu.LogWarning("Can't compute data for node %s/%s",
3030
                           node, disks[i].iv_name)
3031
        continue
3032

    
3033
      cumul_degraded = (cumul_degraded or
3034
                        (mstat.is_degraded and mstat.sync_percent is None))
3035
      if mstat.sync_percent is not None:
3036
        done = False
3037
        if mstat.estimated_time is not None:
3038
          rem_time = ("%s remaining (estimated)" %
3039
                      utils.FormatSeconds(mstat.estimated_time))
3040
          max_time = mstat.estimated_time
3041
        else:
3042
          rem_time = "no time estimate"
3043
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3044
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3045

    
3046
    # if we're done but degraded, let's do a few small retries, to
3047
    # make sure we see a stable and not transient situation; therefore
3048
    # we force restart of the loop
3049
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3050
      logging.info("Degraded disks found, %d retries left", degr_retries)
3051
      degr_retries -= 1
3052
      time.sleep(1)
3053
      continue
3054

    
3055
    if done or oneshot:
3056
      break
3057

    
3058
    time.sleep(min(60, max_time))
3059

    
3060
  if done:
3061
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3062
  return not cumul_degraded
3063

    
3064

    
3065
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3066
  """Check that mirrors are not degraded.
3067

3068
  The ldisk parameter, if True, will change the test from the
3069
  is_degraded attribute (which represents overall non-ok status for
3070
  the device(s)) to the ldisk (representing the local storage status).
3071

3072
  """
3073
  lu.cfg.SetDiskID(dev, node)
3074

    
3075
  result = True
3076

    
3077
  if on_primary or dev.AssembleOnSecondary():
3078
    rstats = lu.rpc.call_blockdev_find(node, dev)
3079
    msg = rstats.fail_msg
3080
    if msg:
3081
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3082
      result = False
3083
    elif not rstats.payload:
3084
      lu.LogWarning("Can't find disk on node %s", node)
3085
      result = False
3086
    else:
3087
      if ldisk:
3088
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3089
      else:
3090
        result = result and not rstats.payload.is_degraded
3091

    
3092
  if dev.children:
3093
    for child in dev.children:
3094
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3095

    
3096
  return result
3097

    
3098

    
3099
class LUDiagnoseOS(NoHooksLU):
3100
  """Logical unit for OS diagnose/query.
3101

3102
  """
3103
  _OP_PARAMS = [
3104
    _POutputFields,
3105
    ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3106
    ]
3107
  REQ_BGL = False
3108
  _HID = "hidden"
3109
  _BLK = "blacklisted"
3110
  _VLD = "valid"
3111
  _FIELDS_STATIC = utils.FieldSet()
3112
  _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3113
                                   "parameters", "api_versions", _HID, _BLK)
3114

    
3115
  def CheckArguments(self):
3116
    if self.op.names:
3117
      raise errors.OpPrereqError("Selective OS query not supported",
3118
                                 errors.ECODE_INVAL)
3119

    
3120
    _CheckOutputFields(static=self._FIELDS_STATIC,
3121
                       dynamic=self._FIELDS_DYNAMIC,
3122
                       selected=self.op.output_fields)
3123

    
3124
  def ExpandNames(self):
3125
    # Lock all nodes, in shared mode
3126
    # Temporary removal of locks, should be reverted later
3127
    # TODO: reintroduce locks when they are lighter-weight
3128
    self.needed_locks = {}
3129
    #self.share_locks[locking.LEVEL_NODE] = 1
3130
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3131

    
3132
  @staticmethod
3133
  def _DiagnoseByOS(rlist):
3134
    """Remaps a per-node return list into an a per-os per-node dictionary
3135

3136
    @param rlist: a map with node names as keys and OS objects as values
3137

3138
    @rtype: dict
3139
    @return: a dictionary with osnames as keys and as value another
3140
        map, with nodes as keys and tuples of (path, status, diagnose,
3141
        variants, parameters, api_versions) as values, eg::
3142

3143
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3144
                                     (/srv/..., False, "invalid api")],
3145
                           "node2": [(/srv/..., True, "", [], [])]}
3146
          }
3147

3148
    """
3149
    all_os = {}
3150
    # we build here the list of nodes that didn't fail the RPC (at RPC
3151
    # level), so that nodes with a non-responding node daemon don't
3152
    # make all OSes invalid
3153
    good_nodes = [node_name for node_name in rlist
3154
                  if not rlist[node_name].fail_msg]
3155
    for node_name, nr in rlist.items():
3156
      if nr.fail_msg or not nr.payload:
3157
        continue
3158
      for (name, path, status, diagnose, variants,
3159
           params, api_versions) in nr.payload:
3160
        if name not in all_os:
3161
          # build a list of nodes for this os containing empty lists
3162
          # for each node in node_list
3163
          all_os[name] = {}
3164
          for nname in good_nodes:
3165
            all_os[name][nname] = []
3166
        # convert params from [name, help] to (name, help)
3167
        params = [tuple(v) for v in params]
3168
        all_os[name][node_name].append((path, status, diagnose,
3169
                                        variants, params, api_versions))
3170
    return all_os
3171

    
3172
  def Exec(self, feedback_fn):
3173
    """Compute the list of OSes.
3174

3175
    """
3176
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3177
    node_data = self.rpc.call_os_diagnose(valid_nodes)
3178
    pol = self._DiagnoseByOS(node_data)
3179
    output = []
3180
    cluster = self.cfg.GetClusterInfo()
3181

    
3182
    for os_name in utils.NiceSort(pol.keys()):
3183
      os_data = pol[os_name]
3184
      row = []
3185
      valid = True
3186
      (variants, params, api_versions) = null_state = (set(), set(), set())
3187
      for idx, osl in enumerate(os_data.values()):
3188
        valid = bool(valid and osl and osl[0][1])
3189
        if not valid:
3190
          (variants, params, api_versions) = null_state
3191
          break
3192
        node_variants, node_params, node_api = osl[0][3:6]
3193
        if idx == 0: # first entry
3194
          variants = set(node_variants)
3195
          params = set(node_params)
3196
          api_versions = set(node_api)
3197
        else: # keep consistency
3198
          variants.intersection_update(node_variants)
3199
          params.intersection_update(node_params)
3200
          api_versions.intersection_update(node_api)
3201

    
3202
      is_hid = os_name in cluster.hidden_os
3203
      is_blk = os_name in cluster.blacklisted_os
3204
      if ((self._HID not in self.op.output_fields and is_hid) or
3205
          (self._BLK not in self.op.output_fields and is_blk) or
3206
          (self._VLD not in self.op.output_fields and not valid)):
3207
        continue
3208

    
3209
      for field in self.op.output_fields:
3210
        if field == "name":
3211
          val = os_name
3212
        elif field == self._VLD:
3213
          val = valid
3214
        elif field == "node_status":
3215
          # this is just a copy of the dict
3216
          val = {}
3217
          for node_name, nos_list in os_data.items():
3218
            val[node_name] = nos_list
3219
        elif field == "variants":
3220
          val = utils.NiceSort(list(variants))
3221
        elif field == "parameters":
3222
          val = list(params)
3223
        elif field == "api_versions":
3224
          val = list(api_versions)
3225
        elif field == self._HID:
3226
          val = is_hid
3227
        elif field == self._BLK:
3228
          val = is_blk
3229
        else:
3230
          raise errors.ParameterError(field)
3231
        row.append(val)
3232
      output.append(row)
3233

    
3234
    return output
3235

    
3236

    
3237
class LURemoveNode(LogicalUnit):
3238
  """Logical unit for removing a node.
3239

3240
  """
3241
  HPATH = "node-remove"
3242
  HTYPE = constants.HTYPE_NODE
3243
  _OP_PARAMS = [
3244
    _PNodeName,
3245
    ]
3246

    
3247
  def BuildHooksEnv(self):
3248
    """Build hooks env.
3249

3250
    This doesn't run on the target node in the pre phase as a failed
3251
    node would then be impossible to remove.
3252

3253
    """
3254
    env = {
3255
      "OP_TARGET": self.op.node_name,
3256
      "NODE_NAME": self.op.node_name,
3257
      }
3258
    all_nodes = self.cfg.GetNodeList()
3259
    try:
3260
      all_nodes.remove(self.op.node_name)
3261
    except ValueError:
3262
      logging.warning("Node %s which is about to be removed not found"
3263
                      " in the all nodes list", self.op.node_name)
3264
    return env, all_nodes, all_nodes
3265

    
3266
  def CheckPrereq(self):
3267
    """Check prerequisites.
3268

3269
    This checks:
3270
     - the node exists in the configuration
3271
     - it does not have primary or secondary instances
3272
     - it's not the master
3273

3274
    Any errors are signaled by raising errors.OpPrereqError.
3275

3276
    """
3277
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3278
    node = self.cfg.GetNodeInfo(self.op.node_name)
3279
    assert node is not None
3280

    
3281
    instance_list = self.cfg.GetInstanceList()
3282

    
3283
    masternode = self.cfg.GetMasterNode()
3284
    if node.name == masternode:
3285
      raise errors.OpPrereqError("Node is the master node,"
3286
                                 " you need to failover first.",
3287
                                 errors.ECODE_INVAL)
3288

    
3289
    for instance_name in instance_list:
3290
      instance = self.cfg.GetInstanceInfo(instance_name)
3291
      if node.name in instance.all_nodes:
3292
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3293
                                   " please remove first." % instance_name,
3294
                                   errors.ECODE_INVAL)
3295
    self.op.node_name = node.name
3296
    self.node = node
3297

    
3298
  def Exec(self, feedback_fn):
3299
    """Removes the node from the cluster.
3300

3301
    """
3302
    node = self.node
3303
    logging.info("Stopping the node daemon and removing configs from node %s",
3304
                 node.name)
3305

    
3306
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3307

    
3308
    # Promote nodes to master candidate as needed
3309
    _AdjustCandidatePool(self, exceptions=[node.name])
3310
    self.context.RemoveNode(node.name)
3311

    
3312
    # Run post hooks on the node before it's removed
3313
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3314
    try:
3315
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3316
    except:
3317
      # pylint: disable-msg=W0702
3318
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
3319

    
3320
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3321
    msg = result.fail_msg
3322
    if msg:
3323
      self.LogWarning("Errors encountered on the remote node while leaving"
3324
                      " the cluster: %s", msg)
3325

    
3326
    # Remove node from our /etc/hosts
3327
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3328
      master_node = self.cfg.GetMasterNode()
3329
      result = self.rpc.call_etc_hosts_modify(master_node,
3330
                                              constants.ETC_HOSTS_REMOVE,
3331
                                              node.name, None)
3332
      result.Raise("Can't update hosts file with new host data")
3333
      _RedistributeAncillaryFiles(self)
3334

    
3335

    
3336
class LUQueryNodes(NoHooksLU):
3337
  """Logical unit for querying nodes.
3338

3339
  """
3340
  # pylint: disable-msg=W0142
3341
  _OP_PARAMS = [
3342
    _POutputFields,
3343
    ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3344
    ("use_locking", False, ht.TBool),
3345
    ]
3346
  REQ_BGL = False
3347

    
3348
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3349
                    "master_candidate", "offline", "drained",
3350
                    "master_capable", "vm_capable"]
3351

    
3352
  _FIELDS_DYNAMIC = utils.FieldSet(
3353
    "dtotal", "dfree",
3354
    "mtotal", "mnode", "mfree",
3355
    "bootid",
3356
    "ctotal", "cnodes", "csockets",
3357
    )
3358

    
3359
  _FIELDS_STATIC = utils.FieldSet(*[
3360
    "pinst_cnt", "sinst_cnt",
3361
    "pinst_list", "sinst_list",
3362
    "pip", "sip", "tags",
3363
    "master",
3364
    "role"] + _SIMPLE_FIELDS
3365
    )
3366

    
3367
  def CheckArguments(self):
3368
    _CheckOutputFields(static=self._FIELDS_STATIC,
3369
                       dynamic=self._FIELDS_DYNAMIC,
3370
                       selected=self.op.output_fields)
3371

    
3372
  def ExpandNames(self):
3373
    self.needed_locks = {}
3374
    self.share_locks[locking.LEVEL_NODE] = 1
3375

    
3376
    if self.op.names:
3377
      self.wanted = _GetWantedNodes(self, self.op.names)
3378
    else:
3379
      self.wanted = locking.ALL_SET
3380

    
3381
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3382
    self.do_locking = self.do_node_query and self.op.use_locking
3383
    if self.do_locking:
3384
      # if we don't request only static fields, we need to lock the nodes
3385
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
3386

    
3387
  def Exec(self, feedback_fn):
3388
    """Computes the list of nodes and their attributes.
3389

3390
    """
3391
    all_info = self.cfg.GetAllNodesInfo()
3392
    if self.do_locking:
3393
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
3394
    elif self.wanted != locking.ALL_SET:
3395
      nodenames = self.wanted
3396
      missing = set(nodenames).difference(all_info.keys())
3397
      if missing:
3398
        raise errors.OpExecError(
3399
          "Some nodes were removed before retrieving their data: %s" % missing)
3400
    else:
3401
      nodenames = all_info.keys()
3402

    
3403
    nodenames = utils.NiceSort(nodenames)
3404
    nodelist = [all_info[name] for name in nodenames]
3405

    
3406
    # begin data gathering
3407

    
3408
    if self.do_node_query:
3409
      live_data = {}
3410
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3411
                                          self.cfg.GetHypervisorType())
3412
      for name in nodenames:
3413
        nodeinfo = node_data[name]
3414
        if not nodeinfo.fail_msg and nodeinfo.payload:
3415
          nodeinfo = nodeinfo.payload
3416
          fn = utils.TryConvert
3417
          live_data[name] = {
3418
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3419
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3420
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
3421
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3422
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
3423
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3424
            "bootid": nodeinfo.get('bootid', None),
3425
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3426
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3427
            }
3428
        else:
3429
          live_data[name] = {}
3430
    else:
3431
      live_data = dict.fromkeys(nodenames, {})
3432

    
3433
    node_to_primary = dict([(name, set()) for name in nodenames])
3434
    node_to_secondary = dict([(name, set()) for name in nodenames])
3435

    
3436
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
3437
                             "sinst_cnt", "sinst_list"))
3438
    if inst_fields & frozenset(self.op.output_fields):
3439
      inst_data = self.cfg.GetAllInstancesInfo()
3440

    
3441
      for inst in inst_data.values():
3442
        if inst.primary_node in node_to_primary:
3443
          node_to_primary[inst.primary_node].add(inst.name)
3444
        for secnode in inst.secondary_nodes:
3445
          if secnode in node_to_secondary:
3446
            node_to_secondary[secnode].add(inst.name)
3447

    
3448
    master_node = self.cfg.GetMasterNode()
3449

    
3450
    # end data gathering
3451

    
3452
    output = []
3453
    for node in nodelist:
3454
      node_output = []
3455
      for field in self.op.output_fields:
3456
        if field in self._SIMPLE_FIELDS:
3457
          val = getattr(node, field)
3458
        elif field == "pinst_list":
3459
          val = list(node_to_primary[node.name])
3460
        elif field == "sinst_list":
3461
          val = list(node_to_secondary[node.name])
3462
        elif field == "pinst_cnt":
3463
          val = len(node_to_primary[node.name])
3464
        elif field == "sinst_cnt":
3465
          val = len(node_to_secondary[node.name])
3466
        elif field == "pip":
3467
          val = node.primary_ip
3468
        elif field == "sip":
3469
          val = node.secondary_ip
3470
        elif field == "tags":
3471
          val = list(node.GetTags())
3472
        elif field == "master":
3473
          val = node.name == master_node
3474
        elif self._FIELDS_DYNAMIC.Matches(field):
3475
          val = live_data[node.name].get(field, None)
3476
        elif field == "role":
3477
          if node.name == master_node:
3478
            val = "M"
3479
          elif node.master_candidate:
3480
            val = "C"
3481
          elif node.drained:
3482
            val = "D"
3483
          elif node.offline:
3484
            val = "O"
3485
          else:
3486
            val = "R"
3487
        else:
3488
          raise errors.ParameterError(field)
3489
        node_output.append(val)
3490
      output.append(node_output)
3491

    
3492
    return output
3493

    
3494

    
3495
class LUQueryNodeVolumes(NoHooksLU):
3496
  """Logical unit for getting volumes on node(s).
3497

3498
  """
3499
  _OP_PARAMS = [
3500
    ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3501
    ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
3502
    ]
3503
  REQ_BGL = False
3504
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3505
  _FIELDS_STATIC = utils.FieldSet("node")
3506

    
3507
  def CheckArguments(self):
3508
    _CheckOutputFields(static=self._FIELDS_STATIC,
3509
                       dynamic=self._FIELDS_DYNAMIC,
3510
                       selected=self.op.output_fields)
3511

    
3512
  def ExpandNames(self):
3513
    self.needed_locks = {}
3514
    self.share_locks[locking.LEVEL_NODE] = 1
3515
    if not self.op.nodes:
3516
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3517
    else:
3518
      self.needed_locks[locking.LEVEL_NODE] = \
3519
        _GetWantedNodes(self, self.op.nodes)
3520

    
3521
  def Exec(self, feedback_fn):
3522
    """Computes the list of nodes and their attributes.
3523

3524
    """
3525
    nodenames = self.acquired_locks[locking.LEVEL_NODE]
3526
    volumes = self.rpc.call_node_volumes(nodenames)
3527

    
3528
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3529
             in self.cfg.GetInstanceList()]
3530

    
3531
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3532

    
3533
    output = []
3534
    for node in nodenames:
3535
      nresult = volumes[node]
3536
      if nresult.offline:
3537
        continue
3538
      msg = nresult.fail_msg
3539
      if msg:
3540
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3541
        continue
3542

    
3543
      node_vols = nresult.payload[:]
3544
      node_vols.sort(key=lambda vol: vol['dev'])
3545

    
3546
      for vol in node_vols:
3547
        node_output = []
3548
        for field in self.op.output_fields:
3549
          if field == "node":
3550
            val = node
3551
          elif field == "phys":
3552
            val = vol['dev']
3553
          elif field == "vg":
3554
            val = vol['vg']
3555
          elif field == "name":
3556
            val = vol['name']
3557
          elif field == "size":
3558
            val = int(float(vol['size']))
3559
          elif field == "instance":
3560
            for inst in ilist:
3561
              if node not in lv_by_node[inst]:
3562
                continue
3563
              if vol['name'] in lv_by_node[inst][node]:
3564
                val = inst.name
3565
                break
3566
            else:
3567
              val = '-'
3568
          else:
3569
            raise errors.ParameterError(field)
3570
          node_output.append(str(val))
3571

    
3572
        output.append(node_output)
3573

    
3574
    return output
3575

    
3576

    
3577
class LUQueryNodeStorage(NoHooksLU):
3578
  """Logical unit for getting information on storage units on node(s).
3579

3580
  """
3581
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3582
  _OP_PARAMS = [
3583
    ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3584
    ("storage_type", ht.NoDefault, _CheckStorageType),
3585
    ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
3586
    ("name", None, ht.TMaybeString),
3587
    ]
3588
  REQ_BGL = False
3589

    
3590
  def CheckArguments(self):
3591
    _CheckOutputFields(static=self._FIELDS_STATIC,
3592
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3593
                       selected=self.op.output_fields)
3594

    
3595
  def ExpandNames(self):
3596
    self.needed_locks = {}
3597
    self.share_locks[locking.LEVEL_NODE] = 1
3598

    
3599
    if self.op.nodes:
3600
      self.needed_locks[locking.LEVEL_NODE] = \
3601
        _GetWantedNodes(self, self.op.nodes)
3602
    else:
3603
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3604

    
3605
  def Exec(self, feedback_fn):
3606
    """Computes the list of nodes and their attributes.
3607

3608
    """
3609
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3610

    
3611
    # Always get name to sort by
3612
    if constants.SF_NAME in self.op.output_fields:
3613
      fields = self.op.output_fields[:]
3614
    else:
3615
      fields = [constants.SF_NAME] + self.op.output_fields
3616

    
3617
    # Never ask for node or type as it's only known to the LU
3618
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3619
      while extra in fields:
3620
        fields.remove(extra)
3621

    
3622
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3623
    name_idx = field_idx[constants.SF_NAME]
3624

    
3625
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3626
    data = self.rpc.call_storage_list(self.nodes,
3627
                                      self.op.storage_type, st_args,
3628
                                      self.op.name, fields)
3629

    
3630
    result = []
3631

    
3632
    for node in utils.NiceSort(self.nodes):
3633
      nresult = data[node]
3634
      if nresult.offline:
3635
        continue
3636

    
3637
      msg = nresult.fail_msg
3638
      if msg:
3639
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3640
        continue
3641

    
3642
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3643

    
3644
      for name in utils.NiceSort(rows.keys()):
3645
        row = rows[name]
3646

    
3647
        out = []
3648

    
3649
        for field in self.op.output_fields:
3650
          if field == constants.SF_NODE:
3651
            val = node
3652
          elif field == constants.SF_TYPE:
3653
            val = self.op.storage_type
3654
          elif field in field_idx:
3655
            val = row[field_idx[field]]
3656
          else:
3657
            raise errors.ParameterError(field)
3658

    
3659
          out.append(val)
3660

    
3661
        result.append(out)
3662

    
3663
    return result
3664

    
3665

    
3666
class LUModifyNodeStorage(NoHooksLU):
3667
  """Logical unit for modifying a storage volume on a node.
3668

3669
  """
3670
  _OP_PARAMS = [
3671
    _PNodeName,
3672
    ("storage_type", ht.NoDefault, _CheckStorageType),
3673
    ("name", ht.NoDefault, ht.TNonEmptyString),
3674
    ("changes", ht.NoDefault, ht.TDict),
3675
    ]
3676
  REQ_BGL = False
3677

    
3678
  def CheckArguments(self):
3679
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3680

    
3681
    storage_type = self.op.storage_type
3682

    
3683
    try:
3684
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3685
    except KeyError:
3686
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3687
                                 " modified" % storage_type,
3688
                                 errors.ECODE_INVAL)
3689

    
3690
    diff = set(self.op.changes.keys()) - modifiable
3691
    if diff:
3692
      raise errors.OpPrereqError("The following fields can not be modified for"
3693
                                 " storage units of type '%s': %r" %
3694
                                 (storage_type, list(diff)),
3695
                                 errors.ECODE_INVAL)
3696

    
3697
  def ExpandNames(self):
3698
    self.needed_locks = {
3699
      locking.LEVEL_NODE: self.op.node_name,
3700
      }
3701

    
3702
  def Exec(self, feedback_fn):
3703
    """Computes the list of nodes and their attributes.
3704

3705
    """
3706
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3707
    result = self.rpc.call_storage_modify(self.op.node_name,
3708
                                          self.op.storage_type, st_args,
3709
                                          self.op.name, self.op.changes)
3710
    result.Raise("Failed to modify storage unit '%s' on %s" %
3711
                 (self.op.name, self.op.node_name))
3712

    
3713

    
3714
class LUAddNode(LogicalUnit):
3715
  """Logical unit for adding node to the cluster.
3716

3717
  """
3718
  HPATH = "node-add"
3719
  HTYPE = constants.HTYPE_NODE
3720
  _OP_PARAMS = [
3721
    _PNodeName,
3722
    ("primary_ip", None, ht.NoType),
3723
    ("secondary_ip", None, ht.TMaybeString),
3724
    ("readd", False, ht.TBool),
3725
    ("group", None, ht.TMaybeString),
3726
    ("master_capable", None, ht.TMaybeBool),
3727
    ("vm_capable", None, ht.TMaybeBool),
3728
    ]
3729
  _NFLAGS = ["master_capable", "vm_capable"]
3730

    
3731
  def CheckArguments(self):
3732
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
3733
    # validate/normalize the node name
3734
    self.hostname = netutils.GetHostname(name=self.op.node_name,
3735
                                         family=self.primary_ip_family)
3736
    self.op.node_name = self.hostname.name
3737
    if self.op.readd and self.op.group:
3738
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
3739
                                 " being readded", errors.ECODE_INVAL)
3740

    
3741
  def BuildHooksEnv(self):
3742
    """Build hooks env.
3743

3744
    This will run on all nodes before, and on all nodes + the new node after.
3745

3746
    """
3747
    env = {
3748
      "OP_TARGET": self.op.node_name,
3749
      "NODE_NAME": self.op.node_name,
3750
      "NODE_PIP": self.op.primary_ip,
3751
      "NODE_SIP": self.op.secondary_ip,
3752
      "MASTER_CAPABLE": str(self.op.master_capable),
3753
      "VM_CAPABLE": str(self.op.vm_capable),
3754
      }
3755
    nodes_0 = self.cfg.GetNodeList()
3756
    nodes_1 = nodes_0 + [self.op.node_name, ]
3757
    return env, nodes_0, nodes_1
3758

    
3759
  def CheckPrereq(self):
3760
    """Check prerequisites.
3761

3762
    This checks:
3763
     - the new node is not already in the config
3764
     - it is resolvable
3765
     - its parameters (single/dual homed) matches the cluster
3766

3767
    Any errors are signaled by raising errors.OpPrereqError.
3768

3769
    """
3770
    cfg = self.cfg
3771
    hostname = self.hostname
3772
    node = hostname.name
3773
    primary_ip = self.op.primary_ip = hostname.ip
3774
    if self.op.secondary_ip is None:
3775
      if self.primary_ip_family == netutils.IP6Address.family:
3776
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
3777
                                   " IPv4 address must be given as secondary",
3778
                                   errors.ECODE_INVAL)
3779
      self.op.secondary_ip = primary_ip
3780

    
3781
    secondary_ip = self.op.secondary_ip
3782
    if not netutils.IP4Address.IsValid(secondary_ip):
3783
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
3784
                                 " address" % secondary_ip, errors.ECODE_INVAL)
3785

    
3786
    node_list = cfg.GetNodeList()
3787
    if not self.op.readd and node in node_list:
3788
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3789
                                 node, errors.ECODE_EXISTS)
3790
    elif self.op.readd and node not in node_list:
3791
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3792
                                 errors.ECODE_NOENT)
3793

    
3794
    self.changed_primary_ip = False
3795

    
3796
    for existing_node_name in node_list:
3797
      existing_node = cfg.GetNodeInfo(existing_node_name)
3798

    
3799
      if self.op.readd and node == existing_node_name:
3800
        if existing_node.secondary_ip != secondary_ip:
3801
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
3802
                                     " address configuration as before",
3803
                                     errors.ECODE_INVAL)
3804
        if existing_node.primary_ip != primary_ip:
3805
          self.changed_primary_ip = True
3806

    
3807
        continue
3808

    
3809
      if (existing_node.primary_ip == primary_ip or
3810
          existing_node.secondary_ip == primary_ip or
3811
          existing_node.primary_ip == secondary_ip or
3812
          existing_node.secondary_ip == secondary_ip):
3813
        raise errors.OpPrereqError("New node ip address(es) conflict with"
3814
                                   " existing node %s" % existing_node.name,
3815
                                   errors.ECODE_NOTUNIQUE)
3816

    
3817
    # After this 'if' block, None is no longer a valid value for the
3818
    # _capable op attributes
3819
    if self.op.readd:
3820
      old_node = self.cfg.GetNodeInfo(node)
3821
      assert old_node is not None, "Can't retrieve locked node %s" % node
3822
      for attr in self._NFLAGS:
3823
        if getattr(self.op, attr) is None:
3824
          setattr(self.op, attr, getattr(old_node, attr))
3825
    else:
3826
      for attr in self._NFLAGS:
3827
        if getattr(self.op, attr) is None:
3828
          setattr(self.op, attr, True)
3829

    
3830
    if self.op.readd and not self.op.vm_capable:
3831
      pri, sec = cfg.GetNodeInstances(node)
3832
      if pri or sec:
3833
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
3834
                                   " flag set to false, but it already holds"
3835
                                   " instances" % node,
3836
                                   errors.ECODE_STATE)
3837

    
3838
    # check that the type of the node (single versus dual homed) is the
3839
    # same as for the master
3840
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3841
    master_singlehomed = myself.secondary_ip == myself.primary_ip
3842
    newbie_singlehomed = secondary_ip == primary_ip
3843
    if master_singlehomed != newbie_singlehomed:
3844
      if master_singlehomed:
3845
        raise errors.OpPrereqError("The master has no secondary ip but the"
3846
                                   " new node has one",
3847
                                   errors.ECODE_INVAL)
3848
      else:
3849
        raise errors.OpPrereqError("The master has a secondary ip but the"
3850
                                   " new node doesn't have one",
3851
                                   errors.ECODE_INVAL)
3852

    
3853
    # checks reachability
3854
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3855
      raise errors.OpPrereqError("Node not reachable by ping",
3856
                                 errors.ECODE_ENVIRON)
3857

    
3858
    if not newbie_singlehomed:
3859
      # check reachability from my secondary ip to newbie's secondary ip
3860
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3861
                           source=myself.secondary_ip):
3862
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3863
                                   " based ping to node daemon port",
3864
                                   errors.ECODE_ENVIRON)
3865

    
3866
    if self.op.readd:
3867
      exceptions = [node]
3868
    else:
3869
      exceptions = []
3870

    
3871
    if self.op.master_capable:
3872
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3873
    else:
3874
      self.master_candidate = False
3875

    
3876
    if self.op.readd:
3877
      self.new_node = old_node
3878
    else:
3879
      node_group = cfg.LookupNodeGroup(self.op.group)
3880
      self.new_node = objects.Node(name=node,
3881
                                   primary_ip=primary_ip,
3882
                                   secondary_ip=secondary_ip,
3883
                                   master_candidate=self.master_candidate,
3884
                                   offline=False, drained=False,
3885
                                   group=node_group)
3886

    
3887
  def Exec(self, feedback_fn):
3888
    """Adds the new node to the cluster.
3889

3890
    """
3891
    new_node = self.new_node
3892
    node = new_node.name
3893

    
3894
    # for re-adds, reset the offline/drained/master-candidate flags;
3895
    # we need to reset here, otherwise offline would prevent RPC calls
3896
    # later in the procedure; this also means that if the re-add
3897
    # fails, we are left with a non-offlined, broken node
3898
    if self.op.readd:
3899
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3900
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3901
      # if we demote the node, we do cleanup later in the procedure
3902
      new_node.master_candidate = self.master_candidate
3903
      if self.changed_primary_ip:
3904
        new_node.primary_ip = self.op.primary_ip
3905

    
3906
    # copy the master/vm_capable flags
3907
    for attr in self._NFLAGS:
3908
      setattr(new_node, attr, getattr(self.op, attr))
3909

    
3910
    # notify the user about any possible mc promotion
3911
    if new_node.master_candidate:
3912
      self.LogInfo("Node will be a master candidate")
3913

    
3914
    # check connectivity
3915
    result = self.rpc.call_version([node])[node]
3916
    result.Raise("Can't get version information from node %s" % node)
3917
    if constants.PROTOCOL_VERSION == result.payload:
3918
      logging.info("Communication to node %s fine, sw version %s match",
3919
                   node, result.payload)
3920
    else:
3921
      raise errors.OpExecError("Version mismatch master version %s,"
3922
                               " node version %s" %
3923
                               (constants.PROTOCOL_VERSION, result.payload))
3924

    
3925
    # Add node to our /etc/hosts, and add key to known_hosts
3926
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3927
      master_node = self.cfg.GetMasterNode()
3928
      result = self.rpc.call_etc_hosts_modify(master_node,
3929
                                              constants.ETC_HOSTS_ADD,
3930
                                              self.hostname.name,
3931
                                              self.hostname.ip)
3932
      result.Raise("Can't update hosts file with new host data")
3933

    
3934
    if new_node.secondary_ip != new_node.primary_ip:
3935
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
3936
                               False)
3937

    
3938
    node_verify_list = [self.cfg.GetMasterNode()]
3939
    node_verify_param = {
3940
      constants.NV_NODELIST: [node],
3941
      # TODO: do a node-net-test as well?
3942
    }
3943

    
3944
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3945
                                       self.cfg.GetClusterName())
3946
    for verifier in node_verify_list:
3947
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3948
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3949
      if nl_payload:
3950
        for failed in nl_payload:
3951
          feedback_fn("ssh/hostname verification failed"
3952
                      " (checking from %s): %s" %
3953
                      (verifier, nl_payload[failed]))
3954
        raise errors.OpExecError("ssh/hostname verification failed.")
3955

    
3956
    if self.op.readd:
3957
      _RedistributeAncillaryFiles(self)
3958
      self.context.ReaddNode(new_node)
3959
      # make sure we redistribute the config
3960
      self.cfg.Update(new_node, feedback_fn)
3961
      # and make sure the new node will not have old files around
3962
      if not new_node.master_candidate:
3963
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3964
        msg = result.fail_msg
3965
        if msg:
3966
          self.LogWarning("Node failed to demote itself from master"
3967
                          " candidate status: %s" % msg)
3968
    else:
3969
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
3970
                                  additional_vm=self.op.vm_capable)
3971
      self.context.AddNode(new_node, self.proc.GetECId())
3972

    
3973

    
3974
class LUSetNodeParams(LogicalUnit):
3975
  """Modifies the parameters of a node.
3976

3977
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
3978
      to the node role (as _ROLE_*)
3979
  @cvar _R2F: a dictionary from node role to tuples of flags
3980
  @cvar _FLAGS: a list of attribute names corresponding to the flags
3981

3982
  """
3983
  HPATH = "node-modify"
3984
  HTYPE = constants.HTYPE_NODE
3985
  _OP_PARAMS = [
3986
    _PNodeName,
3987
    ("master_candidate", None, ht.TMaybeBool),
3988
    ("offline", None, ht.TMaybeBool),
3989
    ("drained", None, ht.TMaybeBool),
3990
    ("auto_promote", False, ht.TBool),
3991
    ("master_capable", None, ht.TMaybeBool),
3992
    ("vm_capable", None, ht.TMaybeBool),
3993
    ("secondary_ip", None, ht.TMaybeString),
3994
    _PForce,
3995
    ]
3996
  REQ_BGL = False
3997
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
3998
  _F2R = {
3999
    (True, False, False): _ROLE_CANDIDATE,
4000
    (False, True, False): _ROLE_DRAINED,
4001
    (False, False, True): _ROLE_OFFLINE,
4002
    (False, False, False): _ROLE_REGULAR,
4003
    }
4004
  _R2F = dict((v, k) for k, v in _F2R.items())
4005
  _FLAGS = ["master_candidate", "drained", "offline"]
4006

    
4007
  def CheckArguments(self):
4008
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4009
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4010
                self.op.master_capable, self.op.vm_capable,
4011
                self.op.secondary_ip]
4012
    if all_mods.count(None) == len(all_mods):
4013
      raise errors.OpPrereqError("Please pass at least one modification",
4014
                                 errors.ECODE_INVAL)
4015
    if all_mods.count(True) > 1:
4016
      raise errors.OpPrereqError("Can't set the node into more than one"
4017
                                 " state at the same time",
4018
                                 errors.ECODE_INVAL)
4019

    
4020
    # Boolean value that tells us whether we might be demoting from MC
4021
    self.might_demote = (self.op.master_candidate == False or
4022
                         self.op.offline == True or
4023
                         self.op.drained == True or
4024
                         self.op.master_capable == False)
4025

    
4026
    if self.op.secondary_ip:
4027
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4028
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4029
                                   " address" % self.op.secondary_ip,
4030
                                   errors.ECODE_INVAL)
4031

    
4032
    self.lock_all = self.op.auto_promote and self.might_demote
4033
    self.lock_instances = self.op.secondary_ip is not None
4034

    
4035
  def ExpandNames(self):
4036
    if self.lock_all:
4037
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4038
    else:
4039
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4040

    
4041
    if self.lock_instances:
4042
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4043

    
4044
  def DeclareLocks(self, level):
4045
    # If we have locked all instances, before waiting to lock nodes, release
4046
    # all the ones living on nodes unrelated to the current operation.
4047
    if level == locking.LEVEL_NODE and self.lock_instances:
4048
      instances_release = []
4049
      instances_keep = []
4050
      self.affected_instances = []
4051
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4052
        for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4053
          instance = self.context.cfg.GetInstanceInfo(instance_name)
4054
          i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR
4055
          if i_mirrored and self.op.node_name in instance.all_nodes:
4056
            instances_keep.append(instance_name)
4057
            self.affected_instances.append(instance)
4058
          else:
4059
            instances_release.append(instance_name)
4060
        if instances_release:
4061
          self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4062
          self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4063

    
4064
  def BuildHooksEnv(self):
4065
    """Build hooks env.
4066

4067
    This runs on the master node.
4068

4069
    """
4070
    env = {
4071
      "OP_TARGET": self.op.node_name,
4072
      "MASTER_CANDIDATE": str(self.op.master_candidate),
4073
      "OFFLINE": str(self.op.offline),
4074
      "DRAINED": str(self.op.drained),
4075
      "MASTER_CAPABLE": str(self.op.master_capable),
4076
      "VM_CAPABLE": str(self.op.vm_capable),
4077
      }
4078
    nl = [self.cfg.GetMasterNode(),
4079
          self.op.node_name]
4080
    return env, nl, nl
4081

    
4082
  def CheckPrereq(self):
4083
    """Check prerequisites.
4084

4085
    This only checks the instance list against the existing names.
4086

4087
    """
4088
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4089

    
4090
    if (self.op.master_candidate is not None or
4091
        self.op.drained is not None or
4092
        self.op.offline is not None):
4093
      # we can't change the master's node flags
4094
      if self.op.node_name == self.cfg.GetMasterNode():
4095
        raise errors.OpPrereqError("The master role can be changed"
4096
                                   " only via master-failover",
4097
                                   errors.ECODE_INVAL)
4098

    
4099
    if self.op.master_candidate and not node.master_capable:
4100
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4101
                                 " it a master candidate" % node.name,
4102
                                 errors.ECODE_STATE)
4103

    
4104
    if self.op.vm_capable == False:
4105
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4106
      if ipri or isec:
4107
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4108
                                   " the vm_capable flag" % node.name,
4109
                                   errors.ECODE_STATE)
4110

    
4111
    if node.master_candidate and self.might_demote and not self.lock_all:
4112
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
4113
      # check if after removing the current node, we're missing master
4114
      # candidates
4115
      (mc_remaining, mc_should, _) = \
4116
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4117
      if mc_remaining < mc_should:
4118
        raise errors.OpPrereqError("Not enough master candidates, please"
4119
                                   " pass auto_promote to allow promotion",
4120
                                   errors.ECODE_STATE)
4121

    
4122
    self.old_flags = old_flags = (node.master_candidate,
4123
                                  node.drained, node.offline)
4124
    assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4125
    self.old_role = old_role = self._F2R[old_flags]
4126

    
4127
    # Check for ineffective changes
4128
    for attr in self._FLAGS:
4129
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4130
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4131
        setattr(self.op, attr, None)
4132

    
4133
    # Past this point, any flag change to False means a transition
4134
    # away from the respective state, as only real changes are kept
4135

    
4136
    # If we're being deofflined/drained, we'll MC ourself if needed
4137
    if (self.op.drained == False or self.op.offline == False or
4138
        (self.op.master_capable and not node.master_capable)):
4139
      if _DecideSelfPromotion(self):
4140
        self.op.master_candidate = True
4141
        self.LogInfo("Auto-promoting node to master candidate")
4142

    
4143
    # If we're no longer master capable, we'll demote ourselves from MC
4144
    if self.op.master_capable == False and node.master_candidate:
4145
      self.LogInfo("Demoting from master candidate")
4146
      self.op.master_candidate = False
4147

    
4148
    # Compute new role
4149
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4150
    if self.op.master_candidate:
4151
      new_role = self._ROLE_CANDIDATE
4152
    elif self.op.drained:
4153
      new_role = self._ROLE_DRAINED
4154
    elif self.op.offline:
4155
      new_role = self._ROLE_OFFLINE
4156
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4157
      # False is still in new flags, which means we're un-setting (the
4158
      # only) True flag
4159
      new_role = self._ROLE_REGULAR
4160
    else: # no new flags, nothing, keep old role
4161
      new_role = old_role
4162

    
4163
    self.new_role = new_role
4164

    
4165
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
4166
      # Trying to transition out of offline status
4167
      result = self.rpc.call_version([node.name])[node.name]
4168
      if result.fail_msg:
4169
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4170
                                   " to report its version: %s" %
4171
                                   (node.name, result.fail_msg),
4172
                                   errors.ECODE_STATE)
4173
      else:
4174
        self.LogWarning("Transitioning node from offline to online state"
4175
                        " without using re-add. Please make sure the node"
4176
                        " is healthy!")
4177

    
4178
    if self.op.secondary_ip:
4179
      # Ok even without locking, because this can't be changed by any LU
4180
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4181
      master_singlehomed = master.secondary_ip == master.primary_ip
4182
      if master_singlehomed and self.op.secondary_ip:
4183
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4184
                                   " homed cluster", errors.ECODE_INVAL)
4185

    
4186
      if node.offline:
4187
        if self.affected_instances:
4188
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
4189
                                     " node has instances (%s) configured"
4190
                                     " to use it" % self.affected_instances)
4191
      else:
4192
        # On online nodes, check that no instances are running, and that
4193
        # the node has the new ip and we can reach it.
4194
        for instance in self.affected_instances:
4195
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
4196

    
4197
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4198
        if master.name != node.name:
4199
          # check reachability from master secondary ip to new secondary ip
4200
          if not netutils.TcpPing(self.op.secondary_ip,
4201
                                  constants.DEFAULT_NODED_PORT,
4202
                                  source=master.secondary_ip):
4203
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4204
                                       " based ping to node daemon port",
4205
                                       errors.ECODE_ENVIRON)
4206

    
4207
  def Exec(self, feedback_fn):
4208
    """Modifies a node.
4209

4210
    """
4211
    node = self.node
4212
    old_role = self.old_role
4213
    new_role = self.new_role
4214

    
4215
    result = []
4216

    
4217
    for attr in ["master_capable", "vm_capable"]:
4218
      val = getattr(self.op, attr)
4219
      if val is not None:
4220
        setattr(node, attr, val)
4221
        result.append((attr, str(val)))
4222

    
4223
    if new_role != old_role:
4224
      # Tell the node to demote itself, if no longer MC and not offline
4225
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4226
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4227
        if msg:
4228
          self.LogWarning("Node failed to demote itself: %s", msg)
4229

    
4230
      new_flags = self._R2F[new_role]
4231
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4232
        if of != nf:
4233
          result.append((desc, str(nf)))
4234
      (node.master_candidate, node.drained, node.offline) = new_flags
4235

    
4236
      # we locked all nodes, we adjust the CP before updating this node
4237
      if self.lock_all:
4238
        _AdjustCandidatePool(self, [node.name])
4239

    
4240
    if self.op.secondary_ip:
4241
      node.secondary_ip = self.op.secondary_ip
4242
      result.append(("secondary_ip", self.op.secondary_ip))
4243

    
4244
    # this will trigger configuration file update, if needed
4245
    self.cfg.Update(node, feedback_fn)
4246

    
4247
    # this will trigger job queue propagation or cleanup if the mc
4248
    # flag changed
4249
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4250
      self.context.ReaddNode(node)
4251

    
4252
    return result
4253

    
4254

    
4255
class LUPowercycleNode(NoHooksLU):
4256
  """Powercycles a node.
4257

4258
  """
4259
  _OP_PARAMS = [
4260
    _PNodeName,
4261
    _PForce,
4262
    ]
4263
  REQ_BGL = False
4264

    
4265
  def CheckArguments(self):
4266
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4267
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4268
      raise errors.OpPrereqError("The node is the master and the force"
4269
                                 " parameter was not set",
4270
                                 errors.ECODE_INVAL)
4271

    
4272
  def ExpandNames(self):
4273
    """Locking for PowercycleNode.
4274

4275
    This is a last-resort option and shouldn't block on other
4276
    jobs. Therefore, we grab no locks.
4277

4278
    """
4279
    self.needed_locks = {}
4280

    
4281
  def Exec(self, feedback_fn):
4282
    """Reboots a node.
4283

4284
    """
4285
    result = self.rpc.call_node_powercycle(self.op.node_name,
4286
                                           self.cfg.GetHypervisorType())
4287
    result.Raise("Failed to schedule the reboot")
4288
    return result.payload
4289

    
4290

    
4291
class LUQueryClusterInfo(NoHooksLU):
4292
  """Query cluster configuration.
4293

4294
  """
4295
  REQ_BGL = False
4296

    
4297
  def ExpandNames(self):
4298
    self.needed_locks = {}
4299

    
4300
  def Exec(self, feedback_fn):
4301
    """Return cluster config.
4302

4303
    """
4304
    cluster = self.cfg.GetClusterInfo()
4305
    os_hvp = {}
4306

    
4307
    # Filter just for enabled hypervisors
4308
    for os_name, hv_dict in cluster.os_hvp.items():
4309
      os_hvp[os_name] = {}
4310
      for hv_name, hv_params in hv_dict.items():
4311
        if hv_name in cluster.enabled_hypervisors:
4312
          os_hvp[os_name][hv_name] = hv_params
4313

    
4314
    # Convert ip_family to ip_version
4315
    primary_ip_version = constants.IP4_VERSION
4316
    if cluster.primary_ip_family == netutils.IP6Address.family:
4317
      primary_ip_version = constants.IP6_VERSION
4318

    
4319
    result = {
4320
      "software_version": constants.RELEASE_VERSION,
4321
      "protocol_version": constants.PROTOCOL_VERSION,
4322
      "config_version": constants.CONFIG_VERSION,
4323
      "os_api_version": max(constants.OS_API_VERSIONS),
4324
      "export_version": constants.EXPORT_VERSION,
4325
      "architecture": (platform.architecture()[0], platform.machine()),
4326
      "name": cluster.cluster_name,
4327
      "master": cluster.master_node,
4328
      "default_hypervisor": cluster.enabled_hypervisors[0],
4329
      "enabled_hypervisors": cluster.enabled_hypervisors,
4330
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4331
                        for hypervisor_name in cluster.enabled_hypervisors]),
4332
      "os_hvp": os_hvp,
4333
      "beparams": cluster.beparams,
4334
      "osparams": cluster.osparams,
4335
      "nicparams": cluster.nicparams,
4336
      "candidate_pool_size": cluster.candidate_pool_size,
4337
      "master_netdev": cluster.master_netdev,
4338
      "volume_group_name": cluster.volume_group_name,
4339
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
4340
      "file_storage_dir": cluster.file_storage_dir,
4341
      "maintain_node_health": cluster.maintain_node_health,
4342
      "ctime": cluster.ctime,
4343
      "mtime": cluster.mtime,
4344
      "uuid": cluster.uuid,
4345
      "tags": list(cluster.GetTags()),
4346
      "uid_pool": cluster.uid_pool,
4347
      "default_iallocator": cluster.default_iallocator,
4348
      "reserved_lvs": cluster.reserved_lvs,
4349
      "primary_ip_version": primary_ip_version,
4350
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4351
      }
4352

    
4353
    return result
4354

    
4355

    
4356
class LUQueryConfigValues(NoHooksLU):
4357
  """Return configuration values.
4358

4359
  """
4360
  _OP_PARAMS = [_POutputFields]
4361
  REQ_BGL = False
4362
  _FIELDS_DYNAMIC = utils.FieldSet()
4363
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4364
                                  "watcher_pause", "volume_group_name")
4365

    
4366
  def CheckArguments(self):
4367
    _CheckOutputFields(static=self._FIELDS_STATIC,
4368
                       dynamic=self._FIELDS_DYNAMIC,
4369
                       selected=self.op.output_fields)
4370

    
4371
  def ExpandNames(self):
4372
    self.needed_locks = {}
4373

    
4374
  def Exec(self, feedback_fn):
4375
    """Dump a representation of the cluster config to the standard output.
4376

4377
    """
4378
    values = []
4379
    for field in self.op.output_fields:
4380
      if field == "cluster_name":
4381
        entry = self.cfg.GetClusterName()
4382
      elif field == "master_node":
4383
        entry = self.cfg.GetMasterNode()
4384
      elif field == "drain_flag":
4385
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4386
      elif field == "watcher_pause":
4387
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4388
      elif field == "volume_group_name":
4389
        entry = self.cfg.GetVGName()
4390
      else:
4391
        raise errors.ParameterError(field)
4392
      values.append(entry)
4393
    return values
4394

    
4395

    
4396
class LUActivateInstanceDisks(NoHooksLU):
4397
  """Bring up an instance's disks.
4398

4399
  """
4400
  _OP_PARAMS = [
4401
    _PInstanceName,
4402
    ("ignore_size", False, ht.TBool),
4403
    ]
4404
  REQ_BGL = False
4405

    
4406
  def ExpandNames(self):
4407
    self._ExpandAndLockInstance()
4408
    self.needed_locks[locking.LEVEL_NODE] = []
4409
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4410

    
4411
  def DeclareLocks(self, level):
4412
    if level == locking.LEVEL_NODE:
4413
      self._LockInstancesNodes()
4414

    
4415
  def CheckPrereq(self):
4416
    """Check prerequisites.
4417

4418
    This checks that the instance is in the cluster.
4419

4420
    """
4421
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4422
    assert self.instance is not None, \
4423
      "Cannot retrieve locked instance %s" % self.op.instance_name
4424
    _CheckNodeOnline(self, self.instance.primary_node)
4425

    
4426
  def Exec(self, feedback_fn):
4427
    """Activate the disks.
4428

4429
    """
4430
    disks_ok, disks_info = \
4431
              _AssembleInstanceDisks(self, self.instance,
4432
                                     ignore_size=self.op.ignore_size)
4433
    if not disks_ok:
4434
      raise errors.OpExecError("Cannot activate block devices")
4435

    
4436
    return disks_info
4437

    
4438

    
4439
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4440
                           ignore_size=False):
4441
  """Prepare the block devices for an instance.
4442

4443
  This sets up the block devices on all nodes.
4444

4445
  @type lu: L{LogicalUnit}
4446
  @param lu: the logical unit on whose behalf we execute
4447
  @type instance: L{objects.Instance}
4448
  @param instance: the instance for whose disks we assemble
4449
  @type disks: list of L{objects.Disk} or None
4450
  @param disks: which disks to assemble (or all, if None)
4451
  @type ignore_secondaries: boolean
4452
  @param ignore_secondaries: if true, errors on secondary nodes
4453
      won't result in an error return from the function
4454
  @type ignore_size: boolean
4455
  @param ignore_size: if true, the current known size of the disk
4456
      will not be used during the disk activation, useful for cases
4457
      when the size is wrong
4458
  @return: False if the operation failed, otherwise a list of
4459
      (host, instance_visible_name, node_visible_name)
4460
      with the mapping from node devices to instance devices
4461

4462
  """
4463
  device_info = []
4464
  disks_ok = True
4465
  iname = instance.name
4466
  disks = _ExpandCheckDisks(instance, disks)
4467

    
4468
  # With the two passes mechanism we try to reduce the window of
4469
  # opportunity for the race condition of switching DRBD to primary
4470
  # before handshaking occured, but we do not eliminate it
4471

    
4472
  # The proper fix would be to wait (with some limits) until the
4473
  # connection has been made and drbd transitions from WFConnection
4474
  # into any other network-connected state (Connected, SyncTarget,
4475
  # SyncSource, etc.)
4476

    
4477
  # 1st pass, assemble on all nodes in secondary mode
4478
  for inst_disk in disks:
4479
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4480
      if ignore_size:
4481
        node_disk = node_disk.Copy()
4482
        node_disk.UnsetSize()
4483
      lu.cfg.SetDiskID(node_disk, node)
4484
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4485
      msg = result.fail_msg
4486
      if msg:
4487
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4488
                           " (is_primary=False, pass=1): %s",
4489
                           inst_disk.iv_name, node, msg)
4490
        if not ignore_secondaries:
4491
          disks_ok = False
4492

    
4493
  # FIXME: race condition on drbd migration to primary
4494

    
4495
  # 2nd pass, do only the primary node
4496
  for inst_disk in disks:
4497
    dev_path = None
4498

    
4499
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4500
      if node != instance.primary_node:
4501
        continue
4502
      if ignore_size:
4503
        node_disk = node_disk.Copy()
4504
        node_disk.UnsetSize()
4505
      lu.cfg.SetDiskID(node_disk, node)
4506
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4507
      msg = result.fail_msg
4508
      if msg:
4509
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4510
                           " (is_primary=True, pass=2): %s",
4511
                           inst_disk.iv_name, node, msg)
4512
        disks_ok = False
4513
      else:
4514
        dev_path = result.payload
4515

    
4516
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4517

    
4518
  # leave the disks configured for the primary node
4519
  # this is a workaround that would be fixed better by
4520
  # improving the logical/physical id handling
4521
  for disk in disks:
4522
    lu.cfg.SetDiskID(disk, instance.primary_node)
4523

    
4524
  return disks_ok, device_info
4525

    
4526

    
4527
def _StartInstanceDisks(lu, instance, force):
4528
  """Start the disks of an instance.
4529

4530
  """
4531
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4532
                                           ignore_secondaries=force)
4533
  if not disks_ok:
4534
    _ShutdownInstanceDisks(lu, instance)
4535
    if force is not None and not force:
4536
      lu.proc.LogWarning("", hint="If the message above refers to a"
4537
                         " secondary node,"
4538
                         " you can retry the operation using '--force'.")
4539
    raise errors.OpExecError("Disk consistency error")
4540

    
4541

    
4542
class LUDeactivateInstanceDisks(NoHooksLU):
4543
  """Shutdown an instance's disks.
4544

4545
  """
4546
  _OP_PARAMS = [
4547
    _PInstanceName,
4548
    ]
4549
  REQ_BGL = False
4550

    
4551
  def ExpandNames(self):
4552
    self._ExpandAndLockInstance()
4553
    self.needed_locks[locking.LEVEL_NODE] = []
4554
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4555

    
4556
  def DeclareLocks(self, level):
4557
    if level == locking.LEVEL_NODE:
4558
      self._LockInstancesNodes()
4559

    
4560
  def CheckPrereq(self):
4561
    """Check prerequisites.
4562

4563
    This checks that the instance is in the cluster.
4564

4565
    """
4566
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4567
    assert self.instance is not None, \
4568
      "Cannot retrieve locked instance %s" % self.op.instance_name
4569

    
4570
  def Exec(self, feedback_fn):
4571
    """Deactivate the disks
4572

4573
    """
4574
    instance = self.instance
4575
    _SafeShutdownInstanceDisks(self, instance)
4576

    
4577

    
4578
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4579
  """Shutdown block devices of an instance.
4580

4581
  This function checks if an instance is running, before calling
4582
  _ShutdownInstanceDisks.
4583

4584
  """
4585
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4586
  _ShutdownInstanceDisks(lu, instance, disks=disks)
4587

    
4588

    
4589
def _ExpandCheckDisks(instance, disks):
4590
  """Return the instance disks selected by the disks list
4591

4592
  @type disks: list of L{objects.Disk} or None
4593
  @param disks: selected disks
4594
  @rtype: list of L{objects.Disk}
4595
  @return: selected instance disks to act on
4596

4597
  """
4598
  if disks is None:
4599
    return instance.disks
4600
  else:
4601
    if not set(disks).issubset(instance.disks):
4602
      raise errors.ProgrammerError("Can only act on disks belonging to the"
4603
                                   " target instance")
4604
    return disks
4605

    
4606

    
4607
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4608
  """Shutdown block devices of an instance.
4609

4610
  This does the shutdown on all nodes of the instance.
4611

4612
  If the ignore_primary is false, errors on the primary node are
4613
  ignored.
4614

4615
  """
4616
  all_result = True
4617
  disks = _ExpandCheckDisks(instance, disks)
4618

    
4619
  for disk in disks:
4620
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4621
      lu.cfg.SetDiskID(top_disk, node)
4622
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4623
      msg = result.fail_msg
4624
      if msg:
4625
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4626
                      disk.iv_name, node, msg)
4627
        if not ignore_primary or node != instance.primary_node:
4628
          all_result = False
4629
  return all_result
4630

    
4631

    
4632
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4633
  """Checks if a node has enough free memory.
4634

4635
  This function check if a given node has the needed amount of free
4636
  memory. In case the node has less memory or we cannot get the
4637
  information from the node, this function raise an OpPrereqError
4638
  exception.
4639

4640
  @type lu: C{LogicalUnit}
4641
  @param lu: a logical unit from which we get configuration data
4642
  @type node: C{str}
4643
  @param node: the node to check
4644
  @type reason: C{str}
4645
  @param reason: string to use in the error message
4646
  @type requested: C{int}
4647
  @param requested: the amount of memory in MiB to check for
4648
  @type hypervisor_name: C{str}
4649
  @param hypervisor_name: the hypervisor to ask for memory stats
4650
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4651
      we cannot check the node
4652

4653
  """
4654
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4655
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4656
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4657
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4658
  if not isinstance(free_mem, int):
4659
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4660
                               " was '%s'" % (node, free_mem),
4661
                               errors.ECODE_ENVIRON)
4662
  if requested > free_mem:
4663
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4664
                               " needed %s MiB, available %s MiB" %
4665
                               (node, reason, requested, free_mem),
4666
                               errors.ECODE_NORES)
4667

    
4668

    
4669
def _CheckNodesFreeDisk(lu, nodenames, requested):
4670
  """Checks if nodes have enough free disk space in the default VG.
4671

4672
  This function check if all given nodes have the needed amount of
4673
  free disk. In case any node has less disk or we cannot get the
4674
  information from the node, this function raise an OpPrereqError
4675
  exception.
4676

4677
  @type lu: C{LogicalUnit}
4678
  @param lu: a logical unit from which we get configuration data
4679
  @type nodenames: C{list}
4680
  @param nodenames: the list of node names to check
4681
  @type requested: C{int}
4682
  @param requested: the amount of disk in MiB to check for
4683
  @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4684
      we cannot check the node
4685

4686
  """
4687
  nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4688
                                   lu.cfg.GetHypervisorType())
4689
  for node in nodenames:
4690
    info = nodeinfo[node]
4691
    info.Raise("Cannot get current information from node %s" % node,
4692
               prereq=True, ecode=errors.ECODE_ENVIRON)
4693
    vg_free = info.payload.get("vg_free", None)
4694
    if not isinstance(vg_free, int):
4695
      raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4696
                                 " result was '%s'" % (node, vg_free),
4697
                                 errors.ECODE_ENVIRON)
4698
    if requested > vg_free:
4699
      raise errors.OpPrereqError("Not enough disk space on target node %s:"
4700
                                 " required %d MiB, available %d MiB" %
4701
                                 (node, requested, vg_free),
4702
                                 errors.ECODE_NORES)
4703

    
4704

    
4705
class LUStartupInstance(LogicalUnit):
4706
  """Starts an instance.
4707

4708
  """
4709
  HPATH = "instance-start"
4710
  HTYPE = constants.HTYPE_INSTANCE
4711
  _OP_PARAMS = [
4712
    _PInstanceName,
4713
    _PForce,
4714
    _PIgnoreOfflineNodes,
4715
    ("hvparams", ht.EmptyDict, ht.TDict),
4716
    ("beparams", ht.EmptyDict, ht.TDict),
4717
    ]
4718
  REQ_BGL = False
4719

    
4720
  def CheckArguments(self):
4721
    # extra beparams
4722
    if self.op.beparams:
4723
      # fill the beparams dict
4724
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4725

    
4726
  def ExpandNames(self):
4727
    self._ExpandAndLockInstance()
4728

    
4729
  def BuildHooksEnv(self):
4730
    """Build hooks env.
4731

4732
    This runs on master, primary and secondary nodes of the instance.
4733

4734
    """
4735
    env = {
4736
      "FORCE": self.op.force,
4737
      }
4738
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4739
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4740
    return env, nl, nl
4741

    
4742
  def CheckPrereq(self):
4743
    """Check prerequisites.
4744

4745
    This checks that the instance is in the cluster.
4746

4747
    """
4748
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4749
    assert self.instance is not None, \
4750
      "Cannot retrieve locked instance %s" % self.op.instance_name
4751

    
4752
    # extra hvparams
4753
    if self.op.hvparams:
4754
      # check hypervisor parameter syntax (locally)
4755
      cluster = self.cfg.GetClusterInfo()
4756
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4757
      filled_hvp = cluster.FillHV(instance)
4758
      filled_hvp.update(self.op.hvparams)
4759
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4760
      hv_type.CheckParameterSyntax(filled_hvp)
4761
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4762

    
4763
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
4764

    
4765
    if self.primary_offline and self.op.ignore_offline_nodes:
4766
      self.proc.LogWarning("Ignoring offline primary node")
4767

    
4768
      if self.op.hvparams or self.op.beparams:
4769
        self.proc.LogWarning("Overridden parameters are ignored")
4770
    else:
4771
      _CheckNodeOnline(self, instance.primary_node)
4772

    
4773
      bep = self.cfg.GetClusterInfo().FillBE(instance)
4774

    
4775
      # check bridges existence
4776
      _CheckInstanceBridgesExist(self, instance)
4777

    
4778
      remote_info = self.rpc.call_instance_info(instance.primary_node,
4779
                                                instance.name,
4780
                                                instance.hypervisor)
4781
      remote_info.Raise("Error checking node %s" % instance.primary_node,
4782
                        prereq=True, ecode=errors.ECODE_ENVIRON)
4783
      if not remote_info.payload: # not running already
4784
        _CheckNodeFreeMemory(self, instance.primary_node,
4785
                             "starting instance %s" % instance.name,
4786
                             bep[constants.BE_MEMORY], instance.hypervisor)
4787

    
4788
  def Exec(self, feedback_fn):
4789
    """Start the instance.
4790

4791
    """
4792
    instance = self.instance
4793
    force = self.op.force
4794

    
4795
    self.cfg.MarkInstanceUp(instance.name)
4796

    
4797
    if self.primary_offline:
4798
      assert self.op.ignore_offline_nodes
4799
      self.proc.LogInfo("Primary node offline, marked instance as started")
4800
    else:
4801
      node_current = instance.primary_node
4802

    
4803
      _StartInstanceDisks(self, instance, force)
4804

    
4805
      result = self.rpc.call_instance_start(node_current, instance,
4806
                                            self.op.hvparams, self.op.beparams)
4807
      msg = result.fail_msg
4808
      if msg:
4809
        _ShutdownInstanceDisks(self, instance)
4810
        raise errors.OpExecError("Could not start instance: %s" % msg)
4811

    
4812

    
4813
class LURebootInstance(LogicalUnit):
4814
  """Reboot an instance.
4815

4816
  """
4817
  HPATH = "instance-reboot"
4818
  HTYPE = constants.HTYPE_INSTANCE
4819
  _OP_PARAMS = [
4820
    _PInstanceName,
4821
    ("ignore_secondaries", False, ht.TBool),
4822
    ("reboot_type", ht.NoDefault, ht.TElemOf(constants.REBOOT_TYPES)),
4823
    _PShutdownTimeout,
4824
    ]
4825
  REQ_BGL = False
4826

    
4827
  def ExpandNames(self):
4828
    self._ExpandAndLockInstance()
4829

    
4830
  def BuildHooksEnv(self):
4831
    """Build hooks env.
4832

4833
    This runs on master, primary and secondary nodes of the instance.
4834

4835
    """
4836
    env = {
4837
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4838
      "REBOOT_TYPE": self.op.reboot_type,
4839
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4840
      }
4841
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4842
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4843
    return env, nl, nl
4844

    
4845
  def CheckPrereq(self):
4846
    """Check prerequisites.
4847

4848
    This checks that the instance is in the cluster.
4849

4850
    """
4851
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4852
    assert self.instance is not None, \
4853
      "Cannot retrieve locked instance %s" % self.op.instance_name
4854

    
4855
    _CheckNodeOnline(self, instance.primary_node)
4856

    
4857
    # check bridges existence
4858
    _CheckInstanceBridgesExist(self, instance)
4859

    
4860
  def Exec(self, feedback_fn):
4861
    """Reboot the instance.
4862

4863
    """
4864
    instance = self.instance
4865
    ignore_secondaries = self.op.ignore_secondaries
4866
    reboot_type = self.op.reboot_type
4867

    
4868
    node_current = instance.primary_node
4869

    
4870
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4871
                       constants.INSTANCE_REBOOT_HARD]:
4872
      for disk in instance.disks:
4873
        self.cfg.SetDiskID(disk, node_current)
4874
      result = self.rpc.call_instance_reboot(node_current, instance,
4875
                                             reboot_type,
4876
                                             self.op.shutdown_timeout)
4877
      result.Raise("Could not reboot instance")
4878
    else:
4879
      result = self.rpc.call_instance_shutdown(node_current, instance,
4880
                                               self.op.shutdown_timeout)
4881
      result.Raise("Could not shutdown instance for full reboot")
4882
      _ShutdownInstanceDisks(self, instance)
4883
      _StartInstanceDisks(self, instance, ignore_secondaries)
4884
      result = self.rpc.call_instance_start(node_current, instance, None, None)
4885
      msg = result.fail_msg
4886
      if msg:
4887
        _ShutdownInstanceDisks(self, instance)
4888
        raise errors.OpExecError("Could not start instance for"
4889
                                 " full reboot: %s" % msg)
4890

    
4891
    self.cfg.MarkInstanceUp(instance.name)
4892

    
4893

    
4894
class LUShutdownInstance(LogicalUnit):
4895
  """Shutdown an instance.
4896

4897
  """
4898
  HPATH = "instance-stop"
4899
  HTYPE = constants.HTYPE_INSTANCE
4900
  _OP_PARAMS = [
4901
    _PInstanceName,
4902
    _PIgnoreOfflineNodes,
4903
    ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, ht.TPositiveInt),
4904
    ]
4905
  REQ_BGL = False
4906

    
4907
  def ExpandNames(self):
4908
    self._ExpandAndLockInstance()
4909

    
4910
  def BuildHooksEnv(self):
4911
    """Build hooks env.
4912

4913
    This runs on master, primary and secondary nodes of the instance.
4914

4915
    """
4916
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4917
    env["TIMEOUT"] = self.op.timeout
4918
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4919
    return env, nl, nl
4920

    
4921
  def CheckPrereq(self):
4922
    """Check prerequisites.
4923

4924
    This checks that the instance is in the cluster.
4925

4926
    """
4927
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4928
    assert self.instance is not None, \
4929
      "Cannot retrieve locked instance %s" % self.op.instance_name
4930

    
4931
    self.primary_offline = \
4932
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
4933

    
4934
    if self.primary_offline and self.op.ignore_offline_nodes:
4935
      self.proc.LogWarning("Ignoring offline primary node")
4936
    else:
4937
      _CheckNodeOnline(self, self.instance.primary_node)
4938

    
4939
  def Exec(self, feedback_fn):
4940
    """Shutdown the instance.
4941

4942
    """
4943
    instance = self.instance
4944
    node_current = instance.primary_node
4945
    timeout = self.op.timeout
4946

    
4947
    self.cfg.MarkInstanceDown(instance.name)
4948

    
4949
    if self.primary_offline:
4950
      assert self.op.ignore_offline_nodes
4951
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
4952
    else:
4953
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4954
      msg = result.fail_msg
4955
      if msg:
4956
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4957

    
4958
      _ShutdownInstanceDisks(self, instance)
4959

    
4960

    
4961
class LUReinstallInstance(LogicalUnit):
4962
  """Reinstall an instance.
4963

4964
  """
4965
  HPATH = "instance-reinstall"
4966
  HTYPE = constants.HTYPE_INSTANCE
4967
  _OP_PARAMS = [
4968
    _PInstanceName,
4969
    ("os_type", None, ht.TMaybeString),
4970
    ("force_variant", False, ht.TBool),
4971
    ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
4972
    ]
4973
  REQ_BGL = False
4974

    
4975
  def ExpandNames(self):
4976
    self._ExpandAndLockInstance()
4977

    
4978
  def BuildHooksEnv(self):
4979
    """Build hooks env.
4980

4981
    This runs on master, primary and secondary nodes of the instance.
4982

4983
    """
4984
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4985
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4986
    return env, nl, nl
4987

    
4988
  def CheckPrereq(self):
4989
    """Check prerequisites.
4990

4991
    This checks that the instance is in the cluster and is not running.
4992

4993
    """
4994
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4995
    assert instance is not None, \
4996
      "Cannot retrieve locked instance %s" % self.op.instance_name
4997
    _CheckNodeOnline(self, instance.primary_node)
4998

    
4999
    if instance.disk_template == constants.DT_DISKLESS:
5000
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5001
                                 self.op.instance_name,
5002
                                 errors.ECODE_INVAL)
5003
    _CheckInstanceDown(self, instance, "cannot reinstall")
5004

    
5005
    if self.op.os_type is not None:
5006
      # OS verification
5007
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5008
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5009
      instance_os = self.op.os_type
5010
    else:
5011
      instance_os = instance.os
5012

    
5013
    nodelist = list(instance.all_nodes)
5014

    
5015
    if self.op.osparams:
5016
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5017
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5018
      self.os_inst = i_osdict # the new dict (without defaults)
5019
    else:
5020
      self.os_inst = None
5021

    
5022
    self.instance = instance
5023

    
5024
  def Exec(self, feedback_fn):
5025
    """Reinstall the instance.
5026

5027
    """
5028
    inst = self.instance
5029

    
5030
    if self.op.os_type is not None:
5031
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5032
      inst.os = self.op.os_type
5033
      # Write to configuration
5034
      self.cfg.Update(inst, feedback_fn)
5035

    
5036
    _StartInstanceDisks(self, inst, None)
5037
    try:
5038
      feedback_fn("Running the instance OS create scripts...")
5039
      # FIXME: pass debug option from opcode to backend
5040
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5041
                                             self.op.debug_level,
5042
                                             osparams=self.os_inst)
5043
      result.Raise("Could not install OS for instance %s on node %s" %
5044
                   (inst.name, inst.primary_node))
5045
    finally:
5046
      _ShutdownInstanceDisks(self, inst)
5047

    
5048

    
5049
class LURecreateInstanceDisks(LogicalUnit):
5050
  """Recreate an instance's missing disks.
5051

5052
  """
5053
  HPATH = "instance-recreate-disks"
5054
  HTYPE = constants.HTYPE_INSTANCE
5055
  _OP_PARAMS = [
5056
    _PInstanceName,
5057
    ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
5058
    ]
5059
  REQ_BGL = False
5060

    
5061
  def ExpandNames(self):
5062
    self._ExpandAndLockInstance()
5063

    
5064
  def BuildHooksEnv(self):
5065
    """Build hooks env.
5066

5067
    This runs on master, primary and secondary nodes of the instance.
5068

5069
    """
5070
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5071
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5072
    return env, nl, nl
5073

    
5074
  def CheckPrereq(self):
5075
    """Check prerequisites.
5076

5077
    This checks that the instance is in the cluster and is not running.
5078

5079
    """
5080
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5081
    assert instance is not None, \
5082
      "Cannot retrieve locked instance %s" % self.op.instance_name
5083
    _CheckNodeOnline(self, instance.primary_node)
5084

    
5085
    if instance.disk_template == constants.DT_DISKLESS:
5086
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5087
                                 self.op.instance_name, errors.ECODE_INVAL)
5088
    _CheckInstanceDown(self, instance, "cannot recreate disks")
5089

    
5090
    if not self.op.disks:
5091
      self.op.disks = range(len(instance.disks))
5092
    else:
5093
      for idx in self.op.disks:
5094
        if idx >= len(instance.disks):
5095
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5096
                                     errors.ECODE_INVAL)
5097

    
5098
    self.instance = instance
5099

    
5100
  def Exec(self, feedback_fn):
5101
    """Recreate the disks.
5102

5103
    """
5104
    to_skip = []
5105
    for idx, _ in enumerate(self.instance.disks):
5106
      if idx not in self.op.disks: # disk idx has not been passed in
5107
        to_skip.append(idx)
5108
        continue
5109

    
5110
    _CreateDisks(self, self.instance, to_skip=to_skip)
5111

    
5112

    
5113
class LURenameInstance(LogicalUnit):
5114
  """Rename an instance.
5115

5116
  """
5117
  HPATH = "instance-rename"
5118
  HTYPE = constants.HTYPE_INSTANCE
5119
  _OP_PARAMS = [
5120
    _PInstanceName,
5121
    ("new_name", ht.NoDefault, ht.TNonEmptyString),
5122
    ("ip_check", False, ht.TBool),
5123
    ("name_check", True, ht.TBool),
5124
    ]
5125

    
5126
  def CheckArguments(self):
5127
    """Check arguments.
5128

5129
    """
5130
    if self.op.ip_check and not self.op.name_check:
5131
      # TODO: make the ip check more flexible and not depend on the name check
5132
      raise errors.OpPrereqError("Cannot do ip check without a name check",
5133
                                 errors.ECODE_INVAL)
5134

    
5135
  def BuildHooksEnv(self):
5136
    """Build hooks env.
5137

5138
    This runs on master, primary and secondary nodes of the instance.
5139

5140
    """
5141
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5142
    env["INSTANCE_NEW_NAME"] = self.op.new_name
5143
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5144
    return env, nl, nl
5145

    
5146
  def CheckPrereq(self):
5147
    """Check prerequisites.
5148

5149
    This checks that the instance is in the cluster and is not running.
5150

5151
    """
5152
    self.op.instance_name = _ExpandInstanceName(self.cfg,
5153
                                                self.op.instance_name)
5154
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5155
    assert instance is not None
5156
    _CheckNodeOnline(self, instance.primary_node)
5157
    _CheckInstanceDown(self, instance, "cannot rename")
5158
    self.instance = instance
5159

    
5160
    new_name = self.op.new_name
5161
    if self.op.name_check:
5162
      hostname = netutils.GetHostname(name=new_name)
5163
      new_name = self.op.new_name = hostname.name
5164
      if (self.op.ip_check and
5165
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5166
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
5167
                                   (hostname.ip, new_name),
5168
                                   errors.ECODE_NOTUNIQUE)
5169

    
5170
    instance_list = self.cfg.GetInstanceList()
5171
    if new_name in instance_list:
5172
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5173
                                 new_name, errors.ECODE_EXISTS)
5174

    
5175
  def Exec(self, feedback_fn):
5176
    """Reinstall the instance.
5177

5178
    """
5179
    inst = self.instance
5180
    old_name = inst.name
5181

    
5182
    if inst.disk_template == constants.DT_FILE:
5183
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5184

    
5185
    self.cfg.RenameInstance(inst.name, self.op.new_name)
5186
    # Change the instance lock. This is definitely safe while we hold the BGL
5187
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5188
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5189

    
5190
    # re-read the instance from the configuration after rename
5191
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
5192

    
5193
    if inst.disk_template == constants.DT_FILE:
5194
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5195
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5196
                                                     old_file_storage_dir,
5197
                                                     new_file_storage_dir)
5198
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
5199
                   " (but the instance has been renamed in Ganeti)" %
5200
                   (inst.primary_node, old_file_storage_dir,
5201
                    new_file_storage_dir))
5202

    
5203
    _StartInstanceDisks(self, inst, None)
5204
    try:
5205
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5206
                                                 old_name, self.op.debug_level)
5207
      msg = result.fail_msg
5208
      if msg:
5209
        msg = ("Could not run OS rename script for instance %s on node %s"
5210
               " (but the instance has been renamed in Ganeti): %s" %
5211
               (inst.name, inst.primary_node, msg))
5212
        self.proc.LogWarning(msg)
5213
    finally:
5214
      _ShutdownInstanceDisks(self, inst)
5215

    
5216
    return inst.name
5217

    
5218

    
5219
class LURemoveInstance(LogicalUnit):
5220
  """Remove an instance.
5221

5222
  """
5223
  HPATH = "instance-remove"
5224
  HTYPE = constants.HTYPE_INSTANCE
5225
  _OP_PARAMS = [
5226
    _PInstanceName,
5227
    ("ignore_failures", False, ht.TBool),
5228
    _PShutdownTimeout,
5229
    ]
5230
  REQ_BGL = False
5231

    
5232
  def ExpandNames(self):
5233
    self._ExpandAndLockInstance()
5234
    self.needed_locks[locking.LEVEL_NODE] = []
5235
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5236

    
5237
  def DeclareLocks(self, level):
5238
    if level == locking.LEVEL_NODE:
5239
      self._LockInstancesNodes()
5240

    
5241
  def BuildHooksEnv(self):
5242
    """Build hooks env.
5243

5244
    This runs on master, primary and secondary nodes of the instance.
5245

5246
    """
5247
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5248
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5249
    nl = [self.cfg.GetMasterNode()]
5250
    nl_post = list(self.instance.all_nodes) + nl
5251
    return env, nl, nl_post
5252

    
5253
  def CheckPrereq(self):
5254
    """Check prerequisites.
5255

5256
    This checks that the instance is in the cluster.
5257

5258
    """
5259
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5260
    assert self.instance is not None, \
5261
      "Cannot retrieve locked instance %s" % self.op.instance_name
5262

    
5263
  def Exec(self, feedback_fn):
5264
    """Remove the instance.
5265

5266
    """
5267
    instance = self.instance
5268
    logging.info("Shutting down instance %s on node %s",
5269
                 instance.name, instance.primary_node)
5270

    
5271
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5272
                                             self.op.shutdown_timeout)
5273
    msg = result.fail_msg
5274
    if msg:
5275
      if self.op.ignore_failures:
5276
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
5277
      else:
5278
        raise errors.OpExecError("Could not shutdown instance %s on"
5279
                                 " node %s: %s" %
5280
                                 (instance.name, instance.primary_node, msg))
5281

    
5282
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5283

    
5284

    
5285
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5286
  """Utility function to remove an instance.
5287

5288
  """
5289
  logging.info("Removing block devices for instance %s", instance.name)
5290

    
5291
  if not _RemoveDisks(lu, instance):
5292
    if not ignore_failures:
5293
      raise errors.OpExecError("Can't remove instance's disks")
5294
    feedback_fn("Warning: can't remove instance's disks")
5295

    
5296
  logging.info("Removing instance %s out of cluster config", instance.name)
5297

    
5298
  lu.cfg.RemoveInstance(instance.name)
5299

    
5300
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5301
    "Instance lock removal conflict"
5302

    
5303
  # Remove lock for the instance
5304
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5305

    
5306

    
5307
class LUQueryInstances(NoHooksLU):
5308
  """Logical unit for querying instances.
5309

5310
  """
5311
  # pylint: disable-msg=W0142
5312
  _OP_PARAMS = [
5313
    ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
5314
    ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
5315
    ("use_locking", False, ht.TBool),
5316
    ]
5317
  REQ_BGL = False
5318
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5319
                    "serial_no", "ctime", "mtime", "uuid"]
5320
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5321
                                    "admin_state",
5322
                                    "disk_template", "ip", "mac", "bridge",
5323
                                    "nic_mode", "nic_link",
5324
                                    "sda_size", "sdb_size", "vcpus", "tags",
5325
                                    "network_port", "beparams",
5326
                                    r"(disk)\.(size)/([0-9]+)",
5327
                                    r"(disk)\.(sizes)", "disk_usage",
5328
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5329
                                    r"(nic)\.(bridge)/([0-9]+)",
5330
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
5331
                                    r"(disk|nic)\.(count)",
5332
                                    "hvparams", "custom_hvparams",
5333
                                    "custom_beparams", "custom_nicparams",
5334
                                    ] + _SIMPLE_FIELDS +
5335
                                  ["hv/%s" % name
5336
                                   for name in constants.HVS_PARAMETERS
5337
                                   if name not in constants.HVC_GLOBALS] +
5338
                                  ["be/%s" % name
5339
                                   for name in constants.BES_PARAMETERS])
5340
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state",
5341
                                   "oper_ram",
5342
                                   "oper_vcpus",
5343
                                   "status")
5344

    
5345

    
5346
  def CheckArguments(self):
5347
    _CheckOutputFields(static=self._FIELDS_STATIC,
5348
                       dynamic=self._FIELDS_DYNAMIC,
5349
                       selected=self.op.output_fields)
5350

    
5351
  def ExpandNames(self):
5352
    self.needed_locks = {}
5353
    self.share_locks[locking.LEVEL_INSTANCE] = 1
5354
    self.share_locks[locking.LEVEL_NODE] = 1
5355

    
5356
    if self.op.names:
5357
      self.wanted = _GetWantedInstances(self, self.op.names)
5358
    else:
5359
      self.wanted = locking.ALL_SET
5360

    
5361
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5362
    self.do_locking = self.do_node_query and self.op.use_locking
5363
    if self.do_locking:
5364
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5365
      self.needed_locks[locking.LEVEL_NODE] = []
5366
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5367

    
5368
  def DeclareLocks(self, level):
5369
    if level == locking.LEVEL_NODE and self.do_locking:
5370
      self._LockInstancesNodes()
5371

    
5372
  def Exec(self, feedback_fn):
5373
    """Computes the list of nodes and their attributes.
5374

5375
    """
5376
    # pylint: disable-msg=R0912
5377
    # way too many branches here
5378
    all_info = self.cfg.GetAllInstancesInfo()
5379
    if self.wanted == locking.ALL_SET:
5380
      # caller didn't specify instance names, so ordering is not important
5381
      if self.do_locking:
5382
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5383
      else:
5384
        instance_names = all_info.keys()
5385
      instance_names = utils.NiceSort(instance_names)
5386
    else:
5387
      # caller did specify names, so we must keep the ordering
5388
      if self.do_locking:
5389
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5390
      else:
5391
        tgt_set = all_info.keys()
5392
      missing = set(self.wanted).difference(tgt_set)
5393
      if missing:
5394
        raise errors.OpExecError("Some instances were removed before"
5395
                                 " retrieving their data: %s" % missing)
5396
      instance_names = self.wanted
5397

    
5398
    instance_list = [all_info[iname] for iname in instance_names]
5399

    
5400
    # begin data gathering
5401

    
5402
    nodes = frozenset([inst.primary_node for inst in instance_list])
5403
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
5404

    
5405
    bad_nodes = []
5406
    off_nodes = []
5407
    if self.do_node_query:
5408
      live_data = {}
5409
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5410
      for name in nodes:
5411
        result = node_data[name]
5412
        if result.offline:
5413
          # offline nodes will be in both lists
5414
          off_nodes.append(name)
5415
        if result.fail_msg:
5416
          bad_nodes.append(name)
5417
        else:
5418
          if result.payload:
5419
            live_data.update(result.payload)
5420
          # else no instance is alive
5421
    else:
5422
      live_data = dict([(name, {}) for name in instance_names])
5423

    
5424
    # end data gathering
5425

    
5426
    HVPREFIX = "hv/"
5427
    BEPREFIX = "be/"
5428
    output = []
5429
    cluster = self.cfg.GetClusterInfo()
5430
    for instance in instance_list:
5431
      iout = []
5432
      i_hv = cluster.FillHV(instance, skip_globals=True)
5433
      i_be = cluster.FillBE(instance)
5434
      i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5435
      for field in self.op.output_fields:
5436
        st_match = self._FIELDS_STATIC.Matches(field)
5437
        if field in self._SIMPLE_FIELDS:
5438
          val = getattr(instance, field)
5439
        elif field == "pnode":
5440
          val = instance.primary_node
5441
        elif field == "snodes":
5442
          val = list(instance.secondary_nodes)
5443
        elif field == "admin_state":
5444
          val = instance.admin_up
5445
        elif field == "oper_state":
5446
          if instance.primary_node in bad_nodes:
5447
            val = None
5448
          else:
5449
            val = bool(live_data.get(instance.name))
5450
        elif field == "status":
5451
          if instance.primary_node in off_nodes:
5452
            val = "ERROR_nodeoffline"
5453
          elif instance.primary_node in bad_nodes:
5454
            val = "ERROR_nodedown"
5455
          else:
5456
            running = bool(live_data.get(instance.name))
5457
            if running:
5458
              if instance.admin_up:
5459
                val = "running"
5460
              else:
5461
                val = "ERROR_up"
5462
            else:
5463
              if instance.admin_up:
5464
                val = "ERROR_down"
5465
              else:
5466
                val = "ADMIN_down"
5467
        elif field == "oper_ram":
5468
          if instance.primary_node in bad_nodes:
5469
            val = None
5470
          elif instance.name in live_data:
5471
            val = live_data[instance.name].get("memory", "?")
5472
          else:
5473
            val = "-"
5474
        elif field == "oper_vcpus":
5475
          if instance.primary_node in bad_nodes:
5476
            val = None
5477
          elif instance.name in live_data:
5478
            val = live_data[instance.name].get("vcpus", "?")
5479
          else:
5480
            val = "-"
5481
        elif field == "vcpus":
5482
          val = i_be[constants.BE_VCPUS]
5483
        elif field == "disk_template":
5484
          val = instance.disk_template
5485
        elif field == "ip":
5486
          if instance.nics:
5487
            val = instance.nics[0].ip
5488
          else:
5489
            val = None
5490
        elif field == "nic_mode":
5491
          if instance.nics:
5492
            val = i_nicp[0][constants.NIC_MODE]
5493
          else:
5494
            val = None
5495
        elif field == "nic_link":
5496
          if instance.nics:
5497
            val = i_nicp[0][constants.NIC_LINK]
5498
          else:
5499
            val = None
5500
        elif field == "bridge":
5501
          if (instance.nics and
5502
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5503
            val = i_nicp[0][constants.NIC_LINK]
5504
          else:
5505
            val = None
5506
        elif field == "mac":
5507
          if instance.nics:
5508
            val = instance.nics[0].mac
5509
          else:
5510
            val = None
5511
        elif field == "custom_nicparams":
5512
          val = [nic.nicparams for nic in instance.nics]
5513
        elif field == "sda_size" or field == "sdb_size":
5514
          idx = ord(field[2]) - ord('a')
5515
          try:
5516
            val = instance.FindDisk(idx).size
5517
          except errors.OpPrereqError:
5518
            val = None
5519
        elif field == "disk_usage": # total disk usage per node
5520
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
5521
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5522
        elif field == "tags":
5523
          val = list(instance.GetTags())
5524
        elif field == "custom_hvparams":
5525
          val = instance.hvparams # not filled!
5526
        elif field == "hvparams":
5527
          val = i_hv
5528
        elif (field.startswith(HVPREFIX) and
5529
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5530
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5531
          val = i_hv.get(field[len(HVPREFIX):], None)
5532
        elif field == "custom_beparams":
5533
          val = instance.beparams
5534
        elif field == "beparams":
5535
          val = i_be
5536
        elif (field.startswith(BEPREFIX) and
5537
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5538
          val = i_be.get(field[len(BEPREFIX):], None)
5539
        elif st_match and st_match.groups():
5540
          # matches a variable list
5541
          st_groups = st_match.groups()
5542
          if st_groups and st_groups[0] == "disk":
5543
            if st_groups[1] == "count":
5544
              val = len(instance.disks)
5545
            elif st_groups[1] == "sizes":
5546
              val = [disk.size for disk in instance.disks]
5547
            elif st_groups[1] == "size":
5548
              try:
5549
                val = instance.FindDisk(st_groups[2]).size
5550
              except errors.OpPrereqError:
5551
                val = None
5552
            else:
5553
              assert False, "Unhandled disk parameter"
5554
          elif st_groups[0] == "nic":
5555
            if st_groups[1] == "count":
5556
              val = len(instance.nics)
5557
            elif st_groups[1] == "macs":
5558
              val = [nic.mac for nic in instance.nics]
5559
            elif st_groups[1] == "ips":
5560
              val = [nic.ip for nic in instance.nics]
5561
            elif st_groups[1] == "modes":
5562
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5563
            elif st_groups[1] == "links":
5564
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5565
            elif st_groups[1] == "bridges":
5566
              val = []
5567
              for nicp in i_nicp:
5568
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5569
                  val.append(nicp[constants.NIC_LINK])
5570
                else:
5571
                  val.append(None)
5572
            else:
5573
              # index-based item
5574
              nic_idx = int(st_groups[2])
5575
              if nic_idx >= len(instance.nics):
5576
                val = None
5577
              else:
5578
                if st_groups[1] == "mac":
5579
                  val = instance.nics[nic_idx].mac
5580
                elif st_groups[1] == "ip":
5581
                  val = instance.nics[nic_idx].ip
5582
                elif st_groups[1] == "mode":
5583
                  val = i_nicp[nic_idx][constants.NIC_MODE]
5584
                elif st_groups[1] == "link":
5585
                  val = i_nicp[nic_idx][constants.NIC_LINK]
5586
                elif st_groups[1] == "bridge":
5587
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5588
                  if nic_mode == constants.NIC_MODE_BRIDGED:
5589
                    val = i_nicp[nic_idx][constants.NIC_LINK]
5590
                  else:
5591
                    val = None
5592
                else:
5593
                  assert False, "Unhandled NIC parameter"
5594
          else:
5595
            assert False, ("Declared but unhandled variable parameter '%s'" %
5596
                           field)
5597
        else:
5598
          assert False, "Declared but unhandled parameter '%s'" % field
5599
        iout.append(val)
5600
      output.append(iout)
5601

    
5602
    return output
5603

    
5604

    
5605
class LUFailoverInstance(LogicalUnit):
5606
  """Failover an instance.
5607

5608
  """
5609
  HPATH = "instance-failover"
5610
  HTYPE = constants.HTYPE_INSTANCE
5611
  _OP_PARAMS = [
5612
    _PInstanceName,
5613
    ("ignore_consistency", False, ht.TBool),
5614
    _PShutdownTimeout,
5615
    ]
5616
  REQ_BGL = False
5617

    
5618
  def ExpandNames(self):
5619
    self._ExpandAndLockInstance()
5620
    self.needed_locks[locking.LEVEL_NODE] = []
5621
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5622

    
5623
  def DeclareLocks(self, level):
5624
    if level == locking.LEVEL_NODE:
5625
      self._LockInstancesNodes()
5626

    
5627
  def BuildHooksEnv(self):
5628
    """Build hooks env.
5629

5630
    This runs on master, primary and secondary nodes of the instance.
5631

5632
    """
5633
    instance = self.instance
5634
    source_node = instance.primary_node
5635
    target_node = instance.secondary_nodes[0]
5636
    env = {
5637
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5638
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5639
      "OLD_PRIMARY": source_node,
5640
      "OLD_SECONDARY": target_node,
5641
      "NEW_PRIMARY": target_node,
5642
      "NEW_SECONDARY": source_node,
5643
      }
5644
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5645
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5646
    nl_post = list(nl)
5647
    nl_post.append(source_node)
5648
    return env, nl, nl_post
5649

    
5650
  def CheckPrereq(self):
5651
    """Check prerequisites.
5652

5653
    This checks that the instance is in the cluster.
5654

5655
    """
5656
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5657
    assert self.instance is not None, \
5658
      "Cannot retrieve locked instance %s" % self.op.instance_name
5659

    
5660
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5661
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5662
      raise errors.OpPrereqError("Instance's disk layout is not"
5663
                                 " network mirrored, cannot failover.",
5664
                                 errors.ECODE_STATE)
5665

    
5666
    secondary_nodes = instance.secondary_nodes
5667
    if not secondary_nodes:
5668
      raise errors.ProgrammerError("no secondary node but using "
5669
                                   "a mirrored disk template")
5670

    
5671
    target_node = secondary_nodes[0]
5672
    _CheckNodeOnline(self, target_node)
5673
    _CheckNodeNotDrained(self, target_node)
5674
    if instance.admin_up:
5675
      # check memory requirements on the secondary node
5676
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5677
                           instance.name, bep[constants.BE_MEMORY],
5678
                           instance.hypervisor)
5679
    else:
5680
      self.LogInfo("Not checking memory on the secondary node as"
5681
                   " instance will not be started")
5682

    
5683
    # check bridge existance
5684
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5685

    
5686
  def Exec(self, feedback_fn):
5687
    """Failover an instance.
5688

5689
    The failover is done by shutting it down on its present node and
5690
    starting it on the secondary.
5691

5692
    """
5693
    instance = self.instance
5694
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5695

    
5696
    source_node = instance.primary_node
5697
    target_node = instance.secondary_nodes[0]
5698

    
5699
    if instance.admin_up:
5700
      feedback_fn("* checking disk consistency between source and target")
5701
      for dev in instance.disks:
5702
        # for drbd, these are drbd over lvm
5703
        if not _CheckDiskConsistency(self, dev, target_node, False):
5704
          if not self.op.ignore_consistency:
5705
            raise errors.OpExecError("Disk %s is degraded on target node,"
5706
                                     " aborting failover." % dev.iv_name)
5707
    else:
5708
      feedback_fn("* not checking disk consistency as instance is not running")
5709

    
5710
    feedback_fn("* shutting down instance on source node")
5711
    logging.info("Shutting down instance %s on node %s",
5712
                 instance.name, source_node)
5713

    
5714
    result = self.rpc.call_instance_shutdown(source_node, instance,
5715
                                             self.op.shutdown_timeout)
5716
    msg = result.fail_msg
5717
    if msg:
5718
      if self.op.ignore_consistency or primary_node.offline:
5719
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5720
                             " Proceeding anyway. Please make sure node"
5721
                             " %s is down. Error details: %s",
5722
                             instance.name, source_node, source_node, msg)
5723
      else:
5724
        raise errors.OpExecError("Could not shutdown instance %s on"
5725
                                 " node %s: %s" %
5726
                                 (instance.name, source_node, msg))
5727

    
5728
    feedback_fn("* deactivating the instance's disks on source node")
5729
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5730
      raise errors.OpExecError("Can't shut down the instance's disks.")
5731

    
5732
    instance.primary_node = target_node
5733
    # distribute new instance config to the other nodes
5734
    self.cfg.Update(instance, feedback_fn)
5735

    
5736
    # Only start the instance if it's marked as up
5737
    if instance.admin_up:
5738
      feedback_fn("* activating the instance's disks on target node")
5739
      logging.info("Starting instance %s on node %s",
5740
                   instance.name, target_node)
5741

    
5742
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5743
                                           ignore_secondaries=True)
5744
      if not disks_ok:
5745
        _ShutdownInstanceDisks(self, instance)
5746
        raise errors.OpExecError("Can't activate the instance's disks")
5747

    
5748
      feedback_fn("* starting the instance on the target node")
5749
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5750
      msg = result.fail_msg
5751
      if msg:
5752
        _ShutdownInstanceDisks(self, instance)
5753
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5754
                                 (instance.name, target_node, msg))
5755

    
5756

    
5757
class LUMigrateInstance(LogicalUnit):
5758
  """Migrate an instance.
5759

5760
  This is migration without shutting down, compared to the failover,
5761
  which is done with shutdown.
5762

5763
  """
5764
  HPATH = "instance-migrate"
5765
  HTYPE = constants.HTYPE_INSTANCE
5766
  _OP_PARAMS = [
5767
    _PInstanceName,
5768
    _PMigrationMode,
5769
    _PMigrationLive,
5770
    ("cleanup", False, ht.TBool),
5771
    ]
5772

    
5773
  REQ_BGL = False
5774

    
5775
  def ExpandNames(self):
5776
    self._ExpandAndLockInstance()
5777

    
5778
    self.needed_locks[locking.LEVEL_NODE] = []
5779
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5780

    
5781
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5782
                                       self.op.cleanup)
5783
    self.tasklets = [self._migrater]
5784

    
5785
  def DeclareLocks(self, level):
5786
    if level == locking.LEVEL_NODE:
5787
      self._LockInstancesNodes()
5788

    
5789
  def BuildHooksEnv(self):
5790
    """Build hooks env.
5791

5792
    This runs on master, primary and secondary nodes of the instance.
5793

5794
    """
5795
    instance = self._migrater.instance
5796
    source_node = instance.primary_node
5797
    target_node = instance.secondary_nodes[0]
5798
    env = _BuildInstanceHookEnvByObject(self, instance)
5799
    env["MIGRATE_LIVE"] = self._migrater.live
5800
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5801
    env.update({
5802
        "OLD_PRIMARY": source_node,
5803
        "OLD_SECONDARY": target_node,
5804
        "NEW_PRIMARY": target_node,
5805
        "NEW_SECONDARY": source_node,
5806
        })
5807
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5808
    nl_post = list(nl)
5809
    nl_post.append(source_node)
5810
    return env, nl, nl_post
5811

    
5812

    
5813
class LUMoveInstance(LogicalUnit):
5814
  """Move an instance by data-copying.
5815

5816
  """
5817
  HPATH = "instance-move"
5818
  HTYPE = constants.HTYPE_INSTANCE
5819
  _OP_PARAMS = [
5820
    _PInstanceName,
5821
    ("target_node", ht.NoDefault, ht.TNonEmptyString),
5822
    _PShutdownTimeout,
5823
    ]
5824
  REQ_BGL = False
5825

    
5826
  def ExpandNames(self):
5827
    self._ExpandAndLockInstance()
5828
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5829
    self.op.target_node = target_node
5830
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5831
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5832

    
5833
  def DeclareLocks(self, level):
5834
    if level == locking.LEVEL_NODE:
5835
      self._LockInstancesNodes(primary_only=True)
5836

    
5837
  def BuildHooksEnv(self):
5838
    """Build hooks env.
5839

5840
    This runs on master, primary and secondary nodes of the instance.
5841

5842
    """
5843
    env = {
5844
      "TARGET_NODE": self.op.target_node,
5845
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5846
      }
5847
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5848
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5849
                                       self.op.target_node]
5850
    return env, nl, nl
5851

    
5852
  def CheckPrereq(self):
5853
    """Check prerequisites.
5854

5855
    This checks that the instance is in the cluster.
5856

5857
    """
5858
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5859
    assert self.instance is not None, \
5860
      "Cannot retrieve locked instance %s" % self.op.instance_name
5861

    
5862
    node = self.cfg.GetNodeInfo(self.op.target_node)
5863
    assert node is not None, \
5864
      "Cannot retrieve locked node %s" % self.op.target_node
5865

    
5866
    self.target_node = target_node = node.name
5867

    
5868
    if target_node == instance.primary_node:
5869
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
5870
                                 (instance.name, target_node),
5871
                                 errors.ECODE_STATE)
5872

    
5873
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5874

    
5875
    for idx, dsk in enumerate(instance.disks):
5876
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5877
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5878
                                   " cannot copy" % idx, errors.ECODE_STATE)
5879

    
5880
    _CheckNodeOnline(self, target_node)
5881
    _CheckNodeNotDrained(self, target_node)
5882
    _CheckNodeVmCapable(self, target_node)
5883

    
5884
    if instance.admin_up:
5885
      # check memory requirements on the secondary node
5886
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5887
                           instance.name, bep[constants.BE_MEMORY],
5888
                           instance.hypervisor)
5889
    else:
5890
      self.LogInfo("Not checking memory on the secondary node as"
5891
                   " instance will not be started")
5892

    
5893
    # check bridge existance
5894
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5895

    
5896
  def Exec(self, feedback_fn):
5897
    """Move an instance.
5898

5899
    The move is done by shutting it down on its present node, copying
5900
    the data over (slow) and starting it on the new node.
5901

5902
    """
5903
    instance = self.instance
5904

    
5905
    source_node = instance.primary_node
5906
    target_node = self.target_node
5907

    
5908
    self.LogInfo("Shutting down instance %s on source node %s",
5909
                 instance.name, source_node)
5910

    
5911
    result = self.rpc.call_instance_shutdown(source_node, instance,
5912
                                             self.op.shutdown_timeout)
5913
    msg = result.fail_msg
5914
    if msg:
5915
      if self.op.ignore_consistency:
5916
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5917
                             " Proceeding anyway. Please make sure node"
5918
                             " %s is down. Error details: %s",
5919
                             instance.name, source_node, source_node, msg)
5920
      else:
5921
        raise errors.OpExecError("Could not shutdown instance %s on"
5922
                                 " node %s: %s" %
5923
                                 (instance.name, source_node, msg))
5924

    
5925
    # create the target disks
5926
    try:
5927
      _CreateDisks(self, instance, target_node=target_node)
5928
    except errors.OpExecError:
5929
      self.LogWarning("Device creation failed, reverting...")
5930
      try:
5931
        _RemoveDisks(self, instance, target_node=target_node)
5932
      finally:
5933
        self.cfg.ReleaseDRBDMinors(instance.name)
5934
        raise
5935

    
5936
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5937

    
5938
    errs = []
5939
    # activate, get path, copy the data over
5940
    for idx, disk in enumerate(instance.disks):
5941
      self.LogInfo("Copying data for disk %d", idx)
5942
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5943
                                               instance.name, True)
5944
      if result.fail_msg:
5945
        self.LogWarning("Can't assemble newly created disk %d: %s",
5946
                        idx, result.fail_msg)
5947
        errs.append(result.fail_msg)
5948
        break
5949
      dev_path = result.payload
5950
      result = self.rpc.call_blockdev_export(source_node, disk,
5951
                                             target_node, dev_path,
5952
                                             cluster_name)
5953
      if result.fail_msg:
5954
        self.LogWarning("Can't copy data over for disk %d: %s",
5955
                        idx, result.fail_msg)
5956
        errs.append(result.fail_msg)
5957
        break
5958

    
5959
    if errs:
5960
      self.LogWarning("Some disks failed to copy, aborting")
5961
      try:
5962
        _RemoveDisks(self, instance, target_node=target_node)
5963
      finally:
5964
        self.cfg.ReleaseDRBDMinors(instance.name)
5965
        raise errors.OpExecError("Errors during disk copy: %s" %
5966
                                 (",".join(errs),))
5967

    
5968
    instance.primary_node = target_node
5969
    self.cfg.Update(instance, feedback_fn)
5970

    
5971
    self.LogInfo("Removing the disks on the original node")
5972
    _RemoveDisks(self, instance, target_node=source_node)
5973

    
5974
    # Only start the instance if it's marked as up
5975
    if instance.admin_up:
5976
      self.LogInfo("Starting instance %s on node %s",
5977
                   instance.name, target_node)
5978

    
5979
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5980
                                           ignore_secondaries=True)
5981
      if not disks_ok:
5982
        _ShutdownInstanceDisks(self, instance)
5983
        raise errors.OpExecError("Can't activate the instance's disks")
5984

    
5985
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5986
      msg = result.fail_msg
5987
      if msg:
5988
        _ShutdownInstanceDisks(self, instance)
5989
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5990
                                 (instance.name, target_node, msg))
5991

    
5992

    
5993
class LUMigrateNode(LogicalUnit):
5994
  """Migrate all instances from a node.
5995

5996
  """
5997
  HPATH = "node-migrate"
5998
  HTYPE = constants.HTYPE_NODE
5999
  _OP_PARAMS = [
6000
    _PNodeName,
6001
    _PMigrationMode,
6002
    _PMigrationLive,
6003
    ]
6004
  REQ_BGL = False
6005

    
6006
  def ExpandNames(self):
6007
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6008

    
6009
    self.needed_locks = {
6010
      locking.LEVEL_NODE: [self.op.node_name],
6011
      }
6012

    
6013
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6014

    
6015
    # Create tasklets for migrating instances for all instances on this node
6016
    names = []
6017
    tasklets = []
6018

    
6019
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6020
      logging.debug("Migrating instance %s", inst.name)
6021
      names.append(inst.name)
6022

    
6023
      tasklets.append(TLMigrateInstance(self, inst.name, False))
6024

    
6025
    self.tasklets = tasklets
6026

    
6027
    # Declare instance locks
6028
    self.needed_locks[locking.LEVEL_INSTANCE] = names
6029

    
6030
  def DeclareLocks(self, level):
6031
    if level == locking.LEVEL_NODE:
6032
      self._LockInstancesNodes()
6033

    
6034
  def BuildHooksEnv(self):
6035
    """Build hooks env.
6036

6037
    This runs on the master, the primary and all the secondaries.
6038

6039
    """
6040
    env = {
6041
      "NODE_NAME": self.op.node_name,
6042
      }
6043

    
6044
    nl = [self.cfg.GetMasterNode()]
6045

    
6046
    return (env, nl, nl)
6047

    
6048

    
6049
class TLMigrateInstance(Tasklet):
6050
  """Tasklet class for instance migration.
6051

6052
  @type live: boolean
6053
  @ivar live: whether the migration will be done live or non-live;
6054
      this variable is initalized only after CheckPrereq has run
6055

6056
  """
6057
  def __init__(self, lu, instance_name, cleanup):
6058
    """Initializes this class.
6059

6060
    """
6061
    Tasklet.__init__(self, lu)
6062

    
6063
    # Parameters
6064
    self.instance_name = instance_name
6065
    self.cleanup = cleanup
6066
    self.live = False # will be overridden later
6067

    
6068
  def CheckPrereq(self):
6069
    """Check prerequisites.
6070

6071
    This checks that the instance is in the cluster.
6072

6073
    """
6074
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6075
    instance = self.cfg.GetInstanceInfo(instance_name)
6076
    assert instance is not None
6077

    
6078
    if instance.disk_template != constants.DT_DRBD8:
6079
      raise errors.OpPrereqError("Instance's disk layout is not"
6080
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
6081

    
6082
    secondary_nodes = instance.secondary_nodes
6083
    if not secondary_nodes:
6084
      raise errors.ConfigurationError("No secondary node but using"
6085
                                      " drbd8 disk template")
6086

    
6087
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
6088

    
6089
    target_node = secondary_nodes[0]
6090
    # check memory requirements on the secondary node
6091
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6092
                         instance.name, i_be[constants.BE_MEMORY],
6093
                         instance.hypervisor)
6094

    
6095
    # check bridge existance
6096
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6097

    
6098
    if not self.cleanup:
6099
      _CheckNodeNotDrained(self.lu, target_node)
6100
      result = self.rpc.call_instance_migratable(instance.primary_node,
6101
                                                 instance)
6102
      result.Raise("Can't migrate, please use failover",
6103
                   prereq=True, ecode=errors.ECODE_STATE)
6104

    
6105
    self.instance = instance
6106

    
6107
    if self.lu.op.live is not None and self.lu.op.mode is not None:
6108
      raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6109
                                 " parameters are accepted",
6110
                                 errors.ECODE_INVAL)
6111
    if self.lu.op.live is not None:
6112
      if self.lu.op.live:
6113
        self.lu.op.mode = constants.HT_MIGRATION_LIVE
6114
      else:
6115
        self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6116
      # reset the 'live' parameter to None so that repeated
6117
      # invocations of CheckPrereq do not raise an exception
6118
      self.lu.op.live = None
6119
    elif self.lu.op.mode is None:
6120
      # read the default value from the hypervisor
6121
      i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6122
      self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6123

    
6124
    self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6125

    
6126
  def _WaitUntilSync(self):
6127
    """Poll with custom rpc for disk sync.
6128

6129
    This uses our own step-based rpc call.
6130

6131
    """
6132
    self.feedback_fn("* wait until resync is done")
6133
    all_done = False
6134
    while not all_done:
6135
      all_done = True
6136
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6137
                                            self.nodes_ip,
6138
                                            self.instance.disks)
6139
      min_percent = 100
6140
      for node, nres in result.items():
6141
        nres.Raise("Cannot resync disks on node %s" % node)
6142
        node_done, node_percent = nres.payload
6143
        all_done = all_done and node_done
6144
        if node_percent is not None:
6145
          min_percent = min(min_percent, node_percent)
6146
      if not all_done:
6147
        if min_percent < 100:
6148
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
6149
        time.sleep(2)
6150

    
6151
  def _EnsureSecondary(self, node):
6152
    """Demote a node to secondary.
6153

6154
    """
6155
    self.feedback_fn("* switching node %s to secondary mode" % node)
6156

    
6157
    for dev in self.instance.disks:
6158
      self.cfg.SetDiskID(dev, node)
6159

    
6160
    result = self.rpc.call_blockdev_close(node, self.instance.name,
6161
                                          self.instance.disks)
6162
    result.Raise("Cannot change disk to secondary on node %s" % node)
6163

    
6164
  def _GoStandalone(self):
6165
    """Disconnect from the network.
6166

6167
    """
6168
    self.feedback_fn("* changing into standalone mode")
6169
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6170
                                               self.instance.disks)
6171
    for node, nres in result.items():
6172
      nres.Raise("Cannot disconnect disks node %s" % node)
6173

    
6174
  def _GoReconnect(self, multimaster):
6175
    """Reconnect to the network.
6176

6177
    """
6178
    if multimaster:
6179
      msg = "dual-master"
6180
    else:
6181
      msg = "single-master"
6182
    self.feedback_fn("* changing disks into %s mode" % msg)
6183
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6184
                                           self.instance.disks,
6185
                                           self.instance.name, multimaster)
6186
    for node, nres in result.items():
6187
      nres.Raise("Cannot change disks config on node %s" % node)
6188

    
6189
  def _ExecCleanup(self):
6190
    """Try to cleanup after a failed migration.
6191

6192
    The cleanup is done by:
6193
      - check that the instance is running only on one node
6194
        (and update the config if needed)
6195
      - change disks on its secondary node to secondary
6196
      - wait until disks are fully synchronized
6197
      - disconnect from the network
6198
      - change disks into single-master mode
6199
      - wait again until disks are fully synchronized
6200

6201
    """
6202
    instance = self.instance
6203
    target_node = self.target_node
6204
    source_node = self.source_node
6205

    
6206
    # check running on only one node
6207
    self.feedback_fn("* checking where the instance actually runs"
6208
                     " (if this hangs, the hypervisor might be in"
6209
                     " a bad state)")
6210
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6211
    for node, result in ins_l.items():
6212
      result.Raise("Can't contact node %s" % node)
6213

    
6214
    runningon_source = instance.name in ins_l[source_node].payload
6215
    runningon_target = instance.name in ins_l[target_node].payload
6216

    
6217
    if runningon_source and runningon_target:
6218
      raise errors.OpExecError("Instance seems to be running on two nodes,"
6219
                               " or the hypervisor is confused. You will have"
6220
                               " to ensure manually that it runs only on one"
6221
                               " and restart this operation.")
6222

    
6223
    if not (runningon_source or runningon_target):
6224
      raise errors.OpExecError("Instance does not seem to be running at all."
6225
                               " In this case, it's safer to repair by"
6226
                               " running 'gnt-instance stop' to ensure disk"
6227
                               " shutdown, and then restarting it.")
6228

    
6229
    if runningon_target:
6230
      # the migration has actually succeeded, we need to update the config
6231
      self.feedback_fn("* instance running on secondary node (%s),"
6232
                       " updating config" % target_node)
6233
      instance.primary_node = target_node
6234
      self.cfg.Update(instance, self.feedback_fn)
6235
      demoted_node = source_node
6236
    else:
6237
      self.feedback_fn("* instance confirmed to be running on its"
6238
                       " primary node (%s)" % source_node)
6239
      demoted_node = target_node
6240

    
6241
    self._EnsureSecondary(demoted_node)
6242
    try:
6243
      self._WaitUntilSync()
6244
    except errors.OpExecError:
6245
      # we ignore here errors, since if the device is standalone, it
6246
      # won't be able to sync
6247
      pass
6248
    self._GoStandalone()
6249
    self._GoReconnect(False)
6250
    self._WaitUntilSync()
6251

    
6252
    self.feedback_fn("* done")
6253

    
6254
  def _RevertDiskStatus(self):
6255
    """Try to revert the disk status after a failed migration.
6256

6257
    """
6258
    target_node = self.target_node
6259
    try:
6260
      self._EnsureSecondary(target_node)
6261
      self._GoStandalone()
6262
      self._GoReconnect(False)
6263
      self._WaitUntilSync()
6264
    except errors.OpExecError, err:
6265
      self.lu.LogWarning("Migration failed and I can't reconnect the"
6266
                         " drives: error '%s'\n"
6267
                         "Please look and recover the instance status" %
6268
                         str(err))
6269

    
6270
  def _AbortMigration(self):
6271
    """Call the hypervisor code to abort a started migration.
6272

6273
    """
6274
    instance = self.instance
6275
    target_node = self.target_node
6276
    migration_info = self.migration_info
6277

    
6278
    abort_result = self.rpc.call_finalize_migration(target_node,
6279
                                                    instance,
6280
                                                    migration_info,
6281
                                                    False)
6282
    abort_msg = abort_result.fail_msg
6283
    if abort_msg:
6284
      logging.error("Aborting migration failed on target node %s: %s",
6285
                    target_node, abort_msg)
6286
      # Don't raise an exception here, as we stil have to try to revert the
6287
      # disk status, even if this step failed.
6288

    
6289
  def _ExecMigration(self):
6290
    """Migrate an instance.
6291

6292
    The migrate is done by:
6293
      - change the disks into dual-master mode
6294
      - wait until disks are fully synchronized again
6295
      - migrate the instance
6296
      - change disks on the new secondary node (the old primary) to secondary
6297
      - wait until disks are fully synchronized
6298
      - change disks into single-master mode
6299

6300
    """
6301
    instance = self.instance
6302
    target_node = self.target_node
6303
    source_node = self.source_node
6304

    
6305
    self.feedback_fn("* checking disk consistency between source and target")
6306
    for dev in instance.disks:
6307
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6308
        raise errors.OpExecError("Disk %s is degraded or not fully"
6309
                                 " synchronized on target node,"
6310
                                 " aborting migrate." % dev.iv_name)
6311

    
6312
    # First get the migration information from the remote node
6313
    result = self.rpc.call_migration_info(source_node, instance)
6314
    msg = result.fail_msg
6315
    if msg:
6316
      log_err = ("Failed fetching source migration information from %s: %s" %
6317
                 (source_node, msg))
6318
      logging.error(log_err)
6319
      raise errors.OpExecError(log_err)
6320

    
6321
    self.migration_info = migration_info = result.payload
6322

    
6323
    # Then switch the disks to master/master mode
6324
    self._EnsureSecondary(target_node)
6325
    self._GoStandalone()
6326
    self._GoReconnect(True)
6327
    self._WaitUntilSync()
6328

    
6329
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
6330
    result = self.rpc.call_accept_instance(target_node,
6331
                                           instance,
6332
                                           migration_info,
6333
                                           self.nodes_ip[target_node])
6334

    
6335
    msg = result.fail_msg
6336
    if msg:
6337
      logging.error("Instance pre-migration failed, trying to revert"
6338
                    " disk status: %s", msg)
6339
      self.feedback_fn("Pre-migration failed, aborting")
6340
      self._AbortMigration()
6341
      self._RevertDiskStatus()
6342
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6343
                               (instance.name, msg))
6344

    
6345
    self.feedback_fn("* migrating instance to %s" % target_node)
6346
    time.sleep(10)
6347
    result = self.rpc.call_instance_migrate(source_node, instance,
6348
                                            self.nodes_ip[target_node],
6349
                                            self.live)
6350
    msg = result.fail_msg
6351
    if msg:
6352
      logging.error("Instance migration failed, trying to revert"
6353
                    " disk status: %s", msg)
6354
      self.feedback_fn("Migration failed, aborting")
6355
      self._AbortMigration()
6356
      self._RevertDiskStatus()
6357
      raise errors.OpExecError("Could not migrate instance %s: %s" %
6358
                               (instance.name, msg))
6359
    time.sleep(10)
6360

    
6361
    instance.primary_node = target_node
6362
    # distribute new instance config to the other nodes
6363
    self.cfg.Update(instance, self.feedback_fn)
6364

    
6365
    result = self.rpc.call_finalize_migration(target_node,
6366
                                              instance,
6367
                                              migration_info,
6368
                                              True)
6369
    msg = result.fail_msg
6370
    if msg:
6371
      logging.error("Instance migration succeeded, but finalization failed:"
6372
                    " %s", msg)
6373
      raise errors.OpExecError("Could not finalize instance migration: %s" %
6374
                               msg)
6375

    
6376
    self._EnsureSecondary(source_node)
6377
    self._WaitUntilSync()
6378
    self._GoStandalone()
6379
    self._GoReconnect(False)
6380
    self._WaitUntilSync()
6381

    
6382
    self.feedback_fn("* done")
6383

    
6384
  def Exec(self, feedback_fn):
6385
    """Perform the migration.
6386

6387
    """
6388
    feedback_fn("Migrating instance %s" % self.instance.name)
6389

    
6390
    self.feedback_fn = feedback_fn
6391

    
6392
    self.source_node = self.instance.primary_node
6393
    self.target_node = self.instance.secondary_nodes[0]
6394
    self.all_nodes = [self.source_node, self.target_node]
6395
    self.nodes_ip = {
6396
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6397
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6398
      }
6399

    
6400
    if self.cleanup:
6401
      return self._ExecCleanup()
6402
    else:
6403
      return self._ExecMigration()
6404

    
6405

    
6406
def _CreateBlockDev(lu, node, instance, device, force_create,
6407
                    info, force_open):
6408
  """Create a tree of block devices on a given node.
6409

6410
  If this device type has to be created on secondaries, create it and
6411
  all its children.
6412

6413
  If not, just recurse to children keeping the same 'force' value.
6414

6415
  @param lu: the lu on whose behalf we execute
6416
  @param node: the node on which to create the device
6417
  @type instance: L{objects.Instance}
6418
  @param instance: the instance which owns the device
6419
  @type device: L{objects.Disk}
6420
  @param device: the device to create
6421
  @type force_create: boolean
6422
  @param force_create: whether to force creation of this device; this
6423
      will be change to True whenever we find a device which has
6424
      CreateOnSecondary() attribute
6425
  @param info: the extra 'metadata' we should attach to the device
6426
      (this will be represented as a LVM tag)
6427
  @type force_open: boolean
6428
  @param force_open: this parameter will be passes to the
6429
      L{backend.BlockdevCreate} function where it specifies
6430
      whether we run on primary or not, and it affects both
6431
      the child assembly and the device own Open() execution
6432

6433
  """
6434
  if device.CreateOnSecondary():
6435
    force_create = True
6436

    
6437
  if device.children:
6438
    for child in device.children:
6439
      _CreateBlockDev(lu, node, instance, child, force_create,
6440
                      info, force_open)
6441

    
6442
  if not force_create:
6443
    return
6444

    
6445
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6446

    
6447

    
6448
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6449
  """Create a single block device on a given node.
6450

6451
  This will not recurse over children of the device, so they must be
6452
  created in advance.
6453

6454
  @param lu: the lu on whose behalf we execute
6455
  @param node: the node on which to create the device
6456
  @type instance: L{objects.Instance}
6457
  @param instance: the instance which owns the device
6458
  @type device: L{objects.Disk}
6459
  @param device: the device to create
6460
  @param info: the extra 'metadata' we should attach to the device
6461
      (this will be represented as a LVM tag)
6462
  @type force_open: boolean
6463
  @param force_open: this parameter will be passes to the
6464
      L{backend.BlockdevCreate} function where it specifies
6465
      whether we run on primary or not, and it affects both
6466
      the child assembly and the device own Open() execution
6467

6468
  """
6469
  lu.cfg.SetDiskID(device, node)
6470
  result = lu.rpc.call_blockdev_create(node, device, device.size,
6471
                                       instance.name, force_open, info)
6472
  result.Raise("Can't create block device %s on"
6473
               " node %s for instance %s" % (device, node, instance.name))
6474
  if device.physical_id is None:
6475
    device.physical_id = result.payload
6476

    
6477

    
6478
def _GenerateUniqueNames(lu, exts):
6479
  """Generate a suitable LV name.
6480

6481
  This will generate a logical volume name for the given instance.
6482

6483
  """
6484
  results = []
6485
  for val in exts:
6486
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6487
    results.append("%s%s" % (new_id, val))
6488
  return results
6489

    
6490

    
6491
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6492
                         p_minor, s_minor):
6493
  """Generate a drbd8 device complete with its children.
6494

6495
  """
6496
  port = lu.cfg.AllocatePort()
6497
  vgname = lu.cfg.GetVGName()
6498
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6499
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6500
                          logical_id=(vgname, names[0]))
6501
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6502
                          logical_id=(vgname, names[1]))
6503
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6504
                          logical_id=(primary, secondary, port,
6505
                                      p_minor, s_minor,
6506
                                      shared_secret),
6507
                          children=[dev_data, dev_meta],
6508
                          iv_name=iv_name)
6509
  return drbd_dev
6510

    
6511

    
6512
def _GenerateDiskTemplate(lu, template_name,
6513
                          instance_name, primary_node,
6514
                          secondary_nodes, disk_info,
6515
                          file_storage_dir, file_driver,
6516
                          base_index):
6517
  """Generate the entire disk layout for a given template type.
6518

6519
  """
6520
  #TODO: compute space requirements
6521

    
6522
  vgname = lu.cfg.GetVGName()
6523
  disk_count = len(disk_info)
6524
  disks = []
6525
  if template_name == constants.DT_DISKLESS:
6526
    pass
6527
  elif template_name == constants.DT_PLAIN:
6528
    if len(secondary_nodes) != 0:
6529
      raise errors.ProgrammerError("Wrong template configuration")
6530

    
6531
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6532
                                      for i in range(disk_count)])
6533
    for idx, disk in enumerate(disk_info):
6534
      disk_index = idx + base_index
6535
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6536
                              logical_id=(vgname, names[idx]),
6537
                              iv_name="disk/%d" % disk_index,
6538
                              mode=disk["mode"])
6539
      disks.append(disk_dev)
6540
  elif template_name == constants.DT_DRBD8:
6541
    if len(secondary_nodes) != 1:
6542
      raise errors.ProgrammerError("Wrong template configuration")
6543
    remote_node = secondary_nodes[0]
6544
    minors = lu.cfg.AllocateDRBDMinor(
6545
      [primary_node, remote_node] * len(disk_info), instance_name)
6546

    
6547
    names = []
6548
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6549
                                               for i in range(disk_count)]):
6550
      names.append(lv_prefix + "_data")
6551
      names.append(lv_prefix + "_meta")
6552
    for idx, disk in enumerate(disk_info):
6553
      disk_index = idx + base_index
6554
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6555
                                      disk["size"], names[idx*2:idx*2+2],
6556
                                      "disk/%d" % disk_index,
6557
                                      minors[idx*2], minors[idx*2+1])
6558
      disk_dev.mode = disk["mode"]
6559
      disks.append(disk_dev)
6560
  elif template_name == constants.DT_FILE:
6561
    if len(secondary_nodes) != 0:
6562
      raise errors.ProgrammerError("Wrong template configuration")
6563

    
6564
    _RequireFileStorage()
6565

    
6566
    for idx, disk in enumerate(disk_info):
6567
      disk_index = idx + base_index
6568
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6569
                              iv_name="disk/%d" % disk_index,
6570
                              logical_id=(file_driver,
6571
                                          "%s/disk%d" % (file_storage_dir,
6572
                                                         disk_index)),
6573
                              mode=disk["mode"])
6574
      disks.append(disk_dev)
6575
  else:
6576
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6577
  return disks
6578

    
6579

    
6580
def _GetInstanceInfoText(instance):
6581
  """Compute that text that should be added to the disk's metadata.
6582

6583
  """
6584
  return "originstname+%s" % instance.name
6585

    
6586

    
6587
def _CalcEta(time_taken, written, total_size):
6588
  """Calculates the ETA based on size written and total size.
6589

6590
  @param time_taken: The time taken so far
6591
  @param written: amount written so far
6592
  @param total_size: The total size of data to be written
6593
  @return: The remaining time in seconds
6594

6595
  """
6596
  avg_time = time_taken / float(written)
6597
  return (total_size - written) * avg_time
6598

    
6599

    
6600
def _WipeDisks(lu, instance):
6601
  """Wipes instance disks.
6602

6603
  @type lu: L{LogicalUnit}
6604
  @param lu: the logical unit on whose behalf we execute
6605
  @type instance: L{objects.Instance}
6606
  @param instance: the instance whose disks we should create
6607
  @return: the success of the wipe
6608

6609
  """
6610
  node = instance.primary_node
6611
  for idx, device in enumerate(instance.disks):
6612
    lu.LogInfo("* Wiping disk %d", idx)
6613
    logging.info("Wiping disk %d for instance %s", idx, instance.name)
6614

    
6615
    # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6616
    # MAX_WIPE_CHUNK at max
6617
    wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6618
                          constants.MIN_WIPE_CHUNK_PERCENT)
6619

    
6620
    offset = 0
6621
    size = device.size
6622
    last_output = 0
6623
    start_time = time.time()
6624

    
6625
    while offset < size:
6626
      wipe_size = min(wipe_chunk_size, size - offset)
6627
      result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6628
      result.Raise("Could not wipe disk %d at offset %d for size %d" %
6629
                   (idx, offset, wipe_size))
6630
      now = time.time()
6631
      offset += wipe_size
6632
      if now - last_output >= 60:
6633
        eta = _CalcEta(now - start_time, offset, size)
6634
        lu.LogInfo(" - done: %.1f%% ETA: %s" %
6635
                   (offset / float(size) * 100, utils.FormatSeconds(eta)))
6636
        last_output = now
6637

    
6638

    
6639
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6640
  """Create all disks for an instance.
6641

6642
  This abstracts away some work from AddInstance.
6643

6644
  @type lu: L{LogicalUnit}
6645
  @param lu: the logical unit on whose behalf we execute
6646
  @type instance: L{objects.Instance}
6647
  @param instance: the instance whose disks we should create
6648
  @type to_skip: list
6649
  @param to_skip: list of indices to skip
6650
  @type target_node: string
6651
  @param target_node: if passed, overrides the target node for creation
6652
  @rtype: boolean
6653
  @return: the success of the creation
6654

6655
  """
6656
  info = _GetInstanceInfoText(instance)
6657
  if target_node is None:
6658
    pnode = instance.primary_node
6659
    all_nodes = instance.all_nodes
6660
  else:
6661
    pnode = target_node
6662
    all_nodes = [pnode]
6663

    
6664
  if instance.disk_template == constants.DT_FILE:
6665
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6666
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6667

    
6668
    result.Raise("Failed to create directory '%s' on"
6669
                 " node %s" % (file_storage_dir, pnode))
6670

    
6671
  # Note: this needs to be kept in sync with adding of disks in
6672
  # LUSetInstanceParams
6673
  for idx, device in enumerate(instance.disks):
6674
    if to_skip and idx in to_skip:
6675
      continue
6676
    logging.info("Creating volume %s for instance %s",
6677
                 device.iv_name, instance.name)
6678
    #HARDCODE
6679
    for node in all_nodes:
6680
      f_create = node == pnode
6681
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6682

    
6683

    
6684
def _RemoveDisks(lu, instance, target_node=None):
6685
  """Remove all disks for an instance.
6686

6687
  This abstracts away some work from `AddInstance()` and
6688
  `RemoveInstance()`. Note that in case some of the devices couldn't
6689
  be removed, the removal will continue with the other ones (compare
6690
  with `_CreateDisks()`).
6691

6692
  @type lu: L{LogicalUnit}
6693
  @param lu: the logical unit on whose behalf we execute
6694
  @type instance: L{objects.Instance}
6695
  @param instance: the instance whose disks we should remove
6696
  @type target_node: string
6697
  @param target_node: used to override the node on which to remove the disks
6698
  @rtype: boolean
6699
  @return: the success of the removal
6700

6701
  """
6702
  logging.info("Removing block devices for instance %s", instance.name)
6703

    
6704
  all_result = True
6705
  for device in instance.disks:
6706
    if target_node:
6707
      edata = [(target_node, device)]
6708
    else:
6709
      edata = device.ComputeNodeTree(instance.primary_node)
6710
    for node, disk in edata:
6711
      lu.cfg.SetDiskID(disk, node)
6712
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6713
      if msg:
6714
        lu.LogWarning("Could not remove block device %s on node %s,"
6715
                      " continuing anyway: %s", device.iv_name, node, msg)
6716
        all_result = False
6717

    
6718
  if instance.disk_template == constants.DT_FILE:
6719
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6720
    if target_node:
6721
      tgt = target_node
6722
    else:
6723
      tgt = instance.primary_node
6724
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6725
    if result.fail_msg:
6726
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6727
                    file_storage_dir, instance.primary_node, result.fail_msg)
6728
      all_result = False
6729

    
6730
  return all_result
6731

    
6732

    
6733
def _ComputeDiskSize(disk_template, disks):
6734
  """Compute disk size requirements in the volume group
6735

6736
  """
6737
  # Required free disk space as a function of disk and swap space
6738
  req_size_dict = {
6739
    constants.DT_DISKLESS: None,
6740
    constants.DT_PLAIN: sum(d["size"] for d in disks),
6741
    # 128 MB are added for drbd metadata for each disk
6742
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6743
    constants.DT_FILE: None,
6744
  }
6745

    
6746
  if disk_template not in req_size_dict:
6747
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6748
                                 " is unknown" %  disk_template)
6749

    
6750
  return req_size_dict[disk_template]
6751

    
6752

    
6753
def _CheckHVParams(lu, nodenames, hvname, hvparams):
6754
  """Hypervisor parameter validation.
6755

6756
  This function abstract the hypervisor parameter validation to be
6757
  used in both instance create and instance modify.
6758

6759
  @type lu: L{LogicalUnit}
6760
  @param lu: the logical unit for which we check
6761
  @type nodenames: list
6762
  @param nodenames: the list of nodes on which we should check
6763
  @type hvname: string
6764
  @param hvname: the name of the hypervisor we should use
6765
  @type hvparams: dict
6766
  @param hvparams: the parameters which we need to check
6767
  @raise errors.OpPrereqError: if the parameters are not valid
6768

6769
  """
6770
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6771
                                                  hvname,
6772
                                                  hvparams)
6773
  for node in nodenames:
6774
    info = hvinfo[node]
6775
    if info.offline:
6776
      continue
6777
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6778

    
6779

    
6780
def _CheckOSParams(lu, required, nodenames, osname, osparams):
6781
  """OS parameters validation.
6782

6783
  @type lu: L{LogicalUnit}
6784
  @param lu: the logical unit for which we check
6785
  @type required: boolean
6786
  @param required: whether the validation should fail if the OS is not
6787
      found
6788
  @type nodenames: list
6789
  @param nodenames: the list of nodes on which we should check
6790
  @type osname: string
6791
  @param osname: the name of the hypervisor we should use
6792
  @type osparams: dict
6793
  @param osparams: the parameters which we need to check
6794
  @raise errors.OpPrereqError: if the parameters are not valid
6795

6796
  """
6797
  result = lu.rpc.call_os_validate(required, nodenames, osname,
6798
                                   [constants.OS_VALIDATE_PARAMETERS],
6799
                                   osparams)
6800
  for node, nres in result.items():
6801
    # we don't check for offline cases since this should be run only
6802
    # against the master node and/or an instance's nodes
6803
    nres.Raise("OS Parameters validation failed on node %s" % node)
6804
    if not nres.payload:
6805
      lu.LogInfo("OS %s not found on node %s, validation skipped",
6806
                 osname, node)
6807

    
6808

    
6809
class LUCreateInstance(LogicalUnit):
6810
  """Create an instance.
6811

6812
  """
6813
  HPATH = "instance-add"
6814
  HTYPE = constants.HTYPE_INSTANCE
6815
  _OP_PARAMS = [
6816
    _PInstanceName,
6817
    ("mode", ht.NoDefault, ht.TElemOf(constants.INSTANCE_CREATE_MODES)),
6818
    ("start", True, ht.TBool),
6819
    ("wait_for_sync", True, ht.TBool),
6820
    ("ip_check", True, ht.TBool),
6821
    ("name_check", True, ht.TBool),
6822
    ("disks", ht.NoDefault, ht.TListOf(ht.TDict)),
6823
    ("nics", ht.NoDefault, ht.TListOf(ht.TDict)),
6824
    ("hvparams", ht.EmptyDict, ht.TDict),
6825
    ("beparams", ht.EmptyDict, ht.TDict),
6826
    ("osparams", ht.EmptyDict, ht.TDict),
6827
    ("no_install", None, ht.TMaybeBool),
6828
    ("os_type", None, ht.TMaybeString),
6829
    ("force_variant", False, ht.TBool),
6830
    ("source_handshake", None, ht.TOr(ht.TList, ht.TNone)),
6831
    ("source_x509_ca", None, ht.TMaybeString),
6832
    ("source_instance_name", None, ht.TMaybeString),
6833
    ("src_node", None, ht.TMaybeString),
6834
    ("src_path", None, ht.TMaybeString),
6835
    ("pnode", None, ht.TMaybeString),
6836
    ("snode", None, ht.TMaybeString),
6837
    ("iallocator", None, ht.TMaybeString),
6838
    ("hypervisor", None, ht.TMaybeString),
6839
    ("disk_template", ht.NoDefault, _CheckDiskTemplate),
6840
    ("identify_defaults", False, ht.TBool),
6841
    ("file_driver", None, ht.TOr(ht.TNone, ht.TElemOf(constants.FILE_DRIVER))),
6842
    ("file_storage_dir", None, ht.TMaybeString),
6843
    ]
6844
  REQ_BGL = False
6845

    
6846
  def CheckArguments(self):
6847
    """Check arguments.
6848

6849
    """
6850
    # do not require name_check to ease forward/backward compatibility
6851
    # for tools
6852
    if self.op.no_install and self.op.start:
6853
      self.LogInfo("No-installation mode selected, disabling startup")
6854
      self.op.start = False
6855
    # validate/normalize the instance name
6856
    self.op.instance_name = \
6857
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
6858

    
6859
    if self.op.ip_check and not self.op.name_check:
6860
      # TODO: make the ip check more flexible and not depend on the name check
6861
      raise errors.OpPrereqError("Cannot do ip check without a name check",
6862
                                 errors.ECODE_INVAL)
6863

    
6864
    # check nics' parameter names
6865
    for nic in self.op.nics:
6866
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6867

    
6868
    # check disks. parameter names and consistent adopt/no-adopt strategy
6869
    has_adopt = has_no_adopt = False
6870
    for disk in self.op.disks:
6871
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6872
      if "adopt" in disk:
6873
        has_adopt = True
6874
      else:
6875
        has_no_adopt = True
6876
    if has_adopt and has_no_adopt:
6877
      raise errors.OpPrereqError("Either all disks are adopted or none is",
6878
                                 errors.ECODE_INVAL)
6879
    if has_adopt:
6880
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6881
        raise errors.OpPrereqError("Disk adoption is not supported for the"
6882
                                   " '%s' disk template" %
6883
                                   self.op.disk_template,
6884
                                   errors.ECODE_INVAL)
6885
      if self.op.iallocator is not None:
6886
        raise errors.OpPrereqError("Disk adoption not allowed with an"
6887
                                   " iallocator script", errors.ECODE_INVAL)
6888
      if self.op.mode == constants.INSTANCE_IMPORT:
6889
        raise errors.OpPrereqError("Disk adoption not allowed for"
6890
                                   " instance import", errors.ECODE_INVAL)
6891

    
6892
    self.adopt_disks = has_adopt
6893

    
6894
    # instance name verification
6895
    if self.op.name_check:
6896
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
6897
      self.op.instance_name = self.hostname1.name
6898
      # used in CheckPrereq for ip ping check
6899
      self.check_ip = self.hostname1.ip
6900
    else:
6901
      self.check_ip = None
6902

    
6903
    # file storage checks
6904
    if (self.op.file_driver and
6905
        not self.op.file_driver in constants.FILE_DRIVER):
6906
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6907
                                 self.op.file_driver, errors.ECODE_INVAL)
6908

    
6909
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6910
      raise errors.OpPrereqError("File storage directory path not absolute",
6911
                                 errors.ECODE_INVAL)
6912

    
6913
    ### Node/iallocator related checks
6914
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6915

    
6916
    if self.op.pnode is not None:
6917
      if self.op.disk_template in constants.DTS_NET_MIRROR:
6918
        if self.op.snode is None:
6919
          raise errors.OpPrereqError("The networked disk templates need"
6920
                                     " a mirror node", errors.ECODE_INVAL)
6921
      elif self.op.snode:
6922
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6923
                        " template")
6924
        self.op.snode = None
6925

    
6926
    self._cds = _GetClusterDomainSecret()
6927

    
6928
    if self.op.mode == constants.INSTANCE_IMPORT:
6929
      # On import force_variant must be True, because if we forced it at
6930
      # initial install, our only chance when importing it back is that it
6931
      # works again!
6932
      self.op.force_variant = True
6933

    
6934
      if self.op.no_install:
6935
        self.LogInfo("No-installation mode has no effect during import")
6936

    
6937
    elif self.op.mode == constants.INSTANCE_CREATE:
6938
      if self.op.os_type is None:
6939
        raise errors.OpPrereqError("No guest OS specified",
6940
                                   errors.ECODE_INVAL)
6941
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
6942
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6943
                                   " installation" % self.op.os_type,
6944
                                   errors.ECODE_STATE)
6945
      if self.op.disk_template is None:
6946
        raise errors.OpPrereqError("No disk template specified",
6947
                                   errors.ECODE_INVAL)
6948

    
6949
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6950
      # Check handshake to ensure both clusters have the same domain secret
6951
      src_handshake = self.op.source_handshake
6952
      if not src_handshake:
6953
        raise errors.OpPrereqError("Missing source handshake",
6954
                                   errors.ECODE_INVAL)
6955

    
6956
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6957
                                                           src_handshake)
6958
      if errmsg:
6959
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6960
                                   errors.ECODE_INVAL)
6961

    
6962
      # Load and check source CA
6963
      self.source_x509_ca_pem = self.op.source_x509_ca
6964
      if not self.source_x509_ca_pem:
6965
        raise errors.OpPrereqError("Missing source X509 CA",
6966
                                   errors.ECODE_INVAL)
6967

    
6968
      try:
6969
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6970
                                                    self._cds)
6971
      except OpenSSL.crypto.Error, err:
6972
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6973
                                   (err, ), errors.ECODE_INVAL)
6974

    
6975
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6976
      if errcode is not None:
6977
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6978
                                   errors.ECODE_INVAL)
6979

    
6980
      self.source_x509_ca = cert
6981

    
6982
      src_instance_name = self.op.source_instance_name
6983
      if not src_instance_name:
6984
        raise errors.OpPrereqError("Missing source instance name",
6985
                                   errors.ECODE_INVAL)
6986

    
6987
      self.source_instance_name = \
6988
          netutils.GetHostname(name=src_instance_name).name
6989

    
6990
    else:
6991
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
6992
                                 self.op.mode, errors.ECODE_INVAL)
6993

    
6994
  def ExpandNames(self):
6995
    """ExpandNames for CreateInstance.
6996

6997
    Figure out the right locks for instance creation.
6998

6999
    """
7000
    self.needed_locks = {}
7001

    
7002
    instance_name = self.op.instance_name
7003
    # this is just a preventive check, but someone might still add this
7004
    # instance in the meantime, and creation will fail at lock-add time
7005
    if instance_name in self.cfg.GetInstanceList():
7006
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7007
                                 instance_name, errors.ECODE_EXISTS)
7008

    
7009
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7010

    
7011
    if self.op.iallocator:
7012
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7013
    else:
7014
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7015
      nodelist = [self.op.pnode]
7016
      if self.op.snode is not None:
7017
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7018
        nodelist.append(self.op.snode)
7019
      self.needed_locks[locking.LEVEL_NODE] = nodelist
7020

    
7021
    # in case of import lock the source node too
7022
    if self.op.mode == constants.INSTANCE_IMPORT:
7023
      src_node = self.op.src_node
7024
      src_path = self.op.src_path
7025

    
7026
      if src_path is None:
7027
        self.op.src_path = src_path = self.op.instance_name
7028

    
7029
      if src_node is None:
7030
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7031
        self.op.src_node = None
7032
        if os.path.isabs(src_path):
7033
          raise errors.OpPrereqError("Importing an instance from an absolute"
7034
                                     " path requires a source node option.",
7035
                                     errors.ECODE_INVAL)
7036
      else:
7037
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7038
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7039
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
7040
        if not os.path.isabs(src_path):
7041
          self.op.src_path = src_path = \
7042
            utils.PathJoin(constants.EXPORT_DIR, src_path)
7043

    
7044
  def _RunAllocator(self):
7045
    """Run the allocator based on input opcode.
7046

7047
    """
7048
    nics = [n.ToDict() for n in self.nics]
7049
    ial = IAllocator(self.cfg, self.rpc,
7050
                     mode=constants.IALLOCATOR_MODE_ALLOC,
7051
                     name=self.op.instance_name,
7052
                     disk_template=self.op.disk_template,
7053
                     tags=[],
7054
                     os=self.op.os_type,
7055
                     vcpus=self.be_full[constants.BE_VCPUS],
7056
                     mem_size=self.be_full[constants.BE_MEMORY],
7057
                     disks=self.disks,
7058
                     nics=nics,
7059
                     hypervisor=self.op.hypervisor,
7060
                     )
7061

    
7062
    ial.Run(self.op.iallocator)
7063

    
7064
    if not ial.success:
7065
      raise errors.OpPrereqError("Can't compute nodes using"
7066
                                 " iallocator '%s': %s" %
7067
                                 (self.op.iallocator, ial.info),
7068
                                 errors.ECODE_NORES)
7069
    if len(ial.result) != ial.required_nodes:
7070
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7071
                                 " of nodes (%s), required %s" %
7072
                                 (self.op.iallocator, len(ial.result),
7073
                                  ial.required_nodes), errors.ECODE_FAULT)
7074
    self.op.pnode = ial.result[0]
7075
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7076
                 self.op.instance_name, self.op.iallocator,
7077
                 utils.CommaJoin(ial.result))
7078
    if ial.required_nodes == 2:
7079
      self.op.snode = ial.result[1]
7080

    
7081
  def BuildHooksEnv(self):
7082
    """Build hooks env.
7083

7084
    This runs on master, primary and secondary nodes of the instance.
7085

7086
    """
7087
    env = {
7088
      "ADD_MODE": self.op.mode,
7089
      }
7090
    if self.op.mode == constants.INSTANCE_IMPORT:
7091
      env["SRC_NODE"] = self.op.src_node
7092
      env["SRC_PATH"] = self.op.src_path
7093
      env["SRC_IMAGES"] = self.src_images
7094

    
7095
    env.update(_BuildInstanceHookEnv(
7096
      name=self.op.instance_name,
7097
      primary_node=self.op.pnode,
7098
      secondary_nodes=self.secondaries,
7099
      status=self.op.start,
7100
      os_type=self.op.os_type,
7101
      memory=self.be_full[constants.BE_MEMORY],
7102
      vcpus=self.be_full[constants.BE_VCPUS],
7103
      nics=_NICListToTuple(self, self.nics),
7104
      disk_template=self.op.disk_template,
7105
      disks=[(d["size"], d["mode"]) for d in self.disks],
7106
      bep=self.be_full,
7107
      hvp=self.hv_full,
7108
      hypervisor_name=self.op.hypervisor,
7109
    ))
7110

    
7111
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7112
          self.secondaries)
7113
    return env, nl, nl
7114

    
7115
  def _ReadExportInfo(self):
7116
    """Reads the export information from disk.
7117

7118
    It will override the opcode source node and path with the actual
7119
    information, if these two were not specified before.
7120

7121
    @return: the export information
7122

7123
    """
7124
    assert self.op.mode == constants.INSTANCE_IMPORT
7125

    
7126
    src_node = self.op.src_node
7127
    src_path = self.op.src_path
7128

    
7129
    if src_node is None:
7130
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7131
      exp_list = self.rpc.call_export_list(locked_nodes)
7132
      found = False
7133
      for node in exp_list:
7134
        if exp_list[node].fail_msg:
7135
          continue
7136
        if src_path in exp_list[node].payload:
7137
          found = True
7138
          self.op.src_node = src_node = node
7139
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7140
                                                       src_path)
7141
          break
7142
      if not found:
7143
        raise errors.OpPrereqError("No export found for relative path %s" %
7144
                                    src_path, errors.ECODE_INVAL)
7145

    
7146
    _CheckNodeOnline(self, src_node)
7147
    result = self.rpc.call_export_info(src_node, src_path)
7148
    result.Raise("No export or invalid export found in dir %s" % src_path)
7149

    
7150
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7151
    if not export_info.has_section(constants.INISECT_EXP):
7152
      raise errors.ProgrammerError("Corrupted export config",
7153
                                   errors.ECODE_ENVIRON)
7154

    
7155
    ei_version = export_info.get(constants.INISECT_EXP, "version")
7156
    if (int(ei_version) != constants.EXPORT_VERSION):
7157
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7158
                                 (ei_version, constants.EXPORT_VERSION),
7159
                                 errors.ECODE_ENVIRON)
7160
    return export_info
7161

    
7162
  def _ReadExportParams(self, einfo):
7163
    """Use export parameters as defaults.
7164

7165
    In case the opcode doesn't specify (as in override) some instance
7166
    parameters, then try to use them from the export information, if
7167
    that declares them.
7168

7169
    """
7170
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7171

    
7172
    if self.op.disk_template is None:
7173
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
7174
        self.op.disk_template = einfo.get(constants.INISECT_INS,
7175
                                          "disk_template")
7176
      else:
7177
        raise errors.OpPrereqError("No disk template specified and the export"
7178
                                   " is missing the disk_template information",
7179
                                   errors.ECODE_INVAL)
7180

    
7181
    if not self.op.disks:
7182
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
7183
        disks = []
7184
        # TODO: import the disk iv_name too
7185
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7186
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7187
          disks.append({"size": disk_sz})
7188
        self.op.disks = disks
7189
      else:
7190
        raise errors.OpPrereqError("No disk info specified and the export"
7191
                                   " is missing the disk information",
7192
                                   errors.ECODE_INVAL)
7193

    
7194
    if (not self.op.nics and
7195
        einfo.has_option(constants.INISECT_INS, "nic_count")):
7196
      nics = []
7197
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7198
        ndict = {}
7199
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7200
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7201
          ndict[name] = v
7202
        nics.append(ndict)
7203
      self.op.nics = nics
7204

    
7205
    if (self.op.hypervisor is None and
7206
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
7207
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7208
    if einfo.has_section(constants.INISECT_HYP):
7209
      # use the export parameters but do not override the ones
7210
      # specified by the user
7211
      for name, value in einfo.items(constants.INISECT_HYP):
7212
        if name not in self.op.hvparams:
7213
          self.op.hvparams[name] = value
7214

    
7215
    if einfo.has_section(constants.INISECT_BEP):
7216
      # use the parameters, without overriding
7217
      for name, value in einfo.items(constants.INISECT_BEP):
7218
        if name not in self.op.beparams:
7219
          self.op.beparams[name] = value
7220
    else:
7221
      # try to read the parameters old style, from the main section
7222
      for name in constants.BES_PARAMETERS:
7223
        if (name not in self.op.beparams and
7224
            einfo.has_option(constants.INISECT_INS, name)):
7225
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7226

    
7227
    if einfo.has_section(constants.INISECT_OSP):
7228
      # use the parameters, without overriding
7229
      for name, value in einfo.items(constants.INISECT_OSP):
7230
        if name not in self.op.osparams:
7231
          self.op.osparams[name] = value
7232

    
7233
  def _RevertToDefaults(self, cluster):
7234
    """Revert the instance parameters to the default values.
7235

7236
    """
7237
    # hvparams
7238
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7239
    for name in self.op.hvparams.keys():
7240
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7241
        del self.op.hvparams[name]
7242
    # beparams
7243
    be_defs = cluster.SimpleFillBE({})
7244
    for name in self.op.beparams.keys():
7245
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
7246
        del self.op.beparams[name]
7247
    # nic params
7248
    nic_defs = cluster.SimpleFillNIC({})
7249
    for nic in self.op.nics:
7250
      for name in constants.NICS_PARAMETERS:
7251
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7252
          del nic[name]
7253
    # osparams
7254
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7255
    for name in self.op.osparams.keys():
7256
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
7257
        del self.op.osparams[name]
7258

    
7259
  def CheckPrereq(self):
7260
    """Check prerequisites.
7261

7262
    """
7263
    if self.op.mode == constants.INSTANCE_IMPORT:
7264
      export_info = self._ReadExportInfo()
7265
      self._ReadExportParams(export_info)
7266

    
7267
    _CheckDiskTemplate(self.op.disk_template)
7268

    
7269
    if (not self.cfg.GetVGName() and
7270
        self.op.disk_template not in constants.DTS_NOT_LVM):
7271
      raise errors.OpPrereqError("Cluster does not support lvm-based"
7272
                                 " instances", errors.ECODE_STATE)
7273

    
7274
    if self.op.hypervisor is None:
7275
      self.op.hypervisor = self.cfg.GetHypervisorType()
7276

    
7277
    cluster = self.cfg.GetClusterInfo()
7278
    enabled_hvs = cluster.enabled_hypervisors
7279
    if self.op.hypervisor not in enabled_hvs:
7280
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7281
                                 " cluster (%s)" % (self.op.hypervisor,
7282
                                  ",".join(enabled_hvs)),
7283
                                 errors.ECODE_STATE)
7284

    
7285
    # check hypervisor parameter syntax (locally)
7286
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7287
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7288
                                      self.op.hvparams)
7289
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7290
    hv_type.CheckParameterSyntax(filled_hvp)
7291
    self.hv_full = filled_hvp
7292
    # check that we don't specify global parameters on an instance
7293
    _CheckGlobalHvParams(self.op.hvparams)
7294

    
7295
    # fill and remember the beparams dict
7296
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7297
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
7298

    
7299
    # build os parameters
7300
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7301

    
7302
    # now that hvp/bep are in final format, let's reset to defaults,
7303
    # if told to do so
7304
    if self.op.identify_defaults:
7305
      self._RevertToDefaults(cluster)
7306

    
7307
    # NIC buildup
7308
    self.nics = []
7309
    for idx, nic in enumerate(self.op.nics):
7310
      nic_mode_req = nic.get("mode", None)
7311
      nic_mode = nic_mode_req
7312
      if nic_mode is None:
7313
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7314

    
7315
      # in routed mode, for the first nic, the default ip is 'auto'
7316
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7317
        default_ip_mode = constants.VALUE_AUTO
7318
      else:
7319
        default_ip_mode = constants.VALUE_NONE
7320

    
7321
      # ip validity checks
7322
      ip = nic.get("ip", default_ip_mode)
7323
      if ip is None or ip.lower() == constants.VALUE_NONE:
7324
        nic_ip = None
7325
      elif ip.lower() == constants.VALUE_AUTO:
7326
        if not self.op.name_check:
7327
          raise errors.OpPrereqError("IP address set to auto but name checks"
7328
                                     " have been skipped",
7329
                                     errors.ECODE_INVAL)
7330
        nic_ip = self.hostname1.ip
7331
      else:
7332
        if not netutils.IPAddress.IsValid(ip):
7333
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7334
                                     errors.ECODE_INVAL)
7335
        nic_ip = ip
7336

    
7337
      # TODO: check the ip address for uniqueness
7338
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7339
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
7340
                                   errors.ECODE_INVAL)
7341

    
7342
      # MAC address verification
7343
      mac = nic.get("mac", constants.VALUE_AUTO)
7344
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7345
        mac = utils.NormalizeAndValidateMac(mac)
7346

    
7347
        try:
7348
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
7349
        except errors.ReservationError:
7350
          raise errors.OpPrereqError("MAC address %s already in use"
7351
                                     " in cluster" % mac,
7352
                                     errors.ECODE_NOTUNIQUE)
7353

    
7354
      # bridge verification
7355
      bridge = nic.get("bridge", None)
7356
      link = nic.get("link", None)
7357
      if bridge and link:
7358
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7359
                                   " at the same time", errors.ECODE_INVAL)
7360
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7361
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7362
                                   errors.ECODE_INVAL)
7363
      elif bridge:
7364
        link = bridge
7365

    
7366
      nicparams = {}
7367
      if nic_mode_req:
7368
        nicparams[constants.NIC_MODE] = nic_mode_req
7369
      if link:
7370
        nicparams[constants.NIC_LINK] = link
7371

    
7372
      check_params = cluster.SimpleFillNIC(nicparams)
7373
      objects.NIC.CheckParameterSyntax(check_params)
7374
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7375

    
7376
    # disk checks/pre-build
7377
    self.disks = []
7378
    for disk in self.op.disks:
7379
      mode = disk.get("mode", constants.DISK_RDWR)
7380
      if mode not in constants.DISK_ACCESS_SET:
7381
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7382
                                   mode, errors.ECODE_INVAL)
7383
      size = disk.get("size", None)
7384
      if size is None:
7385
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7386
      try:
7387
        size = int(size)
7388
      except (TypeError, ValueError):
7389
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7390
                                   errors.ECODE_INVAL)
7391
      new_disk = {"size": size, "mode": mode}
7392
      if "adopt" in disk:
7393
        new_disk["adopt"] = disk["adopt"]
7394
      self.disks.append(new_disk)
7395

    
7396
    if self.op.mode == constants.INSTANCE_IMPORT:
7397

    
7398
      # Check that the new instance doesn't have less disks than the export
7399
      instance_disks = len(self.disks)
7400
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7401
      if instance_disks < export_disks:
7402
        raise errors.OpPrereqError("Not enough disks to import."
7403
                                   " (instance: %d, export: %d)" %
7404
                                   (instance_disks, export_disks),
7405
                                   errors.ECODE_INVAL)
7406

    
7407
      disk_images = []
7408
      for idx in range(export_disks):
7409
        option = 'disk%d_dump' % idx
7410
        if export_info.has_option(constants.INISECT_INS, option):
7411
          # FIXME: are the old os-es, disk sizes, etc. useful?
7412
          export_name = export_info.get(constants.INISECT_INS, option)
7413
          image = utils.PathJoin(self.op.src_path, export_name)
7414
          disk_images.append(image)
7415
        else:
7416
          disk_images.append(False)
7417

    
7418
      self.src_images = disk_images
7419

    
7420
      old_name = export_info.get(constants.INISECT_INS, 'name')
7421
      try:
7422
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7423
      except (TypeError, ValueError), err:
7424
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
7425
                                   " an integer: %s" % str(err),
7426
                                   errors.ECODE_STATE)
7427
      if self.op.instance_name == old_name:
7428
        for idx, nic in enumerate(self.nics):
7429
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7430
            nic_mac_ini = 'nic%d_mac' % idx
7431
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7432

    
7433
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7434

    
7435
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
7436
    if self.op.ip_check:
7437
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7438
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
7439
                                   (self.check_ip, self.op.instance_name),
7440
                                   errors.ECODE_NOTUNIQUE)
7441

    
7442
    #### mac address generation
7443
    # By generating here the mac address both the allocator and the hooks get
7444
    # the real final mac address rather than the 'auto' or 'generate' value.
7445
    # There is a race condition between the generation and the instance object
7446
    # creation, which means that we know the mac is valid now, but we're not
7447
    # sure it will be when we actually add the instance. If things go bad
7448
    # adding the instance will abort because of a duplicate mac, and the
7449
    # creation job will fail.
7450
    for nic in self.nics:
7451
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7452
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7453

    
7454
    #### allocator run
7455

    
7456
    if self.op.iallocator is not None:
7457
      self._RunAllocator()
7458

    
7459
    #### node related checks
7460

    
7461
    # check primary node
7462
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7463
    assert self.pnode is not None, \
7464
      "Cannot retrieve locked node %s" % self.op.pnode
7465
    if pnode.offline:
7466
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7467
                                 pnode.name, errors.ECODE_STATE)
7468
    if pnode.drained:
7469
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7470
                                 pnode.name, errors.ECODE_STATE)
7471
    if not pnode.vm_capable:
7472
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7473
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
7474

    
7475
    self.secondaries = []
7476

    
7477
    # mirror node verification
7478
    if self.op.disk_template in constants.DTS_NET_MIRROR:
7479
      if self.op.snode == pnode.name:
7480
        raise errors.OpPrereqError("The secondary node cannot be the"
7481
                                   " primary node.", errors.ECODE_INVAL)
7482
      _CheckNodeOnline(self, self.op.snode)
7483
      _CheckNodeNotDrained(self, self.op.snode)
7484
      _CheckNodeVmCapable(self, self.op.snode)
7485
      self.secondaries.append(self.op.snode)
7486

    
7487
    nodenames = [pnode.name] + self.secondaries
7488

    
7489
    req_size = _ComputeDiskSize(self.op.disk_template,
7490
                                self.disks)
7491

    
7492
    # Check lv size requirements, if not adopting
7493
    if req_size is not None and not self.adopt_disks:
7494
      _CheckNodesFreeDisk(self, nodenames, req_size)
7495

    
7496
    if self.adopt_disks: # instead, we must check the adoption data
7497
      all_lvs = set([i["adopt"] for i in self.disks])
7498
      if len(all_lvs) != len(self.disks):
7499
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
7500
                                   errors.ECODE_INVAL)
7501
      for lv_name in all_lvs:
7502
        try:
7503
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7504
        except errors.ReservationError:
7505
          raise errors.OpPrereqError("LV named %s used by another instance" %
7506
                                     lv_name, errors.ECODE_NOTUNIQUE)
7507

    
7508
      node_lvs = self.rpc.call_lv_list([pnode.name],
7509
                                       self.cfg.GetVGName())[pnode.name]
7510
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7511
      node_lvs = node_lvs.payload
7512
      delta = all_lvs.difference(node_lvs.keys())
7513
      if delta:
7514
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
7515
                                   utils.CommaJoin(delta),
7516
                                   errors.ECODE_INVAL)
7517
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7518
      if online_lvs:
7519
        raise errors.OpPrereqError("Online logical volumes found, cannot"
7520
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
7521
                                   errors.ECODE_STATE)
7522
      # update the size of disk based on what is found
7523
      for dsk in self.disks:
7524
        dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7525

    
7526
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7527

    
7528
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7529
    # check OS parameters (remotely)
7530
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7531

    
7532
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7533

    
7534
    # memory check on primary node
7535
    if self.op.start:
7536
      _CheckNodeFreeMemory(self, self.pnode.name,
7537
                           "creating instance %s" % self.op.instance_name,
7538
                           self.be_full[constants.BE_MEMORY],
7539
                           self.op.hypervisor)
7540

    
7541
    self.dry_run_result = list(nodenames)
7542

    
7543
  def Exec(self, feedback_fn):
7544
    """Create and add the instance to the cluster.
7545

7546
    """
7547
    instance = self.op.instance_name
7548
    pnode_name = self.pnode.name
7549

    
7550
    ht_kind = self.op.hypervisor
7551
    if ht_kind in constants.HTS_REQ_PORT:
7552
      network_port = self.cfg.AllocatePort()
7553
    else:
7554
      network_port = None
7555

    
7556
    if constants.ENABLE_FILE_STORAGE:
7557
      # this is needed because os.path.join does not accept None arguments
7558
      if self.op.file_storage_dir is None:
7559
        string_file_storage_dir = ""
7560
      else:
7561
        string_file_storage_dir = self.op.file_storage_dir
7562

    
7563
      # build the full file storage dir path
7564
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7565
                                        string_file_storage_dir, instance)
7566
    else:
7567
      file_storage_dir = ""
7568

    
7569
    disks = _GenerateDiskTemplate(self,
7570
                                  self.op.disk_template,
7571
                                  instance, pnode_name,
7572
                                  self.secondaries,
7573
                                  self.disks,
7574
                                  file_storage_dir,
7575
                                  self.op.file_driver,
7576
                                  0)
7577

    
7578
    iobj = objects.Instance(name=instance, os=self.op.os_type,
7579
                            primary_node=pnode_name,
7580
                            nics=self.nics, disks=disks,
7581
                            disk_template=self.op.disk_template,
7582
                            admin_up=False,
7583
                            network_port=network_port,
7584
                            beparams=self.op.beparams,
7585
                            hvparams=self.op.hvparams,
7586
                            hypervisor=self.op.hypervisor,
7587
                            osparams=self.op.osparams,
7588
                            )
7589

    
7590
    if self.adopt_disks:
7591
      # rename LVs to the newly-generated names; we need to construct
7592
      # 'fake' LV disks with the old data, plus the new unique_id
7593
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7594
      rename_to = []
7595
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7596
        rename_to.append(t_dsk.logical_id)
7597
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7598
        self.cfg.SetDiskID(t_dsk, pnode_name)
7599
      result = self.rpc.call_blockdev_rename(pnode_name,
7600
                                             zip(tmp_disks, rename_to))
7601
      result.Raise("Failed to rename adoped LVs")
7602
    else:
7603
      feedback_fn("* creating instance disks...")
7604
      try:
7605
        _CreateDisks(self, iobj)
7606
      except errors.OpExecError:
7607
        self.LogWarning("Device creation failed, reverting...")
7608
        try:
7609
          _RemoveDisks(self, iobj)
7610
        finally:
7611
          self.cfg.ReleaseDRBDMinors(instance)
7612
          raise
7613

    
7614
      if self.cfg.GetClusterInfo().prealloc_wipe_disks:
7615
        feedback_fn("* wiping instance disks...")
7616
        try:
7617
          _WipeDisks(self, iobj)
7618
        except errors.OpExecError:
7619
          self.LogWarning("Device wiping failed, reverting...")
7620
          try:
7621
            _RemoveDisks(self, iobj)
7622
          finally:
7623
            self.cfg.ReleaseDRBDMinors(instance)
7624
            raise
7625

    
7626
    feedback_fn("adding instance %s to cluster config" % instance)
7627

    
7628
    self.cfg.AddInstance(iobj, self.proc.GetECId())
7629

    
7630
    # Declare that we don't want to remove the instance lock anymore, as we've
7631
    # added the instance to the config
7632
    del self.remove_locks[locking.LEVEL_INSTANCE]
7633
    # Unlock all the nodes
7634
    if self.op.mode == constants.INSTANCE_IMPORT:
7635
      nodes_keep = [self.op.src_node]
7636
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7637
                       if node != self.op.src_node]
7638
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7639
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7640
    else:
7641
      self.context.glm.release(locking.LEVEL_NODE)
7642
      del self.acquired_locks[locking.LEVEL_NODE]
7643

    
7644
    if self.op.wait_for_sync:
7645
      disk_abort = not _WaitForSync(self, iobj)
7646
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
7647
      # make sure the disks are not degraded (still sync-ing is ok)
7648
      time.sleep(15)
7649
      feedback_fn("* checking mirrors status")
7650
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7651
    else:
7652
      disk_abort = False
7653

    
7654
    if disk_abort:
7655
      _RemoveDisks(self, iobj)
7656
      self.cfg.RemoveInstance(iobj.name)
7657
      # Make sure the instance lock gets removed
7658
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7659
      raise errors.OpExecError("There are some degraded disks for"
7660
                               " this instance")
7661

    
7662
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7663
      if self.op.mode == constants.INSTANCE_CREATE:
7664
        if not self.op.no_install:
7665
          feedback_fn("* running the instance OS create scripts...")
7666
          # FIXME: pass debug option from opcode to backend
7667
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7668
                                                 self.op.debug_level)
7669
          result.Raise("Could not add os for instance %s"
7670
                       " on node %s" % (instance, pnode_name))
7671

    
7672
      elif self.op.mode == constants.INSTANCE_IMPORT:
7673
        feedback_fn("* running the instance OS import scripts...")
7674

    
7675
        transfers = []
7676

    
7677
        for idx, image in enumerate(self.src_images):
7678
          if not image:
7679
            continue
7680

    
7681
          # FIXME: pass debug option from opcode to backend
7682
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7683
                                             constants.IEIO_FILE, (image, ),
7684
                                             constants.IEIO_SCRIPT,
7685
                                             (iobj.disks[idx], idx),
7686
                                             None)
7687
          transfers.append(dt)
7688

    
7689
        import_result = \
7690
          masterd.instance.TransferInstanceData(self, feedback_fn,
7691
                                                self.op.src_node, pnode_name,
7692
                                                self.pnode.secondary_ip,
7693
                                                iobj, transfers)
7694
        if not compat.all(import_result):
7695
          self.LogWarning("Some disks for instance %s on node %s were not"
7696
                          " imported successfully" % (instance, pnode_name))
7697

    
7698
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7699
        feedback_fn("* preparing remote import...")
7700
        connect_timeout = constants.RIE_CONNECT_TIMEOUT
7701
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7702

    
7703
        disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7704
                                                     self.source_x509_ca,
7705
                                                     self._cds, timeouts)
7706
        if not compat.all(disk_results):
7707
          # TODO: Should the instance still be started, even if some disks
7708
          # failed to import (valid for local imports, too)?
7709
          self.LogWarning("Some disks for instance %s on node %s were not"
7710
                          " imported successfully" % (instance, pnode_name))
7711

    
7712
        # Run rename script on newly imported instance
7713
        assert iobj.name == instance
7714
        feedback_fn("Running rename script for %s" % instance)
7715
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7716
                                                   self.source_instance_name,
7717
                                                   self.op.debug_level)
7718
        if result.fail_msg:
7719
          self.LogWarning("Failed to run rename script for %s on node"
7720
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
7721

    
7722
      else:
7723
        # also checked in the prereq part
7724
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7725
                                     % self.op.mode)
7726

    
7727
    if self.op.start:
7728
      iobj.admin_up = True
7729
      self.cfg.Update(iobj, feedback_fn)
7730
      logging.info("Starting instance %s on node %s", instance, pnode_name)
7731
      feedback_fn("* starting instance...")
7732
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7733
      result.Raise("Could not start instance")
7734

    
7735
    return list(iobj.all_nodes)
7736

    
7737

    
7738
class LUConnectConsole(NoHooksLU):
7739
  """Connect to an instance's console.
7740

7741
  This is somewhat special in that it returns the command line that
7742
  you need to run on the master node in order to connect to the
7743
  console.
7744

7745
  """
7746
  _OP_PARAMS = [
7747
    _PInstanceName
7748
    ]
7749
  REQ_BGL = False
7750

    
7751
  def ExpandNames(self):
7752
    self._ExpandAndLockInstance()
7753

    
7754
  def CheckPrereq(self):
7755
    """Check prerequisites.
7756

7757
    This checks that the instance is in the cluster.
7758

7759
    """
7760
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7761
    assert self.instance is not None, \
7762
      "Cannot retrieve locked instance %s" % self.op.instance_name
7763
    _CheckNodeOnline(self, self.instance.primary_node)
7764

    
7765
  def Exec(self, feedback_fn):
7766
    """Connect to the console of an instance
7767

7768
    """
7769
    instance = self.instance
7770
    node = instance.primary_node
7771

    
7772
    node_insts = self.rpc.call_instance_list([node],
7773
                                             [instance.hypervisor])[node]
7774
    node_insts.Raise("Can't get node information from %s" % node)
7775

    
7776
    if instance.name not in node_insts.payload:
7777
      if instance.admin_up:
7778
        state = "ERROR_down"
7779
      else:
7780
        state = "ADMIN_down"
7781
      raise errors.OpExecError("Instance %s is not running (state %s)" %
7782
                               (instance.name, state))
7783

    
7784
    logging.debug("Connecting to console of %s on %s", instance.name, node)
7785

    
7786
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
7787
    cluster = self.cfg.GetClusterInfo()
7788
    # beparams and hvparams are passed separately, to avoid editing the
7789
    # instance and then saving the defaults in the instance itself.
7790
    hvparams = cluster.FillHV(instance)
7791
    beparams = cluster.FillBE(instance)
7792
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7793

    
7794
    # build ssh cmdline
7795
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7796

    
7797

    
7798
class LUReplaceDisks(LogicalUnit):
7799
  """Replace the disks of an instance.
7800

7801
  """
7802
  HPATH = "mirrors-replace"
7803
  HTYPE = constants.HTYPE_INSTANCE
7804
  _OP_PARAMS = [
7805
    _PInstanceName,
7806
    ("mode", ht.NoDefault, ht.TElemOf(constants.REPLACE_MODES)),
7807
    ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
7808
    ("remote_node", None, ht.TMaybeString),
7809
    ("iallocator", None, ht.TMaybeString),
7810
    ("early_release", False, ht.TBool),
7811
    ]
7812
  REQ_BGL = False
7813

    
7814
  def CheckArguments(self):
7815
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7816
                                  self.op.iallocator)
7817

    
7818
  def ExpandNames(self):
7819
    self._ExpandAndLockInstance()
7820

    
7821
    if self.op.iallocator is not None:
7822
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7823

    
7824
    elif self.op.remote_node is not None:
7825
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7826
      self.op.remote_node = remote_node
7827

    
7828
      # Warning: do not remove the locking of the new secondary here
7829
      # unless DRBD8.AddChildren is changed to work in parallel;
7830
      # currently it doesn't since parallel invocations of
7831
      # FindUnusedMinor will conflict
7832
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7833
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7834

    
7835
    else:
7836
      self.needed_locks[locking.LEVEL_NODE] = []
7837
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7838

    
7839
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7840
                                   self.op.iallocator, self.op.remote_node,
7841
                                   self.op.disks, False, self.op.early_release)
7842

    
7843
    self.tasklets = [self.replacer]
7844

    
7845
  def DeclareLocks(self, level):
7846
    # If we're not already locking all nodes in the set we have to declare the
7847
    # instance's primary/secondary nodes.
7848
    if (level == locking.LEVEL_NODE and
7849
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7850
      self._LockInstancesNodes()
7851

    
7852
  def BuildHooksEnv(self):
7853
    """Build hooks env.
7854

7855
    This runs on the master, the primary and all the secondaries.
7856

7857
    """
7858
    instance = self.replacer.instance
7859
    env = {
7860
      "MODE": self.op.mode,
7861
      "NEW_SECONDARY": self.op.remote_node,
7862
      "OLD_SECONDARY": instance.secondary_nodes[0],
7863
      }
7864
    env.update(_BuildInstanceHookEnvByObject(self, instance))
7865
    nl = [
7866
      self.cfg.GetMasterNode(),
7867
      instance.primary_node,
7868
      ]
7869
    if self.op.remote_node is not None:
7870
      nl.append(self.op.remote_node)
7871
    return env, nl, nl
7872

    
7873

    
7874
class TLReplaceDisks(Tasklet):
7875
  """Replaces disks for an instance.
7876

7877
  Note: Locking is not within the scope of this class.
7878

7879
  """
7880
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7881
               disks, delay_iallocator, early_release):
7882
    """Initializes this class.
7883

7884
    """
7885
    Tasklet.__init__(self, lu)
7886

    
7887
    # Parameters
7888
    self.instance_name = instance_name
7889
    self.mode = mode
7890
    self.iallocator_name = iallocator_name
7891
    self.remote_node = remote_node
7892
    self.disks = disks
7893
    self.delay_iallocator = delay_iallocator
7894
    self.early_release = early_release
7895

    
7896
    # Runtime data
7897
    self.instance = None
7898
    self.new_node = None
7899
    self.target_node = None
7900
    self.other_node = None
7901
    self.remote_node_info = None
7902
    self.node_secondary_ip = None
7903

    
7904
  @staticmethod
7905
  def CheckArguments(mode, remote_node, iallocator):
7906
    """Helper function for users of this class.
7907

7908
    """
7909
    # check for valid parameter combination
7910
    if mode == constants.REPLACE_DISK_CHG:
7911
      if remote_node is None and iallocator is None:
7912
        raise errors.OpPrereqError("When changing the secondary either an"
7913
                                   " iallocator script must be used or the"
7914
                                   " new node given", errors.ECODE_INVAL)
7915

    
7916
      if remote_node is not None and iallocator is not None:
7917
        raise errors.OpPrereqError("Give either the iallocator or the new"
7918
                                   " secondary, not both", errors.ECODE_INVAL)
7919

    
7920
    elif remote_node is not None or iallocator is not None:
7921
      # Not replacing the secondary
7922
      raise errors.OpPrereqError("The iallocator and new node options can"
7923
                                 " only be used when changing the"
7924
                                 " secondary node", errors.ECODE_INVAL)
7925

    
7926
  @staticmethod
7927
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7928
    """Compute a new secondary node using an IAllocator.
7929

7930
    """
7931
    ial = IAllocator(lu.cfg, lu.rpc,
7932
                     mode=constants.IALLOCATOR_MODE_RELOC,
7933
                     name=instance_name,
7934
                     relocate_from=relocate_from)
7935

    
7936
    ial.Run(iallocator_name)
7937

    
7938
    if not ial.success:
7939
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7940
                                 " %s" % (iallocator_name, ial.info),
7941
                                 errors.ECODE_NORES)
7942

    
7943
    if len(ial.result) != ial.required_nodes:
7944
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7945
                                 " of nodes (%s), required %s" %
7946
                                 (iallocator_name,
7947
                                  len(ial.result), ial.required_nodes),
7948
                                 errors.ECODE_FAULT)
7949

    
7950
    remote_node_name = ial.result[0]
7951

    
7952
    lu.LogInfo("Selected new secondary for instance '%s': %s",
7953
               instance_name, remote_node_name)
7954

    
7955
    return remote_node_name
7956

    
7957
  def _FindFaultyDisks(self, node_name):
7958
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7959
                                    node_name, True)
7960

    
7961
  def CheckPrereq(self):
7962
    """Check prerequisites.
7963

7964
    This checks that the instance is in the cluster.
7965

7966
    """
7967
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7968
    assert instance is not None, \
7969
      "Cannot retrieve locked instance %s" % self.instance_name
7970

    
7971
    if instance.disk_template != constants.DT_DRBD8:
7972
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7973
                                 " instances", errors.ECODE_INVAL)
7974

    
7975
    if len(instance.secondary_nodes) != 1:
7976
      raise errors.OpPrereqError("The instance has a strange layout,"
7977
                                 " expected one secondary but found %d" %
7978
                                 len(instance.secondary_nodes),
7979
                                 errors.ECODE_FAULT)
7980

    
7981
    if not self.delay_iallocator:
7982
      self._CheckPrereq2()
7983

    
7984
  def _CheckPrereq2(self):
7985
    """Check prerequisites, second part.
7986

7987
    This function should always be part of CheckPrereq. It was separated and is
7988
    now called from Exec because during node evacuation iallocator was only
7989
    called with an unmodified cluster model, not taking planned changes into
7990
    account.
7991

7992
    """
7993
    instance = self.instance
7994
    secondary_node = instance.secondary_nodes[0]
7995

    
7996
    if self.iallocator_name is None:
7997
      remote_node = self.remote_node
7998
    else:
7999
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8000
                                       instance.name, instance.secondary_nodes)
8001

    
8002
    if remote_node is not None:
8003
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8004
      assert self.remote_node_info is not None, \
8005
        "Cannot retrieve locked node %s" % remote_node
8006
    else:
8007
      self.remote_node_info = None
8008

    
8009
    if remote_node == self.instance.primary_node:
8010
      raise errors.OpPrereqError("The specified node is the primary node of"
8011
                                 " the instance.", errors.ECODE_INVAL)
8012

    
8013
    if remote_node == secondary_node:
8014
      raise errors.OpPrereqError("The specified node is already the"
8015
                                 " secondary node of the instance.",
8016
                                 errors.ECODE_INVAL)
8017

    
8018
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8019
                                    constants.REPLACE_DISK_CHG):
8020
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
8021
                                 errors.ECODE_INVAL)
8022

    
8023
    if self.mode == constants.REPLACE_DISK_AUTO:
8024
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
8025
      faulty_secondary = self._FindFaultyDisks(secondary_node)
8026

    
8027
      if faulty_primary and faulty_secondary:
8028
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8029
                                   " one node and can not be repaired"
8030
                                   " automatically" % self.instance_name,
8031
                                   errors.ECODE_STATE)
8032

    
8033
      if faulty_primary:
8034
        self.disks = faulty_primary
8035
        self.target_node = instance.primary_node
8036
        self.other_node = secondary_node
8037
        check_nodes = [self.target_node, self.other_node]
8038
      elif faulty_secondary:
8039
        self.disks = faulty_secondary
8040
        self.target_node = secondary_node
8041
        self.other_node = instance.primary_node
8042
        check_nodes = [self.target_node, self.other_node]
8043
      else:
8044
        self.disks = []
8045
        check_nodes = []
8046

    
8047
    else:
8048
      # Non-automatic modes
8049
      if self.mode == constants.REPLACE_DISK_PRI:
8050
        self.target_node = instance.primary_node
8051
        self.other_node = secondary_node
8052
        check_nodes = [self.target_node, self.other_node]
8053

    
8054
      elif self.mode == constants.REPLACE_DISK_SEC:
8055
        self.target_node = secondary_node
8056
        self.other_node = instance.primary_node
8057
        check_nodes = [self.target_node, self.other_node]
8058

    
8059
      elif self.mode == constants.REPLACE_DISK_CHG:
8060
        self.new_node = remote_node
8061
        self.other_node = instance.primary_node
8062
        self.target_node = secondary_node
8063
        check_nodes = [self.new_node, self.other_node]
8064

    
8065
        _CheckNodeNotDrained(self.lu, remote_node)
8066
        _CheckNodeVmCapable(self.lu, remote_node)
8067

    
8068
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
8069
        assert old_node_info is not None
8070
        if old_node_info.offline and not self.early_release:
8071
          # doesn't make sense to delay the release
8072
          self.early_release = True
8073
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8074
                          " early-release mode", secondary_node)
8075

    
8076
      else:
8077
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8078
                                     self.mode)
8079

    
8080
      # If not specified all disks should be replaced
8081
      if not self.disks:
8082
        self.disks = range(len(self.instance.disks))
8083

    
8084
    for node in check_nodes:
8085
      _CheckNodeOnline(self.lu, node)
8086

    
8087
    # Check whether disks are valid
8088
    for disk_idx in self.disks:
8089
      instance.FindDisk(disk_idx)
8090

    
8091
    # Get secondary node IP addresses
8092
    node_2nd_ip = {}
8093

    
8094
    for node_name in [self.target_node, self.other_node, self.new_node]:
8095
      if node_name is not None:
8096
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8097

    
8098
    self.node_secondary_ip = node_2nd_ip
8099

    
8100
  def Exec(self, feedback_fn):
8101
    """Execute disk replacement.
8102

8103
    This dispatches the disk replacement to the appropriate handler.
8104

8105
    """
8106
    if self.delay_iallocator:
8107
      self._CheckPrereq2()
8108

    
8109
    if not self.disks:
8110
      feedback_fn("No disks need replacement")
8111
      return
8112

    
8113
    feedback_fn("Replacing disk(s) %s for %s" %
8114
                (utils.CommaJoin(self.disks), self.instance.name))
8115

    
8116
    activate_disks = (not self.instance.admin_up)
8117

    
8118
    # Activate the instance disks if we're replacing them on a down instance
8119
    if activate_disks:
8120
      _StartInstanceDisks(self.lu, self.instance, True)
8121

    
8122
    try:
8123
      # Should we replace the secondary node?
8124
      if self.new_node is not None:
8125
        fn = self._ExecDrbd8Secondary
8126
      else:
8127
        fn = self._ExecDrbd8DiskOnly
8128

    
8129
      return fn(feedback_fn)
8130

    
8131
    finally:
8132
      # Deactivate the instance disks if we're replacing them on a
8133
      # down instance
8134
      if activate_disks:
8135
        _SafeShutdownInstanceDisks(self.lu, self.instance)
8136

    
8137
  def _CheckVolumeGroup(self, nodes):
8138
    self.lu.LogInfo("Checking volume groups")
8139

    
8140
    vgname = self.cfg.GetVGName()
8141

    
8142
    # Make sure volume group exists on all involved nodes
8143
    results = self.rpc.call_vg_list(nodes)
8144
    if not results:
8145
      raise errors.OpExecError("Can't list volume groups on the nodes")
8146

    
8147
    for node in nodes:
8148
      res = results[node]
8149
      res.Raise("Error checking node %s" % node)
8150
      if vgname not in res.payload:
8151
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
8152
                                 (vgname, node))
8153

    
8154
  def _CheckDisksExistence(self, nodes):
8155
    # Check disk existence
8156
    for idx, dev in enumerate(self.instance.disks):
8157
      if idx not in self.disks:
8158
        continue
8159

    
8160
      for node in nodes:
8161
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8162
        self.cfg.SetDiskID(dev, node)
8163

    
8164
        result = self.rpc.call_blockdev_find(node, dev)
8165

    
8166
        msg = result.fail_msg
8167
        if msg or not result.payload:
8168
          if not msg:
8169
            msg = "disk not found"
8170
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8171
                                   (idx, node, msg))
8172

    
8173
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8174
    for idx, dev in enumerate(self.instance.disks):
8175
      if idx not in self.disks:
8176
        continue
8177

    
8178
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8179
                      (idx, node_name))
8180

    
8181
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8182
                                   ldisk=ldisk):
8183
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8184
                                 " replace disks for instance %s" %
8185
                                 (node_name, self.instance.name))
8186

    
8187
  def _CreateNewStorage(self, node_name):
8188
    vgname = self.cfg.GetVGName()
8189
    iv_names = {}
8190

    
8191
    for idx, dev in enumerate(self.instance.disks):
8192
      if idx not in self.disks:
8193
        continue
8194

    
8195
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8196

    
8197
      self.cfg.SetDiskID(dev, node_name)
8198

    
8199
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8200
      names = _GenerateUniqueNames(self.lu, lv_names)
8201

    
8202
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8203
                             logical_id=(vgname, names[0]))
8204
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8205
                             logical_id=(vgname, names[1]))
8206

    
8207
      new_lvs = [lv_data, lv_meta]
8208
      old_lvs = dev.children
8209
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8210

    
8211
      # we pass force_create=True to force the LVM creation
8212
      for new_lv in new_lvs:
8213
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8214
                        _GetInstanceInfoText(self.instance), False)
8215

    
8216
    return iv_names
8217

    
8218
  def _CheckDevices(self, node_name, iv_names):
8219
    for name, (dev, _, _) in iv_names.iteritems():
8220
      self.cfg.SetDiskID(dev, node_name)
8221

    
8222
      result = self.rpc.call_blockdev_find(node_name, dev)
8223

    
8224
      msg = result.fail_msg
8225
      if msg or not result.payload:
8226
        if not msg:
8227
          msg = "disk not found"
8228
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
8229
                                 (name, msg))
8230

    
8231
      if result.payload.is_degraded:
8232
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
8233

    
8234
  def _RemoveOldStorage(self, node_name, iv_names):
8235
    for name, (_, old_lvs, _) in iv_names.iteritems():
8236
      self.lu.LogInfo("Remove logical volumes for %s" % name)
8237

    
8238
      for lv in old_lvs:
8239
        self.cfg.SetDiskID(lv, node_name)
8240

    
8241
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8242
        if msg:
8243
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
8244
                             hint="remove unused LVs manually")
8245

    
8246
  def _ReleaseNodeLock(self, node_name):
8247
    """Releases the lock for a given node."""
8248
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8249

    
8250
  def _ExecDrbd8DiskOnly(self, feedback_fn):
8251
    """Replace a disk on the primary or secondary for DRBD 8.
8252

8253
    The algorithm for replace is quite complicated:
8254

8255
      1. for each disk to be replaced:
8256

8257
        1. create new LVs on the target node with unique names
8258
        1. detach old LVs from the drbd device
8259
        1. rename old LVs to name_replaced.<time_t>
8260
        1. rename new LVs to old LVs
8261
        1. attach the new LVs (with the old names now) to the drbd device
8262

8263
      1. wait for sync across all devices
8264

8265
      1. for each modified disk:
8266

8267
        1. remove old LVs (which have the name name_replaces.<time_t>)
8268

8269
    Failures are not very well handled.
8270

8271
    """
8272
    steps_total = 6
8273

    
8274
    # Step: check device activation
8275
    self.lu.LogStep(1, steps_total, "Check device existence")
8276
    self._CheckDisksExistence([self.other_node, self.target_node])
8277
    self._CheckVolumeGroup([self.target_node, self.other_node])
8278

    
8279
    # Step: check other node consistency
8280
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8281
    self._CheckDisksConsistency(self.other_node,
8282
                                self.other_node == self.instance.primary_node,
8283
                                False)
8284

    
8285
    # Step: create new storage
8286
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8287
    iv_names = self._CreateNewStorage(self.target_node)
8288

    
8289
    # Step: for each lv, detach+rename*2+attach
8290
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8291
    for dev, old_lvs, new_lvs in iv_names.itervalues():
8292
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8293

    
8294
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8295
                                                     old_lvs)
8296
      result.Raise("Can't detach drbd from local storage on node"
8297
                   " %s for device %s" % (self.target_node, dev.iv_name))
8298
      #dev.children = []
8299
      #cfg.Update(instance)
8300

    
8301
      # ok, we created the new LVs, so now we know we have the needed
8302
      # storage; as such, we proceed on the target node to rename
8303
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8304
      # using the assumption that logical_id == physical_id (which in
8305
      # turn is the unique_id on that node)
8306

    
8307
      # FIXME(iustin): use a better name for the replaced LVs
8308
      temp_suffix = int(time.time())
8309
      ren_fn = lambda d, suff: (d.physical_id[0],
8310
                                d.physical_id[1] + "_replaced-%s" % suff)
8311

    
8312
      # Build the rename list based on what LVs exist on the node
8313
      rename_old_to_new = []
8314
      for to_ren in old_lvs:
8315
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8316
        if not result.fail_msg and result.payload:
8317
          # device exists
8318
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8319

    
8320
      self.lu.LogInfo("Renaming the old LVs on the target node")
8321
      result = self.rpc.call_blockdev_rename(self.target_node,
8322
                                             rename_old_to_new)
8323
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
8324

    
8325
      # Now we rename the new LVs to the old LVs
8326
      self.lu.LogInfo("Renaming the new LVs on the target node")
8327
      rename_new_to_old = [(new, old.physical_id)
8328
                           for old, new in zip(old_lvs, new_lvs)]
8329
      result = self.rpc.call_blockdev_rename(self.target_node,
8330
                                             rename_new_to_old)
8331
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
8332

    
8333
      for old, new in zip(old_lvs, new_lvs):
8334
        new.logical_id = old.logical_id
8335
        self.cfg.SetDiskID(new, self.target_node)
8336

    
8337
      for disk in old_lvs:
8338
        disk.logical_id = ren_fn(disk, temp_suffix)
8339
        self.cfg.SetDiskID(disk, self.target_node)
8340

    
8341
      # Now that the new lvs have the old name, we can add them to the device
8342
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8343
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8344
                                                  new_lvs)
8345
      msg = result.fail_msg
8346
      if msg:
8347
        for new_lv in new_lvs:
8348
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
8349
                                               new_lv).fail_msg
8350
          if msg2:
8351
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8352
                               hint=("cleanup manually the unused logical"
8353
                                     "volumes"))
8354
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8355

    
8356
      dev.children = new_lvs
8357

    
8358
      self.cfg.Update(self.instance, feedback_fn)
8359

    
8360
    cstep = 5
8361
    if self.early_release:
8362
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8363
      cstep += 1
8364
      self._RemoveOldStorage(self.target_node, iv_names)
8365
      # WARNING: we release both node locks here, do not do other RPCs
8366
      # than WaitForSync to the primary node
8367
      self._ReleaseNodeLock([self.target_node, self.other_node])
8368

    
8369
    # Wait for sync
8370
    # This can fail as the old devices are degraded and _WaitForSync
8371
    # does a combined result over all disks, so we don't check its return value
8372
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8373
    cstep += 1
8374
    _WaitForSync(self.lu, self.instance)
8375

    
8376
    # Check all devices manually
8377
    self._CheckDevices(self.instance.primary_node, iv_names)
8378

    
8379
    # Step: remove old storage
8380
    if not self.early_release:
8381
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8382
      cstep += 1
8383
      self._RemoveOldStorage(self.target_node, iv_names)
8384

    
8385
  def _ExecDrbd8Secondary(self, feedback_fn):
8386
    """Replace the secondary node for DRBD 8.
8387

8388
    The algorithm for replace is quite complicated:
8389
      - for all disks of the instance:
8390
        - create new LVs on the new node with same names
8391
        - shutdown the drbd device on the old secondary
8392
        - disconnect the drbd network on the primary
8393
        - create the drbd device on the new secondary
8394
        - network attach the drbd on the primary, using an artifice:
8395
          the drbd code for Attach() will connect to the network if it
8396
          finds a device which is connected to the good local disks but
8397
          not network enabled
8398
      - wait for sync across all devices
8399
      - remove all disks from the old secondary
8400

8401
    Failures are not very well handled.
8402

8403
    """
8404
    steps_total = 6
8405

    
8406
    # Step: check device activation
8407
    self.lu.LogStep(1, steps_total, "Check device existence")
8408
    self._CheckDisksExistence([self.instance.primary_node])
8409
    self._CheckVolumeGroup([self.instance.primary_node])
8410

    
8411
    # Step: check other node consistency
8412
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8413
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
8414

    
8415
    # Step: create new storage
8416
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8417
    for idx, dev in enumerate(self.instance.disks):
8418
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8419
                      (self.new_node, idx))
8420
      # we pass force_create=True to force LVM creation
8421
      for new_lv in dev.children:
8422
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8423
                        _GetInstanceInfoText(self.instance), False)
8424

    
8425
    # Step 4: dbrd minors and drbd setups changes
8426
    # after this, we must manually remove the drbd minors on both the
8427
    # error and the success paths
8428
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8429
    minors = self.cfg.AllocateDRBDMinor([self.new_node
8430
                                         for dev in self.instance.disks],
8431
                                        self.instance.name)
8432
    logging.debug("Allocated minors %r", minors)
8433

    
8434
    iv_names = {}
8435
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8436
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8437
                      (self.new_node, idx))
8438
      # create new devices on new_node; note that we create two IDs:
8439
      # one without port, so the drbd will be activated without
8440
      # networking information on the new node at this stage, and one
8441
      # with network, for the latter activation in step 4
8442
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8443
      if self.instance.primary_node == o_node1:
8444
        p_minor = o_minor1
8445
      else:
8446
        assert self.instance.primary_node == o_node2, "Three-node instance?"
8447
        p_minor = o_minor2
8448

    
8449
      new_alone_id = (self.instance.primary_node, self.new_node, None,
8450
                      p_minor, new_minor, o_secret)
8451
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
8452
                    p_minor, new_minor, o_secret)
8453

    
8454
      iv_names[idx] = (dev, dev.children, new_net_id)
8455
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8456
                    new_net_id)
8457
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8458
                              logical_id=new_alone_id,
8459
                              children=dev.children,
8460
                              size=dev.size)
8461
      try:
8462
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8463
                              _GetInstanceInfoText(self.instance), False)
8464
      except errors.GenericError:
8465
        self.cfg.ReleaseDRBDMinors(self.instance.name)
8466
        raise
8467

    
8468
    # We have new devices, shutdown the drbd on the old secondary
8469
    for idx, dev in enumerate(self.instance.disks):
8470
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8471
      self.cfg.SetDiskID(dev, self.target_node)
8472
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8473
      if msg:
8474
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8475
                           "node: %s" % (idx, msg),
8476
                           hint=("Please cleanup this device manually as"
8477
                                 " soon as possible"))
8478

    
8479
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8480
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8481
                                               self.node_secondary_ip,
8482
                                               self.instance.disks)\
8483
                                              [self.instance.primary_node]
8484

    
8485
    msg = result.fail_msg
8486
    if msg:
8487
      # detaches didn't succeed (unlikely)
8488
      self.cfg.ReleaseDRBDMinors(self.instance.name)
8489
      raise errors.OpExecError("Can't detach the disks from the network on"
8490
                               " old node: %s" % (msg,))
8491

    
8492
    # if we managed to detach at least one, we update all the disks of
8493
    # the instance to point to the new secondary
8494
    self.lu.LogInfo("Updating instance configuration")
8495
    for dev, _, new_logical_id in iv_names.itervalues():
8496
      dev.logical_id = new_logical_id
8497
      self.cfg.SetDiskID(dev, self.instance.primary_node)
8498

    
8499
    self.cfg.Update(self.instance, feedback_fn)
8500

    
8501
    # and now perform the drbd attach
8502
    self.lu.LogInfo("Attaching primary drbds to new secondary"
8503
                    " (standalone => connected)")
8504
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8505
                                            self.new_node],
8506
                                           self.node_secondary_ip,
8507
                                           self.instance.disks,
8508
                                           self.instance.name,
8509
                                           False)
8510
    for to_node, to_result in result.items():
8511
      msg = to_result.fail_msg
8512
      if msg:
8513
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8514
                           to_node, msg,
8515
                           hint=("please do a gnt-instance info to see the"
8516
                                 " status of disks"))
8517
    cstep = 5
8518
    if self.early_release:
8519
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8520
      cstep += 1
8521
      self._RemoveOldStorage(self.target_node, iv_names)
8522
      # WARNING: we release all node locks here, do not do other RPCs
8523
      # than WaitForSync to the primary node
8524
      self._ReleaseNodeLock([self.instance.primary_node,
8525
                             self.target_node,
8526
                             self.new_node])
8527

    
8528
    # Wait for sync
8529
    # This can fail as the old devices are degraded and _WaitForSync
8530
    # does a combined result over all disks, so we don't check its return value
8531
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8532
    cstep += 1
8533
    _WaitForSync(self.lu, self.instance)
8534

    
8535
    # Check all devices manually
8536
    self._CheckDevices(self.instance.primary_node, iv_names)
8537

    
8538
    # Step: remove old storage
8539
    if not self.early_release:
8540
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8541
      self._RemoveOldStorage(self.target_node, iv_names)
8542

    
8543

    
8544
class LURepairNodeStorage(NoHooksLU):
8545
  """Repairs the volume group on a node.
8546

8547
  """
8548
  _OP_PARAMS = [
8549
    _PNodeName,
8550
    ("storage_type", ht.NoDefault, _CheckStorageType),
8551
    ("name", ht.NoDefault, ht.TNonEmptyString),
8552
    ("ignore_consistency", False, ht.TBool),
8553
    ]
8554
  REQ_BGL = False
8555

    
8556
  def CheckArguments(self):
8557
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8558

    
8559
    storage_type = self.op.storage_type
8560

    
8561
    if (constants.SO_FIX_CONSISTENCY not in
8562
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8563
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
8564
                                 " repaired" % storage_type,
8565
                                 errors.ECODE_INVAL)
8566

    
8567
  def ExpandNames(self):
8568
    self.needed_locks = {
8569
      locking.LEVEL_NODE: [self.op.node_name],
8570
      }
8571

    
8572
  def _CheckFaultyDisks(self, instance, node_name):
8573
    """Ensure faulty disks abort the opcode or at least warn."""
8574
    try:
8575
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8576
                                  node_name, True):
8577
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8578
                                   " node '%s'" % (instance.name, node_name),
8579
                                   errors.ECODE_STATE)
8580
    except errors.OpPrereqError, err:
8581
      if self.op.ignore_consistency:
8582
        self.proc.LogWarning(str(err.args[0]))
8583
      else:
8584
        raise
8585

    
8586
  def CheckPrereq(self):
8587
    """Check prerequisites.
8588

8589
    """
8590
    # Check whether any instance on this node has faulty disks
8591
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8592
      if not inst.admin_up:
8593
        continue
8594
      check_nodes = set(inst.all_nodes)
8595
      check_nodes.discard(self.op.node_name)
8596
      for inst_node_name in check_nodes:
8597
        self._CheckFaultyDisks(inst, inst_node_name)
8598

    
8599
  def Exec(self, feedback_fn):
8600
    feedback_fn("Repairing storage unit '%s' on %s ..." %
8601
                (self.op.name, self.op.node_name))
8602

    
8603
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8604
    result = self.rpc.call_storage_execute(self.op.node_name,
8605
                                           self.op.storage_type, st_args,
8606
                                           self.op.name,
8607
                                           constants.SO_FIX_CONSISTENCY)
8608
    result.Raise("Failed to repair storage unit '%s' on %s" %
8609
                 (self.op.name, self.op.node_name))
8610

    
8611

    
8612
class LUNodeEvacuationStrategy(NoHooksLU):
8613
  """Computes the node evacuation strategy.
8614

8615
  """
8616
  _OP_PARAMS = [
8617
    ("nodes", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
8618
    ("remote_node", None, ht.TMaybeString),
8619
    ("iallocator", None, ht.TMaybeString),
8620
    ]
8621
  REQ_BGL = False
8622

    
8623
  def CheckArguments(self):
8624
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8625

    
8626
  def ExpandNames(self):
8627
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8628
    self.needed_locks = locks = {}
8629
    if self.op.remote_node is None:
8630
      locks[locking.LEVEL_NODE] = locking.ALL_SET
8631
    else:
8632
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8633
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8634

    
8635
  def Exec(self, feedback_fn):
8636
    if self.op.remote_node is not None:
8637
      instances = []
8638
      for node in self.op.nodes:
8639
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8640
      result = []
8641
      for i in instances:
8642
        if i.primary_node == self.op.remote_node:
8643
          raise errors.OpPrereqError("Node %s is the primary node of"
8644
                                     " instance %s, cannot use it as"
8645
                                     " secondary" %
8646
                                     (self.op.remote_node, i.name),
8647
                                     errors.ECODE_INVAL)
8648
        result.append([i.name, self.op.remote_node])
8649
    else:
8650
      ial = IAllocator(self.cfg, self.rpc,
8651
                       mode=constants.IALLOCATOR_MODE_MEVAC,
8652
                       evac_nodes=self.op.nodes)
8653
      ial.Run(self.op.iallocator, validate=True)
8654
      if not ial.success:
8655
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8656
                                 errors.ECODE_NORES)
8657
      result = ial.result
8658
    return result
8659

    
8660

    
8661
class LUGrowDisk(LogicalUnit):
8662
  """Grow a disk of an instance.
8663

8664
  """
8665
  HPATH = "disk-grow"
8666
  HTYPE = constants.HTYPE_INSTANCE
8667
  _OP_PARAMS = [
8668
    _PInstanceName,
8669
    ("disk", ht.NoDefault, ht.TInt),
8670
    ("amount", ht.NoDefault, ht.TInt),
8671
    ("wait_for_sync", True, ht.TBool),
8672
    ]
8673
  REQ_BGL = False
8674

    
8675
  def ExpandNames(self):
8676
    self._ExpandAndLockInstance()
8677
    self.needed_locks[locking.LEVEL_NODE] = []
8678
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8679

    
8680
  def DeclareLocks(self, level):
8681
    if level == locking.LEVEL_NODE:
8682
      self._LockInstancesNodes()
8683

    
8684
  def BuildHooksEnv(self):
8685
    """Build hooks env.
8686

8687
    This runs on the master, the primary and all the secondaries.
8688

8689
    """
8690
    env = {
8691
      "DISK": self.op.disk,
8692
      "AMOUNT": self.op.amount,
8693
      }
8694
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8695
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8696
    return env, nl, nl
8697

    
8698
  def CheckPrereq(self):
8699
    """Check prerequisites.
8700

8701
    This checks that the instance is in the cluster.
8702

8703
    """
8704
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8705
    assert instance is not None, \
8706
      "Cannot retrieve locked instance %s" % self.op.instance_name
8707
    nodenames = list(instance.all_nodes)
8708
    for node in nodenames:
8709
      _CheckNodeOnline(self, node)
8710

    
8711
    self.instance = instance
8712

    
8713
    if instance.disk_template not in constants.DTS_GROWABLE:
8714
      raise errors.OpPrereqError("Instance's disk layout does not support"
8715
                                 " growing.", errors.ECODE_INVAL)
8716

    
8717
    self.disk = instance.FindDisk(self.op.disk)
8718

    
8719
    if instance.disk_template != constants.DT_FILE:
8720
      # TODO: check the free disk space for file, when that feature will be
8721
      # supported
8722
      _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8723

    
8724
  def Exec(self, feedback_fn):
8725
    """Execute disk grow.
8726

8727
    """
8728
    instance = self.instance
8729
    disk = self.disk
8730

    
8731
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8732
    if not disks_ok:
8733
      raise errors.OpExecError("Cannot activate block device to grow")
8734

    
8735
    for node in instance.all_nodes:
8736
      self.cfg.SetDiskID(disk, node)
8737
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8738
      result.Raise("Grow request failed to node %s" % node)
8739

    
8740
      # TODO: Rewrite code to work properly
8741
      # DRBD goes into sync mode for a short amount of time after executing the
8742
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8743
      # calling "resize" in sync mode fails. Sleeping for a short amount of
8744
      # time is a work-around.
8745
      time.sleep(5)
8746

    
8747
    disk.RecordGrow(self.op.amount)
8748
    self.cfg.Update(instance, feedback_fn)
8749
    if self.op.wait_for_sync:
8750
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
8751
      if disk_abort:
8752
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8753
                             " status.\nPlease check the instance.")
8754
      if not instance.admin_up:
8755
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8756
    elif not instance.admin_up:
8757
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
8758
                           " not supposed to be running because no wait for"
8759
                           " sync mode was requested.")
8760

    
8761

    
8762
class LUQueryInstanceData(NoHooksLU):
8763
  """Query runtime instance data.
8764

8765
  """
8766
  _OP_PARAMS = [
8767
    ("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
8768
    ("static", False, ht.TBool),
8769
    ]
8770
  REQ_BGL = False
8771

    
8772
  def ExpandNames(self):
8773
    self.needed_locks = {}
8774
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8775

    
8776
    if self.op.instances:
8777
      self.wanted_names = []
8778
      for name in self.op.instances:
8779
        full_name = _ExpandInstanceName(self.cfg, name)
8780
        self.wanted_names.append(full_name)
8781
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8782
    else:
8783
      self.wanted_names = None
8784
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8785

    
8786
    self.needed_locks[locking.LEVEL_NODE] = []
8787
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8788

    
8789
  def DeclareLocks(self, level):
8790
    if level == locking.LEVEL_NODE:
8791
      self._LockInstancesNodes()
8792

    
8793
  def CheckPrereq(self):
8794
    """Check prerequisites.
8795

8796
    This only checks the optional instance list against the existing names.
8797

8798
    """
8799
    if self.wanted_names is None:
8800
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8801

    
8802
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8803
                             in self.wanted_names]
8804

    
8805
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
8806
    """Returns the status of a block device
8807

8808
    """
8809
    if self.op.static or not node:
8810
      return None
8811

    
8812
    self.cfg.SetDiskID(dev, node)
8813

    
8814
    result = self.rpc.call_blockdev_find(node, dev)
8815
    if result.offline:
8816
      return None
8817

    
8818
    result.Raise("Can't compute disk status for %s" % instance_name)
8819

    
8820
    status = result.payload
8821
    if status is None:
8822
      return None
8823

    
8824
    return (status.dev_path, status.major, status.minor,
8825
            status.sync_percent, status.estimated_time,
8826
            status.is_degraded, status.ldisk_status)
8827

    
8828
  def _ComputeDiskStatus(self, instance, snode, dev):
8829
    """Compute block device status.
8830

8831
    """
8832
    if dev.dev_type in constants.LDS_DRBD:
8833
      # we change the snode then (otherwise we use the one passed in)
8834
      if dev.logical_id[0] == instance.primary_node:
8835
        snode = dev.logical_id[1]
8836
      else:
8837
        snode = dev.logical_id[0]
8838

    
8839
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8840
                                              instance.name, dev)
8841
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8842

    
8843
    if dev.children:
8844
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
8845
                      for child in dev.children]
8846
    else:
8847
      dev_children = []
8848

    
8849
    data = {
8850
      "iv_name": dev.iv_name,
8851
      "dev_type": dev.dev_type,
8852
      "logical_id": dev.logical_id,
8853
      "physical_id": dev.physical_id,
8854
      "pstatus": dev_pstatus,
8855
      "sstatus": dev_sstatus,
8856
      "children": dev_children,
8857
      "mode": dev.mode,
8858
      "size": dev.size,
8859
      }
8860

    
8861
    return data
8862

    
8863
  def Exec(self, feedback_fn):
8864
    """Gather and return data"""
8865
    result = {}
8866

    
8867
    cluster = self.cfg.GetClusterInfo()
8868

    
8869
    for instance in self.wanted_instances:
8870
      if not self.op.static:
8871
        remote_info = self.rpc.call_instance_info(instance.primary_node,
8872
                                                  instance.name,
8873
                                                  instance.hypervisor)
8874
        remote_info.Raise("Error checking node %s" % instance.primary_node)
8875
        remote_info = remote_info.payload
8876
        if remote_info and "state" in remote_info:
8877
          remote_state = "up"
8878
        else:
8879
          remote_state = "down"
8880
      else:
8881
        remote_state = None
8882
      if instance.admin_up:
8883
        config_state = "up"
8884
      else:
8885
        config_state = "down"
8886

    
8887
      disks = [self._ComputeDiskStatus(instance, None, device)
8888
               for device in instance.disks]
8889

    
8890
      idict = {
8891
        "name": instance.name,
8892
        "config_state": config_state,
8893
        "run_state": remote_state,
8894
        "pnode": instance.primary_node,
8895
        "snodes": instance.secondary_nodes,
8896
        "os": instance.os,
8897
        # this happens to be the same format used for hooks
8898
        "nics": _NICListToTuple(self, instance.nics),
8899
        "disk_template": instance.disk_template,
8900
        "disks": disks,
8901
        "hypervisor": instance.hypervisor,
8902
        "network_port": instance.network_port,
8903
        "hv_instance": instance.hvparams,
8904
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
8905
        "be_instance": instance.beparams,
8906
        "be_actual": cluster.FillBE(instance),
8907
        "os_instance": instance.osparams,
8908
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8909
        "serial_no": instance.serial_no,
8910
        "mtime": instance.mtime,
8911
        "ctime": instance.ctime,
8912
        "uuid": instance.uuid,
8913
        }
8914

    
8915
      result[instance.name] = idict
8916

    
8917
    return result
8918

    
8919

    
8920
class LUSetInstanceParams(LogicalUnit):
8921
  """Modifies an instances's parameters.
8922

8923
  """
8924
  HPATH = "instance-modify"
8925
  HTYPE = constants.HTYPE_INSTANCE
8926
  _OP_PARAMS = [
8927
    _PInstanceName,
8928
    ("nics", ht.EmptyList, ht.TList),
8929
    ("disks", ht.EmptyList, ht.TList),
8930
    ("beparams", ht.EmptyDict, ht.TDict),
8931
    ("hvparams", ht.EmptyDict, ht.TDict),
8932
    ("disk_template", None, ht.TMaybeString),
8933
    ("remote_node", None, ht.TMaybeString),
8934
    ("os_name", None, ht.TMaybeString),
8935
    ("force_variant", False, ht.TBool),
8936
    ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
8937
    _PForce,
8938
    ]
8939
  REQ_BGL = False
8940

    
8941
  def CheckArguments(self):
8942
    if not (self.op.nics or self.op.disks or self.op.disk_template or
8943
            self.op.hvparams or self.op.beparams or self.op.os_name):
8944
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8945

    
8946
    if self.op.hvparams:
8947
      _CheckGlobalHvParams(self.op.hvparams)
8948

    
8949
    # Disk validation
8950
    disk_addremove = 0
8951
    for disk_op, disk_dict in self.op.disks:
8952
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8953
      if disk_op == constants.DDM_REMOVE:
8954
        disk_addremove += 1
8955
        continue
8956
      elif disk_op == constants.DDM_ADD:
8957
        disk_addremove += 1
8958
      else:
8959
        if not isinstance(disk_op, int):
8960
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8961
        if not isinstance(disk_dict, dict):
8962
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8963
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8964

    
8965
      if disk_op == constants.DDM_ADD:
8966
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8967
        if mode not in constants.DISK_ACCESS_SET:
8968
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8969
                                     errors.ECODE_INVAL)
8970
        size = disk_dict.get('size', None)
8971
        if size is None:
8972
          raise errors.OpPrereqError("Required disk parameter size missing",
8973
                                     errors.ECODE_INVAL)
8974
        try:
8975
          size = int(size)
8976
        except (TypeError, ValueError), err:
8977
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8978
                                     str(err), errors.ECODE_INVAL)
8979
        disk_dict['size'] = size
8980
      else:
8981
        # modification of disk
8982
        if 'size' in disk_dict:
8983
          raise errors.OpPrereqError("Disk size change not possible, use"
8984
                                     " grow-disk", errors.ECODE_INVAL)
8985

    
8986
    if disk_addremove > 1:
8987
      raise errors.OpPrereqError("Only one disk add or remove operation"
8988
                                 " supported at a time", errors.ECODE_INVAL)
8989

    
8990
    if self.op.disks and self.op.disk_template is not None:
8991
      raise errors.OpPrereqError("Disk template conversion and other disk"
8992
                                 " changes not supported at the same time",
8993
                                 errors.ECODE_INVAL)
8994

    
8995
    if self.op.disk_template:
8996
      _CheckDiskTemplate(self.op.disk_template)
8997
      if (self.op.disk_template in constants.DTS_NET_MIRROR and
8998
          self.op.remote_node is None):
8999
        raise errors.OpPrereqError("Changing the disk template to a mirrored"
9000
                                   " one requires specifying a secondary node",
9001
                                   errors.ECODE_INVAL)
9002

    
9003
    # NIC validation
9004
    nic_addremove = 0
9005
    for nic_op, nic_dict in self.op.nics:
9006
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9007
      if nic_op == constants.DDM_REMOVE:
9008
        nic_addremove += 1
9009
        continue
9010
      elif nic_op == constants.DDM_ADD:
9011
        nic_addremove += 1
9012
      else:
9013
        if not isinstance(nic_op, int):
9014
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9015
        if not isinstance(nic_dict, dict):
9016
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9017
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9018

    
9019
      # nic_dict should be a dict
9020
      nic_ip = nic_dict.get('ip', None)
9021
      if nic_ip is not None:
9022
        if nic_ip.lower() == constants.VALUE_NONE:
9023
          nic_dict['ip'] = None
9024
        else:
9025
          if not netutils.IPAddress.IsValid(nic_ip):
9026
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9027
                                       errors.ECODE_INVAL)
9028

    
9029
      nic_bridge = nic_dict.get('bridge', None)
9030
      nic_link = nic_dict.get('link', None)
9031
      if nic_bridge and nic_link:
9032
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9033
                                   " at the same time", errors.ECODE_INVAL)
9034
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9035
        nic_dict['bridge'] = None
9036
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9037
        nic_dict['link'] = None
9038

    
9039
      if nic_op == constants.DDM_ADD:
9040
        nic_mac = nic_dict.get('mac', None)
9041
        if nic_mac is None:
9042
          nic_dict['mac'] = constants.VALUE_AUTO
9043

    
9044
      if 'mac' in nic_dict:
9045
        nic_mac = nic_dict['mac']
9046
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9047
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9048

    
9049
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9050
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9051
                                     " modifying an existing nic",
9052
                                     errors.ECODE_INVAL)
9053

    
9054
    if nic_addremove > 1:
9055
      raise errors.OpPrereqError("Only one NIC add or remove operation"
9056
                                 " supported at a time", errors.ECODE_INVAL)
9057

    
9058
  def ExpandNames(self):
9059
    self._ExpandAndLockInstance()
9060
    self.needed_locks[locking.LEVEL_NODE] = []
9061
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9062

    
9063
  def DeclareLocks(self, level):
9064
    if level == locking.LEVEL_NODE:
9065
      self._LockInstancesNodes()
9066
      if self.op.disk_template and self.op.remote_node:
9067
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9068
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9069

    
9070
  def BuildHooksEnv(self):
9071
    """Build hooks env.
9072

9073
    This runs on the master, primary and secondaries.
9074

9075
    """
9076
    args = dict()
9077
    if constants.BE_MEMORY in self.be_new:
9078
      args['memory'] = self.be_new[constants.BE_MEMORY]
9079
    if constants.BE_VCPUS in self.be_new:
9080
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
9081
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9082
    # information at all.
9083
    if self.op.nics:
9084
      args['nics'] = []
9085
      nic_override = dict(self.op.nics)
9086
      for idx, nic in enumerate(self.instance.nics):
9087
        if idx in nic_override:
9088
          this_nic_override = nic_override[idx]
9089
        else:
9090
          this_nic_override = {}
9091
        if 'ip' in this_nic_override:
9092
          ip = this_nic_override['ip']
9093
        else:
9094
          ip = nic.ip
9095
        if 'mac' in this_nic_override:
9096
          mac = this_nic_override['mac']
9097
        else:
9098
          mac = nic.mac
9099
        if idx in self.nic_pnew:
9100
          nicparams = self.nic_pnew[idx]
9101
        else:
9102
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9103
        mode = nicparams[constants.NIC_MODE]
9104
        link = nicparams[constants.NIC_LINK]
9105
        args['nics'].append((ip, mac, mode, link))
9106
      if constants.DDM_ADD in nic_override:
9107
        ip = nic_override[constants.DDM_ADD].get('ip', None)
9108
        mac = nic_override[constants.DDM_ADD]['mac']
9109
        nicparams = self.nic_pnew[constants.DDM_ADD]
9110
        mode = nicparams[constants.NIC_MODE]
9111
        link = nicparams[constants.NIC_LINK]
9112
        args['nics'].append((ip, mac, mode, link))
9113
      elif constants.DDM_REMOVE in nic_override:
9114
        del args['nics'][-1]
9115

    
9116
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9117
    if self.op.disk_template:
9118
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9119
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9120
    return env, nl, nl
9121

    
9122
  def CheckPrereq(self):
9123
    """Check prerequisites.
9124

9125
    This only checks the instance list against the existing names.
9126

9127
    """
9128
    # checking the new params on the primary/secondary nodes
9129

    
9130
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9131
    cluster = self.cluster = self.cfg.GetClusterInfo()
9132
    assert self.instance is not None, \
9133
      "Cannot retrieve locked instance %s" % self.op.instance_name
9134
    pnode = instance.primary_node
9135
    nodelist = list(instance.all_nodes)
9136

    
9137
    # OS change
9138
    if self.op.os_name and not self.op.force:
9139
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9140
                      self.op.force_variant)
9141
      instance_os = self.op.os_name
9142
    else:
9143
      instance_os = instance.os
9144

    
9145
    if self.op.disk_template:
9146
      if instance.disk_template == self.op.disk_template:
9147
        raise errors.OpPrereqError("Instance already has disk template %s" %
9148
                                   instance.disk_template, errors.ECODE_INVAL)
9149

    
9150
      if (instance.disk_template,
9151
          self.op.disk_template) not in self._DISK_CONVERSIONS:
9152
        raise errors.OpPrereqError("Unsupported disk template conversion from"
9153
                                   " %s to %s" % (instance.disk_template,
9154
                                                  self.op.disk_template),
9155
                                   errors.ECODE_INVAL)
9156
      _CheckInstanceDown(self, instance, "cannot change disk template")
9157
      if self.op.disk_template in constants.DTS_NET_MIRROR:
9158
        if self.op.remote_node == pnode:
9159
          raise errors.OpPrereqError("Given new secondary node %s is the same"
9160
                                     " as the primary node of the instance" %
9161
                                     self.op.remote_node, errors.ECODE_STATE)
9162
        _CheckNodeOnline(self, self.op.remote_node)
9163
        _CheckNodeNotDrained(self, self.op.remote_node)
9164
        disks = [{"size": d.size} for d in instance.disks]
9165
        required = _ComputeDiskSize(self.op.disk_template, disks)
9166
        _CheckNodesFreeDisk(self, [self.op.remote_node], required)
9167

    
9168
    # hvparams processing
9169
    if self.op.hvparams:
9170
      hv_type = instance.hypervisor
9171
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9172
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9173
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9174

    
9175
      # local check
9176
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9177
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9178
      self.hv_new = hv_new # the new actual values
9179
      self.hv_inst = i_hvdict # the new dict (without defaults)
9180
    else:
9181
      self.hv_new = self.hv_inst = {}
9182

    
9183
    # beparams processing
9184
    if self.op.beparams:
9185
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9186
                                   use_none=True)
9187
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9188
      be_new = cluster.SimpleFillBE(i_bedict)
9189
      self.be_new = be_new # the new actual values
9190
      self.be_inst = i_bedict # the new dict (without defaults)
9191
    else:
9192
      self.be_new = self.be_inst = {}
9193

    
9194
    # osparams processing
9195
    if self.op.osparams:
9196
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9197
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9198
      self.os_inst = i_osdict # the new dict (without defaults)
9199
    else:
9200
      self.os_inst = {}
9201

    
9202
    self.warn = []
9203

    
9204
    if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9205
      mem_check_list = [pnode]
9206
      if be_new[constants.BE_AUTO_BALANCE]:
9207
        # either we changed auto_balance to yes or it was from before
9208
        mem_check_list.extend(instance.secondary_nodes)
9209
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
9210
                                                  instance.hypervisor)
9211
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
9212
                                         instance.hypervisor)
9213
      pninfo = nodeinfo[pnode]
9214
      msg = pninfo.fail_msg
9215
      if msg:
9216
        # Assume the primary node is unreachable and go ahead
9217
        self.warn.append("Can't get info from primary node %s: %s" %
9218
                         (pnode,  msg))
9219
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
9220
        self.warn.append("Node data from primary node %s doesn't contain"
9221
                         " free memory information" % pnode)
9222
      elif instance_info.fail_msg:
9223
        self.warn.append("Can't get instance runtime information: %s" %
9224
                        instance_info.fail_msg)
9225
      else:
9226
        if instance_info.payload:
9227
          current_mem = int(instance_info.payload['memory'])
9228
        else:
9229
          # Assume instance not running
9230
          # (there is a slight race condition here, but it's not very probable,
9231
          # and we have no other way to check)
9232
          current_mem = 0
9233
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9234
                    pninfo.payload['memory_free'])
9235
        if miss_mem > 0:
9236
          raise errors.OpPrereqError("This change will prevent the instance"
9237
                                     " from starting, due to %d MB of memory"
9238
                                     " missing on its primary node" % miss_mem,
9239
                                     errors.ECODE_NORES)
9240

    
9241
      if be_new[constants.BE_AUTO_BALANCE]:
9242
        for node, nres in nodeinfo.items():
9243
          if node not in instance.secondary_nodes:
9244
            continue
9245
          msg = nres.fail_msg
9246
          if msg:
9247
            self.warn.append("Can't get info from secondary node %s: %s" %
9248
                             (node, msg))
9249
          elif not isinstance(nres.payload.get('memory_free', None), int):
9250
            self.warn.append("Secondary node %s didn't return free"
9251
                             " memory information" % node)
9252
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9253
            self.warn.append("Not enough memory to failover instance to"
9254
                             " secondary node %s" % node)
9255

    
9256
    # NIC processing
9257
    self.nic_pnew = {}
9258
    self.nic_pinst = {}
9259
    for nic_op, nic_dict in self.op.nics:
9260
      if nic_op == constants.DDM_REMOVE:
9261
        if not instance.nics:
9262
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9263
                                     errors.ECODE_INVAL)
9264
        continue
9265
      if nic_op != constants.DDM_ADD:
9266
        # an existing nic
9267
        if not instance.nics:
9268
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9269
                                     " no NICs" % nic_op,
9270
                                     errors.ECODE_INVAL)
9271
        if nic_op < 0 or nic_op >= len(instance.nics):
9272
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9273
                                     " are 0 to %d" %
9274
                                     (nic_op, len(instance.nics) - 1),
9275
                                     errors.ECODE_INVAL)
9276
        old_nic_params = instance.nics[nic_op].nicparams
9277
        old_nic_ip = instance.nics[nic_op].ip
9278
      else:
9279
        old_nic_params = {}
9280
        old_nic_ip = None
9281

    
9282
      update_params_dict = dict([(key, nic_dict[key])
9283
                                 for key in constants.NICS_PARAMETERS
9284
                                 if key in nic_dict])
9285

    
9286
      if 'bridge' in nic_dict:
9287
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9288

    
9289
      new_nic_params = _GetUpdatedParams(old_nic_params,
9290
                                         update_params_dict)
9291
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9292
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9293
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9294
      self.nic_pinst[nic_op] = new_nic_params
9295
      self.nic_pnew[nic_op] = new_filled_nic_params
9296
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9297

    
9298
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
9299
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9300
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9301
        if msg:
9302
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9303
          if self.op.force:
9304
            self.warn.append(msg)
9305
          else:
9306
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9307
      if new_nic_mode == constants.NIC_MODE_ROUTED:
9308
        if 'ip' in nic_dict:
9309
          nic_ip = nic_dict['ip']
9310
        else:
9311
          nic_ip = old_nic_ip
9312
        if nic_ip is None:
9313
          raise errors.OpPrereqError('Cannot set the nic ip to None'
9314
                                     ' on a routed nic', errors.ECODE_INVAL)
9315
      if 'mac' in nic_dict:
9316
        nic_mac = nic_dict['mac']
9317
        if nic_mac is None:
9318
          raise errors.OpPrereqError('Cannot set the nic mac to None',
9319
                                     errors.ECODE_INVAL)
9320
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9321
          # otherwise generate the mac
9322
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9323
        else:
9324
          # or validate/reserve the current one
9325
          try:
9326
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9327
          except errors.ReservationError:
9328
            raise errors.OpPrereqError("MAC address %s already in use"
9329
                                       " in cluster" % nic_mac,
9330
                                       errors.ECODE_NOTUNIQUE)
9331

    
9332
    # DISK processing
9333
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9334
      raise errors.OpPrereqError("Disk operations not supported for"
9335
                                 " diskless instances",
9336
                                 errors.ECODE_INVAL)
9337
    for disk_op, _ in self.op.disks:
9338
      if disk_op == constants.DDM_REMOVE:
9339
        if len(instance.disks) == 1:
9340
          raise errors.OpPrereqError("Cannot remove the last disk of"
9341
                                     " an instance", errors.ECODE_INVAL)
9342
        _CheckInstanceDown(self, instance, "cannot remove disks")
9343

    
9344
      if (disk_op == constants.DDM_ADD and
9345
          len(instance.nics) >= constants.MAX_DISKS):
9346
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9347
                                   " add more" % constants.MAX_DISKS,
9348
                                   errors.ECODE_STATE)
9349
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9350
        # an existing disk
9351
        if disk_op < 0 or disk_op >= len(instance.disks):
9352
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
9353
                                     " are 0 to %d" %
9354
                                     (disk_op, len(instance.disks)),
9355
                                     errors.ECODE_INVAL)
9356

    
9357
    return
9358

    
9359
  def _ConvertPlainToDrbd(self, feedback_fn):
9360
    """Converts an instance from plain to drbd.
9361

9362
    """
9363
    feedback_fn("Converting template to drbd")
9364
    instance = self.instance
9365
    pnode = instance.primary_node
9366
    snode = self.op.remote_node
9367

    
9368
    # create a fake disk info for _GenerateDiskTemplate
9369
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9370
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9371
                                      instance.name, pnode, [snode],
9372
                                      disk_info, None, None, 0)
9373
    info = _GetInstanceInfoText(instance)
9374
    feedback_fn("Creating aditional volumes...")
9375
    # first, create the missing data and meta devices
9376
    for disk in new_disks:
9377
      # unfortunately this is... not too nice
9378
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9379
                            info, True)
9380
      for child in disk.children:
9381
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
9382
    # at this stage, all new LVs have been created, we can rename the
9383
    # old ones
9384
    feedback_fn("Renaming original volumes...")
9385
    rename_list = [(o, n.children[0].logical_id)
9386
                   for (o, n) in zip(instance.disks, new_disks)]
9387
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
9388
    result.Raise("Failed to rename original LVs")
9389

    
9390
    feedback_fn("Initializing DRBD devices...")
9391
    # all child devices are in place, we can now create the DRBD devices
9392
    for disk in new_disks:
9393
      for node in [pnode, snode]:
9394
        f_create = node == pnode
9395
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9396

    
9397
    # at this point, the instance has been modified
9398
    instance.disk_template = constants.DT_DRBD8
9399
    instance.disks = new_disks
9400
    self.cfg.Update(instance, feedback_fn)
9401

    
9402
    # disks are created, waiting for sync
9403
    disk_abort = not _WaitForSync(self, instance)
9404
    if disk_abort:
9405
      raise errors.OpExecError("There are some degraded disks for"
9406
                               " this instance, please cleanup manually")
9407

    
9408
  def _ConvertDrbdToPlain(self, feedback_fn):
9409
    """Converts an instance from drbd to plain.
9410

9411
    """
9412
    instance = self.instance
9413
    assert len(instance.secondary_nodes) == 1
9414
    pnode = instance.primary_node
9415
    snode = instance.secondary_nodes[0]
9416
    feedback_fn("Converting template to plain")
9417

    
9418
    old_disks = instance.disks
9419
    new_disks = [d.children[0] for d in old_disks]
9420

    
9421
    # copy over size and mode
9422
    for parent, child in zip(old_disks, new_disks):
9423
      child.size = parent.size
9424
      child.mode = parent.mode
9425

    
9426
    # update instance structure
9427
    instance.disks = new_disks
9428
    instance.disk_template = constants.DT_PLAIN
9429
    self.cfg.Update(instance, feedback_fn)
9430

    
9431
    feedback_fn("Removing volumes on the secondary node...")
9432
    for disk in old_disks:
9433
      self.cfg.SetDiskID(disk, snode)
9434
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9435
      if msg:
9436
        self.LogWarning("Could not remove block device %s on node %s,"
9437
                        " continuing anyway: %s", disk.iv_name, snode, msg)
9438

    
9439
    feedback_fn("Removing unneeded volumes on the primary node...")
9440
    for idx, disk in enumerate(old_disks):
9441
      meta = disk.children[1]
9442
      self.cfg.SetDiskID(meta, pnode)
9443
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9444
      if msg:
9445
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
9446
                        " continuing anyway: %s", idx, pnode, msg)
9447

    
9448

    
9449
  def Exec(self, feedback_fn):
9450
    """Modifies an instance.
9451

9452
    All parameters take effect only at the next restart of the instance.
9453

9454
    """
9455
    # Process here the warnings from CheckPrereq, as we don't have a
9456
    # feedback_fn there.
9457
    for warn in self.warn:
9458
      feedback_fn("WARNING: %s" % warn)
9459

    
9460
    result = []
9461
    instance = self.instance
9462
    # disk changes
9463
    for disk_op, disk_dict in self.op.disks:
9464
      if disk_op == constants.DDM_REMOVE:
9465
        # remove the last disk
9466
        device = instance.disks.pop()
9467
        device_idx = len(instance.disks)
9468
        for node, disk in device.ComputeNodeTree(instance.primary_node):
9469
          self.cfg.SetDiskID(disk, node)
9470
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9471
          if msg:
9472
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
9473
                            " continuing anyway", device_idx, node, msg)
9474
        result.append(("disk/%d" % device_idx, "remove"))
9475
      elif disk_op == constants.DDM_ADD:
9476
        # add a new disk
9477
        if instance.disk_template == constants.DT_FILE:
9478
          file_driver, file_path = instance.disks[0].logical_id
9479
          file_path = os.path.dirname(file_path)
9480
        else:
9481
          file_driver = file_path = None
9482
        disk_idx_base = len(instance.disks)
9483
        new_disk = _GenerateDiskTemplate(self,
9484
                                         instance.disk_template,
9485
                                         instance.name, instance.primary_node,
9486
                                         instance.secondary_nodes,
9487
                                         [disk_dict],
9488
                                         file_path,
9489
                                         file_driver,
9490
                                         disk_idx_base)[0]
9491
        instance.disks.append(new_disk)
9492
        info = _GetInstanceInfoText(instance)
9493

    
9494
        logging.info("Creating volume %s for instance %s",
9495
                     new_disk.iv_name, instance.name)
9496
        # Note: this needs to be kept in sync with _CreateDisks
9497
        #HARDCODE
9498
        for node in instance.all_nodes:
9499
          f_create = node == instance.primary_node
9500
          try:
9501
            _CreateBlockDev(self, node, instance, new_disk,
9502
                            f_create, info, f_create)
9503
          except errors.OpExecError, err:
9504
            self.LogWarning("Failed to create volume %s (%s) on"
9505
                            " node %s: %s",
9506
                            new_disk.iv_name, new_disk, node, err)
9507
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9508
                       (new_disk.size, new_disk.mode)))
9509
      else:
9510
        # change a given disk
9511
        instance.disks[disk_op].mode = disk_dict['mode']
9512
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9513

    
9514
    if self.op.disk_template:
9515
      r_shut = _ShutdownInstanceDisks(self, instance)
9516
      if not r_shut:
9517
        raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9518
                                 " proceed with disk template conversion")
9519
      mode = (instance.disk_template, self.op.disk_template)
9520
      try:
9521
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
9522
      except:
9523
        self.cfg.ReleaseDRBDMinors(instance.name)
9524
        raise
9525
      result.append(("disk_template", self.op.disk_template))
9526

    
9527
    # NIC changes
9528
    for nic_op, nic_dict in self.op.nics:
9529
      if nic_op == constants.DDM_REMOVE:
9530
        # remove the last nic
9531
        del instance.nics[-1]
9532
        result.append(("nic.%d" % len(instance.nics), "remove"))
9533
      elif nic_op == constants.DDM_ADD:
9534
        # mac and bridge should be set, by now
9535
        mac = nic_dict['mac']
9536
        ip = nic_dict.get('ip', None)
9537
        nicparams = self.nic_pinst[constants.DDM_ADD]
9538
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9539
        instance.nics.append(new_nic)
9540
        result.append(("nic.%d" % (len(instance.nics) - 1),
9541
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
9542
                       (new_nic.mac, new_nic.ip,
9543
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9544
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9545
                       )))
9546
      else:
9547
        for key in 'mac', 'ip':
9548
          if key in nic_dict:
9549
            setattr(instance.nics[nic_op], key, nic_dict[key])
9550
        if nic_op in self.nic_pinst:
9551
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9552
        for key, val in nic_dict.iteritems():
9553
          result.append(("nic.%s/%d" % (key, nic_op), val))
9554

    
9555
    # hvparams changes
9556
    if self.op.hvparams:
9557
      instance.hvparams = self.hv_inst
9558
      for key, val in self.op.hvparams.iteritems():
9559
        result.append(("hv/%s" % key, val))
9560

    
9561
    # beparams changes
9562
    if self.op.beparams:
9563
      instance.beparams = self.be_inst
9564
      for key, val in self.op.beparams.iteritems():
9565
        result.append(("be/%s" % key, val))
9566

    
9567
    # OS change
9568
    if self.op.os_name:
9569
      instance.os = self.op.os_name
9570

    
9571
    # osparams changes
9572
    if self.op.osparams:
9573
      instance.osparams = self.os_inst
9574
      for key, val in self.op.osparams.iteritems():
9575
        result.append(("os/%s" % key, val))
9576

    
9577
    self.cfg.Update(instance, feedback_fn)
9578

    
9579
    return result
9580

    
9581
  _DISK_CONVERSIONS = {
9582
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9583
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9584
    }
9585

    
9586

    
9587
class LUQueryExports(NoHooksLU):
9588
  """Query the exports list
9589

9590
  """
9591
  _OP_PARAMS = [
9592
    ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
9593
    ("use_locking", False, ht.TBool),
9594
    ]
9595
  REQ_BGL = False
9596

    
9597
  def ExpandNames(self):
9598
    self.needed_locks = {}
9599
    self.share_locks[locking.LEVEL_NODE] = 1
9600
    if not self.op.nodes:
9601
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9602
    else:
9603
      self.needed_locks[locking.LEVEL_NODE] = \
9604
        _GetWantedNodes(self, self.op.nodes)
9605

    
9606
  def Exec(self, feedback_fn):
9607
    """Compute the list of all the exported system images.
9608

9609
    @rtype: dict
9610
    @return: a dictionary with the structure node->(export-list)
9611
        where export-list is a list of the instances exported on
9612
        that node.
9613

9614
    """
9615
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9616
    rpcresult = self.rpc.call_export_list(self.nodes)
9617
    result = {}
9618
    for node in rpcresult:
9619
      if rpcresult[node].fail_msg:
9620
        result[node] = False
9621
      else:
9622
        result[node] = rpcresult[node].payload
9623

    
9624
    return result
9625

    
9626

    
9627
class LUPrepareExport(NoHooksLU):
9628
  """Prepares an instance for an export and returns useful information.
9629

9630
  """
9631
  _OP_PARAMS = [
9632
    _PInstanceName,
9633
    ("mode", ht.NoDefault, ht.TElemOf(constants.EXPORT_MODES)),
9634
    ]
9635
  REQ_BGL = False
9636

    
9637
  def ExpandNames(self):
9638
    self._ExpandAndLockInstance()
9639

    
9640
  def CheckPrereq(self):
9641
    """Check prerequisites.
9642

9643
    """
9644
    instance_name = self.op.instance_name
9645

    
9646
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9647
    assert self.instance is not None, \
9648
          "Cannot retrieve locked instance %s" % self.op.instance_name
9649
    _CheckNodeOnline(self, self.instance.primary_node)
9650

    
9651
    self._cds = _GetClusterDomainSecret()
9652

    
9653
  def Exec(self, feedback_fn):
9654
    """Prepares an instance for an export.
9655

9656
    """
9657
    instance = self.instance
9658

    
9659
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9660
      salt = utils.GenerateSecret(8)
9661

    
9662
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9663
      result = self.rpc.call_x509_cert_create(instance.primary_node,
9664
                                              constants.RIE_CERT_VALIDITY)
9665
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
9666

    
9667
      (name, cert_pem) = result.payload
9668

    
9669
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9670
                                             cert_pem)
9671

    
9672
      return {
9673
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9674
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9675
                          salt),
9676
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9677
        }
9678

    
9679
    return None
9680

    
9681

    
9682
class LUExportInstance(LogicalUnit):
9683
  """Export an instance to an image in the cluster.
9684

9685
  """
9686
  HPATH = "instance-export"
9687
  HTYPE = constants.HTYPE_INSTANCE
9688
  _OP_PARAMS = [
9689
    _PInstanceName,
9690
    ("target_node", ht.NoDefault, ht.TOr(ht.TNonEmptyString, ht.TList)),
9691
    ("shutdown", True, ht.TBool),
9692
    _PShutdownTimeout,
9693
    ("remove_instance", False, ht.TBool),
9694
    ("ignore_remove_failures", False, ht.TBool),
9695
    ("mode", constants.EXPORT_MODE_LOCAL, ht.TElemOf(constants.EXPORT_MODES)),
9696
    ("x509_key_name", None, ht.TOr(ht.TList, ht.TNone)),
9697
    ("destination_x509_ca", None, ht.TMaybeString),
9698
    ]
9699
  REQ_BGL = False
9700

    
9701
  def CheckArguments(self):
9702
    """Check the arguments.
9703

9704
    """
9705
    self.x509_key_name = self.op.x509_key_name
9706
    self.dest_x509_ca_pem = self.op.destination_x509_ca
9707

    
9708
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9709
      if not self.x509_key_name:
9710
        raise errors.OpPrereqError("Missing X509 key name for encryption",
9711
                                   errors.ECODE_INVAL)
9712

    
9713
      if not self.dest_x509_ca_pem:
9714
        raise errors.OpPrereqError("Missing destination X509 CA",
9715
                                   errors.ECODE_INVAL)
9716

    
9717
  def ExpandNames(self):
9718
    self._ExpandAndLockInstance()
9719

    
9720
    # Lock all nodes for local exports
9721
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9722
      # FIXME: lock only instance primary and destination node
9723
      #
9724
      # Sad but true, for now we have do lock all nodes, as we don't know where
9725
      # the previous export might be, and in this LU we search for it and
9726
      # remove it from its current node. In the future we could fix this by:
9727
      #  - making a tasklet to search (share-lock all), then create the
9728
      #    new one, then one to remove, after
9729
      #  - removing the removal operation altogether
9730
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9731

    
9732
  def DeclareLocks(self, level):
9733
    """Last minute lock declaration."""
9734
    # All nodes are locked anyway, so nothing to do here.
9735

    
9736
  def BuildHooksEnv(self):
9737
    """Build hooks env.
9738

9739
    This will run on the master, primary node and target node.
9740

9741
    """
9742
    env = {
9743
      "EXPORT_MODE": self.op.mode,
9744
      "EXPORT_NODE": self.op.target_node,
9745
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9746
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9747
      # TODO: Generic function for boolean env variables
9748
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9749
      }
9750

    
9751
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9752

    
9753
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9754

    
9755
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9756
      nl.append(self.op.target_node)
9757

    
9758
    return env, nl, nl
9759

    
9760
  def CheckPrereq(self):
9761
    """Check prerequisites.
9762

9763
    This checks that the instance and node names are valid.
9764

9765
    """
9766
    instance_name = self.op.instance_name
9767

    
9768
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9769
    assert self.instance is not None, \
9770
          "Cannot retrieve locked instance %s" % self.op.instance_name
9771
    _CheckNodeOnline(self, self.instance.primary_node)
9772

    
9773
    if (self.op.remove_instance and self.instance.admin_up and
9774
        not self.op.shutdown):
9775
      raise errors.OpPrereqError("Can not remove instance without shutting it"
9776
                                 " down before")
9777

    
9778
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9779
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9780
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9781
      assert self.dst_node is not None
9782

    
9783
      _CheckNodeOnline(self, self.dst_node.name)
9784
      _CheckNodeNotDrained(self, self.dst_node.name)
9785

    
9786
      self._cds = None
9787
      self.dest_disk_info = None
9788
      self.dest_x509_ca = None
9789

    
9790
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9791
      self.dst_node = None
9792

    
9793
      if len(self.op.target_node) != len(self.instance.disks):
9794
        raise errors.OpPrereqError(("Received destination information for %s"
9795
                                    " disks, but instance %s has %s disks") %
9796
                                   (len(self.op.target_node), instance_name,
9797
                                    len(self.instance.disks)),
9798
                                   errors.ECODE_INVAL)
9799

    
9800
      cds = _GetClusterDomainSecret()
9801

    
9802
      # Check X509 key name
9803
      try:
9804
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9805
      except (TypeError, ValueError), err:
9806
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9807

    
9808
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9809
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9810
                                   errors.ECODE_INVAL)
9811

    
9812
      # Load and verify CA
9813
      try:
9814
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9815
      except OpenSSL.crypto.Error, err:
9816
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9817
                                   (err, ), errors.ECODE_INVAL)
9818

    
9819
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9820
      if errcode is not None:
9821
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9822
                                   (msg, ), errors.ECODE_INVAL)
9823

    
9824
      self.dest_x509_ca = cert
9825

    
9826
      # Verify target information
9827
      disk_info = []
9828
      for idx, disk_data in enumerate(self.op.target_node):
9829
        try:
9830
          (host, port, magic) = \
9831
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9832
        except errors.GenericError, err:
9833
          raise errors.OpPrereqError("Target info for disk %s: %s" %
9834
                                     (idx, err), errors.ECODE_INVAL)
9835

    
9836
        disk_info.append((host, port, magic))
9837

    
9838
      assert len(disk_info) == len(self.op.target_node)
9839
      self.dest_disk_info = disk_info
9840

    
9841
    else:
9842
      raise errors.ProgrammerError("Unhandled export mode %r" %
9843
                                   self.op.mode)
9844

    
9845
    # instance disk type verification
9846
    # TODO: Implement export support for file-based disks
9847
    for disk in self.instance.disks:
9848
      if disk.dev_type == constants.LD_FILE:
9849
        raise errors.OpPrereqError("Export not supported for instances with"
9850
                                   " file-based disks", errors.ECODE_INVAL)
9851

    
9852
  def _CleanupExports(self, feedback_fn):
9853
    """Removes exports of current instance from all other nodes.
9854

9855
    If an instance in a cluster with nodes A..D was exported to node C, its
9856
    exports will be removed from the nodes A, B and D.
9857

9858
    """
9859
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
9860

    
9861
    nodelist = self.cfg.GetNodeList()
9862
    nodelist.remove(self.dst_node.name)
9863

    
9864
    # on one-node clusters nodelist will be empty after the removal
9865
    # if we proceed the backup would be removed because OpQueryExports
9866
    # substitutes an empty list with the full cluster node list.
9867
    iname = self.instance.name
9868
    if nodelist:
9869
      feedback_fn("Removing old exports for instance %s" % iname)
9870
      exportlist = self.rpc.call_export_list(nodelist)
9871
      for node in exportlist:
9872
        if exportlist[node].fail_msg:
9873
          continue
9874
        if iname in exportlist[node].payload:
9875
          msg = self.rpc.call_export_remove(node, iname).fail_msg
9876
          if msg:
9877
            self.LogWarning("Could not remove older export for instance %s"
9878
                            " on node %s: %s", iname, node, msg)
9879

    
9880
  def Exec(self, feedback_fn):
9881
    """Export an instance to an image in the cluster.
9882

9883
    """
9884
    assert self.op.mode in constants.EXPORT_MODES
9885

    
9886
    instance = self.instance
9887
    src_node = instance.primary_node
9888

    
9889
    if self.op.shutdown:
9890
      # shutdown the instance, but not the disks
9891
      feedback_fn("Shutting down instance %s" % instance.name)
9892
      result = self.rpc.call_instance_shutdown(src_node, instance,
9893
                                               self.op.shutdown_timeout)
9894
      # TODO: Maybe ignore failures if ignore_remove_failures is set
9895
      result.Raise("Could not shutdown instance %s on"
9896
                   " node %s" % (instance.name, src_node))
9897

    
9898
    # set the disks ID correctly since call_instance_start needs the
9899
    # correct drbd minor to create the symlinks
9900
    for disk in instance.disks:
9901
      self.cfg.SetDiskID(disk, src_node)
9902

    
9903
    activate_disks = (not instance.admin_up)
9904

    
9905
    if activate_disks:
9906
      # Activate the instance disks if we'exporting a stopped instance
9907
      feedback_fn("Activating disks for %s" % instance.name)
9908
      _StartInstanceDisks(self, instance, None)
9909

    
9910
    try:
9911
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9912
                                                     instance)
9913

    
9914
      helper.CreateSnapshots()
9915
      try:
9916
        if (self.op.shutdown and instance.admin_up and
9917
            not self.op.remove_instance):
9918
          assert not activate_disks
9919
          feedback_fn("Starting instance %s" % instance.name)
9920
          result = self.rpc.call_instance_start(src_node, instance, None, None)
9921
          msg = result.fail_msg
9922
          if msg:
9923
            feedback_fn("Failed to start instance: %s" % msg)
9924
            _ShutdownInstanceDisks(self, instance)
9925
            raise errors.OpExecError("Could not start instance: %s" % msg)
9926

    
9927
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
9928
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9929
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9930
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
9931
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9932

    
9933
          (key_name, _, _) = self.x509_key_name
9934

    
9935
          dest_ca_pem = \
9936
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9937
                                            self.dest_x509_ca)
9938

    
9939
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9940
                                                     key_name, dest_ca_pem,
9941
                                                     timeouts)
9942
      finally:
9943
        helper.Cleanup()
9944

    
9945
      # Check for backwards compatibility
9946
      assert len(dresults) == len(instance.disks)
9947
      assert compat.all(isinstance(i, bool) for i in dresults), \
9948
             "Not all results are boolean: %r" % dresults
9949

    
9950
    finally:
9951
      if activate_disks:
9952
        feedback_fn("Deactivating disks for %s" % instance.name)
9953
        _ShutdownInstanceDisks(self, instance)
9954

    
9955
    if not (compat.all(dresults) and fin_resu):
9956
      failures = []
9957
      if not fin_resu:
9958
        failures.append("export finalization")
9959
      if not compat.all(dresults):
9960
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9961
                               if not dsk)
9962
        failures.append("disk export: disk(s) %s" % fdsk)
9963

    
9964
      raise errors.OpExecError("Export failed, errors in %s" %
9965
                               utils.CommaJoin(failures))
9966

    
9967
    # At this point, the export was successful, we can cleanup/finish
9968

    
9969
    # Remove instance if requested
9970
    if self.op.remove_instance:
9971
      feedback_fn("Removing instance %s" % instance.name)
9972
      _RemoveInstance(self, feedback_fn, instance,
9973
                      self.op.ignore_remove_failures)
9974

    
9975
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9976
      self._CleanupExports(feedback_fn)
9977

    
9978
    return fin_resu, dresults
9979

    
9980

    
9981
class LURemoveExport(NoHooksLU):
9982
  """Remove exports related to the named instance.
9983

9984
  """
9985
  _OP_PARAMS = [
9986
    _PInstanceName,
9987
    ]
9988
  REQ_BGL = False
9989

    
9990
  def ExpandNames(self):
9991
    self.needed_locks = {}
9992
    # We need all nodes to be locked in order for RemoveExport to work, but we
9993
    # don't need to lock the instance itself, as nothing will happen to it (and
9994
    # we can remove exports also for a removed instance)
9995
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9996

    
9997
  def Exec(self, feedback_fn):
9998
    """Remove any export.
9999

10000
    """
10001
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10002
    # If the instance was not found we'll try with the name that was passed in.
10003
    # This will only work if it was an FQDN, though.
10004
    fqdn_warn = False
10005
    if not instance_name:
10006
      fqdn_warn = True
10007
      instance_name = self.op.instance_name
10008

    
10009
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10010
    exportlist = self.rpc.call_export_list(locked_nodes)
10011
    found = False
10012
    for node in exportlist:
10013
      msg = exportlist[node].fail_msg
10014
      if msg:
10015
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10016
        continue
10017
      if instance_name in exportlist[node].payload:
10018
        found = True
10019
        result = self.rpc.call_export_remove(node, instance_name)
10020
        msg = result.fail_msg
10021
        if msg:
10022
          logging.error("Could not remove export for instance %s"
10023
                        " on node %s: %s", instance_name, node, msg)
10024

    
10025
    if fqdn_warn and not found:
10026
      feedback_fn("Export not found. If trying to remove an export belonging"
10027
                  " to a deleted instance please use its Fully Qualified"
10028
                  " Domain Name.")
10029

    
10030

    
10031
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10032
  """Generic tags LU.
10033

10034
  This is an abstract class which is the parent of all the other tags LUs.
10035

10036
  """
10037

    
10038
  def ExpandNames(self):
10039
    self.needed_locks = {}
10040
    if self.op.kind == constants.TAG_NODE:
10041
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10042
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
10043
    elif self.op.kind == constants.TAG_INSTANCE:
10044
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10045
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10046

    
10047
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10048
    # not possible to acquire the BGL based on opcode parameters)
10049

    
10050
  def CheckPrereq(self):
10051
    """Check prerequisites.
10052

10053
    """
10054
    if self.op.kind == constants.TAG_CLUSTER:
10055
      self.target = self.cfg.GetClusterInfo()
10056
    elif self.op.kind == constants.TAG_NODE:
10057
      self.target = self.cfg.GetNodeInfo(self.op.name)
10058
    elif self.op.kind == constants.TAG_INSTANCE:
10059
      self.target = self.cfg.GetInstanceInfo(self.op.name)
10060
    else:
10061
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10062
                                 str(self.op.kind), errors.ECODE_INVAL)
10063

    
10064

    
10065
class LUGetTags(TagsLU):
10066
  """Returns the tags of a given object.
10067

10068
  """
10069
  _OP_PARAMS = [
10070
    ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10071
    # Name is only meaningful for nodes and instances
10072
    ("name", ht.NoDefault, ht.TMaybeString),
10073
    ]
10074
  REQ_BGL = False
10075

    
10076
  def ExpandNames(self):
10077
    TagsLU.ExpandNames(self)
10078

    
10079
    # Share locks as this is only a read operation
10080
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10081

    
10082
  def Exec(self, feedback_fn):
10083
    """Returns the tag list.
10084

10085
    """
10086
    return list(self.target.GetTags())
10087

    
10088

    
10089
class LUSearchTags(NoHooksLU):
10090
  """Searches the tags for a given pattern.
10091

10092
  """
10093
  _OP_PARAMS = [
10094
    ("pattern", ht.NoDefault, ht.TNonEmptyString),
10095
    ]
10096
  REQ_BGL = False
10097

    
10098
  def ExpandNames(self):
10099
    self.needed_locks = {}
10100

    
10101
  def CheckPrereq(self):
10102
    """Check prerequisites.
10103

10104
    This checks the pattern passed for validity by compiling it.
10105

10106
    """
10107
    try:
10108
      self.re = re.compile(self.op.pattern)
10109
    except re.error, err:
10110
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10111
                                 (self.op.pattern, err), errors.ECODE_INVAL)
10112

    
10113
  def Exec(self, feedback_fn):
10114
    """Returns the tag list.
10115

10116
    """
10117
    cfg = self.cfg
10118
    tgts = [("/cluster", cfg.GetClusterInfo())]
10119
    ilist = cfg.GetAllInstancesInfo().values()
10120
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10121
    nlist = cfg.GetAllNodesInfo().values()
10122
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10123
    results = []
10124
    for path, target in tgts:
10125
      for tag in target.GetTags():
10126
        if self.re.search(tag):
10127
          results.append((path, tag))
10128
    return results
10129

    
10130

    
10131
class LUAddTags(TagsLU):
10132
  """Sets a tag on a given object.
10133

10134
  """
10135
  _OP_PARAMS = [
10136
    ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10137
    # Name is only meaningful for nodes and instances
10138
    ("name", ht.NoDefault, ht.TMaybeString),
10139
    ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
10140
    ]
10141
  REQ_BGL = False
10142

    
10143
  def CheckPrereq(self):
10144
    """Check prerequisites.
10145

10146
    This checks the type and length of the tag name and value.
10147

10148
    """
10149
    TagsLU.CheckPrereq(self)
10150
    for tag in self.op.tags:
10151
      objects.TaggableObject.ValidateTag(tag)
10152

    
10153
  def Exec(self, feedback_fn):
10154
    """Sets the tag.
10155

10156
    """
10157
    try:
10158
      for tag in self.op.tags:
10159
        self.target.AddTag(tag)
10160
    except errors.TagError, err:
10161
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
10162
    self.cfg.Update(self.target, feedback_fn)
10163

    
10164

    
10165
class LUDelTags(TagsLU):
10166
  """Delete a list of tags from a given object.
10167

10168
  """
10169
  _OP_PARAMS = [
10170
    ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10171
    # Name is only meaningful for nodes and instances
10172
    ("name", ht.NoDefault, ht.TMaybeString),
10173
    ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
10174
    ]
10175
  REQ_BGL = False
10176

    
10177
  def CheckPrereq(self):
10178
    """Check prerequisites.
10179

10180
    This checks that we have the given tag.
10181

10182
    """
10183
    TagsLU.CheckPrereq(self)
10184
    for tag in self.op.tags:
10185
      objects.TaggableObject.ValidateTag(tag)
10186
    del_tags = frozenset(self.op.tags)
10187
    cur_tags = self.target.GetTags()
10188

    
10189
    diff_tags = del_tags - cur_tags
10190
    if diff_tags:
10191
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
10192
      raise errors.OpPrereqError("Tag(s) %s not found" %
10193
                                 (utils.CommaJoin(diff_names), ),
10194
                                 errors.ECODE_NOENT)
10195

    
10196
  def Exec(self, feedback_fn):
10197
    """Remove the tag from the object.
10198

10199
    """
10200
    for tag in self.op.tags:
10201
      self.target.RemoveTag(tag)
10202
    self.cfg.Update(self.target, feedback_fn)
10203

    
10204

    
10205
class LUTestDelay(NoHooksLU):
10206
  """Sleep for a specified amount of time.
10207

10208
  This LU sleeps on the master and/or nodes for a specified amount of
10209
  time.
10210

10211
  """
10212
  _OP_PARAMS = [
10213
    ("duration", ht.NoDefault, ht.TFloat),
10214
    ("on_master", True, ht.TBool),
10215
    ("on_nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
10216
    ("repeat", 0, ht.TPositiveInt)
10217
    ]
10218
  REQ_BGL = False
10219

    
10220
  def ExpandNames(self):
10221
    """Expand names and set required locks.
10222

10223
    This expands the node list, if any.
10224

10225
    """
10226
    self.needed_locks = {}
10227
    if self.op.on_nodes:
10228
      # _GetWantedNodes can be used here, but is not always appropriate to use
10229
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10230
      # more information.
10231
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10232
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10233

    
10234
  def _TestDelay(self):
10235
    """Do the actual sleep.
10236

10237
    """
10238
    if self.op.on_master:
10239
      if not utils.TestDelay(self.op.duration):
10240
        raise errors.OpExecError("Error during master delay test")
10241
    if self.op.on_nodes:
10242
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10243
      for node, node_result in result.items():
10244
        node_result.Raise("Failure during rpc call to node %s" % node)
10245

    
10246
  def Exec(self, feedback_fn):
10247
    """Execute the test delay opcode, with the wanted repetitions.
10248

10249
    """
10250
    if self.op.repeat == 0:
10251
      self._TestDelay()
10252
    else:
10253
      top_value = self.op.repeat - 1
10254
      for i in range(self.op.repeat):
10255
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10256
        self._TestDelay()
10257

    
10258

    
10259
class LUTestJobqueue(NoHooksLU):
10260
  """Utility LU to test some aspects of the job queue.
10261

10262
  """
10263
  _OP_PARAMS = [
10264
    ("notify_waitlock", False, ht.TBool),
10265
    ("notify_exec", False, ht.TBool),
10266
    ("log_messages", ht.EmptyList, ht.TListOf(ht.TString)),
10267
    ("fail", False, ht.TBool),
10268
    ]
10269
  REQ_BGL = False
10270

    
10271
  # Must be lower than default timeout for WaitForJobChange to see whether it
10272
  # notices changed jobs
10273
  _CLIENT_CONNECT_TIMEOUT = 20.0
10274
  _CLIENT_CONFIRM_TIMEOUT = 60.0
10275

    
10276
  @classmethod
10277
  def _NotifyUsingSocket(cls, cb, errcls):
10278
    """Opens a Unix socket and waits for another program to connect.
10279

10280
    @type cb: callable
10281
    @param cb: Callback to send socket name to client
10282
    @type errcls: class
10283
    @param errcls: Exception class to use for errors
10284

10285
    """
10286
    # Using a temporary directory as there's no easy way to create temporary
10287
    # sockets without writing a custom loop around tempfile.mktemp and
10288
    # socket.bind
10289
    tmpdir = tempfile.mkdtemp()
10290
    try:
10291
      tmpsock = utils.PathJoin(tmpdir, "sock")
10292

    
10293
      logging.debug("Creating temporary socket at %s", tmpsock)
10294
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10295
      try:
10296
        sock.bind(tmpsock)
10297
        sock.listen(1)
10298

    
10299
        # Send details to client
10300
        cb(tmpsock)
10301

    
10302
        # Wait for client to connect before continuing
10303
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10304
        try:
10305
          (conn, _) = sock.accept()
10306
        except socket.error, err:
10307
          raise errcls("Client didn't connect in time (%s)" % err)
10308
      finally:
10309
        sock.close()
10310
    finally:
10311
      # Remove as soon as client is connected
10312
      shutil.rmtree(tmpdir)
10313

    
10314
    # Wait for client to close
10315
    try:
10316
      try:
10317
        # pylint: disable-msg=E1101
10318
        # Instance of '_socketobject' has no ... member
10319
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10320
        conn.recv(1)
10321
      except socket.error, err:
10322
        raise errcls("Client failed to confirm notification (%s)" % err)
10323
    finally:
10324
      conn.close()
10325

    
10326
  def _SendNotification(self, test, arg, sockname):
10327
    """Sends a notification to the client.
10328

10329
    @type test: string
10330
    @param test: Test name
10331
    @param arg: Test argument (depends on test)
10332
    @type sockname: string
10333
    @param sockname: Socket path
10334

10335
    """
10336
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10337

    
10338
  def _Notify(self, prereq, test, arg):
10339
    """Notifies the client of a test.
10340

10341
    @type prereq: bool
10342
    @param prereq: Whether this is a prereq-phase test
10343
    @type test: string
10344
    @param test: Test name
10345
    @param arg: Test argument (depends on test)
10346

10347
    """
10348
    if prereq:
10349
      errcls = errors.OpPrereqError
10350
    else:
10351
      errcls = errors.OpExecError
10352

    
10353
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10354
                                                  test, arg),
10355
                                   errcls)
10356

    
10357
  def CheckArguments(self):
10358
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10359
    self.expandnames_calls = 0
10360

    
10361
  def ExpandNames(self):
10362
    checkargs_calls = getattr(self, "checkargs_calls", 0)
10363
    if checkargs_calls < 1:
10364
      raise errors.ProgrammerError("CheckArguments was not called")
10365

    
10366
    self.expandnames_calls += 1
10367

    
10368
    if self.op.notify_waitlock:
10369
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
10370

    
10371
    self.LogInfo("Expanding names")
10372

    
10373
    # Get lock on master node (just to get a lock, not for a particular reason)
10374
    self.needed_locks = {
10375
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10376
      }
10377

    
10378
  def Exec(self, feedback_fn):
10379
    if self.expandnames_calls < 1:
10380
      raise errors.ProgrammerError("ExpandNames was not called")
10381

    
10382
    if self.op.notify_exec:
10383
      self._Notify(False, constants.JQT_EXEC, None)
10384

    
10385
    self.LogInfo("Executing")
10386

    
10387
    if self.op.log_messages:
10388
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10389
      for idx, msg in enumerate(self.op.log_messages):
10390
        self.LogInfo("Sending log message %s", idx + 1)
10391
        feedback_fn(constants.JQT_MSGPREFIX + msg)
10392
        # Report how many test messages have been sent
10393
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10394

    
10395
    if self.op.fail:
10396
      raise errors.OpExecError("Opcode failure was requested")
10397

    
10398
    return True
10399

    
10400

    
10401
class IAllocator(object):
10402
  """IAllocator framework.
10403

10404
  An IAllocator instance has three sets of attributes:
10405
    - cfg that is needed to query the cluster
10406
    - input data (all members of the _KEYS class attribute are required)
10407
    - four buffer attributes (in|out_data|text), that represent the
10408
      input (to the external script) in text and data structure format,
10409
      and the output from it, again in two formats
10410
    - the result variables from the script (success, info, nodes) for
10411
      easy usage
10412

10413
  """
10414
  # pylint: disable-msg=R0902
10415
  # lots of instance attributes
10416
  _ALLO_KEYS = [
10417
    "name", "mem_size", "disks", "disk_template",
10418
    "os", "tags", "nics", "vcpus", "hypervisor",
10419
    ]
10420
  _RELO_KEYS = [
10421
    "name", "relocate_from",
10422
    ]
10423
  _EVAC_KEYS = [
10424
    "evac_nodes",
10425
    ]
10426

    
10427
  def __init__(self, cfg, rpc, mode, **kwargs):
10428
    self.cfg = cfg
10429
    self.rpc = rpc
10430
    # init buffer variables
10431
    self.in_text = self.out_text = self.in_data = self.out_data = None
10432
    # init all input fields so that pylint is happy
10433
    self.mode = mode
10434
    self.mem_size = self.disks = self.disk_template = None
10435
    self.os = self.tags = self.nics = self.vcpus = None
10436
    self.hypervisor = None
10437
    self.relocate_from = None
10438
    self.name = None
10439
    self.evac_nodes = None
10440
    # computed fields
10441
    self.required_nodes = None
10442
    # init result fields
10443
    self.success = self.info = self.result = None
10444
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10445
      keyset = self._ALLO_KEYS
10446
      fn = self._AddNewInstance
10447
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10448
      keyset = self._RELO_KEYS
10449
      fn = self._AddRelocateInstance
10450
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10451
      keyset = self._EVAC_KEYS
10452
      fn = self._AddEvacuateNodes
10453
    else:
10454
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10455
                                   " IAllocator" % self.mode)
10456
    for key in kwargs:
10457
      if key not in keyset:
10458
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
10459
                                     " IAllocator" % key)
10460
      setattr(self, key, kwargs[key])
10461

    
10462
    for key in keyset:
10463
      if key not in kwargs:
10464
        raise errors.ProgrammerError("Missing input parameter '%s' to"
10465
                                     " IAllocator" % key)
10466
    self._BuildInputData(fn)
10467

    
10468
  def _ComputeClusterData(self):
10469
    """Compute the generic allocator input data.
10470

10471
    This is the data that is independent of the actual operation.
10472

10473
    """
10474
    cfg = self.cfg
10475
    cluster_info = cfg.GetClusterInfo()
10476
    # cluster data
10477
    data = {
10478
      "version": constants.IALLOCATOR_VERSION,
10479
      "cluster_name": cfg.GetClusterName(),
10480
      "cluster_tags": list(cluster_info.GetTags()),
10481
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10482
      # we don't have job IDs
10483
      }
10484
    iinfo = cfg.GetAllInstancesInfo().values()
10485
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10486

    
10487
    # node data
10488
    node_list = cfg.GetNodeList()
10489

    
10490
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10491
      hypervisor_name = self.hypervisor
10492
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10493
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10494
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10495
      hypervisor_name = cluster_info.enabled_hypervisors[0]
10496

    
10497
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10498
                                        hypervisor_name)
10499
    node_iinfo = \
10500
      self.rpc.call_all_instances_info(node_list,
10501
                                       cluster_info.enabled_hypervisors)
10502

    
10503
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
10504

    
10505
    data["nodes"] = self._ComputeNodeData(cfg, node_data, node_iinfo, i_list)
10506

    
10507
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
10508

    
10509
    self.in_data = data
10510

    
10511
  @staticmethod
10512
  def _ComputeNodeGroupData(cfg):
10513
    """Compute node groups data.
10514

10515
    """
10516
    ng = {}
10517
    for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
10518
      ng[guuid] = { "name": gdata.name }
10519
    return ng
10520

    
10521
  @staticmethod
10522
  def _ComputeNodeData(cfg, node_data, node_iinfo, i_list):
10523
    """Compute global node data.
10524

10525
    """
10526
    node_results = {}
10527
    for nname, nresult in node_data.items():
10528
      # first fill in static (config-based) values
10529
      ninfo = cfg.GetNodeInfo(nname)
10530
      pnr = {
10531
        "tags": list(ninfo.GetTags()),
10532
        "primary_ip": ninfo.primary_ip,
10533
        "secondary_ip": ninfo.secondary_ip,
10534
        "offline": ninfo.offline,
10535
        "drained": ninfo.drained,
10536
        "master_candidate": ninfo.master_candidate,
10537
        "group": ninfo.group,
10538
        "master_capable": ninfo.master_capable,
10539
        "vm_capable": ninfo.vm_capable,
10540
        }
10541

    
10542
      if not (ninfo.offline or ninfo.drained):
10543
        nresult.Raise("Can't get data for node %s" % nname)
10544
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10545
                                nname)
10546
        remote_info = nresult.payload
10547

    
10548
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
10549
                     'vg_size', 'vg_free', 'cpu_total']:
10550
          if attr not in remote_info:
10551
            raise errors.OpExecError("Node '%s' didn't return attribute"
10552
                                     " '%s'" % (nname, attr))
10553
          if not isinstance(remote_info[attr], int):
10554
            raise errors.OpExecError("Node '%s' returned invalid value"
10555
                                     " for '%s': %s" %
10556
                                     (nname, attr, remote_info[attr]))
10557
        # compute memory used by primary instances
10558
        i_p_mem = i_p_up_mem = 0
10559
        for iinfo, beinfo in i_list:
10560
          if iinfo.primary_node == nname:
10561
            i_p_mem += beinfo[constants.BE_MEMORY]
10562
            if iinfo.name not in node_iinfo[nname].payload:
10563
              i_used_mem = 0
10564
            else:
10565
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10566
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10567
            remote_info['memory_free'] -= max(0, i_mem_diff)
10568

    
10569
            if iinfo.admin_up:
10570
              i_p_up_mem += beinfo[constants.BE_MEMORY]
10571

    
10572
        # compute memory used by instances
10573
        pnr_dyn = {
10574
          "total_memory": remote_info['memory_total'],
10575
          "reserved_memory": remote_info['memory_dom0'],
10576
          "free_memory": remote_info['memory_free'],
10577
          "total_disk": remote_info['vg_size'],
10578
          "free_disk": remote_info['vg_free'],
10579
          "total_cpus": remote_info['cpu_total'],
10580
          "i_pri_memory": i_p_mem,
10581
          "i_pri_up_memory": i_p_up_mem,
10582
          }
10583
        pnr.update(pnr_dyn)
10584

    
10585
      node_results[nname] = pnr
10586

    
10587
    return node_results
10588

    
10589
  @staticmethod
10590
  def _ComputeInstanceData(cluster_info, i_list):
10591
    """Compute global instance data.
10592

10593
    """
10594
    instance_data = {}
10595
    for iinfo, beinfo in i_list:
10596
      nic_data = []
10597
      for nic in iinfo.nics:
10598
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10599
        nic_dict = {"mac": nic.mac,
10600
                    "ip": nic.ip,
10601
                    "mode": filled_params[constants.NIC_MODE],
10602
                    "link": filled_params[constants.NIC_LINK],
10603
                   }
10604
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10605
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10606
        nic_data.append(nic_dict)
10607
      pir = {
10608
        "tags": list(iinfo.GetTags()),
10609
        "admin_up": iinfo.admin_up,
10610
        "vcpus": beinfo[constants.BE_VCPUS],
10611
        "memory": beinfo[constants.BE_MEMORY],
10612
        "os": iinfo.os,
10613
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10614
        "nics": nic_data,
10615
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10616
        "disk_template": iinfo.disk_template,
10617
        "hypervisor": iinfo.hypervisor,
10618
        }
10619
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10620
                                                 pir["disks"])
10621
      instance_data[iinfo.name] = pir
10622

    
10623
    return instance_data
10624

    
10625
  def _AddNewInstance(self):
10626
    """Add new instance data to allocator structure.
10627

10628
    This in combination with _AllocatorGetClusterData will create the
10629
    correct structure needed as input for the allocator.
10630

10631
    The checks for the completeness of the opcode must have already been
10632
    done.
10633

10634
    """
10635
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10636

    
10637
    if self.disk_template in constants.DTS_NET_MIRROR:
10638
      self.required_nodes = 2
10639
    else:
10640
      self.required_nodes = 1
10641
    request = {
10642
      "name": self.name,
10643
      "disk_template": self.disk_template,
10644
      "tags": self.tags,
10645
      "os": self.os,
10646
      "vcpus": self.vcpus,
10647
      "memory": self.mem_size,
10648
      "disks": self.disks,
10649
      "disk_space_total": disk_space,
10650
      "nics": self.nics,
10651
      "required_nodes": self.required_nodes,
10652
      }
10653
    return request
10654

    
10655
  def _AddRelocateInstance(self):
10656
    """Add relocate instance data to allocator structure.
10657

10658
    This in combination with _IAllocatorGetClusterData will create the
10659
    correct structure needed as input for the allocator.
10660

10661
    The checks for the completeness of the opcode must have already been
10662
    done.
10663

10664
    """
10665
    instance = self.cfg.GetInstanceInfo(self.name)
10666
    if instance is None:
10667
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
10668
                                   " IAllocator" % self.name)
10669

    
10670
    if instance.disk_template not in constants.DTS_NET_MIRROR:
10671
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10672
                                 errors.ECODE_INVAL)
10673

    
10674
    if len(instance.secondary_nodes) != 1:
10675
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
10676
                                 errors.ECODE_STATE)
10677

    
10678
    self.required_nodes = 1
10679
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
10680
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10681

    
10682
    request = {
10683
      "name": self.name,
10684
      "disk_space_total": disk_space,
10685
      "required_nodes": self.required_nodes,
10686
      "relocate_from": self.relocate_from,
10687
      }
10688
    return request
10689

    
10690
  def _AddEvacuateNodes(self):
10691
    """Add evacuate nodes data to allocator structure.
10692

10693
    """
10694
    request = {
10695
      "evac_nodes": self.evac_nodes
10696
      }
10697
    return request
10698

    
10699
  def _BuildInputData(self, fn):
10700
    """Build input data structures.
10701

10702
    """
10703
    self._ComputeClusterData()
10704

    
10705
    request = fn()
10706
    request["type"] = self.mode
10707
    self.in_data["request"] = request
10708

    
10709
    self.in_text = serializer.Dump(self.in_data)
10710

    
10711
  def Run(self, name, validate=True, call_fn=None):
10712
    """Run an instance allocator and return the results.
10713

10714
    """
10715
    if call_fn is None:
10716
      call_fn = self.rpc.call_iallocator_runner
10717

    
10718
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10719
    result.Raise("Failure while running the iallocator script")
10720

    
10721
    self.out_text = result.payload
10722
    if validate:
10723
      self._ValidateResult()
10724

    
10725
  def _ValidateResult(self):
10726
    """Process the allocator results.
10727

10728
    This will process and if successful save the result in
10729
    self.out_data and the other parameters.
10730

10731
    """
10732
    try:
10733
      rdict = serializer.Load(self.out_text)
10734
    except Exception, err:
10735
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10736

    
10737
    if not isinstance(rdict, dict):
10738
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
10739

    
10740
    # TODO: remove backwards compatiblity in later versions
10741
    if "nodes" in rdict and "result" not in rdict:
10742
      rdict["result"] = rdict["nodes"]
10743
      del rdict["nodes"]
10744

    
10745
    for key in "success", "info", "result":
10746
      if key not in rdict:
10747
        raise errors.OpExecError("Can't parse iallocator results:"
10748
                                 " missing key '%s'" % key)
10749
      setattr(self, key, rdict[key])
10750

    
10751
    if not isinstance(rdict["result"], list):
10752
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10753
                               " is not a list")
10754
    self.out_data = rdict
10755

    
10756

    
10757
class LUTestAllocator(NoHooksLU):
10758
  """Run allocator tests.
10759

10760
  This LU runs the allocator tests
10761

10762
  """
10763
  _OP_PARAMS = [
10764
    ("direction", ht.NoDefault,
10765
     ht.TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10766
    ("mode", ht.NoDefault, ht.TElemOf(constants.VALID_IALLOCATOR_MODES)),
10767
    ("name", ht.NoDefault, ht.TNonEmptyString),
10768
    ("nics", ht.NoDefault, ht.TOr(ht.TNone, ht.TListOf(
10769
      ht.TDictOf(ht.TElemOf(["mac", "ip", "bridge"]),
10770
               ht.TOr(ht.TNone, ht.TNonEmptyString))))),
10771
    ("disks", ht.NoDefault, ht.TOr(ht.TNone, ht.TList)),
10772
    ("hypervisor", None, ht.TMaybeString),
10773
    ("allocator", None, ht.TMaybeString),
10774
    ("tags", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
10775
    ("mem_size", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
10776
    ("vcpus", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
10777
    ("os", None, ht.TMaybeString),
10778
    ("disk_template", None, ht.TMaybeString),
10779
    ("evac_nodes", None, ht.TOr(ht.TNone, ht.TListOf(ht.TNonEmptyString))),
10780
    ]
10781

    
10782
  def CheckPrereq(self):
10783
    """Check prerequisites.
10784

10785
    This checks the opcode parameters depending on the director and mode test.
10786

10787
    """
10788
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10789
      for attr in ["mem_size", "disks", "disk_template",
10790
                   "os", "tags", "nics", "vcpus"]:
10791
        if not hasattr(self.op, attr):
10792
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10793
                                     attr, errors.ECODE_INVAL)
10794
      iname = self.cfg.ExpandInstanceName(self.op.name)
10795
      if iname is not None:
10796
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10797
                                   iname, errors.ECODE_EXISTS)
10798
      if not isinstance(self.op.nics, list):
10799
        raise errors.OpPrereqError("Invalid parameter 'nics'",
10800
                                   errors.ECODE_INVAL)
10801
      if not isinstance(self.op.disks, list):
10802
        raise errors.OpPrereqError("Invalid parameter 'disks'",
10803
                                   errors.ECODE_INVAL)
10804
      for row in self.op.disks:
10805
        if (not isinstance(row, dict) or
10806
            "size" not in row or
10807
            not isinstance(row["size"], int) or
10808
            "mode" not in row or
10809
            row["mode"] not in ['r', 'w']):
10810
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
10811
                                     " parameter", errors.ECODE_INVAL)
10812
      if self.op.hypervisor is None:
10813
        self.op.hypervisor = self.cfg.GetHypervisorType()
10814
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10815
      fname = _ExpandInstanceName(self.cfg, self.op.name)
10816
      self.op.name = fname
10817
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10818
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10819
      if not hasattr(self.op, "evac_nodes"):
10820
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10821
                                   " opcode input", errors.ECODE_INVAL)
10822
    else:
10823
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10824
                                 self.op.mode, errors.ECODE_INVAL)
10825

    
10826
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10827
      if self.op.allocator is None:
10828
        raise errors.OpPrereqError("Missing allocator name",
10829
                                   errors.ECODE_INVAL)
10830
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10831
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
10832
                                 self.op.direction, errors.ECODE_INVAL)
10833

    
10834
  def Exec(self, feedback_fn):
10835
    """Run the allocator test.
10836

10837
    """
10838
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10839
      ial = IAllocator(self.cfg, self.rpc,
10840
                       mode=self.op.mode,
10841
                       name=self.op.name,
10842
                       mem_size=self.op.mem_size,
10843
                       disks=self.op.disks,
10844
                       disk_template=self.op.disk_template,
10845
                       os=self.op.os,
10846
                       tags=self.op.tags,
10847
                       nics=self.op.nics,
10848
                       vcpus=self.op.vcpus,
10849
                       hypervisor=self.op.hypervisor,
10850
                       )
10851
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10852
      ial = IAllocator(self.cfg, self.rpc,
10853
                       mode=self.op.mode,
10854
                       name=self.op.name,
10855
                       relocate_from=list(self.relocate_from),
10856
                       )
10857
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10858
      ial = IAllocator(self.cfg, self.rpc,
10859
                       mode=self.op.mode,
10860
                       evac_nodes=self.op.evac_nodes)
10861
    else:
10862
      raise errors.ProgrammerError("Uncatched mode %s in"
10863
                                   " LUTestAllocator.Exec", self.op.mode)
10864

    
10865
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
10866
      result = ial.in_text
10867
    else:
10868
      ial.Run(self.op.allocator, validate=False)
10869
      result = ial.out_text
10870
    return result