Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 4d32c211

History | View | Annotate | Download (383.6 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42

    
43
from ganeti import ssh
44
from ganeti import utils
45
from ganeti import errors
46
from ganeti import hypervisor
47
from ganeti import locking
48
from ganeti import constants
49
from ganeti import objects
50
from ganeti import serializer
51
from ganeti import ssconf
52
from ganeti import uidpool
53
from ganeti import compat
54
from ganeti import masterd
55
from ganeti import netutils
56
from ganeti import ht
57

    
58
import ganeti.masterd.instance # pylint: disable-msg=W0611
59

    
60
# Common opcode attributes
61

    
62
#: output fields for a query operation
63
_POutputFields = ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString))
64

    
65

    
66
#: the shutdown timeout
67
_PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
68
                     ht.TPositiveInt)
69

    
70
#: the force parameter
71
_PForce = ("force", False, ht.TBool)
72

    
73
#: a required instance name (for single-instance LUs)
74
_PInstanceName = ("instance_name", ht.NoDefault, ht.TNonEmptyString)
75

    
76
#: Whether to ignore offline nodes
77
_PIgnoreOfflineNodes = ("ignore_offline_nodes", False, ht.TBool)
78

    
79
#: a required node name (for single-node LUs)
80
_PNodeName = ("node_name", ht.NoDefault, ht.TNonEmptyString)
81

    
82
#: the migration type (live/non-live)
83
_PMigrationMode = ("mode", None,
84
                   ht.TOr(ht.TNone, ht.TElemOf(constants.HT_MIGRATION_MODES)))
85

    
86
#: the obsolete 'live' mode (boolean)
87
_PMigrationLive = ("live", None, ht.TMaybeBool)
88

    
89

    
90
# End types
91
class LogicalUnit(object):
92
  """Logical Unit base class.
93

94
  Subclasses must follow these rules:
95
    - implement ExpandNames
96
    - implement CheckPrereq (except when tasklets are used)
97
    - implement Exec (except when tasklets are used)
98
    - implement BuildHooksEnv
99
    - redefine HPATH and HTYPE
100
    - optionally redefine their run requirements:
101
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
102

103
  Note that all commands require root permissions.
104

105
  @ivar dry_run_result: the value (if any) that will be returned to the caller
106
      in dry-run mode (signalled by opcode dry_run parameter)
107
  @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
108
      they should get if not already defined, and types they must match
109

110
  """
111
  HPATH = None
112
  HTYPE = None
113
  _OP_PARAMS = []
114
  REQ_BGL = True
115

    
116
  def __init__(self, processor, op, context, rpc):
117
    """Constructor for LogicalUnit.
118

119
    This needs to be overridden in derived classes in order to check op
120
    validity.
121

122
    """
123
    self.proc = processor
124
    self.op = op
125
    self.cfg = context.cfg
126
    self.context = context
127
    self.rpc = rpc
128
    # Dicts used to declare locking needs to mcpu
129
    self.needed_locks = None
130
    self.acquired_locks = {}
131
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
132
    self.add_locks = {}
133
    self.remove_locks = {}
134
    # Used to force good behavior when calling helper functions
135
    self.recalculate_locks = {}
136
    self.__ssh = None
137
    # logging
138
    self.Log = processor.Log # pylint: disable-msg=C0103
139
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
140
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
141
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
142
    # support for dry-run
143
    self.dry_run_result = None
144
    # support for generic debug attribute
145
    if (not hasattr(self.op, "debug_level") or
146
        not isinstance(self.op.debug_level, int)):
147
      self.op.debug_level = 0
148

    
149
    # Tasklets
150
    self.tasklets = None
151

    
152
    # The new kind-of-type-system
153
    op_id = self.op.OP_ID
154
    for attr_name, aval, test in self._OP_PARAMS:
155
      if not hasattr(op, attr_name):
156
        if aval == ht.NoDefault:
157
          raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
158
                                     (op_id, attr_name), errors.ECODE_INVAL)
159
        else:
160
          if callable(aval):
161
            dval = aval()
162
          else:
163
            dval = aval
164
          setattr(self.op, attr_name, dval)
165
      attr_val = getattr(op, attr_name)
166
      if test == ht.NoType:
167
        # no tests here
168
        continue
169
      if not callable(test):
170
        raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
171
                                     " given type is not a proper type (%s)" %
172
                                     (op_id, attr_name, test))
173
      if not test(attr_val):
174
        logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
175
                      self.op.OP_ID, attr_name, type(attr_val), attr_val)
176
        raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
177
                                   (op_id, attr_name), errors.ECODE_INVAL)
178

    
179
    self.CheckArguments()
180

    
181
  def __GetSSH(self):
182
    """Returns the SshRunner object
183

184
    """
185
    if not self.__ssh:
186
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
187
    return self.__ssh
188

    
189
  ssh = property(fget=__GetSSH)
190

    
191
  def CheckArguments(self):
192
    """Check syntactic validity for the opcode arguments.
193

194
    This method is for doing a simple syntactic check and ensure
195
    validity of opcode parameters, without any cluster-related
196
    checks. While the same can be accomplished in ExpandNames and/or
197
    CheckPrereq, doing these separate is better because:
198

199
      - ExpandNames is left as as purely a lock-related function
200
      - CheckPrereq is run after we have acquired locks (and possible
201
        waited for them)
202

203
    The function is allowed to change the self.op attribute so that
204
    later methods can no longer worry about missing parameters.
205

206
    """
207
    pass
208

    
209
  def ExpandNames(self):
210
    """Expand names for this LU.
211

212
    This method is called before starting to execute the opcode, and it should
213
    update all the parameters of the opcode to their canonical form (e.g. a
214
    short node name must be fully expanded after this method has successfully
215
    completed). This way locking, hooks, logging, ecc. can work correctly.
216

217
    LUs which implement this method must also populate the self.needed_locks
218
    member, as a dict with lock levels as keys, and a list of needed lock names
219
    as values. Rules:
220

221
      - use an empty dict if you don't need any lock
222
      - if you don't need any lock at a particular level omit that level
223
      - don't put anything for the BGL level
224
      - if you want all locks at a level use locking.ALL_SET as a value
225

226
    If you need to share locks (rather than acquire them exclusively) at one
227
    level you can modify self.share_locks, setting a true value (usually 1) for
228
    that level. By default locks are not shared.
229

230
    This function can also define a list of tasklets, which then will be
231
    executed in order instead of the usual LU-level CheckPrereq and Exec
232
    functions, if those are not defined by the LU.
233

234
    Examples::
235

236
      # Acquire all nodes and one instance
237
      self.needed_locks = {
238
        locking.LEVEL_NODE: locking.ALL_SET,
239
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
240
      }
241
      # Acquire just two nodes
242
      self.needed_locks = {
243
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
244
      }
245
      # Acquire no locks
246
      self.needed_locks = {} # No, you can't leave it to the default value None
247

248
    """
249
    # The implementation of this method is mandatory only if the new LU is
250
    # concurrent, so that old LUs don't need to be changed all at the same
251
    # time.
252
    if self.REQ_BGL:
253
      self.needed_locks = {} # Exclusive LUs don't need locks.
254
    else:
255
      raise NotImplementedError
256

    
257
  def DeclareLocks(self, level):
258
    """Declare LU locking needs for a level
259

260
    While most LUs can just declare their locking needs at ExpandNames time,
261
    sometimes there's the need to calculate some locks after having acquired
262
    the ones before. This function is called just before acquiring locks at a
263
    particular level, but after acquiring the ones at lower levels, and permits
264
    such calculations. It can be used to modify self.needed_locks, and by
265
    default it does nothing.
266

267
    This function is only called if you have something already set in
268
    self.needed_locks for the level.
269

270
    @param level: Locking level which is going to be locked
271
    @type level: member of ganeti.locking.LEVELS
272

273
    """
274

    
275
  def CheckPrereq(self):
276
    """Check prerequisites for this LU.
277

278
    This method should check that the prerequisites for the execution
279
    of this LU are fulfilled. It can do internode communication, but
280
    it should be idempotent - no cluster or system changes are
281
    allowed.
282

283
    The method should raise errors.OpPrereqError in case something is
284
    not fulfilled. Its return value is ignored.
285

286
    This method should also update all the parameters of the opcode to
287
    their canonical form if it hasn't been done by ExpandNames before.
288

289
    """
290
    if self.tasklets is not None:
291
      for (idx, tl) in enumerate(self.tasklets):
292
        logging.debug("Checking prerequisites for tasklet %s/%s",
293
                      idx + 1, len(self.tasklets))
294
        tl.CheckPrereq()
295
    else:
296
      pass
297

    
298
  def Exec(self, feedback_fn):
299
    """Execute the LU.
300

301
    This method should implement the actual work. It should raise
302
    errors.OpExecError for failures that are somewhat dealt with in
303
    code, or expected.
304

305
    """
306
    if self.tasklets is not None:
307
      for (idx, tl) in enumerate(self.tasklets):
308
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
309
        tl.Exec(feedback_fn)
310
    else:
311
      raise NotImplementedError
312

    
313
  def BuildHooksEnv(self):
314
    """Build hooks environment for this LU.
315

316
    This method should return a three-node tuple consisting of: a dict
317
    containing the environment that will be used for running the
318
    specific hook for this LU, a list of node names on which the hook
319
    should run before the execution, and a list of node names on which
320
    the hook should run after the execution.
321

322
    The keys of the dict must not have 'GANETI_' prefixed as this will
323
    be handled in the hooks runner. Also note additional keys will be
324
    added by the hooks runner. If the LU doesn't define any
325
    environment, an empty dict (and not None) should be returned.
326

327
    No nodes should be returned as an empty list (and not None).
328

329
    Note that if the HPATH for a LU class is None, this function will
330
    not be called.
331

332
    """
333
    raise NotImplementedError
334

    
335
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
336
    """Notify the LU about the results of its hooks.
337

338
    This method is called every time a hooks phase is executed, and notifies
339
    the Logical Unit about the hooks' result. The LU can then use it to alter
340
    its result based on the hooks.  By default the method does nothing and the
341
    previous result is passed back unchanged but any LU can define it if it
342
    wants to use the local cluster hook-scripts somehow.
343

344
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
345
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
346
    @param hook_results: the results of the multi-node hooks rpc call
347
    @param feedback_fn: function used send feedback back to the caller
348
    @param lu_result: the previous Exec result this LU had, or None
349
        in the PRE phase
350
    @return: the new Exec result, based on the previous result
351
        and hook results
352

353
    """
354
    # API must be kept, thus we ignore the unused argument and could
355
    # be a function warnings
356
    # pylint: disable-msg=W0613,R0201
357
    return lu_result
358

    
359
  def _ExpandAndLockInstance(self):
360
    """Helper function to expand and lock an instance.
361

362
    Many LUs that work on an instance take its name in self.op.instance_name
363
    and need to expand it and then declare the expanded name for locking. This
364
    function does it, and then updates self.op.instance_name to the expanded
365
    name. It also initializes needed_locks as a dict, if this hasn't been done
366
    before.
367

368
    """
369
    if self.needed_locks is None:
370
      self.needed_locks = {}
371
    else:
372
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
373
        "_ExpandAndLockInstance called with instance-level locks set"
374
    self.op.instance_name = _ExpandInstanceName(self.cfg,
375
                                                self.op.instance_name)
376
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
377

    
378
  def _LockInstancesNodes(self, primary_only=False):
379
    """Helper function to declare instances' nodes for locking.
380

381
    This function should be called after locking one or more instances to lock
382
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
383
    with all primary or secondary nodes for instances already locked and
384
    present in self.needed_locks[locking.LEVEL_INSTANCE].
385

386
    It should be called from DeclareLocks, and for safety only works if
387
    self.recalculate_locks[locking.LEVEL_NODE] is set.
388

389
    In the future it may grow parameters to just lock some instance's nodes, or
390
    to just lock primaries or secondary nodes, if needed.
391

392
    If should be called in DeclareLocks in a way similar to::
393

394
      if level == locking.LEVEL_NODE:
395
        self._LockInstancesNodes()
396

397
    @type primary_only: boolean
398
    @param primary_only: only lock primary nodes of locked instances
399

400
    """
401
    assert locking.LEVEL_NODE in self.recalculate_locks, \
402
      "_LockInstancesNodes helper function called with no nodes to recalculate"
403

    
404
    # TODO: check if we're really been called with the instance locks held
405

    
406
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
407
    # future we might want to have different behaviors depending on the value
408
    # of self.recalculate_locks[locking.LEVEL_NODE]
409
    wanted_nodes = []
410
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
411
      instance = self.context.cfg.GetInstanceInfo(instance_name)
412
      wanted_nodes.append(instance.primary_node)
413
      if not primary_only:
414
        wanted_nodes.extend(instance.secondary_nodes)
415

    
416
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
417
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
418
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
419
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
420

    
421
    del self.recalculate_locks[locking.LEVEL_NODE]
422

    
423

    
424
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
425
  """Simple LU which runs no hooks.
426

427
  This LU is intended as a parent for other LogicalUnits which will
428
  run no hooks, in order to reduce duplicate code.
429

430
  """
431
  HPATH = None
432
  HTYPE = None
433

    
434
  def BuildHooksEnv(self):
435
    """Empty BuildHooksEnv for NoHooksLu.
436

437
    This just raises an error.
438

439
    """
440
    assert False, "BuildHooksEnv called for NoHooksLUs"
441

    
442

    
443
class Tasklet:
444
  """Tasklet base class.
445

446
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
447
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
448
  tasklets know nothing about locks.
449

450
  Subclasses must follow these rules:
451
    - Implement CheckPrereq
452
    - Implement Exec
453

454
  """
455
  def __init__(self, lu):
456
    self.lu = lu
457

    
458
    # Shortcuts
459
    self.cfg = lu.cfg
460
    self.rpc = lu.rpc
461

    
462
  def CheckPrereq(self):
463
    """Check prerequisites for this tasklets.
464

465
    This method should check whether the prerequisites for the execution of
466
    this tasklet are fulfilled. It can do internode communication, but it
467
    should be idempotent - no cluster or system changes are allowed.
468

469
    The method should raise errors.OpPrereqError in case something is not
470
    fulfilled. Its return value is ignored.
471

472
    This method should also update all parameters to their canonical form if it
473
    hasn't been done before.
474

475
    """
476
    pass
477

    
478
  def Exec(self, feedback_fn):
479
    """Execute the tasklet.
480

481
    This method should implement the actual work. It should raise
482
    errors.OpExecError for failures that are somewhat dealt with in code, or
483
    expected.
484

485
    """
486
    raise NotImplementedError
487

    
488

    
489
def _GetWantedNodes(lu, nodes):
490
  """Returns list of checked and expanded node names.
491

492
  @type lu: L{LogicalUnit}
493
  @param lu: the logical unit on whose behalf we execute
494
  @type nodes: list
495
  @param nodes: list of node names or None for all nodes
496
  @rtype: list
497
  @return: the list of nodes, sorted
498
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
499

500
  """
501
  if not nodes:
502
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
503
      " non-empty list of nodes whose name is to be expanded.")
504

    
505
  wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
506
  return utils.NiceSort(wanted)
507

    
508

    
509
def _GetWantedInstances(lu, instances):
510
  """Returns list of checked and expanded instance names.
511

512
  @type lu: L{LogicalUnit}
513
  @param lu: the logical unit on whose behalf we execute
514
  @type instances: list
515
  @param instances: list of instance names or None for all instances
516
  @rtype: list
517
  @return: the list of instances, sorted
518
  @raise errors.OpPrereqError: if the instances parameter is wrong type
519
  @raise errors.OpPrereqError: if any of the passed instances is not found
520

521
  """
522
  if instances:
523
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
524
  else:
525
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
526
  return wanted
527

    
528

    
529
def _GetUpdatedParams(old_params, update_dict,
530
                      use_default=True, use_none=False):
531
  """Return the new version of a parameter dictionary.
532

533
  @type old_params: dict
534
  @param old_params: old parameters
535
  @type update_dict: dict
536
  @param update_dict: dict containing new parameter values, or
537
      constants.VALUE_DEFAULT to reset the parameter to its default
538
      value
539
  @param use_default: boolean
540
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
541
      values as 'to be deleted' values
542
  @param use_none: boolean
543
  @type use_none: whether to recognise C{None} values as 'to be
544
      deleted' values
545
  @rtype: dict
546
  @return: the new parameter dictionary
547

548
  """
549
  params_copy = copy.deepcopy(old_params)
550
  for key, val in update_dict.iteritems():
551
    if ((use_default and val == constants.VALUE_DEFAULT) or
552
        (use_none and val is None)):
553
      try:
554
        del params_copy[key]
555
      except KeyError:
556
        pass
557
    else:
558
      params_copy[key] = val
559
  return params_copy
560

    
561

    
562
def _CheckOutputFields(static, dynamic, selected):
563
  """Checks whether all selected fields are valid.
564

565
  @type static: L{utils.FieldSet}
566
  @param static: static fields set
567
  @type dynamic: L{utils.FieldSet}
568
  @param dynamic: dynamic fields set
569

570
  """
571
  f = utils.FieldSet()
572
  f.Extend(static)
573
  f.Extend(dynamic)
574

    
575
  delta = f.NonMatching(selected)
576
  if delta:
577
    raise errors.OpPrereqError("Unknown output fields selected: %s"
578
                               % ",".join(delta), errors.ECODE_INVAL)
579

    
580

    
581
def _CheckGlobalHvParams(params):
582
  """Validates that given hypervisor params are not global ones.
583

584
  This will ensure that instances don't get customised versions of
585
  global params.
586

587
  """
588
  used_globals = constants.HVC_GLOBALS.intersection(params)
589
  if used_globals:
590
    msg = ("The following hypervisor parameters are global and cannot"
591
           " be customized at instance level, please modify them at"
592
           " cluster level: %s" % utils.CommaJoin(used_globals))
593
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
594

    
595

    
596
def _CheckNodeOnline(lu, node):
597
  """Ensure that a given node is online.
598

599
  @param lu: the LU on behalf of which we make the check
600
  @param node: the node to check
601
  @raise errors.OpPrereqError: if the node is offline
602

603
  """
604
  if lu.cfg.GetNodeInfo(node).offline:
605
    raise errors.OpPrereqError("Can't use offline node %s" % node,
606
                               errors.ECODE_STATE)
607

    
608

    
609
def _CheckNodeNotDrained(lu, node):
610
  """Ensure that a given node is not drained.
611

612
  @param lu: the LU on behalf of which we make the check
613
  @param node: the node to check
614
  @raise errors.OpPrereqError: if the node is drained
615

616
  """
617
  if lu.cfg.GetNodeInfo(node).drained:
618
    raise errors.OpPrereqError("Can't use drained node %s" % node,
619
                               errors.ECODE_STATE)
620

    
621

    
622
def _CheckNodeVmCapable(lu, node):
623
  """Ensure that a given node is vm capable.
624

625
  @param lu: the LU on behalf of which we make the check
626
  @param node: the node to check
627
  @raise errors.OpPrereqError: if the node is not vm capable
628

629
  """
630
  if not lu.cfg.GetNodeInfo(node).vm_capable:
631
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
632
                               errors.ECODE_STATE)
633

    
634

    
635
def _CheckNodeHasOS(lu, node, os_name, force_variant):
636
  """Ensure that a node supports a given OS.
637

638
  @param lu: the LU on behalf of which we make the check
639
  @param node: the node to check
640
  @param os_name: the OS to query about
641
  @param force_variant: whether to ignore variant errors
642
  @raise errors.OpPrereqError: if the node is not supporting the OS
643

644
  """
645
  result = lu.rpc.call_os_get(node, os_name)
646
  result.Raise("OS '%s' not in supported OS list for node %s" %
647
               (os_name, node),
648
               prereq=True, ecode=errors.ECODE_INVAL)
649
  if not force_variant:
650
    _CheckOSVariant(result.payload, os_name)
651

    
652

    
653
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
654
  """Ensure that a node has the given secondary ip.
655

656
  @type lu: L{LogicalUnit}
657
  @param lu: the LU on behalf of which we make the check
658
  @type node: string
659
  @param node: the node to check
660
  @type secondary_ip: string
661
  @param secondary_ip: the ip to check
662
  @type prereq: boolean
663
  @param prereq: whether to throw a prerequisite or an execute error
664
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
665
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
666

667
  """
668
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
669
  result.Raise("Failure checking secondary ip on node %s" % node,
670
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
671
  if not result.payload:
672
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
673
           " please fix and re-run this command" % secondary_ip)
674
    if prereq:
675
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
676
    else:
677
      raise errors.OpExecError(msg)
678

    
679

    
680
def _RequireFileStorage():
681
  """Checks that file storage is enabled.
682

683
  @raise errors.OpPrereqError: when file storage is disabled
684

685
  """
686
  if not constants.ENABLE_FILE_STORAGE:
687
    raise errors.OpPrereqError("File storage disabled at configure time",
688
                               errors.ECODE_INVAL)
689

    
690

    
691
def _CheckDiskTemplate(template):
692
  """Ensure a given disk template is valid.
693

694
  """
695
  if template not in constants.DISK_TEMPLATES:
696
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
697
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
698
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
699
  if template == constants.DT_FILE:
700
    _RequireFileStorage()
701
  return True
702

    
703

    
704
def _CheckStorageType(storage_type):
705
  """Ensure a given storage type is valid.
706

707
  """
708
  if storage_type not in constants.VALID_STORAGE_TYPES:
709
    raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
710
                               errors.ECODE_INVAL)
711
  if storage_type == constants.ST_FILE:
712
    _RequireFileStorage()
713
  return True
714

    
715

    
716
def _GetClusterDomainSecret():
717
  """Reads the cluster domain secret.
718

719
  """
720
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
721
                               strict=True)
722

    
723

    
724
def _CheckInstanceDown(lu, instance, reason):
725
  """Ensure that an instance is not running."""
726
  if instance.admin_up:
727
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
728
                               (instance.name, reason), errors.ECODE_STATE)
729

    
730
  pnode = instance.primary_node
731
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
732
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
733
              prereq=True, ecode=errors.ECODE_ENVIRON)
734

    
735
  if instance.name in ins_l.payload:
736
    raise errors.OpPrereqError("Instance %s is running, %s" %
737
                               (instance.name, reason), errors.ECODE_STATE)
738

    
739

    
740
def _ExpandItemName(fn, name, kind):
741
  """Expand an item name.
742

743
  @param fn: the function to use for expansion
744
  @param name: requested item name
745
  @param kind: text description ('Node' or 'Instance')
746
  @return: the resolved (full) name
747
  @raise errors.OpPrereqError: if the item is not found
748

749
  """
750
  full_name = fn(name)
751
  if full_name is None:
752
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
753
                               errors.ECODE_NOENT)
754
  return full_name
755

    
756

    
757
def _ExpandNodeName(cfg, name):
758
  """Wrapper over L{_ExpandItemName} for nodes."""
759
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
760

    
761

    
762
def _ExpandInstanceName(cfg, name):
763
  """Wrapper over L{_ExpandItemName} for instance."""
764
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
765

    
766

    
767
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
768
                          memory, vcpus, nics, disk_template, disks,
769
                          bep, hvp, hypervisor_name):
770
  """Builds instance related env variables for hooks
771

772
  This builds the hook environment from individual variables.
773

774
  @type name: string
775
  @param name: the name of the instance
776
  @type primary_node: string
777
  @param primary_node: the name of the instance's primary node
778
  @type secondary_nodes: list
779
  @param secondary_nodes: list of secondary nodes as strings
780
  @type os_type: string
781
  @param os_type: the name of the instance's OS
782
  @type status: boolean
783
  @param status: the should_run status of the instance
784
  @type memory: string
785
  @param memory: the memory size of the instance
786
  @type vcpus: string
787
  @param vcpus: the count of VCPUs the instance has
788
  @type nics: list
789
  @param nics: list of tuples (ip, mac, mode, link) representing
790
      the NICs the instance has
791
  @type disk_template: string
792
  @param disk_template: the disk template of the instance
793
  @type disks: list
794
  @param disks: the list of (size, mode) pairs
795
  @type bep: dict
796
  @param bep: the backend parameters for the instance
797
  @type hvp: dict
798
  @param hvp: the hypervisor parameters for the instance
799
  @type hypervisor_name: string
800
  @param hypervisor_name: the hypervisor for the instance
801
  @rtype: dict
802
  @return: the hook environment for this instance
803

804
  """
805
  if status:
806
    str_status = "up"
807
  else:
808
    str_status = "down"
809
  env = {
810
    "OP_TARGET": name,
811
    "INSTANCE_NAME": name,
812
    "INSTANCE_PRIMARY": primary_node,
813
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
814
    "INSTANCE_OS_TYPE": os_type,
815
    "INSTANCE_STATUS": str_status,
816
    "INSTANCE_MEMORY": memory,
817
    "INSTANCE_VCPUS": vcpus,
818
    "INSTANCE_DISK_TEMPLATE": disk_template,
819
    "INSTANCE_HYPERVISOR": hypervisor_name,
820
  }
821

    
822
  if nics:
823
    nic_count = len(nics)
824
    for idx, (ip, mac, mode, link) in enumerate(nics):
825
      if ip is None:
826
        ip = ""
827
      env["INSTANCE_NIC%d_IP" % idx] = ip
828
      env["INSTANCE_NIC%d_MAC" % idx] = mac
829
      env["INSTANCE_NIC%d_MODE" % idx] = mode
830
      env["INSTANCE_NIC%d_LINK" % idx] = link
831
      if mode == constants.NIC_MODE_BRIDGED:
832
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
833
  else:
834
    nic_count = 0
835

    
836
  env["INSTANCE_NIC_COUNT"] = nic_count
837

    
838
  if disks:
839
    disk_count = len(disks)
840
    for idx, (size, mode) in enumerate(disks):
841
      env["INSTANCE_DISK%d_SIZE" % idx] = size
842
      env["INSTANCE_DISK%d_MODE" % idx] = mode
843
  else:
844
    disk_count = 0
845

    
846
  env["INSTANCE_DISK_COUNT"] = disk_count
847

    
848
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
849
    for key, value in source.items():
850
      env["INSTANCE_%s_%s" % (kind, key)] = value
851

    
852
  return env
853

    
854

    
855
def _NICListToTuple(lu, nics):
856
  """Build a list of nic information tuples.
857

858
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
859
  value in LUQueryInstanceData.
860

861
  @type lu:  L{LogicalUnit}
862
  @param lu: the logical unit on whose behalf we execute
863
  @type nics: list of L{objects.NIC}
864
  @param nics: list of nics to convert to hooks tuples
865

866
  """
867
  hooks_nics = []
868
  cluster = lu.cfg.GetClusterInfo()
869
  for nic in nics:
870
    ip = nic.ip
871
    mac = nic.mac
872
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
873
    mode = filled_params[constants.NIC_MODE]
874
    link = filled_params[constants.NIC_LINK]
875
    hooks_nics.append((ip, mac, mode, link))
876
  return hooks_nics
877

    
878

    
879
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
880
  """Builds instance related env variables for hooks from an object.
881

882
  @type lu: L{LogicalUnit}
883
  @param lu: the logical unit on whose behalf we execute
884
  @type instance: L{objects.Instance}
885
  @param instance: the instance for which we should build the
886
      environment
887
  @type override: dict
888
  @param override: dictionary with key/values that will override
889
      our values
890
  @rtype: dict
891
  @return: the hook environment dictionary
892

893
  """
894
  cluster = lu.cfg.GetClusterInfo()
895
  bep = cluster.FillBE(instance)
896
  hvp = cluster.FillHV(instance)
897
  args = {
898
    'name': instance.name,
899
    'primary_node': instance.primary_node,
900
    'secondary_nodes': instance.secondary_nodes,
901
    'os_type': instance.os,
902
    'status': instance.admin_up,
903
    'memory': bep[constants.BE_MEMORY],
904
    'vcpus': bep[constants.BE_VCPUS],
905
    'nics': _NICListToTuple(lu, instance.nics),
906
    'disk_template': instance.disk_template,
907
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
908
    'bep': bep,
909
    'hvp': hvp,
910
    'hypervisor_name': instance.hypervisor,
911
  }
912
  if override:
913
    args.update(override)
914
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
915

    
916

    
917
def _AdjustCandidatePool(lu, exceptions):
918
  """Adjust the candidate pool after node operations.
919

920
  """
921
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
922
  if mod_list:
923
    lu.LogInfo("Promoted nodes to master candidate role: %s",
924
               utils.CommaJoin(node.name for node in mod_list))
925
    for name in mod_list:
926
      lu.context.ReaddNode(name)
927
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
928
  if mc_now > mc_max:
929
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
930
               (mc_now, mc_max))
931

    
932

    
933
def _DecideSelfPromotion(lu, exceptions=None):
934
  """Decide whether I should promote myself as a master candidate.
935

936
  """
937
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
938
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
939
  # the new node will increase mc_max with one, so:
940
  mc_should = min(mc_should + 1, cp_size)
941
  return mc_now < mc_should
942

    
943

    
944
def _CheckNicsBridgesExist(lu, target_nics, target_node):
945
  """Check that the brigdes needed by a list of nics exist.
946

947
  """
948
  cluster = lu.cfg.GetClusterInfo()
949
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
950
  brlist = [params[constants.NIC_LINK] for params in paramslist
951
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
952
  if brlist:
953
    result = lu.rpc.call_bridges_exist(target_node, brlist)
954
    result.Raise("Error checking bridges on destination node '%s'" %
955
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
956

    
957

    
958
def _CheckInstanceBridgesExist(lu, instance, node=None):
959
  """Check that the brigdes needed by an instance exist.
960

961
  """
962
  if node is None:
963
    node = instance.primary_node
964
  _CheckNicsBridgesExist(lu, instance.nics, node)
965

    
966

    
967
def _CheckOSVariant(os_obj, name):
968
  """Check whether an OS name conforms to the os variants specification.
969

970
  @type os_obj: L{objects.OS}
971
  @param os_obj: OS object to check
972
  @type name: string
973
  @param name: OS name passed by the user, to check for validity
974

975
  """
976
  if not os_obj.supported_variants:
977
    return
978
  variant = objects.OS.GetVariant(name)
979
  if not variant:
980
    raise errors.OpPrereqError("OS name must include a variant",
981
                               errors.ECODE_INVAL)
982

    
983
  if variant not in os_obj.supported_variants:
984
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
985

    
986

    
987
def _GetNodeInstancesInner(cfg, fn):
988
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
989

    
990

    
991
def _GetNodeInstances(cfg, node_name):
992
  """Returns a list of all primary and secondary instances on a node.
993

994
  """
995

    
996
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
997

    
998

    
999
def _GetNodePrimaryInstances(cfg, node_name):
1000
  """Returns primary instances on a node.
1001

1002
  """
1003
  return _GetNodeInstancesInner(cfg,
1004
                                lambda inst: node_name == inst.primary_node)
1005

    
1006

    
1007
def _GetNodeSecondaryInstances(cfg, node_name):
1008
  """Returns secondary instances on a node.
1009

1010
  """
1011
  return _GetNodeInstancesInner(cfg,
1012
                                lambda inst: node_name in inst.secondary_nodes)
1013

    
1014

    
1015
def _GetStorageTypeArgs(cfg, storage_type):
1016
  """Returns the arguments for a storage type.
1017

1018
  """
1019
  # Special case for file storage
1020
  if storage_type == constants.ST_FILE:
1021
    # storage.FileStorage wants a list of storage directories
1022
    return [[cfg.GetFileStorageDir()]]
1023

    
1024
  return []
1025

    
1026

    
1027
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1028
  faulty = []
1029

    
1030
  for dev in instance.disks:
1031
    cfg.SetDiskID(dev, node_name)
1032

    
1033
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1034
  result.Raise("Failed to get disk status from node %s" % node_name,
1035
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1036

    
1037
  for idx, bdev_status in enumerate(result.payload):
1038
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1039
      faulty.append(idx)
1040

    
1041
  return faulty
1042

    
1043

    
1044
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1045
  """Check the sanity of iallocator and node arguments and use the
1046
  cluster-wide iallocator if appropriate.
1047

1048
  Check that at most one of (iallocator, node) is specified. If none is
1049
  specified, then the LU's opcode's iallocator slot is filled with the
1050
  cluster-wide default iallocator.
1051

1052
  @type iallocator_slot: string
1053
  @param iallocator_slot: the name of the opcode iallocator slot
1054
  @type node_slot: string
1055
  @param node_slot: the name of the opcode target node slot
1056

1057
  """
1058
  node = getattr(lu.op, node_slot, None)
1059
  iallocator = getattr(lu.op, iallocator_slot, None)
1060

    
1061
  if node is not None and iallocator is not None:
1062
    raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1063
                               errors.ECODE_INVAL)
1064
  elif node is None and iallocator is None:
1065
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1066
    if default_iallocator:
1067
      setattr(lu.op, iallocator_slot, default_iallocator)
1068
    else:
1069
      raise errors.OpPrereqError("No iallocator or node given and no"
1070
                                 " cluster-wide default iallocator found."
1071
                                 " Please specify either an iallocator or a"
1072
                                 " node, or set a cluster-wide default"
1073
                                 " iallocator.")
1074

    
1075

    
1076
class LUPostInitCluster(LogicalUnit):
1077
  """Logical unit for running hooks after cluster initialization.
1078

1079
  """
1080
  HPATH = "cluster-init"
1081
  HTYPE = constants.HTYPE_CLUSTER
1082

    
1083
  def BuildHooksEnv(self):
1084
    """Build hooks env.
1085

1086
    """
1087
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1088
    mn = self.cfg.GetMasterNode()
1089
    return env, [], [mn]
1090

    
1091
  def Exec(self, feedback_fn):
1092
    """Nothing to do.
1093

1094
    """
1095
    return True
1096

    
1097

    
1098
class LUDestroyCluster(LogicalUnit):
1099
  """Logical unit for destroying the cluster.
1100

1101
  """
1102
  HPATH = "cluster-destroy"
1103
  HTYPE = constants.HTYPE_CLUSTER
1104

    
1105
  def BuildHooksEnv(self):
1106
    """Build hooks env.
1107

1108
    """
1109
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1110
    return env, [], []
1111

    
1112
  def CheckPrereq(self):
1113
    """Check prerequisites.
1114

1115
    This checks whether the cluster is empty.
1116

1117
    Any errors are signaled by raising errors.OpPrereqError.
1118

1119
    """
1120
    master = self.cfg.GetMasterNode()
1121

    
1122
    nodelist = self.cfg.GetNodeList()
1123
    if len(nodelist) != 1 or nodelist[0] != master:
1124
      raise errors.OpPrereqError("There are still %d node(s) in"
1125
                                 " this cluster." % (len(nodelist) - 1),
1126
                                 errors.ECODE_INVAL)
1127
    instancelist = self.cfg.GetInstanceList()
1128
    if instancelist:
1129
      raise errors.OpPrereqError("There are still %d instance(s) in"
1130
                                 " this cluster." % len(instancelist),
1131
                                 errors.ECODE_INVAL)
1132

    
1133
  def Exec(self, feedback_fn):
1134
    """Destroys the cluster.
1135

1136
    """
1137
    master = self.cfg.GetMasterNode()
1138

    
1139
    # Run post hooks on master node before it's removed
1140
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1141
    try:
1142
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1143
    except:
1144
      # pylint: disable-msg=W0702
1145
      self.LogWarning("Errors occurred running hooks on %s" % master)
1146

    
1147
    result = self.rpc.call_node_stop_master(master, False)
1148
    result.Raise("Could not disable the master role")
1149

    
1150
    return master
1151

    
1152

    
1153
def _VerifyCertificate(filename):
1154
  """Verifies a certificate for LUVerifyCluster.
1155

1156
  @type filename: string
1157
  @param filename: Path to PEM file
1158

1159
  """
1160
  try:
1161
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1162
                                           utils.ReadFile(filename))
1163
  except Exception, err: # pylint: disable-msg=W0703
1164
    return (LUVerifyCluster.ETYPE_ERROR,
1165
            "Failed to load X509 certificate %s: %s" % (filename, err))
1166

    
1167
  (errcode, msg) = \
1168
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1169
                                constants.SSL_CERT_EXPIRATION_ERROR)
1170

    
1171
  if msg:
1172
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1173
  else:
1174
    fnamemsg = None
1175

    
1176
  if errcode is None:
1177
    return (None, fnamemsg)
1178
  elif errcode == utils.CERT_WARNING:
1179
    return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1180
  elif errcode == utils.CERT_ERROR:
1181
    return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1182

    
1183
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1184

    
1185

    
1186
class LUVerifyCluster(LogicalUnit):
1187
  """Verifies the cluster status.
1188

1189
  """
1190
  HPATH = "cluster-verify"
1191
  HTYPE = constants.HTYPE_CLUSTER
1192
  _OP_PARAMS = [
1193
    ("skip_checks", ht.EmptyList,
1194
     ht.TListOf(ht.TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1195
    ("verbose", False, ht.TBool),
1196
    ("error_codes", False, ht.TBool),
1197
    ("debug_simulate_errors", False, ht.TBool),
1198
    ]
1199
  REQ_BGL = False
1200

    
1201
  TCLUSTER = "cluster"
1202
  TNODE = "node"
1203
  TINSTANCE = "instance"
1204

    
1205
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1206
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1207
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1208
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1209
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1210
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1211
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1212
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1213
  ENODEDRBD = (TNODE, "ENODEDRBD")
1214
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1215
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1216
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1217
  ENODEHV = (TNODE, "ENODEHV")
1218
  ENODELVM = (TNODE, "ENODELVM")
1219
  ENODEN1 = (TNODE, "ENODEN1")
1220
  ENODENET = (TNODE, "ENODENET")
1221
  ENODEOS = (TNODE, "ENODEOS")
1222
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1223
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1224
  ENODERPC = (TNODE, "ENODERPC")
1225
  ENODESSH = (TNODE, "ENODESSH")
1226
  ENODEVERSION = (TNODE, "ENODEVERSION")
1227
  ENODESETUP = (TNODE, "ENODESETUP")
1228
  ENODETIME = (TNODE, "ENODETIME")
1229

    
1230
  ETYPE_FIELD = "code"
1231
  ETYPE_ERROR = "ERROR"
1232
  ETYPE_WARNING = "WARNING"
1233

    
1234
  class NodeImage(object):
1235
    """A class representing the logical and physical status of a node.
1236

1237
    @type name: string
1238
    @ivar name: the node name to which this object refers
1239
    @ivar volumes: a structure as returned from
1240
        L{ganeti.backend.GetVolumeList} (runtime)
1241
    @ivar instances: a list of running instances (runtime)
1242
    @ivar pinst: list of configured primary instances (config)
1243
    @ivar sinst: list of configured secondary instances (config)
1244
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1245
        of this node (config)
1246
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1247
    @ivar dfree: free disk, as reported by the node (runtime)
1248
    @ivar offline: the offline status (config)
1249
    @type rpc_fail: boolean
1250
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1251
        not whether the individual keys were correct) (runtime)
1252
    @type lvm_fail: boolean
1253
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1254
    @type hyp_fail: boolean
1255
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1256
    @type ghost: boolean
1257
    @ivar ghost: whether this is a known node or not (config)
1258
    @type os_fail: boolean
1259
    @ivar os_fail: whether the RPC call didn't return valid OS data
1260
    @type oslist: list
1261
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1262
    @type vm_capable: boolean
1263
    @ivar vm_capable: whether the node can host instances
1264

1265
    """
1266
    def __init__(self, offline=False, name=None, vm_capable=True):
1267
      self.name = name
1268
      self.volumes = {}
1269
      self.instances = []
1270
      self.pinst = []
1271
      self.sinst = []
1272
      self.sbp = {}
1273
      self.mfree = 0
1274
      self.dfree = 0
1275
      self.offline = offline
1276
      self.vm_capable = vm_capable
1277
      self.rpc_fail = False
1278
      self.lvm_fail = False
1279
      self.hyp_fail = False
1280
      self.ghost = False
1281
      self.os_fail = False
1282
      self.oslist = {}
1283

    
1284
  def ExpandNames(self):
1285
    self.needed_locks = {
1286
      locking.LEVEL_NODE: locking.ALL_SET,
1287
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1288
    }
1289
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1290

    
1291
  def _Error(self, ecode, item, msg, *args, **kwargs):
1292
    """Format an error message.
1293

1294
    Based on the opcode's error_codes parameter, either format a
1295
    parseable error code, or a simpler error string.
1296

1297
    This must be called only from Exec and functions called from Exec.
1298

1299
    """
1300
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1301
    itype, etxt = ecode
1302
    # first complete the msg
1303
    if args:
1304
      msg = msg % args
1305
    # then format the whole message
1306
    if self.op.error_codes:
1307
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1308
    else:
1309
      if item:
1310
        item = " " + item
1311
      else:
1312
        item = ""
1313
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1314
    # and finally report it via the feedback_fn
1315
    self._feedback_fn("  - %s" % msg)
1316

    
1317
  def _ErrorIf(self, cond, *args, **kwargs):
1318
    """Log an error message if the passed condition is True.
1319

1320
    """
1321
    cond = bool(cond) or self.op.debug_simulate_errors
1322
    if cond:
1323
      self._Error(*args, **kwargs)
1324
    # do not mark the operation as failed for WARN cases only
1325
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1326
      self.bad = self.bad or cond
1327

    
1328
  def _VerifyNode(self, ninfo, nresult):
1329
    """Perform some basic validation on data returned from a node.
1330

1331
      - check the result data structure is well formed and has all the
1332
        mandatory fields
1333
      - check ganeti version
1334

1335
    @type ninfo: L{objects.Node}
1336
    @param ninfo: the node to check
1337
    @param nresult: the results from the node
1338
    @rtype: boolean
1339
    @return: whether overall this call was successful (and we can expect
1340
         reasonable values in the respose)
1341

1342
    """
1343
    node = ninfo.name
1344
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1345

    
1346
    # main result, nresult should be a non-empty dict
1347
    test = not nresult or not isinstance(nresult, dict)
1348
    _ErrorIf(test, self.ENODERPC, node,
1349
                  "unable to verify node: no data returned")
1350
    if test:
1351
      return False
1352

    
1353
    # compares ganeti version
1354
    local_version = constants.PROTOCOL_VERSION
1355
    remote_version = nresult.get("version", None)
1356
    test = not (remote_version and
1357
                isinstance(remote_version, (list, tuple)) and
1358
                len(remote_version) == 2)
1359
    _ErrorIf(test, self.ENODERPC, node,
1360
             "connection to node returned invalid data")
1361
    if test:
1362
      return False
1363

    
1364
    test = local_version != remote_version[0]
1365
    _ErrorIf(test, self.ENODEVERSION, node,
1366
             "incompatible protocol versions: master %s,"
1367
             " node %s", local_version, remote_version[0])
1368
    if test:
1369
      return False
1370

    
1371
    # node seems compatible, we can actually try to look into its results
1372

    
1373
    # full package version
1374
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1375
                  self.ENODEVERSION, node,
1376
                  "software version mismatch: master %s, node %s",
1377
                  constants.RELEASE_VERSION, remote_version[1],
1378
                  code=self.ETYPE_WARNING)
1379

    
1380
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1381
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1382
      for hv_name, hv_result in hyp_result.iteritems():
1383
        test = hv_result is not None
1384
        _ErrorIf(test, self.ENODEHV, node,
1385
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1386

    
1387
    test = nresult.get(constants.NV_NODESETUP,
1388
                           ["Missing NODESETUP results"])
1389
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1390
             "; ".join(test))
1391

    
1392
    return True
1393

    
1394
  def _VerifyNodeTime(self, ninfo, nresult,
1395
                      nvinfo_starttime, nvinfo_endtime):
1396
    """Check the node time.
1397

1398
    @type ninfo: L{objects.Node}
1399
    @param ninfo: the node to check
1400
    @param nresult: the remote results for the node
1401
    @param nvinfo_starttime: the start time of the RPC call
1402
    @param nvinfo_endtime: the end time of the RPC call
1403

1404
    """
1405
    node = ninfo.name
1406
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1407

    
1408
    ntime = nresult.get(constants.NV_TIME, None)
1409
    try:
1410
      ntime_merged = utils.MergeTime(ntime)
1411
    except (ValueError, TypeError):
1412
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1413
      return
1414

    
1415
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1416
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1417
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1418
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1419
    else:
1420
      ntime_diff = None
1421

    
1422
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1423
             "Node time diverges by at least %s from master node time",
1424
             ntime_diff)
1425

    
1426
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1427
    """Check the node time.
1428

1429
    @type ninfo: L{objects.Node}
1430
    @param ninfo: the node to check
1431
    @param nresult: the remote results for the node
1432
    @param vg_name: the configured VG name
1433

1434
    """
1435
    if vg_name is None:
1436
      return
1437

    
1438
    node = ninfo.name
1439
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1440

    
1441
    # checks vg existence and size > 20G
1442
    vglist = nresult.get(constants.NV_VGLIST, None)
1443
    test = not vglist
1444
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1445
    if not test:
1446
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1447
                                            constants.MIN_VG_SIZE)
1448
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1449

    
1450
    # check pv names
1451
    pvlist = nresult.get(constants.NV_PVLIST, None)
1452
    test = pvlist is None
1453
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1454
    if not test:
1455
      # check that ':' is not present in PV names, since it's a
1456
      # special character for lvcreate (denotes the range of PEs to
1457
      # use on the PV)
1458
      for _, pvname, owner_vg in pvlist:
1459
        test = ":" in pvname
1460
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1461
                 " '%s' of VG '%s'", pvname, owner_vg)
1462

    
1463
  def _VerifyNodeNetwork(self, ninfo, nresult):
1464
    """Check the node time.
1465

1466
    @type ninfo: L{objects.Node}
1467
    @param ninfo: the node to check
1468
    @param nresult: the remote results for the node
1469

1470
    """
1471
    node = ninfo.name
1472
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1473

    
1474
    test = constants.NV_NODELIST not in nresult
1475
    _ErrorIf(test, self.ENODESSH, node,
1476
             "node hasn't returned node ssh connectivity data")
1477
    if not test:
1478
      if nresult[constants.NV_NODELIST]:
1479
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1480
          _ErrorIf(True, self.ENODESSH, node,
1481
                   "ssh communication with node '%s': %s", a_node, a_msg)
1482

    
1483
    test = constants.NV_NODENETTEST not in nresult
1484
    _ErrorIf(test, self.ENODENET, node,
1485
             "node hasn't returned node tcp connectivity data")
1486
    if not test:
1487
      if nresult[constants.NV_NODENETTEST]:
1488
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1489
        for anode in nlist:
1490
          _ErrorIf(True, self.ENODENET, node,
1491
                   "tcp communication with node '%s': %s",
1492
                   anode, nresult[constants.NV_NODENETTEST][anode])
1493

    
1494
    test = constants.NV_MASTERIP not in nresult
1495
    _ErrorIf(test, self.ENODENET, node,
1496
             "node hasn't returned node master IP reachability data")
1497
    if not test:
1498
      if not nresult[constants.NV_MASTERIP]:
1499
        if node == self.master_node:
1500
          msg = "the master node cannot reach the master IP (not configured?)"
1501
        else:
1502
          msg = "cannot reach the master IP"
1503
        _ErrorIf(True, self.ENODENET, node, msg)
1504

    
1505
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1506
                      diskstatus):
1507
    """Verify an instance.
1508

1509
    This function checks to see if the required block devices are
1510
    available on the instance's node.
1511

1512
    """
1513
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1514
    node_current = instanceconfig.primary_node
1515

    
1516
    node_vol_should = {}
1517
    instanceconfig.MapLVsByNode(node_vol_should)
1518

    
1519
    for node in node_vol_should:
1520
      n_img = node_image[node]
1521
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1522
        # ignore missing volumes on offline or broken nodes
1523
        continue
1524
      for volume in node_vol_should[node]:
1525
        test = volume not in n_img.volumes
1526
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1527
                 "volume %s missing on node %s", volume, node)
1528

    
1529
    if instanceconfig.admin_up:
1530
      pri_img = node_image[node_current]
1531
      test = instance not in pri_img.instances and not pri_img.offline
1532
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1533
               "instance not running on its primary node %s",
1534
               node_current)
1535

    
1536
    for node, n_img in node_image.items():
1537
      if (not node == node_current):
1538
        test = instance in n_img.instances
1539
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1540
                 "instance should not run on node %s", node)
1541

    
1542
    diskdata = [(nname, disk, idx)
1543
                for (nname, disks) in diskstatus.items()
1544
                for idx, disk in enumerate(disks)]
1545

    
1546
    for nname, bdev_status, idx in diskdata:
1547
      _ErrorIf(not bdev_status,
1548
               self.EINSTANCEFAULTYDISK, instance,
1549
               "couldn't retrieve status for disk/%s on %s", idx, nname)
1550
      _ErrorIf(bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY,
1551
               self.EINSTANCEFAULTYDISK, instance,
1552
               "disk/%s on %s is faulty", idx, nname)
1553

    
1554
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1555
    """Verify if there are any unknown volumes in the cluster.
1556

1557
    The .os, .swap and backup volumes are ignored. All other volumes are
1558
    reported as unknown.
1559

1560
    @type reserved: L{ganeti.utils.FieldSet}
1561
    @param reserved: a FieldSet of reserved volume names
1562

1563
    """
1564
    for node, n_img in node_image.items():
1565
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1566
        # skip non-healthy nodes
1567
        continue
1568
      for volume in n_img.volumes:
1569
        test = ((node not in node_vol_should or
1570
                volume not in node_vol_should[node]) and
1571
                not reserved.Matches(volume))
1572
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1573
                      "volume %s is unknown", volume)
1574

    
1575
  def _VerifyOrphanInstances(self, instancelist, node_image):
1576
    """Verify the list of running instances.
1577

1578
    This checks what instances are running but unknown to the cluster.
1579

1580
    """
1581
    for node, n_img in node_image.items():
1582
      for o_inst in n_img.instances:
1583
        test = o_inst not in instancelist
1584
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1585
                      "instance %s on node %s should not exist", o_inst, node)
1586

    
1587
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1588
    """Verify N+1 Memory Resilience.
1589

1590
    Check that if one single node dies we can still start all the
1591
    instances it was primary for.
1592

1593
    """
1594
    for node, n_img in node_image.items():
1595
      # This code checks that every node which is now listed as
1596
      # secondary has enough memory to host all instances it is
1597
      # supposed to should a single other node in the cluster fail.
1598
      # FIXME: not ready for failover to an arbitrary node
1599
      # FIXME: does not support file-backed instances
1600
      # WARNING: we currently take into account down instances as well
1601
      # as up ones, considering that even if they're down someone
1602
      # might want to start them even in the event of a node failure.
1603
      for prinode, instances in n_img.sbp.items():
1604
        needed_mem = 0
1605
        for instance in instances:
1606
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1607
          if bep[constants.BE_AUTO_BALANCE]:
1608
            needed_mem += bep[constants.BE_MEMORY]
1609
        test = n_img.mfree < needed_mem
1610
        self._ErrorIf(test, self.ENODEN1, node,
1611
                      "not enough memory on to accommodate"
1612
                      " failovers should peer node %s fail", prinode)
1613

    
1614
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1615
                       master_files):
1616
    """Verifies and computes the node required file checksums.
1617

1618
    @type ninfo: L{objects.Node}
1619
    @param ninfo: the node to check
1620
    @param nresult: the remote results for the node
1621
    @param file_list: required list of files
1622
    @param local_cksum: dictionary of local files and their checksums
1623
    @param master_files: list of files that only masters should have
1624

1625
    """
1626
    node = ninfo.name
1627
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1628

    
1629
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1630
    test = not isinstance(remote_cksum, dict)
1631
    _ErrorIf(test, self.ENODEFILECHECK, node,
1632
             "node hasn't returned file checksum data")
1633
    if test:
1634
      return
1635

    
1636
    for file_name in file_list:
1637
      node_is_mc = ninfo.master_candidate
1638
      must_have = (file_name not in master_files) or node_is_mc
1639
      # missing
1640
      test1 = file_name not in remote_cksum
1641
      # invalid checksum
1642
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1643
      # existing and good
1644
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1645
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1646
               "file '%s' missing", file_name)
1647
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1648
               "file '%s' has wrong checksum", file_name)
1649
      # not candidate and this is not a must-have file
1650
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1651
               "file '%s' should not exist on non master"
1652
               " candidates (and the file is outdated)", file_name)
1653
      # all good, except non-master/non-must have combination
1654
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1655
               "file '%s' should not exist"
1656
               " on non master candidates", file_name)
1657

    
1658
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1659
                      drbd_map):
1660
    """Verifies and the node DRBD status.
1661

1662
    @type ninfo: L{objects.Node}
1663
    @param ninfo: the node to check
1664
    @param nresult: the remote results for the node
1665
    @param instanceinfo: the dict of instances
1666
    @param drbd_helper: the configured DRBD usermode helper
1667
    @param drbd_map: the DRBD map as returned by
1668
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1669

1670
    """
1671
    node = ninfo.name
1672
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1673

    
1674
    if drbd_helper:
1675
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1676
      test = (helper_result == None)
1677
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
1678
               "no drbd usermode helper returned")
1679
      if helper_result:
1680
        status, payload = helper_result
1681
        test = not status
1682
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1683
                 "drbd usermode helper check unsuccessful: %s", payload)
1684
        test = status and (payload != drbd_helper)
1685
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1686
                 "wrong drbd usermode helper: %s", payload)
1687

    
1688
    # compute the DRBD minors
1689
    node_drbd = {}
1690
    for minor, instance in drbd_map[node].items():
1691
      test = instance not in instanceinfo
1692
      _ErrorIf(test, self.ECLUSTERCFG, None,
1693
               "ghost instance '%s' in temporary DRBD map", instance)
1694
        # ghost instance should not be running, but otherwise we
1695
        # don't give double warnings (both ghost instance and
1696
        # unallocated minor in use)
1697
      if test:
1698
        node_drbd[minor] = (instance, False)
1699
      else:
1700
        instance = instanceinfo[instance]
1701
        node_drbd[minor] = (instance.name, instance.admin_up)
1702

    
1703
    # and now check them
1704
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1705
    test = not isinstance(used_minors, (tuple, list))
1706
    _ErrorIf(test, self.ENODEDRBD, node,
1707
             "cannot parse drbd status file: %s", str(used_minors))
1708
    if test:
1709
      # we cannot check drbd status
1710
      return
1711

    
1712
    for minor, (iname, must_exist) in node_drbd.items():
1713
      test = minor not in used_minors and must_exist
1714
      _ErrorIf(test, self.ENODEDRBD, node,
1715
               "drbd minor %d of instance %s is not active", minor, iname)
1716
    for minor in used_minors:
1717
      test = minor not in node_drbd
1718
      _ErrorIf(test, self.ENODEDRBD, node,
1719
               "unallocated drbd minor %d is in use", minor)
1720

    
1721
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1722
    """Builds the node OS structures.
1723

1724
    @type ninfo: L{objects.Node}
1725
    @param ninfo: the node to check
1726
    @param nresult: the remote results for the node
1727
    @param nimg: the node image object
1728

1729
    """
1730
    node = ninfo.name
1731
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1732

    
1733
    remote_os = nresult.get(constants.NV_OSLIST, None)
1734
    test = (not isinstance(remote_os, list) or
1735
            not compat.all(isinstance(v, list) and len(v) == 7
1736
                           for v in remote_os))
1737

    
1738
    _ErrorIf(test, self.ENODEOS, node,
1739
             "node hasn't returned valid OS data")
1740

    
1741
    nimg.os_fail = test
1742

    
1743
    if test:
1744
      return
1745

    
1746
    os_dict = {}
1747

    
1748
    for (name, os_path, status, diagnose,
1749
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1750

    
1751
      if name not in os_dict:
1752
        os_dict[name] = []
1753

    
1754
      # parameters is a list of lists instead of list of tuples due to
1755
      # JSON lacking a real tuple type, fix it:
1756
      parameters = [tuple(v) for v in parameters]
1757
      os_dict[name].append((os_path, status, diagnose,
1758
                            set(variants), set(parameters), set(api_ver)))
1759

    
1760
    nimg.oslist = os_dict
1761

    
1762
  def _VerifyNodeOS(self, ninfo, nimg, base):
1763
    """Verifies the node OS list.
1764

1765
    @type ninfo: L{objects.Node}
1766
    @param ninfo: the node to check
1767
    @param nimg: the node image object
1768
    @param base: the 'template' node we match against (e.g. from the master)
1769

1770
    """
1771
    node = ninfo.name
1772
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1773

    
1774
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1775

    
1776
    for os_name, os_data in nimg.oslist.items():
1777
      assert os_data, "Empty OS status for OS %s?!" % os_name
1778
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1779
      _ErrorIf(not f_status, self.ENODEOS, node,
1780
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1781
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1782
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1783
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1784
      # this will catched in backend too
1785
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1786
               and not f_var, self.ENODEOS, node,
1787
               "OS %s with API at least %d does not declare any variant",
1788
               os_name, constants.OS_API_V15)
1789
      # comparisons with the 'base' image
1790
      test = os_name not in base.oslist
1791
      _ErrorIf(test, self.ENODEOS, node,
1792
               "Extra OS %s not present on reference node (%s)",
1793
               os_name, base.name)
1794
      if test:
1795
        continue
1796
      assert base.oslist[os_name], "Base node has empty OS status?"
1797
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1798
      if not b_status:
1799
        # base OS is invalid, skipping
1800
        continue
1801
      for kind, a, b in [("API version", f_api, b_api),
1802
                         ("variants list", f_var, b_var),
1803
                         ("parameters", f_param, b_param)]:
1804
        _ErrorIf(a != b, self.ENODEOS, node,
1805
                 "OS %s %s differs from reference node %s: %s vs. %s",
1806
                 kind, os_name, base.name,
1807
                 utils.CommaJoin(a), utils.CommaJoin(b))
1808

    
1809
    # check any missing OSes
1810
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1811
    _ErrorIf(missing, self.ENODEOS, node,
1812
             "OSes present on reference node %s but missing on this node: %s",
1813
             base.name, utils.CommaJoin(missing))
1814

    
1815
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1816
    """Verifies and updates the node volume data.
1817

1818
    This function will update a L{NodeImage}'s internal structures
1819
    with data from the remote call.
1820

1821
    @type ninfo: L{objects.Node}
1822
    @param ninfo: the node to check
1823
    @param nresult: the remote results for the node
1824
    @param nimg: the node image object
1825
    @param vg_name: the configured VG name
1826

1827
    """
1828
    node = ninfo.name
1829
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1830

    
1831
    nimg.lvm_fail = True
1832
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1833
    if vg_name is None:
1834
      pass
1835
    elif isinstance(lvdata, basestring):
1836
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1837
               utils.SafeEncode(lvdata))
1838
    elif not isinstance(lvdata, dict):
1839
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1840
    else:
1841
      nimg.volumes = lvdata
1842
      nimg.lvm_fail = False
1843

    
1844
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1845
    """Verifies and updates the node instance list.
1846

1847
    If the listing was successful, then updates this node's instance
1848
    list. Otherwise, it marks the RPC call as failed for the instance
1849
    list key.
1850

1851
    @type ninfo: L{objects.Node}
1852
    @param ninfo: the node to check
1853
    @param nresult: the remote results for the node
1854
    @param nimg: the node image object
1855

1856
    """
1857
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1858
    test = not isinstance(idata, list)
1859
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1860
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1861
    if test:
1862
      nimg.hyp_fail = True
1863
    else:
1864
      nimg.instances = idata
1865

    
1866
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1867
    """Verifies and computes a node information map
1868

1869
    @type ninfo: L{objects.Node}
1870
    @param ninfo: the node to check
1871
    @param nresult: the remote results for the node
1872
    @param nimg: the node image object
1873
    @param vg_name: the configured VG name
1874

1875
    """
1876
    node = ninfo.name
1877
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1878

    
1879
    # try to read free memory (from the hypervisor)
1880
    hv_info = nresult.get(constants.NV_HVINFO, None)
1881
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1882
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1883
    if not test:
1884
      try:
1885
        nimg.mfree = int(hv_info["memory_free"])
1886
      except (ValueError, TypeError):
1887
        _ErrorIf(True, self.ENODERPC, node,
1888
                 "node returned invalid nodeinfo, check hypervisor")
1889

    
1890
    # FIXME: devise a free space model for file based instances as well
1891
    if vg_name is not None:
1892
      test = (constants.NV_VGLIST not in nresult or
1893
              vg_name not in nresult[constants.NV_VGLIST])
1894
      _ErrorIf(test, self.ENODELVM, node,
1895
               "node didn't return data for the volume group '%s'"
1896
               " - it is either missing or broken", vg_name)
1897
      if not test:
1898
        try:
1899
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1900
        except (ValueError, TypeError):
1901
          _ErrorIf(True, self.ENODERPC, node,
1902
                   "node returned invalid LVM info, check LVM status")
1903

    
1904
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1905
    """Gets per-disk status information for all instances.
1906

1907
    @type nodelist: list of strings
1908
    @param nodelist: Node names
1909
    @type node_image: dict of (name, L{objects.Node})
1910
    @param node_image: Node objects
1911
    @type instanceinfo: dict of (name, L{objects.Instance})
1912
    @param instanceinfo: Instance objects
1913

1914
    """
1915
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1916

    
1917
    node_disks = {}
1918
    node_disks_devonly = {}
1919

    
1920
    for nname in nodelist:
1921
      disks = [(inst, disk)
1922
               for instlist in [node_image[nname].pinst,
1923
                                node_image[nname].sinst]
1924
               for inst in instlist
1925
               for disk in instanceinfo[inst].disks]
1926

    
1927
      if not disks:
1928
        # No need to collect data
1929
        continue
1930

    
1931
      node_disks[nname] = disks
1932

    
1933
      # Creating copies as SetDiskID below will modify the objects and that can
1934
      # lead to incorrect data returned from nodes
1935
      devonly = [dev.Copy() for (_, dev) in disks]
1936

    
1937
      for dev in devonly:
1938
        self.cfg.SetDiskID(dev, nname)
1939

    
1940
      node_disks_devonly[nname] = devonly
1941

    
1942
    assert len(node_disks) == len(node_disks_devonly)
1943

    
1944
    # Collect data from all nodes with disks
1945
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
1946
                                                          node_disks_devonly)
1947

    
1948
    assert len(result) == len(node_disks)
1949

    
1950
    instdisk = {}
1951

    
1952
    for (nname, nres) in result.items():
1953
      if nres.offline:
1954
        # Ignore offline node
1955
        continue
1956

    
1957
      disks = node_disks[nname]
1958

    
1959
      msg = nres.fail_msg
1960
      _ErrorIf(msg, self.ENODERPC, nname,
1961
               "while getting disk information: %s", nres.fail_msg)
1962
      if msg:
1963
        # No data from this node
1964
        data = len(disks) * [None]
1965
      else:
1966
        data = nres.payload
1967

    
1968
      for ((inst, _), status) in zip(disks, data):
1969
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
1970

    
1971
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
1972
                      len(nnames) <= len(instanceinfo[inst].all_nodes)
1973
                      for inst, nnames in instdisk.items()
1974
                      for nname, statuses in nnames.items())
1975

    
1976
    return instdisk
1977

    
1978
  def BuildHooksEnv(self):
1979
    """Build hooks env.
1980

1981
    Cluster-Verify hooks just ran in the post phase and their failure makes
1982
    the output be logged in the verify output and the verification to fail.
1983

1984
    """
1985
    all_nodes = self.cfg.GetNodeList()
1986
    env = {
1987
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1988
      }
1989
    for node in self.cfg.GetAllNodesInfo().values():
1990
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1991

    
1992
    return env, [], all_nodes
1993

    
1994
  def Exec(self, feedback_fn):
1995
    """Verify integrity of cluster, performing various test on nodes.
1996

1997
    """
1998
    self.bad = False
1999
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2000
    verbose = self.op.verbose
2001
    self._feedback_fn = feedback_fn
2002
    feedback_fn("* Verifying global settings")
2003
    for msg in self.cfg.VerifyConfig():
2004
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2005

    
2006
    # Check the cluster certificates
2007
    for cert_filename in constants.ALL_CERT_FILES:
2008
      (errcode, msg) = _VerifyCertificate(cert_filename)
2009
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2010

    
2011
    vg_name = self.cfg.GetVGName()
2012
    drbd_helper = self.cfg.GetDRBDHelper()
2013
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2014
    cluster = self.cfg.GetClusterInfo()
2015
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
2016
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2017
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2018
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2019
                        for iname in instancelist)
2020
    i_non_redundant = [] # Non redundant instances
2021
    i_non_a_balanced = [] # Non auto-balanced instances
2022
    n_offline = 0 # Count of offline nodes
2023
    n_drained = 0 # Count of nodes being drained
2024
    node_vol_should = {}
2025

    
2026
    # FIXME: verify OS list
2027
    # do local checksums
2028
    master_files = [constants.CLUSTER_CONF_FILE]
2029
    master_node = self.master_node = self.cfg.GetMasterNode()
2030
    master_ip = self.cfg.GetMasterIP()
2031

    
2032
    file_names = ssconf.SimpleStore().GetFileList()
2033
    file_names.extend(constants.ALL_CERT_FILES)
2034
    file_names.extend(master_files)
2035
    if cluster.modify_etc_hosts:
2036
      file_names.append(constants.ETC_HOSTS)
2037

    
2038
    local_checksums = utils.FingerprintFiles(file_names)
2039

    
2040
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2041
    node_verify_param = {
2042
      constants.NV_FILELIST: file_names,
2043
      constants.NV_NODELIST: [node.name for node in nodeinfo
2044
                              if not node.offline],
2045
      constants.NV_HYPERVISOR: hypervisors,
2046
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2047
                                  node.secondary_ip) for node in nodeinfo
2048
                                 if not node.offline],
2049
      constants.NV_INSTANCELIST: hypervisors,
2050
      constants.NV_VERSION: None,
2051
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2052
      constants.NV_NODESETUP: None,
2053
      constants.NV_TIME: None,
2054
      constants.NV_MASTERIP: (master_node, master_ip),
2055
      constants.NV_OSLIST: None,
2056
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2057
      }
2058

    
2059
    if vg_name is not None:
2060
      node_verify_param[constants.NV_VGLIST] = None
2061
      node_verify_param[constants.NV_LVLIST] = vg_name
2062
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2063
      node_verify_param[constants.NV_DRBDLIST] = None
2064

    
2065
    if drbd_helper:
2066
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2067

    
2068
    # Build our expected cluster state
2069
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2070
                                                 name=node.name,
2071
                                                 vm_capable=node.vm_capable))
2072
                      for node in nodeinfo)
2073

    
2074
    for instance in instancelist:
2075
      inst_config = instanceinfo[instance]
2076

    
2077
      for nname in inst_config.all_nodes:
2078
        if nname not in node_image:
2079
          # ghost node
2080
          gnode = self.NodeImage(name=nname)
2081
          gnode.ghost = True
2082
          node_image[nname] = gnode
2083

    
2084
      inst_config.MapLVsByNode(node_vol_should)
2085

    
2086
      pnode = inst_config.primary_node
2087
      node_image[pnode].pinst.append(instance)
2088

    
2089
      for snode in inst_config.secondary_nodes:
2090
        nimg = node_image[snode]
2091
        nimg.sinst.append(instance)
2092
        if pnode not in nimg.sbp:
2093
          nimg.sbp[pnode] = []
2094
        nimg.sbp[pnode].append(instance)
2095

    
2096
    # At this point, we have the in-memory data structures complete,
2097
    # except for the runtime information, which we'll gather next
2098

    
2099
    # Due to the way our RPC system works, exact response times cannot be
2100
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2101
    # time before and after executing the request, we can at least have a time
2102
    # window.
2103
    nvinfo_starttime = time.time()
2104
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2105
                                           self.cfg.GetClusterName())
2106
    nvinfo_endtime = time.time()
2107

    
2108
    all_drbd_map = self.cfg.ComputeDRBDMap()
2109

    
2110
    feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2111
    instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2112

    
2113
    feedback_fn("* Verifying node status")
2114

    
2115
    refos_img = None
2116

    
2117
    for node_i in nodeinfo:
2118
      node = node_i.name
2119
      nimg = node_image[node]
2120

    
2121
      if node_i.offline:
2122
        if verbose:
2123
          feedback_fn("* Skipping offline node %s" % (node,))
2124
        n_offline += 1
2125
        continue
2126

    
2127
      if node == master_node:
2128
        ntype = "master"
2129
      elif node_i.master_candidate:
2130
        ntype = "master candidate"
2131
      elif node_i.drained:
2132
        ntype = "drained"
2133
        n_drained += 1
2134
      else:
2135
        ntype = "regular"
2136
      if verbose:
2137
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2138

    
2139
      msg = all_nvinfo[node].fail_msg
2140
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2141
      if msg:
2142
        nimg.rpc_fail = True
2143
        continue
2144

    
2145
      nresult = all_nvinfo[node].payload
2146

    
2147
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2148
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2149
      self._VerifyNodeNetwork(node_i, nresult)
2150
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2151
                            master_files)
2152

    
2153
      if nimg.vm_capable:
2154
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2155
        self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2156
                             all_drbd_map)
2157

    
2158
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2159
        self._UpdateNodeInstances(node_i, nresult, nimg)
2160
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2161
        self._UpdateNodeOS(node_i, nresult, nimg)
2162
        if not nimg.os_fail:
2163
          if refos_img is None:
2164
            refos_img = nimg
2165
          self._VerifyNodeOS(node_i, nimg, refos_img)
2166

    
2167
    feedback_fn("* Verifying instance status")
2168
    for instance in instancelist:
2169
      if verbose:
2170
        feedback_fn("* Verifying instance %s" % instance)
2171
      inst_config = instanceinfo[instance]
2172
      self._VerifyInstance(instance, inst_config, node_image,
2173
                           instdisk[instance])
2174
      inst_nodes_offline = []
2175

    
2176
      pnode = inst_config.primary_node
2177
      pnode_img = node_image[pnode]
2178
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2179
               self.ENODERPC, pnode, "instance %s, connection to"
2180
               " primary node failed", instance)
2181

    
2182
      if pnode_img.offline:
2183
        inst_nodes_offline.append(pnode)
2184

    
2185
      # If the instance is non-redundant we cannot survive losing its primary
2186
      # node, so we are not N+1 compliant. On the other hand we have no disk
2187
      # templates with more than one secondary so that situation is not well
2188
      # supported either.
2189
      # FIXME: does not support file-backed instances
2190
      if not inst_config.secondary_nodes:
2191
        i_non_redundant.append(instance)
2192
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2193
               instance, "instance has multiple secondary nodes: %s",
2194
               utils.CommaJoin(inst_config.secondary_nodes),
2195
               code=self.ETYPE_WARNING)
2196

    
2197
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2198
        i_non_a_balanced.append(instance)
2199

    
2200
      for snode in inst_config.secondary_nodes:
2201
        s_img = node_image[snode]
2202
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2203
                 "instance %s, connection to secondary node failed", instance)
2204

    
2205
        if s_img.offline:
2206
          inst_nodes_offline.append(snode)
2207

    
2208
      # warn that the instance lives on offline nodes
2209
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2210
               "instance lives on offline node(s) %s",
2211
               utils.CommaJoin(inst_nodes_offline))
2212
      # ... or ghost/non-vm_capable nodes
2213
      for node in inst_config.all_nodes:
2214
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2215
                 "instance lives on ghost node %s", node)
2216
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2217
                 instance, "instance lives on non-vm_capable node %s", node)
2218

    
2219
    feedback_fn("* Verifying orphan volumes")
2220
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2221
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2222

    
2223
    feedback_fn("* Verifying orphan instances")
2224
    self._VerifyOrphanInstances(instancelist, node_image)
2225

    
2226
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2227
      feedback_fn("* Verifying N+1 Memory redundancy")
2228
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
2229

    
2230
    feedback_fn("* Other Notes")
2231
    if i_non_redundant:
2232
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2233
                  % len(i_non_redundant))
2234

    
2235
    if i_non_a_balanced:
2236
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2237
                  % len(i_non_a_balanced))
2238

    
2239
    if n_offline:
2240
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2241

    
2242
    if n_drained:
2243
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2244

    
2245
    return not self.bad
2246

    
2247
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2248
    """Analyze the post-hooks' result
2249

2250
    This method analyses the hook result, handles it, and sends some
2251
    nicely-formatted feedback back to the user.
2252

2253
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2254
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2255
    @param hooks_results: the results of the multi-node hooks rpc call
2256
    @param feedback_fn: function used send feedback back to the caller
2257
    @param lu_result: previous Exec result
2258
    @return: the new Exec result, based on the previous result
2259
        and hook results
2260

2261
    """
2262
    # We only really run POST phase hooks, and are only interested in
2263
    # their results
2264
    if phase == constants.HOOKS_PHASE_POST:
2265
      # Used to change hooks' output to proper indentation
2266
      indent_re = re.compile('^', re.M)
2267
      feedback_fn("* Hooks Results")
2268
      assert hooks_results, "invalid result from hooks"
2269

    
2270
      for node_name in hooks_results:
2271
        res = hooks_results[node_name]
2272
        msg = res.fail_msg
2273
        test = msg and not res.offline
2274
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2275
                      "Communication failure in hooks execution: %s", msg)
2276
        if res.offline or msg:
2277
          # No need to investigate payload if node is offline or gave an error.
2278
          # override manually lu_result here as _ErrorIf only
2279
          # overrides self.bad
2280
          lu_result = 1
2281
          continue
2282
        for script, hkr, output in res.payload:
2283
          test = hkr == constants.HKR_FAIL
2284
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2285
                        "Script %s failed, output:", script)
2286
          if test:
2287
            output = indent_re.sub('      ', output)
2288
            feedback_fn("%s" % output)
2289
            lu_result = 0
2290

    
2291
      return lu_result
2292

    
2293

    
2294
class LUVerifyDisks(NoHooksLU):
2295
  """Verifies the cluster disks status.
2296

2297
  """
2298
  REQ_BGL = False
2299

    
2300
  def ExpandNames(self):
2301
    self.needed_locks = {
2302
      locking.LEVEL_NODE: locking.ALL_SET,
2303
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2304
    }
2305
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2306

    
2307
  def Exec(self, feedback_fn):
2308
    """Verify integrity of cluster disks.
2309

2310
    @rtype: tuple of three items
2311
    @return: a tuple of (dict of node-to-node_error, list of instances
2312
        which need activate-disks, dict of instance: (node, volume) for
2313
        missing volumes
2314

2315
    """
2316
    result = res_nodes, res_instances, res_missing = {}, [], {}
2317

    
2318
    vg_name = self.cfg.GetVGName()
2319
    nodes = utils.NiceSort(self.cfg.GetNodeList())
2320
    instances = [self.cfg.GetInstanceInfo(name)
2321
                 for name in self.cfg.GetInstanceList()]
2322

    
2323
    nv_dict = {}
2324
    for inst in instances:
2325
      inst_lvs = {}
2326
      if (not inst.admin_up or
2327
          inst.disk_template not in constants.DTS_NET_MIRROR):
2328
        continue
2329
      inst.MapLVsByNode(inst_lvs)
2330
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2331
      for node, vol_list in inst_lvs.iteritems():
2332
        for vol in vol_list:
2333
          nv_dict[(node, vol)] = inst
2334

    
2335
    if not nv_dict:
2336
      return result
2337

    
2338
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2339

    
2340
    for node in nodes:
2341
      # node_volume
2342
      node_res = node_lvs[node]
2343
      if node_res.offline:
2344
        continue
2345
      msg = node_res.fail_msg
2346
      if msg:
2347
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2348
        res_nodes[node] = msg
2349
        continue
2350

    
2351
      lvs = node_res.payload
2352
      for lv_name, (_, _, lv_online) in lvs.items():
2353
        inst = nv_dict.pop((node, lv_name), None)
2354
        if (not lv_online and inst is not None
2355
            and inst.name not in res_instances):
2356
          res_instances.append(inst.name)
2357

    
2358
    # any leftover items in nv_dict are missing LVs, let's arrange the
2359
    # data better
2360
    for key, inst in nv_dict.iteritems():
2361
      if inst.name not in res_missing:
2362
        res_missing[inst.name] = []
2363
      res_missing[inst.name].append(key)
2364

    
2365
    return result
2366

    
2367

    
2368
class LURepairDiskSizes(NoHooksLU):
2369
  """Verifies the cluster disks sizes.
2370

2371
  """
2372
  _OP_PARAMS = [("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString))]
2373
  REQ_BGL = False
2374

    
2375
  def ExpandNames(self):
2376
    if self.op.instances:
2377
      self.wanted_names = []
2378
      for name in self.op.instances:
2379
        full_name = _ExpandInstanceName(self.cfg, name)
2380
        self.wanted_names.append(full_name)
2381
      self.needed_locks = {
2382
        locking.LEVEL_NODE: [],
2383
        locking.LEVEL_INSTANCE: self.wanted_names,
2384
        }
2385
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2386
    else:
2387
      self.wanted_names = None
2388
      self.needed_locks = {
2389
        locking.LEVEL_NODE: locking.ALL_SET,
2390
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2391
        }
2392
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2393

    
2394
  def DeclareLocks(self, level):
2395
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2396
      self._LockInstancesNodes(primary_only=True)
2397

    
2398
  def CheckPrereq(self):
2399
    """Check prerequisites.
2400

2401
    This only checks the optional instance list against the existing names.
2402

2403
    """
2404
    if self.wanted_names is None:
2405
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2406

    
2407
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2408
                             in self.wanted_names]
2409

    
2410
  def _EnsureChildSizes(self, disk):
2411
    """Ensure children of the disk have the needed disk size.
2412

2413
    This is valid mainly for DRBD8 and fixes an issue where the
2414
    children have smaller disk size.
2415

2416
    @param disk: an L{ganeti.objects.Disk} object
2417

2418
    """
2419
    if disk.dev_type == constants.LD_DRBD8:
2420
      assert disk.children, "Empty children for DRBD8?"
2421
      fchild = disk.children[0]
2422
      mismatch = fchild.size < disk.size
2423
      if mismatch:
2424
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2425
                     fchild.size, disk.size)
2426
        fchild.size = disk.size
2427

    
2428
      # and we recurse on this child only, not on the metadev
2429
      return self._EnsureChildSizes(fchild) or mismatch
2430
    else:
2431
      return False
2432

    
2433
  def Exec(self, feedback_fn):
2434
    """Verify the size of cluster disks.
2435

2436
    """
2437
    # TODO: check child disks too
2438
    # TODO: check differences in size between primary/secondary nodes
2439
    per_node_disks = {}
2440
    for instance in self.wanted_instances:
2441
      pnode = instance.primary_node
2442
      if pnode not in per_node_disks:
2443
        per_node_disks[pnode] = []
2444
      for idx, disk in enumerate(instance.disks):
2445
        per_node_disks[pnode].append((instance, idx, disk))
2446

    
2447
    changed = []
2448
    for node, dskl in per_node_disks.items():
2449
      newl = [v[2].Copy() for v in dskl]
2450
      for dsk in newl:
2451
        self.cfg.SetDiskID(dsk, node)
2452
      result = self.rpc.call_blockdev_getsizes(node, newl)
2453
      if result.fail_msg:
2454
        self.LogWarning("Failure in blockdev_getsizes call to node"
2455
                        " %s, ignoring", node)
2456
        continue
2457
      if len(result.data) != len(dskl):
2458
        self.LogWarning("Invalid result from node %s, ignoring node results",
2459
                        node)
2460
        continue
2461
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2462
        if size is None:
2463
          self.LogWarning("Disk %d of instance %s did not return size"
2464
                          " information, ignoring", idx, instance.name)
2465
          continue
2466
        if not isinstance(size, (int, long)):
2467
          self.LogWarning("Disk %d of instance %s did not return valid"
2468
                          " size information, ignoring", idx, instance.name)
2469
          continue
2470
        size = size >> 20
2471
        if size != disk.size:
2472
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2473
                       " correcting: recorded %d, actual %d", idx,
2474
                       instance.name, disk.size, size)
2475
          disk.size = size
2476
          self.cfg.Update(instance, feedback_fn)
2477
          changed.append((instance.name, idx, size))
2478
        if self._EnsureChildSizes(disk):
2479
          self.cfg.Update(instance, feedback_fn)
2480
          changed.append((instance.name, idx, disk.size))
2481
    return changed
2482

    
2483

    
2484
class LURenameCluster(LogicalUnit):
2485
  """Rename the cluster.
2486

2487
  """
2488
  HPATH = "cluster-rename"
2489
  HTYPE = constants.HTYPE_CLUSTER
2490
  _OP_PARAMS = [("name", ht.NoDefault, ht.TNonEmptyString)]
2491

    
2492
  def BuildHooksEnv(self):
2493
    """Build hooks env.
2494

2495
    """
2496
    env = {
2497
      "OP_TARGET": self.cfg.GetClusterName(),
2498
      "NEW_NAME": self.op.name,
2499
      }
2500
    mn = self.cfg.GetMasterNode()
2501
    all_nodes = self.cfg.GetNodeList()
2502
    return env, [mn], all_nodes
2503

    
2504
  def CheckPrereq(self):
2505
    """Verify that the passed name is a valid one.
2506

2507
    """
2508
    hostname = netutils.GetHostname(name=self.op.name,
2509
                                    family=self.cfg.GetPrimaryIPFamily())
2510

    
2511
    new_name = hostname.name
2512
    self.ip = new_ip = hostname.ip
2513
    old_name = self.cfg.GetClusterName()
2514
    old_ip = self.cfg.GetMasterIP()
2515
    if new_name == old_name and new_ip == old_ip:
2516
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2517
                                 " cluster has changed",
2518
                                 errors.ECODE_INVAL)
2519
    if new_ip != old_ip:
2520
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2521
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2522
                                   " reachable on the network" %
2523
                                   new_ip, errors.ECODE_NOTUNIQUE)
2524

    
2525
    self.op.name = new_name
2526

    
2527
  def Exec(self, feedback_fn):
2528
    """Rename the cluster.
2529

2530
    """
2531
    clustername = self.op.name
2532
    ip = self.ip
2533

    
2534
    # shutdown the master IP
2535
    master = self.cfg.GetMasterNode()
2536
    result = self.rpc.call_node_stop_master(master, False)
2537
    result.Raise("Could not disable the master role")
2538

    
2539
    try:
2540
      cluster = self.cfg.GetClusterInfo()
2541
      cluster.cluster_name = clustername
2542
      cluster.master_ip = ip
2543
      self.cfg.Update(cluster, feedback_fn)
2544

    
2545
      # update the known hosts file
2546
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2547
      node_list = self.cfg.GetNodeList()
2548
      try:
2549
        node_list.remove(master)
2550
      except ValueError:
2551
        pass
2552
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2553
    finally:
2554
      result = self.rpc.call_node_start_master(master, False, False)
2555
      msg = result.fail_msg
2556
      if msg:
2557
        self.LogWarning("Could not re-enable the master role on"
2558
                        " the master, please restart manually: %s", msg)
2559

    
2560
    return clustername
2561

    
2562

    
2563
class LUSetClusterParams(LogicalUnit):
2564
  """Change the parameters of the cluster.
2565

2566
  """
2567
  HPATH = "cluster-modify"
2568
  HTYPE = constants.HTYPE_CLUSTER
2569
  _OP_PARAMS = [
2570
    ("vg_name", None, ht.TMaybeString),
2571
    ("enabled_hypervisors", None,
2572
     ht.TOr(ht.TAnd(ht.TListOf(ht.TElemOf(constants.HYPER_TYPES)), ht.TTrue),
2573
            ht.TNone)),
2574
    ("hvparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2575
                              ht.TNone)),
2576
    ("beparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2577
                              ht.TNone)),
2578
    ("os_hvp", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2579
                            ht.TNone)),
2580
    ("osparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2581
                              ht.TNone)),
2582
    ("candidate_pool_size", None, ht.TOr(ht.TStrictPositiveInt, ht.TNone)),
2583
    ("uid_pool", None, ht.NoType),
2584
    ("add_uids", None, ht.NoType),
2585
    ("remove_uids", None, ht.NoType),
2586
    ("maintain_node_health", None, ht.TMaybeBool),
2587
    ("prealloc_wipe_disks", None, ht.TMaybeBool),
2588
    ("nicparams", None, ht.TOr(ht.TDict, ht.TNone)),
2589
    ("drbd_helper", None, ht.TOr(ht.TString, ht.TNone)),
2590
    ("default_iallocator", None, ht.TOr(ht.TString, ht.TNone)),
2591
    ("reserved_lvs", None, ht.TOr(ht.TListOf(ht.TNonEmptyString), ht.TNone)),
2592
    ("hidden_os", None, ht.TOr(ht.TListOf(\
2593
          ht.TAnd(ht.TList,
2594
                ht.TIsLength(2),
2595
                ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2596
          ht.TNone)),
2597
    ("blacklisted_os", None, ht.TOr(ht.TListOf(\
2598
          ht.TAnd(ht.TList,
2599
                ht.TIsLength(2),
2600
                ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2601
          ht.TNone)),
2602
    ]
2603
  REQ_BGL = False
2604

    
2605
  def CheckArguments(self):
2606
    """Check parameters
2607

2608
    """
2609
    if self.op.uid_pool:
2610
      uidpool.CheckUidPool(self.op.uid_pool)
2611

    
2612
    if self.op.add_uids:
2613
      uidpool.CheckUidPool(self.op.add_uids)
2614

    
2615
    if self.op.remove_uids:
2616
      uidpool.CheckUidPool(self.op.remove_uids)
2617

    
2618
  def ExpandNames(self):
2619
    # FIXME: in the future maybe other cluster params won't require checking on
2620
    # all nodes to be modified.
2621
    self.needed_locks = {
2622
      locking.LEVEL_NODE: locking.ALL_SET,
2623
    }
2624
    self.share_locks[locking.LEVEL_NODE] = 1
2625

    
2626
  def BuildHooksEnv(self):
2627
    """Build hooks env.
2628

2629
    """
2630
    env = {
2631
      "OP_TARGET": self.cfg.GetClusterName(),
2632
      "NEW_VG_NAME": self.op.vg_name,
2633
      }
2634
    mn = self.cfg.GetMasterNode()
2635
    return env, [mn], [mn]
2636

    
2637
  def CheckPrereq(self):
2638
    """Check prerequisites.
2639

2640
    This checks whether the given params don't conflict and
2641
    if the given volume group is valid.
2642

2643
    """
2644
    if self.op.vg_name is not None and not self.op.vg_name:
2645
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2646
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2647
                                   " instances exist", errors.ECODE_INVAL)
2648

    
2649
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
2650
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2651
        raise errors.OpPrereqError("Cannot disable drbd helper while"
2652
                                   " drbd-based instances exist",
2653
                                   errors.ECODE_INVAL)
2654

    
2655
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2656

    
2657
    # if vg_name not None, checks given volume group on all nodes
2658
    if self.op.vg_name:
2659
      vglist = self.rpc.call_vg_list(node_list)
2660
      for node in node_list:
2661
        msg = vglist[node].fail_msg
2662
        if msg:
2663
          # ignoring down node
2664
          self.LogWarning("Error while gathering data on node %s"
2665
                          " (ignoring node): %s", node, msg)
2666
          continue
2667
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2668
                                              self.op.vg_name,
2669
                                              constants.MIN_VG_SIZE)
2670
        if vgstatus:
2671
          raise errors.OpPrereqError("Error on node '%s': %s" %
2672
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2673

    
2674
    if self.op.drbd_helper:
2675
      # checks given drbd helper on all nodes
2676
      helpers = self.rpc.call_drbd_helper(node_list)
2677
      for node in node_list:
2678
        ninfo = self.cfg.GetNodeInfo(node)
2679
        if ninfo.offline:
2680
          self.LogInfo("Not checking drbd helper on offline node %s", node)
2681
          continue
2682
        msg = helpers[node].fail_msg
2683
        if msg:
2684
          raise errors.OpPrereqError("Error checking drbd helper on node"
2685
                                     " '%s': %s" % (node, msg),
2686
                                     errors.ECODE_ENVIRON)
2687
        node_helper = helpers[node].payload
2688
        if node_helper != self.op.drbd_helper:
2689
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2690
                                     (node, node_helper), errors.ECODE_ENVIRON)
2691

    
2692
    self.cluster = cluster = self.cfg.GetClusterInfo()
2693
    # validate params changes
2694
    if self.op.beparams:
2695
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2696
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2697

    
2698
    if self.op.nicparams:
2699
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2700
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2701
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2702
      nic_errors = []
2703

    
2704
      # check all instances for consistency
2705
      for instance in self.cfg.GetAllInstancesInfo().values():
2706
        for nic_idx, nic in enumerate(instance.nics):
2707
          params_copy = copy.deepcopy(nic.nicparams)
2708
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2709

    
2710
          # check parameter syntax
2711
          try:
2712
            objects.NIC.CheckParameterSyntax(params_filled)
2713
          except errors.ConfigurationError, err:
2714
            nic_errors.append("Instance %s, nic/%d: %s" %
2715
                              (instance.name, nic_idx, err))
2716

    
2717
          # if we're moving instances to routed, check that they have an ip
2718
          target_mode = params_filled[constants.NIC_MODE]
2719
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2720
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2721
                              (instance.name, nic_idx))
2722
      if nic_errors:
2723
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2724
                                   "\n".join(nic_errors))
2725

    
2726
    # hypervisor list/parameters
2727
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2728
    if self.op.hvparams:
2729
      for hv_name, hv_dict in self.op.hvparams.items():
2730
        if hv_name not in self.new_hvparams:
2731
          self.new_hvparams[hv_name] = hv_dict
2732
        else:
2733
          self.new_hvparams[hv_name].update(hv_dict)
2734

    
2735
    # os hypervisor parameters
2736
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2737
    if self.op.os_hvp:
2738
      for os_name, hvs in self.op.os_hvp.items():
2739
        if os_name not in self.new_os_hvp:
2740
          self.new_os_hvp[os_name] = hvs
2741
        else:
2742
          for hv_name, hv_dict in hvs.items():
2743
            if hv_name not in self.new_os_hvp[os_name]:
2744
              self.new_os_hvp[os_name][hv_name] = hv_dict
2745
            else:
2746
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2747

    
2748
    # os parameters
2749
    self.new_osp = objects.FillDict(cluster.osparams, {})
2750
    if self.op.osparams:
2751
      for os_name, osp in self.op.osparams.items():
2752
        if os_name not in self.new_osp:
2753
          self.new_osp[os_name] = {}
2754

    
2755
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2756
                                                  use_none=True)
2757

    
2758
        if not self.new_osp[os_name]:
2759
          # we removed all parameters
2760
          del self.new_osp[os_name]
2761
        else:
2762
          # check the parameter validity (remote check)
2763
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2764
                         os_name, self.new_osp[os_name])
2765

    
2766
    # changes to the hypervisor list
2767
    if self.op.enabled_hypervisors is not None:
2768
      self.hv_list = self.op.enabled_hypervisors
2769
      for hv in self.hv_list:
2770
        # if the hypervisor doesn't already exist in the cluster
2771
        # hvparams, we initialize it to empty, and then (in both
2772
        # cases) we make sure to fill the defaults, as we might not
2773
        # have a complete defaults list if the hypervisor wasn't
2774
        # enabled before
2775
        if hv not in new_hvp:
2776
          new_hvp[hv] = {}
2777
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2778
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2779
    else:
2780
      self.hv_list = cluster.enabled_hypervisors
2781

    
2782
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2783
      # either the enabled list has changed, or the parameters have, validate
2784
      for hv_name, hv_params in self.new_hvparams.items():
2785
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2786
            (self.op.enabled_hypervisors and
2787
             hv_name in self.op.enabled_hypervisors)):
2788
          # either this is a new hypervisor, or its parameters have changed
2789
          hv_class = hypervisor.GetHypervisor(hv_name)
2790
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2791
          hv_class.CheckParameterSyntax(hv_params)
2792
          _CheckHVParams(self, node_list, hv_name, hv_params)
2793

    
2794
    if self.op.os_hvp:
2795
      # no need to check any newly-enabled hypervisors, since the
2796
      # defaults have already been checked in the above code-block
2797
      for os_name, os_hvp in self.new_os_hvp.items():
2798
        for hv_name, hv_params in os_hvp.items():
2799
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2800
          # we need to fill in the new os_hvp on top of the actual hv_p
2801
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2802
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2803
          hv_class = hypervisor.GetHypervisor(hv_name)
2804
          hv_class.CheckParameterSyntax(new_osp)
2805
          _CheckHVParams(self, node_list, hv_name, new_osp)
2806

    
2807
    if self.op.default_iallocator:
2808
      alloc_script = utils.FindFile(self.op.default_iallocator,
2809
                                    constants.IALLOCATOR_SEARCH_PATH,
2810
                                    os.path.isfile)
2811
      if alloc_script is None:
2812
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2813
                                   " specified" % self.op.default_iallocator,
2814
                                   errors.ECODE_INVAL)
2815

    
2816
  def Exec(self, feedback_fn):
2817
    """Change the parameters of the cluster.
2818

2819
    """
2820
    if self.op.vg_name is not None:
2821
      new_volume = self.op.vg_name
2822
      if not new_volume:
2823
        new_volume = None
2824
      if new_volume != self.cfg.GetVGName():
2825
        self.cfg.SetVGName(new_volume)
2826
      else:
2827
        feedback_fn("Cluster LVM configuration already in desired"
2828
                    " state, not changing")
2829
    if self.op.drbd_helper is not None:
2830
      new_helper = self.op.drbd_helper
2831
      if not new_helper:
2832
        new_helper = None
2833
      if new_helper != self.cfg.GetDRBDHelper():
2834
        self.cfg.SetDRBDHelper(new_helper)
2835
      else:
2836
        feedback_fn("Cluster DRBD helper already in desired state,"
2837
                    " not changing")
2838
    if self.op.hvparams:
2839
      self.cluster.hvparams = self.new_hvparams
2840
    if self.op.os_hvp:
2841
      self.cluster.os_hvp = self.new_os_hvp
2842
    if self.op.enabled_hypervisors is not None:
2843
      self.cluster.hvparams = self.new_hvparams
2844
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2845
    if self.op.beparams:
2846
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2847
    if self.op.nicparams:
2848
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2849
    if self.op.osparams:
2850
      self.cluster.osparams = self.new_osp
2851

    
2852
    if self.op.candidate_pool_size is not None:
2853
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2854
      # we need to update the pool size here, otherwise the save will fail
2855
      _AdjustCandidatePool(self, [])
2856

    
2857
    if self.op.maintain_node_health is not None:
2858
      self.cluster.maintain_node_health = self.op.maintain_node_health
2859

    
2860
    if self.op.prealloc_wipe_disks is not None:
2861
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
2862

    
2863
    if self.op.add_uids is not None:
2864
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2865

    
2866
    if self.op.remove_uids is not None:
2867
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2868

    
2869
    if self.op.uid_pool is not None:
2870
      self.cluster.uid_pool = self.op.uid_pool
2871

    
2872
    if self.op.default_iallocator is not None:
2873
      self.cluster.default_iallocator = self.op.default_iallocator
2874

    
2875
    if self.op.reserved_lvs is not None:
2876
      self.cluster.reserved_lvs = self.op.reserved_lvs
2877

    
2878
    def helper_os(aname, mods, desc):
2879
      desc += " OS list"
2880
      lst = getattr(self.cluster, aname)
2881
      for key, val in mods:
2882
        if key == constants.DDM_ADD:
2883
          if val in lst:
2884
            feedback_fn("OS %s already in %s, ignoring", val, desc)
2885
          else:
2886
            lst.append(val)
2887
        elif key == constants.DDM_REMOVE:
2888
          if val in lst:
2889
            lst.remove(val)
2890
          else:
2891
            feedback_fn("OS %s not found in %s, ignoring", val, desc)
2892
        else:
2893
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
2894

    
2895
    if self.op.hidden_os:
2896
      helper_os("hidden_os", self.op.hidden_os, "hidden")
2897

    
2898
    if self.op.blacklisted_os:
2899
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
2900

    
2901
    self.cfg.Update(self.cluster, feedback_fn)
2902

    
2903

    
2904
def _UploadHelper(lu, nodes, fname):
2905
  """Helper for uploading a file and showing warnings.
2906

2907
  """
2908
  if os.path.exists(fname):
2909
    result = lu.rpc.call_upload_file(nodes, fname)
2910
    for to_node, to_result in result.items():
2911
      msg = to_result.fail_msg
2912
      if msg:
2913
        msg = ("Copy of file %s to node %s failed: %s" %
2914
               (fname, to_node, msg))
2915
        lu.proc.LogWarning(msg)
2916

    
2917

    
2918
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
2919
  """Distribute additional files which are part of the cluster configuration.
2920

2921
  ConfigWriter takes care of distributing the config and ssconf files, but
2922
  there are more files which should be distributed to all nodes. This function
2923
  makes sure those are copied.
2924

2925
  @param lu: calling logical unit
2926
  @param additional_nodes: list of nodes not in the config to distribute to
2927
  @type additional_vm: boolean
2928
  @param additional_vm: whether the additional nodes are vm-capable or not
2929

2930
  """
2931
  # 1. Gather target nodes
2932
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2933
  dist_nodes = lu.cfg.GetOnlineNodeList()
2934
  nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
2935
  vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
2936
  if additional_nodes is not None:
2937
    dist_nodes.extend(additional_nodes)
2938
    if additional_vm:
2939
      vm_nodes.extend(additional_nodes)
2940
  if myself.name in dist_nodes:
2941
    dist_nodes.remove(myself.name)
2942
  if myself.name in vm_nodes:
2943
    vm_nodes.remove(myself.name)
2944

    
2945
  # 2. Gather files to distribute
2946
  dist_files = set([constants.ETC_HOSTS,
2947
                    constants.SSH_KNOWN_HOSTS_FILE,
2948
                    constants.RAPI_CERT_FILE,
2949
                    constants.RAPI_USERS_FILE,
2950
                    constants.CONFD_HMAC_KEY,
2951
                    constants.CLUSTER_DOMAIN_SECRET_FILE,
2952
                   ])
2953

    
2954
  vm_files = set()
2955
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2956
  for hv_name in enabled_hypervisors:
2957
    hv_class = hypervisor.GetHypervisor(hv_name)
2958
    vm_files.update(hv_class.GetAncillaryFiles())
2959

    
2960
  # 3. Perform the files upload
2961
  for fname in dist_files:
2962
    _UploadHelper(lu, dist_nodes, fname)
2963
  for fname in vm_files:
2964
    _UploadHelper(lu, vm_nodes, fname)
2965

    
2966

    
2967
class LURedistributeConfig(NoHooksLU):
2968
  """Force the redistribution of cluster configuration.
2969

2970
  This is a very simple LU.
2971

2972
  """
2973
  REQ_BGL = False
2974

    
2975
  def ExpandNames(self):
2976
    self.needed_locks = {
2977
      locking.LEVEL_NODE: locking.ALL_SET,
2978
    }
2979
    self.share_locks[locking.LEVEL_NODE] = 1
2980

    
2981
  def Exec(self, feedback_fn):
2982
    """Redistribute the configuration.
2983

2984
    """
2985
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2986
    _RedistributeAncillaryFiles(self)
2987

    
2988

    
2989
def _WaitForSync(lu, instance, disks=None, oneshot=False):
2990
  """Sleep and poll for an instance's disk to sync.
2991

2992
  """
2993
  if not instance.disks or disks is not None and not disks:
2994
    return True
2995

    
2996
  disks = _ExpandCheckDisks(instance, disks)
2997

    
2998
  if not oneshot:
2999
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3000

    
3001
  node = instance.primary_node
3002

    
3003
  for dev in disks:
3004
    lu.cfg.SetDiskID(dev, node)
3005

    
3006
  # TODO: Convert to utils.Retry
3007

    
3008
  retries = 0
3009
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3010
  while True:
3011
    max_time = 0
3012
    done = True
3013
    cumul_degraded = False
3014
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3015
    msg = rstats.fail_msg
3016
    if msg:
3017
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3018
      retries += 1
3019
      if retries >= 10:
3020
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3021
                                 " aborting." % node)
3022
      time.sleep(6)
3023
      continue
3024
    rstats = rstats.payload
3025
    retries = 0
3026
    for i, mstat in enumerate(rstats):
3027
      if mstat is None:
3028
        lu.LogWarning("Can't compute data for node %s/%s",
3029
                           node, disks[i].iv_name)
3030
        continue
3031

    
3032
      cumul_degraded = (cumul_degraded or
3033
                        (mstat.is_degraded and mstat.sync_percent is None))
3034
      if mstat.sync_percent is not None:
3035
        done = False
3036
        if mstat.estimated_time is not None:
3037
          rem_time = ("%s remaining (estimated)" %
3038
                      utils.FormatSeconds(mstat.estimated_time))
3039
          max_time = mstat.estimated_time
3040
        else:
3041
          rem_time = "no time estimate"
3042
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3043
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3044

    
3045
    # if we're done but degraded, let's do a few small retries, to
3046
    # make sure we see a stable and not transient situation; therefore
3047
    # we force restart of the loop
3048
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3049
      logging.info("Degraded disks found, %d retries left", degr_retries)
3050
      degr_retries -= 1
3051
      time.sleep(1)
3052
      continue
3053

    
3054
    if done or oneshot:
3055
      break
3056

    
3057
    time.sleep(min(60, max_time))
3058

    
3059
  if done:
3060
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3061
  return not cumul_degraded
3062

    
3063

    
3064
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3065
  """Check that mirrors are not degraded.
3066

3067
  The ldisk parameter, if True, will change the test from the
3068
  is_degraded attribute (which represents overall non-ok status for
3069
  the device(s)) to the ldisk (representing the local storage status).
3070

3071
  """
3072
  lu.cfg.SetDiskID(dev, node)
3073

    
3074
  result = True
3075

    
3076
  if on_primary or dev.AssembleOnSecondary():
3077
    rstats = lu.rpc.call_blockdev_find(node, dev)
3078
    msg = rstats.fail_msg
3079
    if msg:
3080
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3081
      result = False
3082
    elif not rstats.payload:
3083
      lu.LogWarning("Can't find disk on node %s", node)
3084
      result = False
3085
    else:
3086
      if ldisk:
3087
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3088
      else:
3089
        result = result and not rstats.payload.is_degraded
3090

    
3091
  if dev.children:
3092
    for child in dev.children:
3093
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3094

    
3095
  return result
3096

    
3097

    
3098
class LUDiagnoseOS(NoHooksLU):
3099
  """Logical unit for OS diagnose/query.
3100

3101
  """
3102
  _OP_PARAMS = [
3103
    _POutputFields,
3104
    ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3105
    ]
3106
  REQ_BGL = False
3107
  _HID = "hidden"
3108
  _BLK = "blacklisted"
3109
  _VLD = "valid"
3110
  _FIELDS_STATIC = utils.FieldSet()
3111
  _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3112
                                   "parameters", "api_versions", _HID, _BLK)
3113

    
3114
  def CheckArguments(self):
3115
    if self.op.names:
3116
      raise errors.OpPrereqError("Selective OS query not supported",
3117
                                 errors.ECODE_INVAL)
3118

    
3119
    _CheckOutputFields(static=self._FIELDS_STATIC,
3120
                       dynamic=self._FIELDS_DYNAMIC,
3121
                       selected=self.op.output_fields)
3122

    
3123
  def ExpandNames(self):
3124
    # Lock all nodes, in shared mode
3125
    # Temporary removal of locks, should be reverted later
3126
    # TODO: reintroduce locks when they are lighter-weight
3127
    self.needed_locks = {}
3128
    #self.share_locks[locking.LEVEL_NODE] = 1
3129
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3130

    
3131
  @staticmethod
3132
  def _DiagnoseByOS(rlist):
3133
    """Remaps a per-node return list into an a per-os per-node dictionary
3134

3135
    @param rlist: a map with node names as keys and OS objects as values
3136

3137
    @rtype: dict
3138
    @return: a dictionary with osnames as keys and as value another
3139
        map, with nodes as keys and tuples of (path, status, diagnose,
3140
        variants, parameters, api_versions) as values, eg::
3141

3142
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3143
                                     (/srv/..., False, "invalid api")],
3144
                           "node2": [(/srv/..., True, "", [], [])]}
3145
          }
3146

3147
    """
3148
    all_os = {}
3149
    # we build here the list of nodes that didn't fail the RPC (at RPC
3150
    # level), so that nodes with a non-responding node daemon don't
3151
    # make all OSes invalid
3152
    good_nodes = [node_name for node_name in rlist
3153
                  if not rlist[node_name].fail_msg]
3154
    for node_name, nr in rlist.items():
3155
      if nr.fail_msg or not nr.payload:
3156
        continue
3157
      for (name, path, status, diagnose, variants,
3158
           params, api_versions) in nr.payload:
3159
        if name not in all_os:
3160
          # build a list of nodes for this os containing empty lists
3161
          # for each node in node_list
3162
          all_os[name] = {}
3163
          for nname in good_nodes:
3164
            all_os[name][nname] = []
3165
        # convert params from [name, help] to (name, help)
3166
        params = [tuple(v) for v in params]
3167
        all_os[name][node_name].append((path, status, diagnose,
3168
                                        variants, params, api_versions))
3169
    return all_os
3170

    
3171
  def Exec(self, feedback_fn):
3172
    """Compute the list of OSes.
3173

3174
    """
3175
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3176
    node_data = self.rpc.call_os_diagnose(valid_nodes)
3177
    pol = self._DiagnoseByOS(node_data)
3178
    output = []
3179
    cluster = self.cfg.GetClusterInfo()
3180

    
3181
    for os_name in utils.NiceSort(pol.keys()):
3182
      os_data = pol[os_name]
3183
      row = []
3184
      valid = True
3185
      (variants, params, api_versions) = null_state = (set(), set(), set())
3186
      for idx, osl in enumerate(os_data.values()):
3187
        valid = bool(valid and osl and osl[0][1])
3188
        if not valid:
3189
          (variants, params, api_versions) = null_state
3190
          break
3191
        node_variants, node_params, node_api = osl[0][3:6]
3192
        if idx == 0: # first entry
3193
          variants = set(node_variants)
3194
          params = set(node_params)
3195
          api_versions = set(node_api)
3196
        else: # keep consistency
3197
          variants.intersection_update(node_variants)
3198
          params.intersection_update(node_params)
3199
          api_versions.intersection_update(node_api)
3200

    
3201
      is_hid = os_name in cluster.hidden_os
3202
      is_blk = os_name in cluster.blacklisted_os
3203
      if ((self._HID not in self.op.output_fields and is_hid) or
3204
          (self._BLK not in self.op.output_fields and is_blk) or
3205
          (self._VLD not in self.op.output_fields and not valid)):
3206
        continue
3207

    
3208
      for field in self.op.output_fields:
3209
        if field == "name":
3210
          val = os_name
3211
        elif field == self._VLD:
3212
          val = valid
3213
        elif field == "node_status":
3214
          # this is just a copy of the dict
3215
          val = {}
3216
          for node_name, nos_list in os_data.items():
3217
            val[node_name] = nos_list
3218
        elif field == "variants":
3219
          val = utils.NiceSort(list(variants))
3220
        elif field == "parameters":
3221
          val = list(params)
3222
        elif field == "api_versions":
3223
          val = list(api_versions)
3224
        elif field == self._HID:
3225
          val = is_hid
3226
        elif field == self._BLK:
3227
          val = is_blk
3228
        else:
3229
          raise errors.ParameterError(field)
3230
        row.append(val)
3231
      output.append(row)
3232

    
3233
    return output
3234

    
3235

    
3236
class LURemoveNode(LogicalUnit):
3237
  """Logical unit for removing a node.
3238

3239
  """
3240
  HPATH = "node-remove"
3241
  HTYPE = constants.HTYPE_NODE
3242
  _OP_PARAMS = [
3243
    _PNodeName,
3244
    ]
3245

    
3246
  def BuildHooksEnv(self):
3247
    """Build hooks env.
3248

3249
    This doesn't run on the target node in the pre phase as a failed
3250
    node would then be impossible to remove.
3251

3252
    """
3253
    env = {
3254
      "OP_TARGET": self.op.node_name,
3255
      "NODE_NAME": self.op.node_name,
3256
      }
3257
    all_nodes = self.cfg.GetNodeList()
3258
    try:
3259
      all_nodes.remove(self.op.node_name)
3260
    except ValueError:
3261
      logging.warning("Node %s which is about to be removed not found"
3262
                      " in the all nodes list", self.op.node_name)
3263
    return env, all_nodes, all_nodes
3264

    
3265
  def CheckPrereq(self):
3266
    """Check prerequisites.
3267

3268
    This checks:
3269
     - the node exists in the configuration
3270
     - it does not have primary or secondary instances
3271
     - it's not the master
3272

3273
    Any errors are signaled by raising errors.OpPrereqError.
3274

3275
    """
3276
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3277
    node = self.cfg.GetNodeInfo(self.op.node_name)
3278
    assert node is not None
3279

    
3280
    instance_list = self.cfg.GetInstanceList()
3281

    
3282
    masternode = self.cfg.GetMasterNode()
3283
    if node.name == masternode:
3284
      raise errors.OpPrereqError("Node is the master node,"
3285
                                 " you need to failover first.",
3286
                                 errors.ECODE_INVAL)
3287

    
3288
    for instance_name in instance_list:
3289
      instance = self.cfg.GetInstanceInfo(instance_name)
3290
      if node.name in instance.all_nodes:
3291
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3292
                                   " please remove first." % instance_name,
3293
                                   errors.ECODE_INVAL)
3294
    self.op.node_name = node.name
3295
    self.node = node
3296

    
3297
  def Exec(self, feedback_fn):
3298
    """Removes the node from the cluster.
3299

3300
    """
3301
    node = self.node
3302
    logging.info("Stopping the node daemon and removing configs from node %s",
3303
                 node.name)
3304

    
3305
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3306

    
3307
    # Promote nodes to master candidate as needed
3308
    _AdjustCandidatePool(self, exceptions=[node.name])
3309
    self.context.RemoveNode(node.name)
3310

    
3311
    # Run post hooks on the node before it's removed
3312
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3313
    try:
3314
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3315
    except:
3316
      # pylint: disable-msg=W0702
3317
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
3318

    
3319
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3320
    msg = result.fail_msg
3321
    if msg:
3322
      self.LogWarning("Errors encountered on the remote node while leaving"
3323
                      " the cluster: %s", msg)
3324

    
3325
    # Remove node from our /etc/hosts
3326
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3327
      master_node = self.cfg.GetMasterNode()
3328
      result = self.rpc.call_etc_hosts_modify(master_node,
3329
                                              constants.ETC_HOSTS_REMOVE,
3330
                                              node.name, None)
3331
      result.Raise("Can't update hosts file with new host data")
3332
      _RedistributeAncillaryFiles(self)
3333

    
3334

    
3335
class LUQueryNodes(NoHooksLU):
3336
  """Logical unit for querying nodes.
3337

3338
  """
3339
  # pylint: disable-msg=W0142
3340
  _OP_PARAMS = [
3341
    _POutputFields,
3342
    ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3343
    ("use_locking", False, ht.TBool),
3344
    ]
3345
  REQ_BGL = False
3346

    
3347
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3348
                    "master_candidate", "offline", "drained",
3349
                    "master_capable", "vm_capable"]
3350

    
3351
  _FIELDS_DYNAMIC = utils.FieldSet(
3352
    "dtotal", "dfree",
3353
    "mtotal", "mnode", "mfree",
3354
    "bootid",
3355
    "ctotal", "cnodes", "csockets",
3356
    )
3357

    
3358
  _FIELDS_STATIC = utils.FieldSet(*[
3359
    "pinst_cnt", "sinst_cnt",
3360
    "pinst_list", "sinst_list",
3361
    "pip", "sip", "tags",
3362
    "master",
3363
    "role"] + _SIMPLE_FIELDS
3364
    )
3365

    
3366
  def CheckArguments(self):
3367
    _CheckOutputFields(static=self._FIELDS_STATIC,
3368
                       dynamic=self._FIELDS_DYNAMIC,
3369
                       selected=self.op.output_fields)
3370

    
3371
  def ExpandNames(self):
3372
    self.needed_locks = {}
3373
    self.share_locks[locking.LEVEL_NODE] = 1
3374

    
3375
    if self.op.names:
3376
      self.wanted = _GetWantedNodes(self, self.op.names)
3377
    else:
3378
      self.wanted = locking.ALL_SET
3379

    
3380
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3381
    self.do_locking = self.do_node_query and self.op.use_locking
3382
    if self.do_locking:
3383
      # if we don't request only static fields, we need to lock the nodes
3384
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
3385

    
3386
  def Exec(self, feedback_fn):
3387
    """Computes the list of nodes and their attributes.
3388

3389
    """
3390
    all_info = self.cfg.GetAllNodesInfo()
3391
    if self.do_locking:
3392
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
3393
    elif self.wanted != locking.ALL_SET:
3394
      nodenames = self.wanted
3395
      missing = set(nodenames).difference(all_info.keys())
3396
      if missing:
3397
        raise errors.OpExecError(
3398
          "Some nodes were removed before retrieving their data: %s" % missing)
3399
    else:
3400
      nodenames = all_info.keys()
3401

    
3402
    nodenames = utils.NiceSort(nodenames)
3403
    nodelist = [all_info[name] for name in nodenames]
3404

    
3405
    # begin data gathering
3406

    
3407
    if self.do_node_query:
3408
      live_data = {}
3409
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3410
                                          self.cfg.GetHypervisorType())
3411
      for name in nodenames:
3412
        nodeinfo = node_data[name]
3413
        if not nodeinfo.fail_msg and nodeinfo.payload:
3414
          nodeinfo = nodeinfo.payload
3415
          fn = utils.TryConvert
3416
          live_data[name] = {
3417
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3418
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3419
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
3420
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3421
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
3422
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3423
            "bootid": nodeinfo.get('bootid', None),
3424
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3425
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3426
            }
3427
        else:
3428
          live_data[name] = {}
3429
    else:
3430
      live_data = dict.fromkeys(nodenames, {})
3431

    
3432
    node_to_primary = dict([(name, set()) for name in nodenames])
3433
    node_to_secondary = dict([(name, set()) for name in nodenames])
3434

    
3435
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
3436
                             "sinst_cnt", "sinst_list"))
3437
    if inst_fields & frozenset(self.op.output_fields):
3438
      inst_data = self.cfg.GetAllInstancesInfo()
3439

    
3440
      for inst in inst_data.values():
3441
        if inst.primary_node in node_to_primary:
3442
          node_to_primary[inst.primary_node].add(inst.name)
3443
        for secnode in inst.secondary_nodes:
3444
          if secnode in node_to_secondary:
3445
            node_to_secondary[secnode].add(inst.name)
3446

    
3447
    master_node = self.cfg.GetMasterNode()
3448

    
3449
    # end data gathering
3450

    
3451
    output = []
3452
    for node in nodelist:
3453
      node_output = []
3454
      for field in self.op.output_fields:
3455
        if field in self._SIMPLE_FIELDS:
3456
          val = getattr(node, field)
3457
        elif field == "pinst_list":
3458
          val = list(node_to_primary[node.name])
3459
        elif field == "sinst_list":
3460
          val = list(node_to_secondary[node.name])
3461
        elif field == "pinst_cnt":
3462
          val = len(node_to_primary[node.name])
3463
        elif field == "sinst_cnt":
3464
          val = len(node_to_secondary[node.name])
3465
        elif field == "pip":
3466
          val = node.primary_ip
3467
        elif field == "sip":
3468
          val = node.secondary_ip
3469
        elif field == "tags":
3470
          val = list(node.GetTags())
3471
        elif field == "master":
3472
          val = node.name == master_node
3473
        elif self._FIELDS_DYNAMIC.Matches(field):
3474
          val = live_data[node.name].get(field, None)
3475
        elif field == "role":
3476
          if node.name == master_node:
3477
            val = "M"
3478
          elif node.master_candidate:
3479
            val = "C"
3480
          elif node.drained:
3481
            val = "D"
3482
          elif node.offline:
3483
            val = "O"
3484
          else:
3485
            val = "R"
3486
        else:
3487
          raise errors.ParameterError(field)
3488
        node_output.append(val)
3489
      output.append(node_output)
3490

    
3491
    return output
3492

    
3493

    
3494
class LUQueryNodeVolumes(NoHooksLU):
3495
  """Logical unit for getting volumes on node(s).
3496

3497
  """
3498
  _OP_PARAMS = [
3499
    ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3500
    ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
3501
    ]
3502
  REQ_BGL = False
3503
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3504
  _FIELDS_STATIC = utils.FieldSet("node")
3505

    
3506
  def CheckArguments(self):
3507
    _CheckOutputFields(static=self._FIELDS_STATIC,
3508
                       dynamic=self._FIELDS_DYNAMIC,
3509
                       selected=self.op.output_fields)
3510

    
3511
  def ExpandNames(self):
3512
    self.needed_locks = {}
3513
    self.share_locks[locking.LEVEL_NODE] = 1
3514
    if not self.op.nodes:
3515
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3516
    else:
3517
      self.needed_locks[locking.LEVEL_NODE] = \
3518
        _GetWantedNodes(self, self.op.nodes)
3519

    
3520
  def Exec(self, feedback_fn):
3521
    """Computes the list of nodes and their attributes.
3522

3523
    """
3524
    nodenames = self.acquired_locks[locking.LEVEL_NODE]
3525
    volumes = self.rpc.call_node_volumes(nodenames)
3526

    
3527
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3528
             in self.cfg.GetInstanceList()]
3529

    
3530
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3531

    
3532
    output = []
3533
    for node in nodenames:
3534
      nresult = volumes[node]
3535
      if nresult.offline:
3536
        continue
3537
      msg = nresult.fail_msg
3538
      if msg:
3539
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3540
        continue
3541

    
3542
      node_vols = nresult.payload[:]
3543
      node_vols.sort(key=lambda vol: vol['dev'])
3544

    
3545
      for vol in node_vols:
3546
        node_output = []
3547
        for field in self.op.output_fields:
3548
          if field == "node":
3549
            val = node
3550
          elif field == "phys":
3551
            val = vol['dev']
3552
          elif field == "vg":
3553
            val = vol['vg']
3554
          elif field == "name":
3555
            val = vol['name']
3556
          elif field == "size":
3557
            val = int(float(vol['size']))
3558
          elif field == "instance":
3559
            for inst in ilist:
3560
              if node not in lv_by_node[inst]:
3561
                continue
3562
              if vol['name'] in lv_by_node[inst][node]:
3563
                val = inst.name
3564
                break
3565
            else:
3566
              val = '-'
3567
          else:
3568
            raise errors.ParameterError(field)
3569
          node_output.append(str(val))
3570

    
3571
        output.append(node_output)
3572

    
3573
    return output
3574

    
3575

    
3576
class LUQueryNodeStorage(NoHooksLU):
3577
  """Logical unit for getting information on storage units on node(s).
3578

3579
  """
3580
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3581
  _OP_PARAMS = [
3582
    ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3583
    ("storage_type", ht.NoDefault, _CheckStorageType),
3584
    ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
3585
    ("name", None, ht.TMaybeString),
3586
    ]
3587
  REQ_BGL = False
3588

    
3589
  def CheckArguments(self):
3590
    _CheckOutputFields(static=self._FIELDS_STATIC,
3591
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3592
                       selected=self.op.output_fields)
3593

    
3594
  def ExpandNames(self):
3595
    self.needed_locks = {}
3596
    self.share_locks[locking.LEVEL_NODE] = 1
3597

    
3598
    if self.op.nodes:
3599
      self.needed_locks[locking.LEVEL_NODE] = \
3600
        _GetWantedNodes(self, self.op.nodes)
3601
    else:
3602
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3603

    
3604
  def Exec(self, feedback_fn):
3605
    """Computes the list of nodes and their attributes.
3606

3607
    """
3608
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3609

    
3610
    # Always get name to sort by
3611
    if constants.SF_NAME in self.op.output_fields:
3612
      fields = self.op.output_fields[:]
3613
    else:
3614
      fields = [constants.SF_NAME] + self.op.output_fields
3615

    
3616
    # Never ask for node or type as it's only known to the LU
3617
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3618
      while extra in fields:
3619
        fields.remove(extra)
3620

    
3621
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3622
    name_idx = field_idx[constants.SF_NAME]
3623

    
3624
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3625
    data = self.rpc.call_storage_list(self.nodes,
3626
                                      self.op.storage_type, st_args,
3627
                                      self.op.name, fields)
3628

    
3629
    result = []
3630

    
3631
    for node in utils.NiceSort(self.nodes):
3632
      nresult = data[node]
3633
      if nresult.offline:
3634
        continue
3635

    
3636
      msg = nresult.fail_msg
3637
      if msg:
3638
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3639
        continue
3640

    
3641
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3642

    
3643
      for name in utils.NiceSort(rows.keys()):
3644
        row = rows[name]
3645

    
3646
        out = []
3647

    
3648
        for field in self.op.output_fields:
3649
          if field == constants.SF_NODE:
3650
            val = node
3651
          elif field == constants.SF_TYPE:
3652
            val = self.op.storage_type
3653
          elif field in field_idx:
3654
            val = row[field_idx[field]]
3655
          else:
3656
            raise errors.ParameterError(field)
3657

    
3658
          out.append(val)
3659

    
3660
        result.append(out)
3661

    
3662
    return result
3663

    
3664

    
3665
class LUModifyNodeStorage(NoHooksLU):
3666
  """Logical unit for modifying a storage volume on a node.
3667

3668
  """
3669
  _OP_PARAMS = [
3670
    _PNodeName,
3671
    ("storage_type", ht.NoDefault, _CheckStorageType),
3672
    ("name", ht.NoDefault, ht.TNonEmptyString),
3673
    ("changes", ht.NoDefault, ht.TDict),
3674
    ]
3675
  REQ_BGL = False
3676

    
3677
  def CheckArguments(self):
3678
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3679

    
3680
    storage_type = self.op.storage_type
3681

    
3682
    try:
3683
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3684
    except KeyError:
3685
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3686
                                 " modified" % storage_type,
3687
                                 errors.ECODE_INVAL)
3688

    
3689
    diff = set(self.op.changes.keys()) - modifiable
3690
    if diff:
3691
      raise errors.OpPrereqError("The following fields can not be modified for"
3692
                                 " storage units of type '%s': %r" %
3693
                                 (storage_type, list(diff)),
3694
                                 errors.ECODE_INVAL)
3695

    
3696
  def ExpandNames(self):
3697
    self.needed_locks = {
3698
      locking.LEVEL_NODE: self.op.node_name,
3699
      }
3700

    
3701
  def Exec(self, feedback_fn):
3702
    """Computes the list of nodes and their attributes.
3703

3704
    """
3705
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3706
    result = self.rpc.call_storage_modify(self.op.node_name,
3707
                                          self.op.storage_type, st_args,
3708
                                          self.op.name, self.op.changes)
3709
    result.Raise("Failed to modify storage unit '%s' on %s" %
3710
                 (self.op.name, self.op.node_name))
3711

    
3712

    
3713
class LUAddNode(LogicalUnit):
3714
  """Logical unit for adding node to the cluster.
3715

3716
  """
3717
  HPATH = "node-add"
3718
  HTYPE = constants.HTYPE_NODE
3719
  _OP_PARAMS = [
3720
    _PNodeName,
3721
    ("primary_ip", None, ht.NoType),
3722
    ("secondary_ip", None, ht.TMaybeString),
3723
    ("readd", False, ht.TBool),
3724
    ("group", None, ht.TMaybeString),
3725
    ("master_capable", None, ht.TMaybeBool),
3726
    ("vm_capable", None, ht.TMaybeBool),
3727
    ]
3728
  _NFLAGS = ["master_capable", "vm_capable"]
3729

    
3730
  def CheckArguments(self):
3731
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
3732
    # validate/normalize the node name
3733
    self.hostname = netutils.GetHostname(name=self.op.node_name,
3734
                                         family=self.primary_ip_family)
3735
    self.op.node_name = self.hostname.name
3736
    if self.op.readd and self.op.group:
3737
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
3738
                                 " being readded", errors.ECODE_INVAL)
3739

    
3740
  def BuildHooksEnv(self):
3741
    """Build hooks env.
3742

3743
    This will run on all nodes before, and on all nodes + the new node after.
3744

3745
    """
3746
    env = {
3747
      "OP_TARGET": self.op.node_name,
3748
      "NODE_NAME": self.op.node_name,
3749
      "NODE_PIP": self.op.primary_ip,
3750
      "NODE_SIP": self.op.secondary_ip,
3751
      "MASTER_CAPABLE": str(self.op.master_capable),
3752
      "VM_CAPABLE": str(self.op.vm_capable),
3753
      }
3754
    nodes_0 = self.cfg.GetNodeList()
3755
    nodes_1 = nodes_0 + [self.op.node_name, ]
3756
    return env, nodes_0, nodes_1
3757

    
3758
  def CheckPrereq(self):
3759
    """Check prerequisites.
3760

3761
    This checks:
3762
     - the new node is not already in the config
3763
     - it is resolvable
3764
     - its parameters (single/dual homed) matches the cluster
3765

3766
    Any errors are signaled by raising errors.OpPrereqError.
3767

3768
    """
3769
    cfg = self.cfg
3770
    hostname = self.hostname
3771
    node = hostname.name
3772
    primary_ip = self.op.primary_ip = hostname.ip
3773
    if self.op.secondary_ip is None:
3774
      if self.primary_ip_family == netutils.IP6Address.family:
3775
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
3776
                                   " IPv4 address must be given as secondary",
3777
                                   errors.ECODE_INVAL)
3778
      self.op.secondary_ip = primary_ip
3779

    
3780
    secondary_ip = self.op.secondary_ip
3781
    if not netutils.IP4Address.IsValid(secondary_ip):
3782
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
3783
                                 " address" % secondary_ip, errors.ECODE_INVAL)
3784

    
3785
    node_list = cfg.GetNodeList()
3786
    if not self.op.readd and node in node_list:
3787
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3788
                                 node, errors.ECODE_EXISTS)
3789
    elif self.op.readd and node not in node_list:
3790
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3791
                                 errors.ECODE_NOENT)
3792

    
3793
    self.changed_primary_ip = False
3794

    
3795
    for existing_node_name in node_list:
3796
      existing_node = cfg.GetNodeInfo(existing_node_name)
3797

    
3798
      if self.op.readd and node == existing_node_name:
3799
        if existing_node.secondary_ip != secondary_ip:
3800
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
3801
                                     " address configuration as before",
3802
                                     errors.ECODE_INVAL)
3803
        if existing_node.primary_ip != primary_ip:
3804
          self.changed_primary_ip = True
3805

    
3806
        continue
3807

    
3808
      if (existing_node.primary_ip == primary_ip or
3809
          existing_node.secondary_ip == primary_ip or
3810
          existing_node.primary_ip == secondary_ip or
3811
          existing_node.secondary_ip == secondary_ip):
3812
        raise errors.OpPrereqError("New node ip address(es) conflict with"
3813
                                   " existing node %s" % existing_node.name,
3814
                                   errors.ECODE_NOTUNIQUE)
3815

    
3816
    # After this 'if' block, None is no longer a valid value for the
3817
    # _capable op attributes
3818
    if self.op.readd:
3819
      old_node = self.cfg.GetNodeInfo(node)
3820
      assert old_node is not None, "Can't retrieve locked node %s" % node
3821
      for attr in self._NFLAGS:
3822
        if getattr(self.op, attr) is None:
3823
          setattr(self.op, attr, getattr(old_node, attr))
3824
    else:
3825
      for attr in self._NFLAGS:
3826
        if getattr(self.op, attr) is None:
3827
          setattr(self.op, attr, True)
3828

    
3829
    if self.op.readd and not self.op.vm_capable:
3830
      pri, sec = cfg.GetNodeInstances(node)
3831
      if pri or sec:
3832
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
3833
                                   " flag set to false, but it already holds"
3834
                                   " instances" % node,
3835
                                   errors.ECODE_STATE)
3836

    
3837
    # check that the type of the node (single versus dual homed) is the
3838
    # same as for the master
3839
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3840
    master_singlehomed = myself.secondary_ip == myself.primary_ip
3841
    newbie_singlehomed = secondary_ip == primary_ip
3842
    if master_singlehomed != newbie_singlehomed:
3843
      if master_singlehomed:
3844
        raise errors.OpPrereqError("The master has no secondary ip but the"
3845
                                   " new node has one",
3846
                                   errors.ECODE_INVAL)
3847
      else:
3848
        raise errors.OpPrereqError("The master has a secondary ip but the"
3849
                                   " new node doesn't have one",
3850
                                   errors.ECODE_INVAL)
3851

    
3852
    # checks reachability
3853
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3854
      raise errors.OpPrereqError("Node not reachable by ping",
3855
                                 errors.ECODE_ENVIRON)
3856

    
3857
    if not newbie_singlehomed:
3858
      # check reachability from my secondary ip to newbie's secondary ip
3859
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3860
                           source=myself.secondary_ip):
3861
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3862
                                   " based ping to node daemon port",
3863
                                   errors.ECODE_ENVIRON)
3864

    
3865
    if self.op.readd:
3866
      exceptions = [node]
3867
    else:
3868
      exceptions = []
3869

    
3870
    if self.op.master_capable:
3871
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3872
    else:
3873
      self.master_candidate = False
3874

    
3875
    if self.op.readd:
3876
      self.new_node = old_node
3877
    else:
3878
      node_group = cfg.LookupNodeGroup(self.op.group)
3879
      self.new_node = objects.Node(name=node,
3880
                                   primary_ip=primary_ip,
3881
                                   secondary_ip=secondary_ip,
3882
                                   master_candidate=self.master_candidate,
3883
                                   offline=False, drained=False,
3884
                                   group=node_group)
3885

    
3886
  def Exec(self, feedback_fn):
3887
    """Adds the new node to the cluster.
3888

3889
    """
3890
    new_node = self.new_node
3891
    node = new_node.name
3892

    
3893
    # for re-adds, reset the offline/drained/master-candidate flags;
3894
    # we need to reset here, otherwise offline would prevent RPC calls
3895
    # later in the procedure; this also means that if the re-add
3896
    # fails, we are left with a non-offlined, broken node
3897
    if self.op.readd:
3898
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3899
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3900
      # if we demote the node, we do cleanup later in the procedure
3901
      new_node.master_candidate = self.master_candidate
3902
      if self.changed_primary_ip:
3903
        new_node.primary_ip = self.op.primary_ip
3904

    
3905
    # copy the master/vm_capable flags
3906
    for attr in self._NFLAGS:
3907
      setattr(new_node, attr, getattr(self.op, attr))
3908

    
3909
    # notify the user about any possible mc promotion
3910
    if new_node.master_candidate:
3911
      self.LogInfo("Node will be a master candidate")
3912

    
3913
    # check connectivity
3914
    result = self.rpc.call_version([node])[node]
3915
    result.Raise("Can't get version information from node %s" % node)
3916
    if constants.PROTOCOL_VERSION == result.payload:
3917
      logging.info("Communication to node %s fine, sw version %s match",
3918
                   node, result.payload)
3919
    else:
3920
      raise errors.OpExecError("Version mismatch master version %s,"
3921
                               " node version %s" %
3922
                               (constants.PROTOCOL_VERSION, result.payload))
3923

    
3924
    # Add node to our /etc/hosts, and add key to known_hosts
3925
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3926
      master_node = self.cfg.GetMasterNode()
3927
      result = self.rpc.call_etc_hosts_modify(master_node,
3928
                                              constants.ETC_HOSTS_ADD,
3929
                                              self.hostname.name,
3930
                                              self.hostname.ip)
3931
      result.Raise("Can't update hosts file with new host data")
3932

    
3933
    if new_node.secondary_ip != new_node.primary_ip:
3934
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
3935
                               False)
3936

    
3937
    node_verify_list = [self.cfg.GetMasterNode()]
3938
    node_verify_param = {
3939
      constants.NV_NODELIST: [node],
3940
      # TODO: do a node-net-test as well?
3941
    }
3942

    
3943
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3944
                                       self.cfg.GetClusterName())
3945
    for verifier in node_verify_list:
3946
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3947
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3948
      if nl_payload:
3949
        for failed in nl_payload:
3950
          feedback_fn("ssh/hostname verification failed"
3951
                      " (checking from %s): %s" %
3952
                      (verifier, nl_payload[failed]))
3953
        raise errors.OpExecError("ssh/hostname verification failed.")
3954

    
3955
    if self.op.readd:
3956
      _RedistributeAncillaryFiles(self)
3957
      self.context.ReaddNode(new_node)
3958
      # make sure we redistribute the config
3959
      self.cfg.Update(new_node, feedback_fn)
3960
      # and make sure the new node will not have old files around
3961
      if not new_node.master_candidate:
3962
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3963
        msg = result.fail_msg
3964
        if msg:
3965
          self.LogWarning("Node failed to demote itself from master"
3966
                          " candidate status: %s" % msg)
3967
    else:
3968
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
3969
                                  additional_vm=self.op.vm_capable)
3970
      self.context.AddNode(new_node, self.proc.GetECId())
3971

    
3972

    
3973
class LUSetNodeParams(LogicalUnit):
3974
  """Modifies the parameters of a node.
3975

3976
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
3977
      to the node role (as _ROLE_*)
3978
  @cvar _R2F: a dictionary from node role to tuples of flags
3979
  @cvar _FLAGS: a list of attribute names corresponding to the flags
3980

3981
  """
3982
  HPATH = "node-modify"
3983
  HTYPE = constants.HTYPE_NODE
3984
  _OP_PARAMS = [
3985
    _PNodeName,
3986
    ("master_candidate", None, ht.TMaybeBool),
3987
    ("offline", None, ht.TMaybeBool),
3988
    ("drained", None, ht.TMaybeBool),
3989
    ("auto_promote", False, ht.TBool),
3990
    ("master_capable", None, ht.TMaybeBool),
3991
    ("vm_capable", None, ht.TMaybeBool),
3992
    ("secondary_ip", None, ht.TMaybeString),
3993
    _PForce,
3994
    ]
3995
  REQ_BGL = False
3996
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
3997
  _F2R = {
3998
    (True, False, False): _ROLE_CANDIDATE,
3999
    (False, True, False): _ROLE_DRAINED,
4000
    (False, False, True): _ROLE_OFFLINE,
4001
    (False, False, False): _ROLE_REGULAR,
4002
    }
4003
  _R2F = dict((v, k) for k, v in _F2R.items())
4004
  _FLAGS = ["master_candidate", "drained", "offline"]
4005

    
4006
  def CheckArguments(self):
4007
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4008
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4009
                self.op.master_capable, self.op.vm_capable,
4010
                self.op.secondary_ip]
4011
    if all_mods.count(None) == len(all_mods):
4012
      raise errors.OpPrereqError("Please pass at least one modification",
4013
                                 errors.ECODE_INVAL)
4014
    if all_mods.count(True) > 1:
4015
      raise errors.OpPrereqError("Can't set the node into more than one"
4016
                                 " state at the same time",
4017
                                 errors.ECODE_INVAL)
4018

    
4019
    # Boolean value that tells us whether we might be demoting from MC
4020
    self.might_demote = (self.op.master_candidate == False or
4021
                         self.op.offline == True or
4022
                         self.op.drained == True or
4023
                         self.op.master_capable == False)
4024

    
4025
    if self.op.secondary_ip:
4026
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4027
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4028
                                   " address" % self.op.secondary_ip,
4029
                                   errors.ECODE_INVAL)
4030

    
4031
    self.lock_all = self.op.auto_promote and self.might_demote
4032
    self.lock_instances = self.op.secondary_ip is not None
4033

    
4034
  def ExpandNames(self):
4035
    if self.lock_all:
4036
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4037
    else:
4038
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4039

    
4040
    if self.lock_instances:
4041
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4042

    
4043
  def DeclareLocks(self, level):
4044
    # If we have locked all instances, before waiting to lock nodes, release
4045
    # all the ones living on nodes unrelated to the current operation.
4046
    if level == locking.LEVEL_NODE and self.lock_instances:
4047
      instances_release = []
4048
      instances_keep = []
4049
      self.affected_instances = []
4050
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4051
        for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4052
          instance = self.context.cfg.GetInstanceInfo(instance_name)
4053
          i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR
4054
          if i_mirrored and self.op.node_name in instance.all_nodes:
4055
            instances_keep.append(instance_name)
4056
            self.affected_instances.append(instance)
4057
          else:
4058
            instances_release.append(instance_name)
4059
        if instances_release:
4060
          self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4061
          self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4062

    
4063
  def BuildHooksEnv(self):
4064
    """Build hooks env.
4065

4066
    This runs on the master node.
4067

4068
    """
4069
    env = {
4070
      "OP_TARGET": self.op.node_name,
4071
      "MASTER_CANDIDATE": str(self.op.master_candidate),
4072
      "OFFLINE": str(self.op.offline),
4073
      "DRAINED": str(self.op.drained),
4074
      "MASTER_CAPABLE": str(self.op.master_capable),
4075
      "VM_CAPABLE": str(self.op.vm_capable),
4076
      }
4077
    nl = [self.cfg.GetMasterNode(),
4078
          self.op.node_name]
4079
    return env, nl, nl
4080

    
4081
  def CheckPrereq(self):
4082
    """Check prerequisites.
4083

4084
    This only checks the instance list against the existing names.
4085

4086
    """
4087
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4088

    
4089
    if (self.op.master_candidate is not None or
4090
        self.op.drained is not None or
4091
        self.op.offline is not None):
4092
      # we can't change the master's node flags
4093
      if self.op.node_name == self.cfg.GetMasterNode():
4094
        raise errors.OpPrereqError("The master role can be changed"
4095
                                   " only via master-failover",
4096
                                   errors.ECODE_INVAL)
4097

    
4098
    if self.op.master_candidate and not node.master_capable:
4099
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4100
                                 " it a master candidate" % node.name,
4101
                                 errors.ECODE_STATE)
4102

    
4103
    if self.op.vm_capable == False:
4104
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4105
      if ipri or isec:
4106
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4107
                                   " the vm_capable flag" % node.name,
4108
                                   errors.ECODE_STATE)
4109

    
4110
    if node.master_candidate and self.might_demote and not self.lock_all:
4111
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
4112
      # check if after removing the current node, we're missing master
4113
      # candidates
4114
      (mc_remaining, mc_should, _) = \
4115
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4116
      if mc_remaining < mc_should:
4117
        raise errors.OpPrereqError("Not enough master candidates, please"
4118
                                   " pass auto_promote to allow promotion",
4119
                                   errors.ECODE_STATE)
4120

    
4121
    self.old_flags = old_flags = (node.master_candidate,
4122
                                  node.drained, node.offline)
4123
    assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4124
    self.old_role = old_role = self._F2R[old_flags]
4125

    
4126
    # Check for ineffective changes
4127
    for attr in self._FLAGS:
4128
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4129
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4130
        setattr(self.op, attr, None)
4131

    
4132
    # Past this point, any flag change to False means a transition
4133
    # away from the respective state, as only real changes are kept
4134

    
4135
    # If we're being deofflined/drained, we'll MC ourself if needed
4136
    if (self.op.drained == False or self.op.offline == False or
4137
        (self.op.master_capable and not node.master_capable)):
4138
      if _DecideSelfPromotion(self):
4139
        self.op.master_candidate = True
4140
        self.LogInfo("Auto-promoting node to master candidate")
4141

    
4142
    # If we're no longer master capable, we'll demote ourselves from MC
4143
    if self.op.master_capable == False and node.master_candidate:
4144
      self.LogInfo("Demoting from master candidate")
4145
      self.op.master_candidate = False
4146

    
4147
    # Compute new role
4148
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4149
    if self.op.master_candidate:
4150
      new_role = self._ROLE_CANDIDATE
4151
    elif self.op.drained:
4152
      new_role = self._ROLE_DRAINED
4153
    elif self.op.offline:
4154
      new_role = self._ROLE_OFFLINE
4155
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4156
      # False is still in new flags, which means we're un-setting (the
4157
      # only) True flag
4158
      new_role = self._ROLE_REGULAR
4159
    else: # no new flags, nothing, keep old role
4160
      new_role = old_role
4161

    
4162
    self.new_role = new_role
4163

    
4164
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
4165
      # Trying to transition out of offline status
4166
      result = self.rpc.call_version([node.name])[node.name]
4167
      if result.fail_msg:
4168
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4169
                                   " to report its version: %s" %
4170
                                   (node.name, result.fail_msg),
4171
                                   errors.ECODE_STATE)
4172
      else:
4173
        self.LogWarning("Transitioning node from offline to online state"
4174
                        " without using re-add. Please make sure the node"
4175
                        " is healthy!")
4176

    
4177
    if self.op.secondary_ip:
4178
      # Ok even without locking, because this can't be changed by any LU
4179
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4180
      master_singlehomed = master.secondary_ip == master.primary_ip
4181
      if master_singlehomed and self.op.secondary_ip:
4182
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4183
                                   " homed cluster", errors.ECODE_INVAL)
4184

    
4185
      if node.offline:
4186
        if self.affected_instances:
4187
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
4188
                                     " node has instances (%s) configured"
4189
                                     " to use it" % self.affected_instances)
4190
      else:
4191
        # On online nodes, check that no instances are running, and that
4192
        # the node has the new ip and we can reach it.
4193
        for instance in self.affected_instances:
4194
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
4195

    
4196
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4197
        if master.name != node.name:
4198
          # check reachability from master secondary ip to new secondary ip
4199
          if not netutils.TcpPing(self.op.secondary_ip,
4200
                                  constants.DEFAULT_NODED_PORT,
4201
                                  source=master.secondary_ip):
4202
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4203
                                       " based ping to node daemon port",
4204
                                       errors.ECODE_ENVIRON)
4205

    
4206
  def Exec(self, feedback_fn):
4207
    """Modifies a node.
4208

4209
    """
4210
    node = self.node
4211
    old_role = self.old_role
4212
    new_role = self.new_role
4213

    
4214
    result = []
4215

    
4216
    for attr in ["master_capable", "vm_capable"]:
4217
      val = getattr(self.op, attr)
4218
      if val is not None:
4219
        setattr(node, attr, val)
4220
        result.append((attr, str(val)))
4221

    
4222
    if new_role != old_role:
4223
      # Tell the node to demote itself, if no longer MC and not offline
4224
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4225
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4226
        if msg:
4227
          self.LogWarning("Node failed to demote itself: %s", msg)
4228

    
4229
      new_flags = self._R2F[new_role]
4230
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4231
        if of != nf:
4232
          result.append((desc, str(nf)))
4233
      (node.master_candidate, node.drained, node.offline) = new_flags
4234

    
4235
      # we locked all nodes, we adjust the CP before updating this node
4236
      if self.lock_all:
4237
        _AdjustCandidatePool(self, [node.name])
4238

    
4239
    if self.op.secondary_ip:
4240
      node.secondary_ip = self.op.secondary_ip
4241
      result.append(("secondary_ip", self.op.secondary_ip))
4242

    
4243
    # this will trigger configuration file update, if needed
4244
    self.cfg.Update(node, feedback_fn)
4245

    
4246
    # this will trigger job queue propagation or cleanup if the mc
4247
    # flag changed
4248
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4249
      self.context.ReaddNode(node)
4250

    
4251
    return result
4252

    
4253

    
4254
class LUPowercycleNode(NoHooksLU):
4255
  """Powercycles a node.
4256

4257
  """
4258
  _OP_PARAMS = [
4259
    _PNodeName,
4260
    _PForce,
4261
    ]
4262
  REQ_BGL = False
4263

    
4264
  def CheckArguments(self):
4265
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4266
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4267
      raise errors.OpPrereqError("The node is the master and the force"
4268
                                 " parameter was not set",
4269
                                 errors.ECODE_INVAL)
4270

    
4271
  def ExpandNames(self):
4272
    """Locking for PowercycleNode.
4273

4274
    This is a last-resort option and shouldn't block on other
4275
    jobs. Therefore, we grab no locks.
4276

4277
    """
4278
    self.needed_locks = {}
4279

    
4280
  def Exec(self, feedback_fn):
4281
    """Reboots a node.
4282

4283
    """
4284
    result = self.rpc.call_node_powercycle(self.op.node_name,
4285
                                           self.cfg.GetHypervisorType())
4286
    result.Raise("Failed to schedule the reboot")
4287
    return result.payload
4288

    
4289

    
4290
class LUQueryClusterInfo(NoHooksLU):
4291
  """Query cluster configuration.
4292

4293
  """
4294
  REQ_BGL = False
4295

    
4296
  def ExpandNames(self):
4297
    self.needed_locks = {}
4298

    
4299
  def Exec(self, feedback_fn):
4300
    """Return cluster config.
4301

4302
    """
4303
    cluster = self.cfg.GetClusterInfo()
4304
    os_hvp = {}
4305

    
4306
    # Filter just for enabled hypervisors
4307
    for os_name, hv_dict in cluster.os_hvp.items():
4308
      os_hvp[os_name] = {}
4309
      for hv_name, hv_params in hv_dict.items():
4310
        if hv_name in cluster.enabled_hypervisors:
4311
          os_hvp[os_name][hv_name] = hv_params
4312

    
4313
    # Convert ip_family to ip_version
4314
    primary_ip_version = constants.IP4_VERSION
4315
    if cluster.primary_ip_family == netutils.IP6Address.family:
4316
      primary_ip_version = constants.IP6_VERSION
4317

    
4318
    result = {
4319
      "software_version": constants.RELEASE_VERSION,
4320
      "protocol_version": constants.PROTOCOL_VERSION,
4321
      "config_version": constants.CONFIG_VERSION,
4322
      "os_api_version": max(constants.OS_API_VERSIONS),
4323
      "export_version": constants.EXPORT_VERSION,
4324
      "architecture": (platform.architecture()[0], platform.machine()),
4325
      "name": cluster.cluster_name,
4326
      "master": cluster.master_node,
4327
      "default_hypervisor": cluster.enabled_hypervisors[0],
4328
      "enabled_hypervisors": cluster.enabled_hypervisors,
4329
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4330
                        for hypervisor_name in cluster.enabled_hypervisors]),
4331
      "os_hvp": os_hvp,
4332
      "beparams": cluster.beparams,
4333
      "osparams": cluster.osparams,
4334
      "nicparams": cluster.nicparams,
4335
      "candidate_pool_size": cluster.candidate_pool_size,
4336
      "master_netdev": cluster.master_netdev,
4337
      "volume_group_name": cluster.volume_group_name,
4338
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
4339
      "file_storage_dir": cluster.file_storage_dir,
4340
      "maintain_node_health": cluster.maintain_node_health,
4341
      "ctime": cluster.ctime,
4342
      "mtime": cluster.mtime,
4343
      "uuid": cluster.uuid,
4344
      "tags": list(cluster.GetTags()),
4345
      "uid_pool": cluster.uid_pool,
4346
      "default_iallocator": cluster.default_iallocator,
4347
      "reserved_lvs": cluster.reserved_lvs,
4348
      "primary_ip_version": primary_ip_version,
4349
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4350
      }
4351

    
4352
    return result
4353

    
4354

    
4355
class LUQueryConfigValues(NoHooksLU):
4356
  """Return configuration values.
4357

4358
  """
4359
  _OP_PARAMS = [_POutputFields]
4360
  REQ_BGL = False
4361
  _FIELDS_DYNAMIC = utils.FieldSet()
4362
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4363
                                  "watcher_pause", "volume_group_name")
4364

    
4365
  def CheckArguments(self):
4366
    _CheckOutputFields(static=self._FIELDS_STATIC,
4367
                       dynamic=self._FIELDS_DYNAMIC,
4368
                       selected=self.op.output_fields)
4369

    
4370
  def ExpandNames(self):
4371
    self.needed_locks = {}
4372

    
4373
  def Exec(self, feedback_fn):
4374
    """Dump a representation of the cluster config to the standard output.
4375

4376
    """
4377
    values = []
4378
    for field in self.op.output_fields:
4379
      if field == "cluster_name":
4380
        entry = self.cfg.GetClusterName()
4381
      elif field == "master_node":
4382
        entry = self.cfg.GetMasterNode()
4383
      elif field == "drain_flag":
4384
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4385
      elif field == "watcher_pause":
4386
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4387
      elif field == "volume_group_name":
4388
        entry = self.cfg.GetVGName()
4389
      else:
4390
        raise errors.ParameterError(field)
4391
      values.append(entry)
4392
    return values
4393

    
4394

    
4395
class LUActivateInstanceDisks(NoHooksLU):
4396
  """Bring up an instance's disks.
4397

4398
  """
4399
  _OP_PARAMS = [
4400
    _PInstanceName,
4401
    ("ignore_size", False, ht.TBool),
4402
    ]
4403
  REQ_BGL = False
4404

    
4405
  def ExpandNames(self):
4406
    self._ExpandAndLockInstance()
4407
    self.needed_locks[locking.LEVEL_NODE] = []
4408
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4409

    
4410
  def DeclareLocks(self, level):
4411
    if level == locking.LEVEL_NODE:
4412
      self._LockInstancesNodes()
4413

    
4414
  def CheckPrereq(self):
4415
    """Check prerequisites.
4416

4417
    This checks that the instance is in the cluster.
4418

4419
    """
4420
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4421
    assert self.instance is not None, \
4422
      "Cannot retrieve locked instance %s" % self.op.instance_name
4423
    _CheckNodeOnline(self, self.instance.primary_node)
4424

    
4425
  def Exec(self, feedback_fn):
4426
    """Activate the disks.
4427

4428
    """
4429
    disks_ok, disks_info = \
4430
              _AssembleInstanceDisks(self, self.instance,
4431
                                     ignore_size=self.op.ignore_size)
4432
    if not disks_ok:
4433
      raise errors.OpExecError("Cannot activate block devices")
4434

    
4435
    return disks_info
4436

    
4437

    
4438
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4439
                           ignore_size=False):
4440
  """Prepare the block devices for an instance.
4441

4442
  This sets up the block devices on all nodes.
4443

4444
  @type lu: L{LogicalUnit}
4445
  @param lu: the logical unit on whose behalf we execute
4446
  @type instance: L{objects.Instance}
4447
  @param instance: the instance for whose disks we assemble
4448
  @type disks: list of L{objects.Disk} or None
4449
  @param disks: which disks to assemble (or all, if None)
4450
  @type ignore_secondaries: boolean
4451
  @param ignore_secondaries: if true, errors on secondary nodes
4452
      won't result in an error return from the function
4453
  @type ignore_size: boolean
4454
  @param ignore_size: if true, the current known size of the disk
4455
      will not be used during the disk activation, useful for cases
4456
      when the size is wrong
4457
  @return: False if the operation failed, otherwise a list of
4458
      (host, instance_visible_name, node_visible_name)
4459
      with the mapping from node devices to instance devices
4460

4461
  """
4462
  device_info = []
4463
  disks_ok = True
4464
  iname = instance.name
4465
  disks = _ExpandCheckDisks(instance, disks)
4466

    
4467
  # With the two passes mechanism we try to reduce the window of
4468
  # opportunity for the race condition of switching DRBD to primary
4469
  # before handshaking occured, but we do not eliminate it
4470

    
4471
  # The proper fix would be to wait (with some limits) until the
4472
  # connection has been made and drbd transitions from WFConnection
4473
  # into any other network-connected state (Connected, SyncTarget,
4474
  # SyncSource, etc.)
4475

    
4476
  # 1st pass, assemble on all nodes in secondary mode
4477
  for inst_disk in disks:
4478
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4479
      if ignore_size:
4480
        node_disk = node_disk.Copy()
4481
        node_disk.UnsetSize()
4482
      lu.cfg.SetDiskID(node_disk, node)
4483
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4484
      msg = result.fail_msg
4485
      if msg:
4486
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4487
                           " (is_primary=False, pass=1): %s",
4488
                           inst_disk.iv_name, node, msg)
4489
        if not ignore_secondaries:
4490
          disks_ok = False
4491

    
4492
  # FIXME: race condition on drbd migration to primary
4493

    
4494
  # 2nd pass, do only the primary node
4495
  for inst_disk in disks:
4496
    dev_path = None
4497

    
4498
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4499
      if node != instance.primary_node:
4500
        continue
4501
      if ignore_size:
4502
        node_disk = node_disk.Copy()
4503
        node_disk.UnsetSize()
4504
      lu.cfg.SetDiskID(node_disk, node)
4505
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4506
      msg = result.fail_msg
4507
      if msg:
4508
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4509
                           " (is_primary=True, pass=2): %s",
4510
                           inst_disk.iv_name, node, msg)
4511
        disks_ok = False
4512
      else:
4513
        dev_path = result.payload
4514

    
4515
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4516

    
4517
  # leave the disks configured for the primary node
4518
  # this is a workaround that would be fixed better by
4519
  # improving the logical/physical id handling
4520
  for disk in disks:
4521
    lu.cfg.SetDiskID(disk, instance.primary_node)
4522

    
4523
  return disks_ok, device_info
4524

    
4525

    
4526
def _StartInstanceDisks(lu, instance, force):
4527
  """Start the disks of an instance.
4528

4529
  """
4530
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4531
                                           ignore_secondaries=force)
4532
  if not disks_ok:
4533
    _ShutdownInstanceDisks(lu, instance)
4534
    if force is not None and not force:
4535
      lu.proc.LogWarning("", hint="If the message above refers to a"
4536
                         " secondary node,"
4537
                         " you can retry the operation using '--force'.")
4538
    raise errors.OpExecError("Disk consistency error")
4539

    
4540

    
4541
class LUDeactivateInstanceDisks(NoHooksLU):
4542
  """Shutdown an instance's disks.
4543

4544
  """
4545
  _OP_PARAMS = [
4546
    _PInstanceName,
4547
    ]
4548
  REQ_BGL = False
4549

    
4550
  def ExpandNames(self):
4551
    self._ExpandAndLockInstance()
4552
    self.needed_locks[locking.LEVEL_NODE] = []
4553
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4554

    
4555
  def DeclareLocks(self, level):
4556
    if level == locking.LEVEL_NODE:
4557
      self._LockInstancesNodes()
4558

    
4559
  def CheckPrereq(self):
4560
    """Check prerequisites.
4561

4562
    This checks that the instance is in the cluster.
4563

4564
    """
4565
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4566
    assert self.instance is not None, \
4567
      "Cannot retrieve locked instance %s" % self.op.instance_name
4568

    
4569
  def Exec(self, feedback_fn):
4570
    """Deactivate the disks
4571

4572
    """
4573
    instance = self.instance
4574
    _SafeShutdownInstanceDisks(self, instance)
4575

    
4576

    
4577
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4578
  """Shutdown block devices of an instance.
4579

4580
  This function checks if an instance is running, before calling
4581
  _ShutdownInstanceDisks.
4582

4583
  """
4584
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4585
  _ShutdownInstanceDisks(lu, instance, disks=disks)
4586

    
4587

    
4588
def _ExpandCheckDisks(instance, disks):
4589
  """Return the instance disks selected by the disks list
4590

4591
  @type disks: list of L{objects.Disk} or None
4592
  @param disks: selected disks
4593
  @rtype: list of L{objects.Disk}
4594
  @return: selected instance disks to act on
4595

4596
  """
4597
  if disks is None:
4598
    return instance.disks
4599
  else:
4600
    if not set(disks).issubset(instance.disks):
4601
      raise errors.ProgrammerError("Can only act on disks belonging to the"
4602
                                   " target instance")
4603
    return disks
4604

    
4605

    
4606
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4607
  """Shutdown block devices of an instance.
4608

4609
  This does the shutdown on all nodes of the instance.
4610

4611
  If the ignore_primary is false, errors on the primary node are
4612
  ignored.
4613

4614
  """
4615
  all_result = True
4616
  disks = _ExpandCheckDisks(instance, disks)
4617

    
4618
  for disk in disks:
4619
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4620
      lu.cfg.SetDiskID(top_disk, node)
4621
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4622
      msg = result.fail_msg
4623
      if msg:
4624
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4625
                      disk.iv_name, node, msg)
4626
        if not ignore_primary or node != instance.primary_node:
4627
          all_result = False
4628
  return all_result
4629

    
4630

    
4631
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4632
  """Checks if a node has enough free memory.
4633

4634
  This function check if a given node has the needed amount of free
4635
  memory. In case the node has less memory or we cannot get the
4636
  information from the node, this function raise an OpPrereqError
4637
  exception.
4638

4639
  @type lu: C{LogicalUnit}
4640
  @param lu: a logical unit from which we get configuration data
4641
  @type node: C{str}
4642
  @param node: the node to check
4643
  @type reason: C{str}
4644
  @param reason: string to use in the error message
4645
  @type requested: C{int}
4646
  @param requested: the amount of memory in MiB to check for
4647
  @type hypervisor_name: C{str}
4648
  @param hypervisor_name: the hypervisor to ask for memory stats
4649
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4650
      we cannot check the node
4651

4652
  """
4653
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4654
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4655
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4656
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4657
  if not isinstance(free_mem, int):
4658
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4659
                               " was '%s'" % (node, free_mem),
4660
                               errors.ECODE_ENVIRON)
4661
  if requested > free_mem:
4662
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4663
                               " needed %s MiB, available %s MiB" %
4664
                               (node, reason, requested, free_mem),
4665
                               errors.ECODE_NORES)
4666

    
4667

    
4668
def _CheckNodesFreeDisk(lu, nodenames, requested):
4669
  """Checks if nodes have enough free disk space in the default VG.
4670

4671
  This function check if all given nodes have the needed amount of
4672
  free disk. In case any node has less disk or we cannot get the
4673
  information from the node, this function raise an OpPrereqError
4674
  exception.
4675

4676
  @type lu: C{LogicalUnit}
4677
  @param lu: a logical unit from which we get configuration data
4678
  @type nodenames: C{list}
4679
  @param nodenames: the list of node names to check
4680
  @type requested: C{int}
4681
  @param requested: the amount of disk in MiB to check for
4682
  @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4683
      we cannot check the node
4684

4685
  """
4686
  nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4687
                                   lu.cfg.GetHypervisorType())
4688
  for node in nodenames:
4689
    info = nodeinfo[node]
4690
    info.Raise("Cannot get current information from node %s" % node,
4691
               prereq=True, ecode=errors.ECODE_ENVIRON)
4692
    vg_free = info.payload.get("vg_free", None)
4693
    if not isinstance(vg_free, int):
4694
      raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4695
                                 " result was '%s'" % (node, vg_free),
4696
                                 errors.ECODE_ENVIRON)
4697
    if requested > vg_free:
4698
      raise errors.OpPrereqError("Not enough disk space on target node %s:"
4699
                                 " required %d MiB, available %d MiB" %
4700
                                 (node, requested, vg_free),
4701
                                 errors.ECODE_NORES)
4702

    
4703

    
4704
class LUStartupInstance(LogicalUnit):
4705
  """Starts an instance.
4706

4707
  """
4708
  HPATH = "instance-start"
4709
  HTYPE = constants.HTYPE_INSTANCE
4710
  _OP_PARAMS = [
4711
    _PInstanceName,
4712
    _PForce,
4713
    _PIgnoreOfflineNodes,
4714
    ("hvparams", ht.EmptyDict, ht.TDict),
4715
    ("beparams", ht.EmptyDict, ht.TDict),
4716
    ]
4717
  REQ_BGL = False
4718

    
4719
  def CheckArguments(self):
4720
    # extra beparams
4721
    if self.op.beparams:
4722
      # fill the beparams dict
4723
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4724

    
4725
  def ExpandNames(self):
4726
    self._ExpandAndLockInstance()
4727

    
4728
  def BuildHooksEnv(self):
4729
    """Build hooks env.
4730

4731
    This runs on master, primary and secondary nodes of the instance.
4732

4733
    """
4734
    env = {
4735
      "FORCE": self.op.force,
4736
      }
4737
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4738
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4739
    return env, nl, nl
4740

    
4741
  def CheckPrereq(self):
4742
    """Check prerequisites.
4743

4744
    This checks that the instance is in the cluster.
4745

4746
    """
4747
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4748
    assert self.instance is not None, \
4749
      "Cannot retrieve locked instance %s" % self.op.instance_name
4750

    
4751
    # extra hvparams
4752
    if self.op.hvparams:
4753
      # check hypervisor parameter syntax (locally)
4754
      cluster = self.cfg.GetClusterInfo()
4755
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4756
      filled_hvp = cluster.FillHV(instance)
4757
      filled_hvp.update(self.op.hvparams)
4758
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4759
      hv_type.CheckParameterSyntax(filled_hvp)
4760
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4761

    
4762
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
4763

    
4764
    if self.primary_offline and self.op.ignore_offline_nodes:
4765
      self.proc.LogWarning("Ignoring offline primary node")
4766

    
4767
      if self.op.hvparams or self.op.beparams:
4768
        self.proc.LogWarning("Overridden parameters are ignored")
4769
    else:
4770
      _CheckNodeOnline(self, instance.primary_node)
4771

    
4772
      bep = self.cfg.GetClusterInfo().FillBE(instance)
4773

    
4774
      # check bridges existence
4775
      _CheckInstanceBridgesExist(self, instance)
4776

    
4777
      remote_info = self.rpc.call_instance_info(instance.primary_node,
4778
                                                instance.name,
4779
                                                instance.hypervisor)
4780
      remote_info.Raise("Error checking node %s" % instance.primary_node,
4781
                        prereq=True, ecode=errors.ECODE_ENVIRON)
4782
      if not remote_info.payload: # not running already
4783
        _CheckNodeFreeMemory(self, instance.primary_node,
4784
                             "starting instance %s" % instance.name,
4785
                             bep[constants.BE_MEMORY], instance.hypervisor)
4786

    
4787
  def Exec(self, feedback_fn):
4788
    """Start the instance.
4789

4790
    """
4791
    instance = self.instance
4792
    force = self.op.force
4793

    
4794
    self.cfg.MarkInstanceUp(instance.name)
4795

    
4796
    if self.primary_offline:
4797
      assert self.op.ignore_offline_nodes
4798
      self.proc.LogInfo("Primary node offline, marked instance as started")
4799
    else:
4800
      node_current = instance.primary_node
4801

    
4802
      _StartInstanceDisks(self, instance, force)
4803

    
4804
      result = self.rpc.call_instance_start(node_current, instance,
4805
                                            self.op.hvparams, self.op.beparams)
4806
      msg = result.fail_msg
4807
      if msg:
4808
        _ShutdownInstanceDisks(self, instance)
4809
        raise errors.OpExecError("Could not start instance: %s" % msg)
4810

    
4811

    
4812
class LURebootInstance(LogicalUnit):
4813
  """Reboot an instance.
4814

4815
  """
4816
  HPATH = "instance-reboot"
4817
  HTYPE = constants.HTYPE_INSTANCE
4818
  _OP_PARAMS = [
4819
    _PInstanceName,
4820
    ("ignore_secondaries", False, ht.TBool),
4821
    ("reboot_type", ht.NoDefault, ht.TElemOf(constants.REBOOT_TYPES)),
4822
    _PShutdownTimeout,
4823
    ]
4824
  REQ_BGL = False
4825

    
4826
  def ExpandNames(self):
4827
    self._ExpandAndLockInstance()
4828

    
4829
  def BuildHooksEnv(self):
4830
    """Build hooks env.
4831

4832
    This runs on master, primary and secondary nodes of the instance.
4833

4834
    """
4835
    env = {
4836
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4837
      "REBOOT_TYPE": self.op.reboot_type,
4838
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4839
      }
4840
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4841
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4842
    return env, nl, nl
4843

    
4844
  def CheckPrereq(self):
4845
    """Check prerequisites.
4846

4847
    This checks that the instance is in the cluster.
4848

4849
    """
4850
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4851
    assert self.instance is not None, \
4852
      "Cannot retrieve locked instance %s" % self.op.instance_name
4853

    
4854
    _CheckNodeOnline(self, instance.primary_node)
4855

    
4856
    # check bridges existence
4857
    _CheckInstanceBridgesExist(self, instance)
4858

    
4859
  def Exec(self, feedback_fn):
4860
    """Reboot the instance.
4861

4862
    """
4863
    instance = self.instance
4864
    ignore_secondaries = self.op.ignore_secondaries
4865
    reboot_type = self.op.reboot_type
4866

    
4867
    node_current = instance.primary_node
4868

    
4869
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4870
                       constants.INSTANCE_REBOOT_HARD]:
4871
      for disk in instance.disks:
4872
        self.cfg.SetDiskID(disk, node_current)
4873
      result = self.rpc.call_instance_reboot(node_current, instance,
4874
                                             reboot_type,
4875
                                             self.op.shutdown_timeout)
4876
      result.Raise("Could not reboot instance")
4877
    else:
4878
      result = self.rpc.call_instance_shutdown(node_current, instance,
4879
                                               self.op.shutdown_timeout)
4880
      result.Raise("Could not shutdown instance for full reboot")
4881
      _ShutdownInstanceDisks(self, instance)
4882
      _StartInstanceDisks(self, instance, ignore_secondaries)
4883
      result = self.rpc.call_instance_start(node_current, instance, None, None)
4884
      msg = result.fail_msg
4885
      if msg:
4886
        _ShutdownInstanceDisks(self, instance)
4887
        raise errors.OpExecError("Could not start instance for"
4888
                                 " full reboot: %s" % msg)
4889

    
4890
    self.cfg.MarkInstanceUp(instance.name)
4891

    
4892

    
4893
class LUShutdownInstance(LogicalUnit):
4894
  """Shutdown an instance.
4895

4896
  """
4897
  HPATH = "instance-stop"
4898
  HTYPE = constants.HTYPE_INSTANCE
4899
  _OP_PARAMS = [
4900
    _PInstanceName,
4901
    _PIgnoreOfflineNodes,
4902
    ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, ht.TPositiveInt),
4903
    ]
4904
  REQ_BGL = False
4905

    
4906
  def ExpandNames(self):
4907
    self._ExpandAndLockInstance()
4908

    
4909
  def BuildHooksEnv(self):
4910
    """Build hooks env.
4911

4912
    This runs on master, primary and secondary nodes of the instance.
4913

4914
    """
4915
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4916
    env["TIMEOUT"] = self.op.timeout
4917
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4918
    return env, nl, nl
4919

    
4920
  def CheckPrereq(self):
4921
    """Check prerequisites.
4922

4923
    This checks that the instance is in the cluster.
4924

4925
    """
4926
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4927
    assert self.instance is not None, \
4928
      "Cannot retrieve locked instance %s" % self.op.instance_name
4929

    
4930
    self.primary_offline = \
4931
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
4932

    
4933
    if self.primary_offline and self.op.ignore_offline_nodes:
4934
      self.proc.LogWarning("Ignoring offline primary node")
4935
    else:
4936
      _CheckNodeOnline(self, self.instance.primary_node)
4937

    
4938
  def Exec(self, feedback_fn):
4939
    """Shutdown the instance.
4940

4941
    """
4942
    instance = self.instance
4943
    node_current = instance.primary_node
4944
    timeout = self.op.timeout
4945

    
4946
    self.cfg.MarkInstanceDown(instance.name)
4947

    
4948
    if self.primary_offline:
4949
      assert self.op.ignore_offline_nodes
4950
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
4951
    else:
4952
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4953
      msg = result.fail_msg
4954
      if msg:
4955
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4956

    
4957
      _ShutdownInstanceDisks(self, instance)
4958

    
4959

    
4960
class LUReinstallInstance(LogicalUnit):
4961
  """Reinstall an instance.
4962

4963
  """
4964
  HPATH = "instance-reinstall"
4965
  HTYPE = constants.HTYPE_INSTANCE
4966
  _OP_PARAMS = [
4967
    _PInstanceName,
4968
    ("os_type", None, ht.TMaybeString),
4969
    ("force_variant", False, ht.TBool),
4970
    ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
4971
    ]
4972
  REQ_BGL = False
4973

    
4974
  def ExpandNames(self):
4975
    self._ExpandAndLockInstance()
4976

    
4977
  def BuildHooksEnv(self):
4978
    """Build hooks env.
4979

4980
    This runs on master, primary and secondary nodes of the instance.
4981

4982
    """
4983
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4984
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4985
    return env, nl, nl
4986

    
4987
  def CheckPrereq(self):
4988
    """Check prerequisites.
4989

4990
    This checks that the instance is in the cluster and is not running.
4991

4992
    """
4993
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4994
    assert instance is not None, \
4995
      "Cannot retrieve locked instance %s" % self.op.instance_name
4996
    _CheckNodeOnline(self, instance.primary_node)
4997

    
4998
    if instance.disk_template == constants.DT_DISKLESS:
4999
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5000
                                 self.op.instance_name,
5001
                                 errors.ECODE_INVAL)
5002
    _CheckInstanceDown(self, instance, "cannot reinstall")
5003

    
5004
    if self.op.os_type is not None:
5005
      # OS verification
5006
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5007
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5008
      instance_os = self.op.os_type
5009
    else:
5010
      instance_os = instance.os
5011

    
5012
    nodelist = list(instance.all_nodes)
5013

    
5014
    if self.op.osparams:
5015
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5016
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5017
      self.os_inst = i_osdict # the new dict (without defaults)
5018
    else:
5019
      self.os_inst = None
5020

    
5021
    self.instance = instance
5022

    
5023
  def Exec(self, feedback_fn):
5024
    """Reinstall the instance.
5025

5026
    """
5027
    inst = self.instance
5028

    
5029
    if self.op.os_type is not None:
5030
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5031
      inst.os = self.op.os_type
5032
      # Write to configuration
5033
      self.cfg.Update(inst, feedback_fn)
5034

    
5035
    _StartInstanceDisks(self, inst, None)
5036
    try:
5037
      feedback_fn("Running the instance OS create scripts...")
5038
      # FIXME: pass debug option from opcode to backend
5039
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5040
                                             self.op.debug_level,
5041
                                             osparams=self.os_inst)
5042
      result.Raise("Could not install OS for instance %s on node %s" %
5043
                   (inst.name, inst.primary_node))
5044
    finally:
5045
      _ShutdownInstanceDisks(self, inst)
5046

    
5047

    
5048
class LURecreateInstanceDisks(LogicalUnit):
5049
  """Recreate an instance's missing disks.
5050

5051
  """
5052
  HPATH = "instance-recreate-disks"
5053
  HTYPE = constants.HTYPE_INSTANCE
5054
  _OP_PARAMS = [
5055
    _PInstanceName,
5056
    ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
5057
    ]
5058
  REQ_BGL = False
5059

    
5060
  def ExpandNames(self):
5061
    self._ExpandAndLockInstance()
5062

    
5063
  def BuildHooksEnv(self):
5064
    """Build hooks env.
5065

5066
    This runs on master, primary and secondary nodes of the instance.
5067

5068
    """
5069
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5070
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5071
    return env, nl, nl
5072

    
5073
  def CheckPrereq(self):
5074
    """Check prerequisites.
5075

5076
    This checks that the instance is in the cluster and is not running.
5077

5078
    """
5079
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5080
    assert instance is not None, \
5081
      "Cannot retrieve locked instance %s" % self.op.instance_name
5082
    _CheckNodeOnline(self, instance.primary_node)
5083

    
5084
    if instance.disk_template == constants.DT_DISKLESS:
5085
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5086
                                 self.op.instance_name, errors.ECODE_INVAL)
5087
    _CheckInstanceDown(self, instance, "cannot recreate disks")
5088

    
5089
    if not self.op.disks:
5090
      self.op.disks = range(len(instance.disks))
5091
    else:
5092
      for idx in self.op.disks:
5093
        if idx >= len(instance.disks):
5094
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5095
                                     errors.ECODE_INVAL)
5096

    
5097
    self.instance = instance
5098

    
5099
  def Exec(self, feedback_fn):
5100
    """Recreate the disks.
5101

5102
    """
5103
    to_skip = []
5104
    for idx, _ in enumerate(self.instance.disks):
5105
      if idx not in self.op.disks: # disk idx has not been passed in
5106
        to_skip.append(idx)
5107
        continue
5108

    
5109
    _CreateDisks(self, self.instance, to_skip=to_skip)
5110

    
5111

    
5112
class LURenameInstance(LogicalUnit):
5113
  """Rename an instance.
5114

5115
  """
5116
  HPATH = "instance-rename"
5117
  HTYPE = constants.HTYPE_INSTANCE
5118
  _OP_PARAMS = [
5119
    _PInstanceName,
5120
    ("new_name", ht.NoDefault, ht.TNonEmptyString),
5121
    ("ip_check", False, ht.TBool),
5122
    ("name_check", True, ht.TBool),
5123
    ]
5124

    
5125
  def CheckArguments(self):
5126
    """Check arguments.
5127

5128
    """
5129
    if self.op.ip_check and not self.op.name_check:
5130
      # TODO: make the ip check more flexible and not depend on the name check
5131
      raise errors.OpPrereqError("Cannot do ip check without a name check",
5132
                                 errors.ECODE_INVAL)
5133

    
5134
  def BuildHooksEnv(self):
5135
    """Build hooks env.
5136

5137
    This runs on master, primary and secondary nodes of the instance.
5138

5139
    """
5140
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5141
    env["INSTANCE_NEW_NAME"] = self.op.new_name
5142
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5143
    return env, nl, nl
5144

    
5145
  def CheckPrereq(self):
5146
    """Check prerequisites.
5147

5148
    This checks that the instance is in the cluster and is not running.
5149

5150
    """
5151
    self.op.instance_name = _ExpandInstanceName(self.cfg,
5152
                                                self.op.instance_name)
5153
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5154
    assert instance is not None
5155
    _CheckNodeOnline(self, instance.primary_node)
5156
    _CheckInstanceDown(self, instance, "cannot rename")
5157
    self.instance = instance
5158

    
5159
    new_name = self.op.new_name
5160
    if self.op.name_check:
5161
      hostname = netutils.GetHostname(name=new_name)
5162
      new_name = self.op.new_name = hostname.name
5163
      if (self.op.ip_check and
5164
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5165
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
5166
                                   (hostname.ip, new_name),
5167
                                   errors.ECODE_NOTUNIQUE)
5168

    
5169
    instance_list = self.cfg.GetInstanceList()
5170
    if new_name in instance_list:
5171
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5172
                                 new_name, errors.ECODE_EXISTS)
5173

    
5174
  def Exec(self, feedback_fn):
5175
    """Reinstall the instance.
5176

5177
    """
5178
    inst = self.instance
5179
    old_name = inst.name
5180

    
5181
    if inst.disk_template == constants.DT_FILE:
5182
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5183

    
5184
    self.cfg.RenameInstance(inst.name, self.op.new_name)
5185
    # Change the instance lock. This is definitely safe while we hold the BGL
5186
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5187
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5188

    
5189
    # re-read the instance from the configuration after rename
5190
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
5191

    
5192
    if inst.disk_template == constants.DT_FILE:
5193
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5194
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5195
                                                     old_file_storage_dir,
5196
                                                     new_file_storage_dir)
5197
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
5198
                   " (but the instance has been renamed in Ganeti)" %
5199
                   (inst.primary_node, old_file_storage_dir,
5200
                    new_file_storage_dir))
5201

    
5202
    _StartInstanceDisks(self, inst, None)
5203
    try:
5204
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5205
                                                 old_name, self.op.debug_level)
5206
      msg = result.fail_msg
5207
      if msg:
5208
        msg = ("Could not run OS rename script for instance %s on node %s"
5209
               " (but the instance has been renamed in Ganeti): %s" %
5210
               (inst.name, inst.primary_node, msg))
5211
        self.proc.LogWarning(msg)
5212
    finally:
5213
      _ShutdownInstanceDisks(self, inst)
5214

    
5215
    return inst.name
5216

    
5217

    
5218
class LURemoveInstance(LogicalUnit):
5219
  """Remove an instance.
5220

5221
  """
5222
  HPATH = "instance-remove"
5223
  HTYPE = constants.HTYPE_INSTANCE
5224
  _OP_PARAMS = [
5225
    _PInstanceName,
5226
    ("ignore_failures", False, ht.TBool),
5227
    _PShutdownTimeout,
5228
    ]
5229
  REQ_BGL = False
5230

    
5231
  def ExpandNames(self):
5232
    self._ExpandAndLockInstance()
5233
    self.needed_locks[locking.LEVEL_NODE] = []
5234
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5235

    
5236
  def DeclareLocks(self, level):
5237
    if level == locking.LEVEL_NODE:
5238
      self._LockInstancesNodes()
5239

    
5240
  def BuildHooksEnv(self):
5241
    """Build hooks env.
5242

5243
    This runs on master, primary and secondary nodes of the instance.
5244

5245
    """
5246
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5247
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5248
    nl = [self.cfg.GetMasterNode()]
5249
    nl_post = list(self.instance.all_nodes) + nl
5250
    return env, nl, nl_post
5251

    
5252
  def CheckPrereq(self):
5253
    """Check prerequisites.
5254

5255
    This checks that the instance is in the cluster.
5256

5257
    """
5258
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5259
    assert self.instance is not None, \
5260
      "Cannot retrieve locked instance %s" % self.op.instance_name
5261

    
5262
  def Exec(self, feedback_fn):
5263
    """Remove the instance.
5264

5265
    """
5266
    instance = self.instance
5267
    logging.info("Shutting down instance %s on node %s",
5268
                 instance.name, instance.primary_node)
5269

    
5270
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5271
                                             self.op.shutdown_timeout)
5272
    msg = result.fail_msg
5273
    if msg:
5274
      if self.op.ignore_failures:
5275
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
5276
      else:
5277
        raise errors.OpExecError("Could not shutdown instance %s on"
5278
                                 " node %s: %s" %
5279
                                 (instance.name, instance.primary_node, msg))
5280

    
5281
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5282

    
5283

    
5284
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5285
  """Utility function to remove an instance.
5286

5287
  """
5288
  logging.info("Removing block devices for instance %s", instance.name)
5289

    
5290
  if not _RemoveDisks(lu, instance):
5291
    if not ignore_failures:
5292
      raise errors.OpExecError("Can't remove instance's disks")
5293
    feedback_fn("Warning: can't remove instance's disks")
5294

    
5295
  logging.info("Removing instance %s out of cluster config", instance.name)
5296

    
5297
  lu.cfg.RemoveInstance(instance.name)
5298

    
5299
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5300
    "Instance lock removal conflict"
5301

    
5302
  # Remove lock for the instance
5303
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5304

    
5305

    
5306
class LUQueryInstances(NoHooksLU):
5307
  """Logical unit for querying instances.
5308

5309
  """
5310
  # pylint: disable-msg=W0142
5311
  _OP_PARAMS = [
5312
    ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
5313
    ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
5314
    ("use_locking", False, ht.TBool),
5315
    ]
5316
  REQ_BGL = False
5317
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5318
                    "serial_no", "ctime", "mtime", "uuid"]
5319
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5320
                                    "admin_state",
5321
                                    "disk_template", "ip", "mac", "bridge",
5322
                                    "nic_mode", "nic_link",
5323
                                    "sda_size", "sdb_size", "vcpus", "tags",
5324
                                    "network_port", "beparams",
5325
                                    r"(disk)\.(size)/([0-9]+)",
5326
                                    r"(disk)\.(sizes)", "disk_usage",
5327
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5328
                                    r"(nic)\.(bridge)/([0-9]+)",
5329
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
5330
                                    r"(disk|nic)\.(count)",
5331
                                    "hvparams", "custom_hvparams",
5332
                                    "custom_beparams", "custom_nicparams",
5333
                                    ] + _SIMPLE_FIELDS +
5334
                                  ["hv/%s" % name
5335
                                   for name in constants.HVS_PARAMETERS
5336
                                   if name not in constants.HVC_GLOBALS] +
5337
                                  ["be/%s" % name
5338
                                   for name in constants.BES_PARAMETERS])
5339
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state",
5340
                                   "oper_ram",
5341
                                   "oper_vcpus",
5342
                                   "status")
5343

    
5344

    
5345
  def CheckArguments(self):
5346
    _CheckOutputFields(static=self._FIELDS_STATIC,
5347
                       dynamic=self._FIELDS_DYNAMIC,
5348
                       selected=self.op.output_fields)
5349

    
5350
  def ExpandNames(self):
5351
    self.needed_locks = {}
5352
    self.share_locks[locking.LEVEL_INSTANCE] = 1
5353
    self.share_locks[locking.LEVEL_NODE] = 1
5354

    
5355
    if self.op.names:
5356
      self.wanted = _GetWantedInstances(self, self.op.names)
5357
    else:
5358
      self.wanted = locking.ALL_SET
5359

    
5360
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5361
    self.do_locking = self.do_node_query and self.op.use_locking
5362
    if self.do_locking:
5363
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5364
      self.needed_locks[locking.LEVEL_NODE] = []
5365
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5366

    
5367
  def DeclareLocks(self, level):
5368
    if level == locking.LEVEL_NODE and self.do_locking:
5369
      self._LockInstancesNodes()
5370

    
5371
  def Exec(self, feedback_fn):
5372
    """Computes the list of nodes and their attributes.
5373

5374
    """
5375
    # pylint: disable-msg=R0912
5376
    # way too many branches here
5377
    all_info = self.cfg.GetAllInstancesInfo()
5378
    if self.wanted == locking.ALL_SET:
5379
      # caller didn't specify instance names, so ordering is not important
5380
      if self.do_locking:
5381
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5382
      else:
5383
        instance_names = all_info.keys()
5384
      instance_names = utils.NiceSort(instance_names)
5385
    else:
5386
      # caller did specify names, so we must keep the ordering
5387
      if self.do_locking:
5388
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5389
      else:
5390
        tgt_set = all_info.keys()
5391
      missing = set(self.wanted).difference(tgt_set)
5392
      if missing:
5393
        raise errors.OpExecError("Some instances were removed before"
5394
                                 " retrieving their data: %s" % missing)
5395
      instance_names = self.wanted
5396

    
5397
    instance_list = [all_info[iname] for iname in instance_names]
5398

    
5399
    # begin data gathering
5400

    
5401
    nodes = frozenset([inst.primary_node for inst in instance_list])
5402
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
5403

    
5404
    bad_nodes = []
5405
    off_nodes = []
5406
    if self.do_node_query:
5407
      live_data = {}
5408
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5409
      for name in nodes:
5410
        result = node_data[name]
5411
        if result.offline:
5412
          # offline nodes will be in both lists
5413
          off_nodes.append(name)
5414
        if result.fail_msg:
5415
          bad_nodes.append(name)
5416
        else:
5417
          if result.payload:
5418
            live_data.update(result.payload)
5419
          # else no instance is alive
5420
    else:
5421
      live_data = dict([(name, {}) for name in instance_names])
5422

    
5423
    # end data gathering
5424

    
5425
    HVPREFIX = "hv/"
5426
    BEPREFIX = "be/"
5427
    output = []
5428
    cluster = self.cfg.GetClusterInfo()
5429
    for instance in instance_list:
5430
      iout = []
5431
      i_hv = cluster.FillHV(instance, skip_globals=True)
5432
      i_be = cluster.FillBE(instance)
5433
      i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5434
      for field in self.op.output_fields:
5435
        st_match = self._FIELDS_STATIC.Matches(field)
5436
        if field in self._SIMPLE_FIELDS:
5437
          val = getattr(instance, field)
5438
        elif field == "pnode":
5439
          val = instance.primary_node
5440
        elif field == "snodes":
5441
          val = list(instance.secondary_nodes)
5442
        elif field == "admin_state":
5443
          val = instance.admin_up
5444
        elif field == "oper_state":
5445
          if instance.primary_node in bad_nodes:
5446
            val = None
5447
          else:
5448
            val = bool(live_data.get(instance.name))
5449
        elif field == "status":
5450
          if instance.primary_node in off_nodes:
5451
            val = "ERROR_nodeoffline"
5452
          elif instance.primary_node in bad_nodes:
5453
            val = "ERROR_nodedown"
5454
          else:
5455
            running = bool(live_data.get(instance.name))
5456
            if running:
5457
              if instance.admin_up:
5458
                val = "running"
5459
              else:
5460
                val = "ERROR_up"
5461
            else:
5462
              if instance.admin_up:
5463
                val = "ERROR_down"
5464
              else:
5465
                val = "ADMIN_down"
5466
        elif field == "oper_ram":
5467
          if instance.primary_node in bad_nodes:
5468
            val = None
5469
          elif instance.name in live_data:
5470
            val = live_data[instance.name].get("memory", "?")
5471
          else:
5472
            val = "-"
5473
        elif field == "oper_vcpus":
5474
          if instance.primary_node in bad_nodes:
5475
            val = None
5476
          elif instance.name in live_data:
5477
            val = live_data[instance.name].get("vcpus", "?")
5478
          else:
5479
            val = "-"
5480
        elif field == "vcpus":
5481
          val = i_be[constants.BE_VCPUS]
5482
        elif field == "disk_template":
5483
          val = instance.disk_template
5484
        elif field == "ip":
5485
          if instance.nics:
5486
            val = instance.nics[0].ip
5487
          else:
5488
            val = None
5489
        elif field == "nic_mode":
5490
          if instance.nics:
5491
            val = i_nicp[0][constants.NIC_MODE]
5492
          else:
5493
            val = None
5494
        elif field == "nic_link":
5495
          if instance.nics:
5496
            val = i_nicp[0][constants.NIC_LINK]
5497
          else:
5498
            val = None
5499
        elif field == "bridge":
5500
          if (instance.nics and
5501
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5502
            val = i_nicp[0][constants.NIC_LINK]
5503
          else:
5504
            val = None
5505
        elif field == "mac":
5506
          if instance.nics:
5507
            val = instance.nics[0].mac
5508
          else:
5509
            val = None
5510
        elif field == "custom_nicparams":
5511
          val = [nic.nicparams for nic in instance.nics]
5512
        elif field == "sda_size" or field == "sdb_size":
5513
          idx = ord(field[2]) - ord('a')
5514
          try:
5515
            val = instance.FindDisk(idx).size
5516
          except errors.OpPrereqError:
5517
            val = None
5518
        elif field == "disk_usage": # total disk usage per node
5519
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
5520
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5521
        elif field == "tags":
5522
          val = list(instance.GetTags())
5523
        elif field == "custom_hvparams":
5524
          val = instance.hvparams # not filled!
5525
        elif field == "hvparams":
5526
          val = i_hv
5527
        elif (field.startswith(HVPREFIX) and
5528
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5529
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5530
          val = i_hv.get(field[len(HVPREFIX):], None)
5531
        elif field == "custom_beparams":
5532
          val = instance.beparams
5533
        elif field == "beparams":
5534
          val = i_be
5535
        elif (field.startswith(BEPREFIX) and
5536
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5537
          val = i_be.get(field[len(BEPREFIX):], None)
5538
        elif st_match and st_match.groups():
5539
          # matches a variable list
5540
          st_groups = st_match.groups()
5541
          if st_groups and st_groups[0] == "disk":
5542
            if st_groups[1] == "count":
5543
              val = len(instance.disks)
5544
            elif st_groups[1] == "sizes":
5545
              val = [disk.size for disk in instance.disks]
5546
            elif st_groups[1] == "size":
5547
              try:
5548
                val = instance.FindDisk(st_groups[2]).size
5549
              except errors.OpPrereqError:
5550
                val = None
5551
            else:
5552
              assert False, "Unhandled disk parameter"
5553
          elif st_groups[0] == "nic":
5554
            if st_groups[1] == "count":
5555
              val = len(instance.nics)
5556
            elif st_groups[1] == "macs":
5557
              val = [nic.mac for nic in instance.nics]
5558
            elif st_groups[1] == "ips":
5559
              val = [nic.ip for nic in instance.nics]
5560
            elif st_groups[1] == "modes":
5561
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5562
            elif st_groups[1] == "links":
5563
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5564
            elif st_groups[1] == "bridges":
5565
              val = []
5566
              for nicp in i_nicp:
5567
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5568
                  val.append(nicp[constants.NIC_LINK])
5569
                else:
5570
                  val.append(None)
5571
            else:
5572
              # index-based item
5573
              nic_idx = int(st_groups[2])
5574
              if nic_idx >= len(instance.nics):
5575
                val = None
5576
              else:
5577
                if st_groups[1] == "mac":
5578
                  val = instance.nics[nic_idx].mac
5579
                elif st_groups[1] == "ip":
5580
                  val = instance.nics[nic_idx].ip
5581
                elif st_groups[1] == "mode":
5582
                  val = i_nicp[nic_idx][constants.NIC_MODE]
5583
                elif st_groups[1] == "link":
5584
                  val = i_nicp[nic_idx][constants.NIC_LINK]
5585
                elif st_groups[1] == "bridge":
5586
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5587
                  if nic_mode == constants.NIC_MODE_BRIDGED:
5588
                    val = i_nicp[nic_idx][constants.NIC_LINK]
5589
                  else:
5590
                    val = None
5591
                else:
5592
                  assert False, "Unhandled NIC parameter"
5593
          else:
5594
            assert False, ("Declared but unhandled variable parameter '%s'" %
5595
                           field)
5596
        else:
5597
          assert False, "Declared but unhandled parameter '%s'" % field
5598
        iout.append(val)
5599
      output.append(iout)
5600

    
5601
    return output
5602

    
5603

    
5604
class LUFailoverInstance(LogicalUnit):
5605
  """Failover an instance.
5606

5607
  """
5608
  HPATH = "instance-failover"
5609
  HTYPE = constants.HTYPE_INSTANCE
5610
  _OP_PARAMS = [
5611
    _PInstanceName,
5612
    ("ignore_consistency", False, ht.TBool),
5613
    _PShutdownTimeout,
5614
    ]
5615
  REQ_BGL = False
5616

    
5617
  def ExpandNames(self):
5618
    self._ExpandAndLockInstance()
5619
    self.needed_locks[locking.LEVEL_NODE] = []
5620
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5621

    
5622
  def DeclareLocks(self, level):
5623
    if level == locking.LEVEL_NODE:
5624
      self._LockInstancesNodes()
5625

    
5626
  def BuildHooksEnv(self):
5627
    """Build hooks env.
5628

5629
    This runs on master, primary and secondary nodes of the instance.
5630

5631
    """
5632
    instance = self.instance
5633
    source_node = instance.primary_node
5634
    target_node = instance.secondary_nodes[0]
5635
    env = {
5636
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5637
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5638
      "OLD_PRIMARY": source_node,
5639
      "OLD_SECONDARY": target_node,
5640
      "NEW_PRIMARY": target_node,
5641
      "NEW_SECONDARY": source_node,
5642
      }
5643
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5644
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5645
    nl_post = list(nl)
5646
    nl_post.append(source_node)
5647
    return env, nl, nl_post
5648

    
5649
  def CheckPrereq(self):
5650
    """Check prerequisites.
5651

5652
    This checks that the instance is in the cluster.
5653

5654
    """
5655
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5656
    assert self.instance is not None, \
5657
      "Cannot retrieve locked instance %s" % self.op.instance_name
5658

    
5659
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5660
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5661
      raise errors.OpPrereqError("Instance's disk layout is not"
5662
                                 " network mirrored, cannot failover.",
5663
                                 errors.ECODE_STATE)
5664

    
5665
    secondary_nodes = instance.secondary_nodes
5666
    if not secondary_nodes:
5667
      raise errors.ProgrammerError("no secondary node but using "
5668
                                   "a mirrored disk template")
5669

    
5670
    target_node = secondary_nodes[0]
5671
    _CheckNodeOnline(self, target_node)
5672
    _CheckNodeNotDrained(self, target_node)
5673
    if instance.admin_up:
5674
      # check memory requirements on the secondary node
5675
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5676
                           instance.name, bep[constants.BE_MEMORY],
5677
                           instance.hypervisor)
5678
    else:
5679
      self.LogInfo("Not checking memory on the secondary node as"
5680
                   " instance will not be started")
5681

    
5682
    # check bridge existance
5683
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5684

    
5685
  def Exec(self, feedback_fn):
5686
    """Failover an instance.
5687

5688
    The failover is done by shutting it down on its present node and
5689
    starting it on the secondary.
5690

5691
    """
5692
    instance = self.instance
5693
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5694

    
5695
    source_node = instance.primary_node
5696
    target_node = instance.secondary_nodes[0]
5697

    
5698
    if instance.admin_up:
5699
      feedback_fn("* checking disk consistency between source and target")
5700
      for dev in instance.disks:
5701
        # for drbd, these are drbd over lvm
5702
        if not _CheckDiskConsistency(self, dev, target_node, False):
5703
          if not self.op.ignore_consistency:
5704
            raise errors.OpExecError("Disk %s is degraded on target node,"
5705
                                     " aborting failover." % dev.iv_name)
5706
    else:
5707
      feedback_fn("* not checking disk consistency as instance is not running")
5708

    
5709
    feedback_fn("* shutting down instance on source node")
5710
    logging.info("Shutting down instance %s on node %s",
5711
                 instance.name, source_node)
5712

    
5713
    result = self.rpc.call_instance_shutdown(source_node, instance,
5714
                                             self.op.shutdown_timeout)
5715
    msg = result.fail_msg
5716
    if msg:
5717
      if self.op.ignore_consistency or primary_node.offline:
5718
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5719
                             " Proceeding anyway. Please make sure node"
5720
                             " %s is down. Error details: %s",
5721
                             instance.name, source_node, source_node, msg)
5722
      else:
5723
        raise errors.OpExecError("Could not shutdown instance %s on"
5724
                                 " node %s: %s" %
5725
                                 (instance.name, source_node, msg))
5726

    
5727
    feedback_fn("* deactivating the instance's disks on source node")
5728
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5729
      raise errors.OpExecError("Can't shut down the instance's disks.")
5730

    
5731
    instance.primary_node = target_node
5732
    # distribute new instance config to the other nodes
5733
    self.cfg.Update(instance, feedback_fn)
5734

    
5735
    # Only start the instance if it's marked as up
5736
    if instance.admin_up:
5737
      feedback_fn("* activating the instance's disks on target node")
5738
      logging.info("Starting instance %s on node %s",
5739
                   instance.name, target_node)
5740

    
5741
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5742
                                           ignore_secondaries=True)
5743
      if not disks_ok:
5744
        _ShutdownInstanceDisks(self, instance)
5745
        raise errors.OpExecError("Can't activate the instance's disks")
5746

    
5747
      feedback_fn("* starting the instance on the target node")
5748
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5749
      msg = result.fail_msg
5750
      if msg:
5751
        _ShutdownInstanceDisks(self, instance)
5752
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5753
                                 (instance.name, target_node, msg))
5754

    
5755

    
5756
class LUMigrateInstance(LogicalUnit):
5757
  """Migrate an instance.
5758

5759
  This is migration without shutting down, compared to the failover,
5760
  which is done with shutdown.
5761

5762
  """
5763
  HPATH = "instance-migrate"
5764
  HTYPE = constants.HTYPE_INSTANCE
5765
  _OP_PARAMS = [
5766
    _PInstanceName,
5767
    _PMigrationMode,
5768
    _PMigrationLive,
5769
    ("cleanup", False, ht.TBool),
5770
    ]
5771

    
5772
  REQ_BGL = False
5773

    
5774
  def ExpandNames(self):
5775
    self._ExpandAndLockInstance()
5776

    
5777
    self.needed_locks[locking.LEVEL_NODE] = []
5778
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5779

    
5780
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5781
                                       self.op.cleanup)
5782
    self.tasklets = [self._migrater]
5783

    
5784
  def DeclareLocks(self, level):
5785
    if level == locking.LEVEL_NODE:
5786
      self._LockInstancesNodes()
5787

    
5788
  def BuildHooksEnv(self):
5789
    """Build hooks env.
5790

5791
    This runs on master, primary and secondary nodes of the instance.
5792

5793
    """
5794
    instance = self._migrater.instance
5795
    source_node = instance.primary_node
5796
    target_node = instance.secondary_nodes[0]
5797
    env = _BuildInstanceHookEnvByObject(self, instance)
5798
    env["MIGRATE_LIVE"] = self._migrater.live
5799
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5800
    env.update({
5801
        "OLD_PRIMARY": source_node,
5802
        "OLD_SECONDARY": target_node,
5803
        "NEW_PRIMARY": target_node,
5804
        "NEW_SECONDARY": source_node,
5805
        })
5806
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5807
    nl_post = list(nl)
5808
    nl_post.append(source_node)
5809
    return env, nl, nl_post
5810

    
5811

    
5812
class LUMoveInstance(LogicalUnit):
5813
  """Move an instance by data-copying.
5814

5815
  """
5816
  HPATH = "instance-move"
5817
  HTYPE = constants.HTYPE_INSTANCE
5818
  _OP_PARAMS = [
5819
    _PInstanceName,
5820
    ("target_node", ht.NoDefault, ht.TNonEmptyString),
5821
    _PShutdownTimeout,
5822
    ]
5823
  REQ_BGL = False
5824

    
5825
  def ExpandNames(self):
5826
    self._ExpandAndLockInstance()
5827
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5828
    self.op.target_node = target_node
5829
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5830
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5831

    
5832
  def DeclareLocks(self, level):
5833
    if level == locking.LEVEL_NODE:
5834
      self._LockInstancesNodes(primary_only=True)
5835

    
5836
  def BuildHooksEnv(self):
5837
    """Build hooks env.
5838

5839
    This runs on master, primary and secondary nodes of the instance.
5840

5841
    """
5842
    env = {
5843
      "TARGET_NODE": self.op.target_node,
5844
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5845
      }
5846
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5847
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5848
                                       self.op.target_node]
5849
    return env, nl, nl
5850

    
5851
  def CheckPrereq(self):
5852
    """Check prerequisites.
5853

5854
    This checks that the instance is in the cluster.
5855

5856
    """
5857
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5858
    assert self.instance is not None, \
5859
      "Cannot retrieve locked instance %s" % self.op.instance_name
5860

    
5861
    node = self.cfg.GetNodeInfo(self.op.target_node)
5862
    assert node is not None, \
5863
      "Cannot retrieve locked node %s" % self.op.target_node
5864

    
5865
    self.target_node = target_node = node.name
5866

    
5867
    if target_node == instance.primary_node:
5868
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
5869
                                 (instance.name, target_node),
5870
                                 errors.ECODE_STATE)
5871

    
5872
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5873

    
5874
    for idx, dsk in enumerate(instance.disks):
5875
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5876
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5877
                                   " cannot copy" % idx, errors.ECODE_STATE)
5878

    
5879
    _CheckNodeOnline(self, target_node)
5880
    _CheckNodeNotDrained(self, target_node)
5881
    _CheckNodeVmCapable(self, target_node)
5882

    
5883
    if instance.admin_up:
5884
      # check memory requirements on the secondary node
5885
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5886
                           instance.name, bep[constants.BE_MEMORY],
5887
                           instance.hypervisor)
5888
    else:
5889
      self.LogInfo("Not checking memory on the secondary node as"
5890
                   " instance will not be started")
5891

    
5892
    # check bridge existance
5893
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5894

    
5895
  def Exec(self, feedback_fn):
5896
    """Move an instance.
5897

5898
    The move is done by shutting it down on its present node, copying
5899
    the data over (slow) and starting it on the new node.
5900

5901
    """
5902
    instance = self.instance
5903

    
5904
    source_node = instance.primary_node
5905
    target_node = self.target_node
5906

    
5907
    self.LogInfo("Shutting down instance %s on source node %s",
5908
                 instance.name, source_node)
5909

    
5910
    result = self.rpc.call_instance_shutdown(source_node, instance,
5911
                                             self.op.shutdown_timeout)
5912
    msg = result.fail_msg
5913
    if msg:
5914
      if self.op.ignore_consistency:
5915
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5916
                             " Proceeding anyway. Please make sure node"
5917
                             " %s is down. Error details: %s",
5918
                             instance.name, source_node, source_node, msg)
5919
      else:
5920
        raise errors.OpExecError("Could not shutdown instance %s on"
5921
                                 " node %s: %s" %
5922
                                 (instance.name, source_node, msg))
5923

    
5924
    # create the target disks
5925
    try:
5926
      _CreateDisks(self, instance, target_node=target_node)
5927
    except errors.OpExecError:
5928
      self.LogWarning("Device creation failed, reverting...")
5929
      try:
5930
        _RemoveDisks(self, instance, target_node=target_node)
5931
      finally:
5932
        self.cfg.ReleaseDRBDMinors(instance.name)
5933
        raise
5934

    
5935
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5936

    
5937
    errs = []
5938
    # activate, get path, copy the data over
5939
    for idx, disk in enumerate(instance.disks):
5940
      self.LogInfo("Copying data for disk %d", idx)
5941
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5942
                                               instance.name, True)
5943
      if result.fail_msg:
5944
        self.LogWarning("Can't assemble newly created disk %d: %s",
5945
                        idx, result.fail_msg)
5946
        errs.append(result.fail_msg)
5947
        break
5948
      dev_path = result.payload
5949
      result = self.rpc.call_blockdev_export(source_node, disk,
5950
                                             target_node, dev_path,
5951
                                             cluster_name)
5952
      if result.fail_msg:
5953
        self.LogWarning("Can't copy data over for disk %d: %s",
5954
                        idx, result.fail_msg)
5955
        errs.append(result.fail_msg)
5956
        break
5957

    
5958
    if errs:
5959
      self.LogWarning("Some disks failed to copy, aborting")
5960
      try:
5961
        _RemoveDisks(self, instance, target_node=target_node)
5962
      finally:
5963
        self.cfg.ReleaseDRBDMinors(instance.name)
5964
        raise errors.OpExecError("Errors during disk copy: %s" %
5965
                                 (",".join(errs),))
5966

    
5967
    instance.primary_node = target_node
5968
    self.cfg.Update(instance, feedback_fn)
5969

    
5970
    self.LogInfo("Removing the disks on the original node")
5971
    _RemoveDisks(self, instance, target_node=source_node)
5972

    
5973
    # Only start the instance if it's marked as up
5974
    if instance.admin_up:
5975
      self.LogInfo("Starting instance %s on node %s",
5976
                   instance.name, target_node)
5977

    
5978
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5979
                                           ignore_secondaries=True)
5980
      if not disks_ok:
5981
        _ShutdownInstanceDisks(self, instance)
5982
        raise errors.OpExecError("Can't activate the instance's disks")
5983

    
5984
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5985
      msg = result.fail_msg
5986
      if msg:
5987
        _ShutdownInstanceDisks(self, instance)
5988
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5989
                                 (instance.name, target_node, msg))
5990

    
5991

    
5992
class LUMigrateNode(LogicalUnit):
5993
  """Migrate all instances from a node.
5994

5995
  """
5996
  HPATH = "node-migrate"
5997
  HTYPE = constants.HTYPE_NODE
5998
  _OP_PARAMS = [
5999
    _PNodeName,
6000
    _PMigrationMode,
6001
    _PMigrationLive,
6002
    ]
6003
  REQ_BGL = False
6004

    
6005
  def ExpandNames(self):
6006
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6007

    
6008
    self.needed_locks = {
6009
      locking.LEVEL_NODE: [self.op.node_name],
6010
      }
6011

    
6012
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6013

    
6014
    # Create tasklets for migrating instances for all instances on this node
6015
    names = []
6016
    tasklets = []
6017

    
6018
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6019
      logging.debug("Migrating instance %s", inst.name)
6020
      names.append(inst.name)
6021

    
6022
      tasklets.append(TLMigrateInstance(self, inst.name, False))
6023

    
6024
    self.tasklets = tasklets
6025

    
6026
    # Declare instance locks
6027
    self.needed_locks[locking.LEVEL_INSTANCE] = names
6028

    
6029
  def DeclareLocks(self, level):
6030
    if level == locking.LEVEL_NODE:
6031
      self._LockInstancesNodes()
6032

    
6033
  def BuildHooksEnv(self):
6034
    """Build hooks env.
6035

6036
    This runs on the master, the primary and all the secondaries.
6037

6038
    """
6039
    env = {
6040
      "NODE_NAME": self.op.node_name,
6041
      }
6042

    
6043
    nl = [self.cfg.GetMasterNode()]
6044

    
6045
    return (env, nl, nl)
6046

    
6047

    
6048
class TLMigrateInstance(Tasklet):
6049
  """Tasklet class for instance migration.
6050

6051
  @type live: boolean
6052
  @ivar live: whether the migration will be done live or non-live;
6053
      this variable is initalized only after CheckPrereq has run
6054

6055
  """
6056
  def __init__(self, lu, instance_name, cleanup):
6057
    """Initializes this class.
6058

6059
    """
6060
    Tasklet.__init__(self, lu)
6061

    
6062
    # Parameters
6063
    self.instance_name = instance_name
6064
    self.cleanup = cleanup
6065
    self.live = False # will be overridden later
6066

    
6067
  def CheckPrereq(self):
6068
    """Check prerequisites.
6069

6070
    This checks that the instance is in the cluster.
6071

6072
    """
6073
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6074
    instance = self.cfg.GetInstanceInfo(instance_name)
6075
    assert instance is not None
6076

    
6077
    if instance.disk_template != constants.DT_DRBD8:
6078
      raise errors.OpPrereqError("Instance's disk layout is not"
6079
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
6080

    
6081
    secondary_nodes = instance.secondary_nodes
6082
    if not secondary_nodes:
6083
      raise errors.ConfigurationError("No secondary node but using"
6084
                                      " drbd8 disk template")
6085

    
6086
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
6087

    
6088
    target_node = secondary_nodes[0]
6089
    # check memory requirements on the secondary node
6090
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6091
                         instance.name, i_be[constants.BE_MEMORY],
6092
                         instance.hypervisor)
6093

    
6094
    # check bridge existance
6095
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6096

    
6097
    if not self.cleanup:
6098
      _CheckNodeNotDrained(self.lu, target_node)
6099
      result = self.rpc.call_instance_migratable(instance.primary_node,
6100
                                                 instance)
6101
      result.Raise("Can't migrate, please use failover",
6102
                   prereq=True, ecode=errors.ECODE_STATE)
6103

    
6104
    self.instance = instance
6105

    
6106
    if self.lu.op.live is not None and self.lu.op.mode is not None:
6107
      raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6108
                                 " parameters are accepted",
6109
                                 errors.ECODE_INVAL)
6110
    if self.lu.op.live is not None:
6111
      if self.lu.op.live:
6112
        self.lu.op.mode = constants.HT_MIGRATION_LIVE
6113
      else:
6114
        self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6115
      # reset the 'live' parameter to None so that repeated
6116
      # invocations of CheckPrereq do not raise an exception
6117
      self.lu.op.live = None
6118
    elif self.lu.op.mode is None:
6119
      # read the default value from the hypervisor
6120
      i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6121
      self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6122

    
6123
    self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6124

    
6125
  def _WaitUntilSync(self):
6126
    """Poll with custom rpc for disk sync.
6127

6128
    This uses our own step-based rpc call.
6129

6130
    """
6131
    self.feedback_fn("* wait until resync is done")
6132
    all_done = False
6133
    while not all_done:
6134
      all_done = True
6135
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6136
                                            self.nodes_ip,
6137
                                            self.instance.disks)
6138
      min_percent = 100
6139
      for node, nres in result.items():
6140
        nres.Raise("Cannot resync disks on node %s" % node)
6141
        node_done, node_percent = nres.payload
6142
        all_done = all_done and node_done
6143
        if node_percent is not None:
6144
          min_percent = min(min_percent, node_percent)
6145
      if not all_done:
6146
        if min_percent < 100:
6147
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
6148
        time.sleep(2)
6149

    
6150
  def _EnsureSecondary(self, node):
6151
    """Demote a node to secondary.
6152

6153
    """
6154
    self.feedback_fn("* switching node %s to secondary mode" % node)
6155

    
6156
    for dev in self.instance.disks:
6157
      self.cfg.SetDiskID(dev, node)
6158

    
6159
    result = self.rpc.call_blockdev_close(node, self.instance.name,
6160
                                          self.instance.disks)
6161
    result.Raise("Cannot change disk to secondary on node %s" % node)
6162

    
6163
  def _GoStandalone(self):
6164
    """Disconnect from the network.
6165

6166
    """
6167
    self.feedback_fn("* changing into standalone mode")
6168
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6169
                                               self.instance.disks)
6170
    for node, nres in result.items():
6171
      nres.Raise("Cannot disconnect disks node %s" % node)
6172

    
6173
  def _GoReconnect(self, multimaster):
6174
    """Reconnect to the network.
6175

6176
    """
6177
    if multimaster:
6178
      msg = "dual-master"
6179
    else:
6180
      msg = "single-master"
6181
    self.feedback_fn("* changing disks into %s mode" % msg)
6182
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6183
                                           self.instance.disks,
6184
                                           self.instance.name, multimaster)
6185
    for node, nres in result.items():
6186
      nres.Raise("Cannot change disks config on node %s" % node)
6187

    
6188
  def _ExecCleanup(self):
6189
    """Try to cleanup after a failed migration.
6190

6191
    The cleanup is done by:
6192
      - check that the instance is running only on one node
6193
        (and update the config if needed)
6194
      - change disks on its secondary node to secondary
6195
      - wait until disks are fully synchronized
6196
      - disconnect from the network
6197
      - change disks into single-master mode
6198
      - wait again until disks are fully synchronized
6199

6200
    """
6201
    instance = self.instance
6202
    target_node = self.target_node
6203
    source_node = self.source_node
6204

    
6205
    # check running on only one node
6206
    self.feedback_fn("* checking where the instance actually runs"
6207
                     " (if this hangs, the hypervisor might be in"
6208
                     " a bad state)")
6209
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6210
    for node, result in ins_l.items():
6211
      result.Raise("Can't contact node %s" % node)
6212

    
6213
    runningon_source = instance.name in ins_l[source_node].payload
6214
    runningon_target = instance.name in ins_l[target_node].payload
6215

    
6216
    if runningon_source and runningon_target:
6217
      raise errors.OpExecError("Instance seems to be running on two nodes,"
6218
                               " or the hypervisor is confused. You will have"
6219
                               " to ensure manually that it runs only on one"
6220
                               " and restart this operation.")
6221

    
6222
    if not (runningon_source or runningon_target):
6223
      raise errors.OpExecError("Instance does not seem to be running at all."
6224
                               " In this case, it's safer to repair by"
6225
                               " running 'gnt-instance stop' to ensure disk"
6226
                               " shutdown, and then restarting it.")
6227

    
6228
    if runningon_target:
6229
      # the migration has actually succeeded, we need to update the config
6230
      self.feedback_fn("* instance running on secondary node (%s),"
6231
                       " updating config" % target_node)
6232
      instance.primary_node = target_node
6233
      self.cfg.Update(instance, self.feedback_fn)
6234
      demoted_node = source_node
6235
    else:
6236
      self.feedback_fn("* instance confirmed to be running on its"
6237
                       " primary node (%s)" % source_node)
6238
      demoted_node = target_node
6239

    
6240
    self._EnsureSecondary(demoted_node)
6241
    try:
6242
      self._WaitUntilSync()
6243
    except errors.OpExecError:
6244
      # we ignore here errors, since if the device is standalone, it
6245
      # won't be able to sync
6246
      pass
6247
    self._GoStandalone()
6248
    self._GoReconnect(False)
6249
    self._WaitUntilSync()
6250

    
6251
    self.feedback_fn("* done")
6252

    
6253
  def _RevertDiskStatus(self):
6254
    """Try to revert the disk status after a failed migration.
6255

6256
    """
6257
    target_node = self.target_node
6258
    try:
6259
      self._EnsureSecondary(target_node)
6260
      self._GoStandalone()
6261
      self._GoReconnect(False)
6262
      self._WaitUntilSync()
6263
    except errors.OpExecError, err:
6264
      self.lu.LogWarning("Migration failed and I can't reconnect the"
6265
                         " drives: error '%s'\n"
6266
                         "Please look and recover the instance status" %
6267
                         str(err))
6268

    
6269
  def _AbortMigration(self):
6270
    """Call the hypervisor code to abort a started migration.
6271

6272
    """
6273
    instance = self.instance
6274
    target_node = self.target_node
6275
    migration_info = self.migration_info
6276

    
6277
    abort_result = self.rpc.call_finalize_migration(target_node,
6278
                                                    instance,
6279
                                                    migration_info,
6280
                                                    False)
6281
    abort_msg = abort_result.fail_msg
6282
    if abort_msg:
6283
      logging.error("Aborting migration failed on target node %s: %s",
6284
                    target_node, abort_msg)
6285
      # Don't raise an exception here, as we stil have to try to revert the
6286
      # disk status, even if this step failed.
6287

    
6288
  def _ExecMigration(self):
6289
    """Migrate an instance.
6290

6291
    The migrate is done by:
6292
      - change the disks into dual-master mode
6293
      - wait until disks are fully synchronized again
6294
      - migrate the instance
6295
      - change disks on the new secondary node (the old primary) to secondary
6296
      - wait until disks are fully synchronized
6297
      - change disks into single-master mode
6298

6299
    """
6300
    instance = self.instance
6301
    target_node = self.target_node
6302
    source_node = self.source_node
6303

    
6304
    self.feedback_fn("* checking disk consistency between source and target")
6305
    for dev in instance.disks:
6306
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6307
        raise errors.OpExecError("Disk %s is degraded or not fully"
6308
                                 " synchronized on target node,"
6309
                                 " aborting migrate." % dev.iv_name)
6310

    
6311
    # First get the migration information from the remote node
6312
    result = self.rpc.call_migration_info(source_node, instance)
6313
    msg = result.fail_msg
6314
    if msg:
6315
      log_err = ("Failed fetching source migration information from %s: %s" %
6316
                 (source_node, msg))
6317
      logging.error(log_err)
6318
      raise errors.OpExecError(log_err)
6319

    
6320
    self.migration_info = migration_info = result.payload
6321

    
6322
    # Then switch the disks to master/master mode
6323
    self._EnsureSecondary(target_node)
6324
    self._GoStandalone()
6325
    self._GoReconnect(True)
6326
    self._WaitUntilSync()
6327

    
6328
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
6329
    result = self.rpc.call_accept_instance(target_node,
6330
                                           instance,
6331
                                           migration_info,
6332
                                           self.nodes_ip[target_node])
6333

    
6334
    msg = result.fail_msg
6335
    if msg:
6336
      logging.error("Instance pre-migration failed, trying to revert"
6337
                    " disk status: %s", msg)
6338
      self.feedback_fn("Pre-migration failed, aborting")
6339
      self._AbortMigration()
6340
      self._RevertDiskStatus()
6341
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6342
                               (instance.name, msg))
6343

    
6344
    self.feedback_fn("* migrating instance to %s" % target_node)
6345
    time.sleep(10)
6346
    result = self.rpc.call_instance_migrate(source_node, instance,
6347
                                            self.nodes_ip[target_node],
6348
                                            self.live)
6349
    msg = result.fail_msg
6350
    if msg:
6351
      logging.error("Instance migration failed, trying to revert"
6352
                    " disk status: %s", msg)
6353
      self.feedback_fn("Migration failed, aborting")
6354
      self._AbortMigration()
6355
      self._RevertDiskStatus()
6356
      raise errors.OpExecError("Could not migrate instance %s: %s" %
6357
                               (instance.name, msg))
6358
    time.sleep(10)
6359

    
6360
    instance.primary_node = target_node
6361
    # distribute new instance config to the other nodes
6362
    self.cfg.Update(instance, self.feedback_fn)
6363

    
6364
    result = self.rpc.call_finalize_migration(target_node,
6365
                                              instance,
6366
                                              migration_info,
6367
                                              True)
6368
    msg = result.fail_msg
6369
    if msg:
6370
      logging.error("Instance migration succeeded, but finalization failed:"
6371
                    " %s", msg)
6372
      raise errors.OpExecError("Could not finalize instance migration: %s" %
6373
                               msg)
6374

    
6375
    self._EnsureSecondary(source_node)
6376
    self._WaitUntilSync()
6377
    self._GoStandalone()
6378
    self._GoReconnect(False)
6379
    self._WaitUntilSync()
6380

    
6381
    self.feedback_fn("* done")
6382

    
6383
  def Exec(self, feedback_fn):
6384
    """Perform the migration.
6385

6386
    """
6387
    feedback_fn("Migrating instance %s" % self.instance.name)
6388

    
6389
    self.feedback_fn = feedback_fn
6390

    
6391
    self.source_node = self.instance.primary_node
6392
    self.target_node = self.instance.secondary_nodes[0]
6393
    self.all_nodes = [self.source_node, self.target_node]
6394
    self.nodes_ip = {
6395
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6396
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6397
      }
6398

    
6399
    if self.cleanup:
6400
      return self._ExecCleanup()
6401
    else:
6402
      return self._ExecMigration()
6403

    
6404

    
6405
def _CreateBlockDev(lu, node, instance, device, force_create,
6406
                    info, force_open):
6407
  """Create a tree of block devices on a given node.
6408

6409
  If this device type has to be created on secondaries, create it and
6410
  all its children.
6411

6412
  If not, just recurse to children keeping the same 'force' value.
6413

6414
  @param lu: the lu on whose behalf we execute
6415
  @param node: the node on which to create the device
6416
  @type instance: L{objects.Instance}
6417
  @param instance: the instance which owns the device
6418
  @type device: L{objects.Disk}
6419
  @param device: the device to create
6420
  @type force_create: boolean
6421
  @param force_create: whether to force creation of this device; this
6422
      will be change to True whenever we find a device which has
6423
      CreateOnSecondary() attribute
6424
  @param info: the extra 'metadata' we should attach to the device
6425
      (this will be represented as a LVM tag)
6426
  @type force_open: boolean
6427
  @param force_open: this parameter will be passes to the
6428
      L{backend.BlockdevCreate} function where it specifies
6429
      whether we run on primary or not, and it affects both
6430
      the child assembly and the device own Open() execution
6431

6432
  """
6433
  if device.CreateOnSecondary():
6434
    force_create = True
6435

    
6436
  if device.children:
6437
    for child in device.children:
6438
      _CreateBlockDev(lu, node, instance, child, force_create,
6439
                      info, force_open)
6440

    
6441
  if not force_create:
6442
    return
6443

    
6444
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6445

    
6446

    
6447
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6448
  """Create a single block device on a given node.
6449

6450
  This will not recurse over children of the device, so they must be
6451
  created in advance.
6452

6453
  @param lu: the lu on whose behalf we execute
6454
  @param node: the node on which to create the device
6455
  @type instance: L{objects.Instance}
6456
  @param instance: the instance which owns the device
6457
  @type device: L{objects.Disk}
6458
  @param device: the device to create
6459
  @param info: the extra 'metadata' we should attach to the device
6460
      (this will be represented as a LVM tag)
6461
  @type force_open: boolean
6462
  @param force_open: this parameter will be passes to the
6463
      L{backend.BlockdevCreate} function where it specifies
6464
      whether we run on primary or not, and it affects both
6465
      the child assembly and the device own Open() execution
6466

6467
  """
6468
  lu.cfg.SetDiskID(device, node)
6469
  result = lu.rpc.call_blockdev_create(node, device, device.size,
6470
                                       instance.name, force_open, info)
6471
  result.Raise("Can't create block device %s on"
6472
               " node %s for instance %s" % (device, node, instance.name))
6473
  if device.physical_id is None:
6474
    device.physical_id = result.payload
6475

    
6476

    
6477
def _GenerateUniqueNames(lu, exts):
6478
  """Generate a suitable LV name.
6479

6480
  This will generate a logical volume name for the given instance.
6481

6482
  """
6483
  results = []
6484
  for val in exts:
6485
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6486
    results.append("%s%s" % (new_id, val))
6487
  return results
6488

    
6489

    
6490
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6491
                         p_minor, s_minor):
6492
  """Generate a drbd8 device complete with its children.
6493

6494
  """
6495
  port = lu.cfg.AllocatePort()
6496
  vgname = lu.cfg.GetVGName()
6497
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6498
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6499
                          logical_id=(vgname, names[0]))
6500
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6501
                          logical_id=(vgname, names[1]))
6502
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6503
                          logical_id=(primary, secondary, port,
6504
                                      p_minor, s_minor,
6505
                                      shared_secret),
6506
                          children=[dev_data, dev_meta],
6507
                          iv_name=iv_name)
6508
  return drbd_dev
6509

    
6510

    
6511
def _GenerateDiskTemplate(lu, template_name,
6512
                          instance_name, primary_node,
6513
                          secondary_nodes, disk_info,
6514
                          file_storage_dir, file_driver,
6515
                          base_index):
6516
  """Generate the entire disk layout for a given template type.
6517

6518
  """
6519
  #TODO: compute space requirements
6520

    
6521
  vgname = lu.cfg.GetVGName()
6522
  disk_count = len(disk_info)
6523
  disks = []
6524
  if template_name == constants.DT_DISKLESS:
6525
    pass
6526
  elif template_name == constants.DT_PLAIN:
6527
    if len(secondary_nodes) != 0:
6528
      raise errors.ProgrammerError("Wrong template configuration")
6529

    
6530
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6531
                                      for i in range(disk_count)])
6532
    for idx, disk in enumerate(disk_info):
6533
      disk_index = idx + base_index
6534
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6535
                              logical_id=(vgname, names[idx]),
6536
                              iv_name="disk/%d" % disk_index,
6537
                              mode=disk["mode"])
6538
      disks.append(disk_dev)
6539
  elif template_name == constants.DT_DRBD8:
6540
    if len(secondary_nodes) != 1:
6541
      raise errors.ProgrammerError("Wrong template configuration")
6542
    remote_node = secondary_nodes[0]
6543
    minors = lu.cfg.AllocateDRBDMinor(
6544
      [primary_node, remote_node] * len(disk_info), instance_name)
6545

    
6546
    names = []
6547
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6548
                                               for i in range(disk_count)]):
6549
      names.append(lv_prefix + "_data")
6550
      names.append(lv_prefix + "_meta")
6551
    for idx, disk in enumerate(disk_info):
6552
      disk_index = idx + base_index
6553
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6554
                                      disk["size"], names[idx*2:idx*2+2],
6555
                                      "disk/%d" % disk_index,
6556
                                      minors[idx*2], minors[idx*2+1])
6557
      disk_dev.mode = disk["mode"]
6558
      disks.append(disk_dev)
6559
  elif template_name == constants.DT_FILE:
6560
    if len(secondary_nodes) != 0:
6561
      raise errors.ProgrammerError("Wrong template configuration")
6562

    
6563
    _RequireFileStorage()
6564

    
6565
    for idx, disk in enumerate(disk_info):
6566
      disk_index = idx + base_index
6567
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6568
                              iv_name="disk/%d" % disk_index,
6569
                              logical_id=(file_driver,
6570
                                          "%s/disk%d" % (file_storage_dir,
6571
                                                         disk_index)),
6572
                              mode=disk["mode"])
6573
      disks.append(disk_dev)
6574
  else:
6575
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6576
  return disks
6577

    
6578

    
6579
def _GetInstanceInfoText(instance):
6580
  """Compute that text that should be added to the disk's metadata.
6581

6582
  """
6583
  return "originstname+%s" % instance.name
6584

    
6585

    
6586
def _CalcEta(time_taken, written, total_size):
6587
  """Calculates the ETA based on size written and total size.
6588

6589
  @param time_taken: The time taken so far
6590
  @param written: amount written so far
6591
  @param total_size: The total size of data to be written
6592
  @return: The remaining time in seconds
6593

6594
  """
6595
  avg_time = time_taken / float(written)
6596
  return (total_size - written) * avg_time
6597

    
6598

    
6599
def _WipeDisks(lu, instance):
6600
  """Wipes instance disks.
6601

6602
  @type lu: L{LogicalUnit}
6603
  @param lu: the logical unit on whose behalf we execute
6604
  @type instance: L{objects.Instance}
6605
  @param instance: the instance whose disks we should create
6606
  @return: the success of the wipe
6607

6608
  """
6609
  node = instance.primary_node
6610
  for idx, device in enumerate(instance.disks):
6611
    lu.LogInfo("* Wiping disk %d", idx)
6612
    logging.info("Wiping disk %d for instance %s", idx, instance.name)
6613

    
6614
    # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6615
    # MAX_WIPE_CHUNK at max
6616
    wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6617
                          constants.MIN_WIPE_CHUNK_PERCENT)
6618

    
6619
    offset = 0
6620
    size = device.size
6621
    last_output = 0
6622
    start_time = time.time()
6623

    
6624
    while offset < size:
6625
      wipe_size = min(wipe_chunk_size, size - offset)
6626
      result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6627
      result.Raise("Could not wipe disk %d at offset %d for size %d" %
6628
                   (idx, offset, wipe_size))
6629
      now = time.time()
6630
      offset += wipe_size
6631
      if now - last_output >= 60:
6632
        eta = _CalcEta(now - start_time, offset, size)
6633
        lu.LogInfo(" - done: %.1f%% ETA: %s" %
6634
                   (offset / float(size) * 100, utils.FormatSeconds(eta)))
6635
        last_output = now
6636

    
6637

    
6638
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6639
  """Create all disks for an instance.
6640

6641
  This abstracts away some work from AddInstance.
6642

6643
  @type lu: L{LogicalUnit}
6644
  @param lu: the logical unit on whose behalf we execute
6645
  @type instance: L{objects.Instance}
6646
  @param instance: the instance whose disks we should create
6647
  @type to_skip: list
6648
  @param to_skip: list of indices to skip
6649
  @type target_node: string
6650
  @param target_node: if passed, overrides the target node for creation
6651
  @rtype: boolean
6652
  @return: the success of the creation
6653

6654
  """
6655
  info = _GetInstanceInfoText(instance)
6656
  if target_node is None:
6657
    pnode = instance.primary_node
6658
    all_nodes = instance.all_nodes
6659
  else:
6660
    pnode = target_node
6661
    all_nodes = [pnode]
6662

    
6663
  if instance.disk_template == constants.DT_FILE:
6664
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6665
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6666

    
6667
    result.Raise("Failed to create directory '%s' on"
6668
                 " node %s" % (file_storage_dir, pnode))
6669

    
6670
  # Note: this needs to be kept in sync with adding of disks in
6671
  # LUSetInstanceParams
6672
  for idx, device in enumerate(instance.disks):
6673
    if to_skip and idx in to_skip:
6674
      continue
6675
    logging.info("Creating volume %s for instance %s",
6676
                 device.iv_name, instance.name)
6677
    #HARDCODE
6678
    for node in all_nodes:
6679
      f_create = node == pnode
6680
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6681

    
6682

    
6683
def _RemoveDisks(lu, instance, target_node=None):
6684
  """Remove all disks for an instance.
6685

6686
  This abstracts away some work from `AddInstance()` and
6687
  `RemoveInstance()`. Note that in case some of the devices couldn't
6688
  be removed, the removal will continue with the other ones (compare
6689
  with `_CreateDisks()`).
6690

6691
  @type lu: L{LogicalUnit}
6692
  @param lu: the logical unit on whose behalf we execute
6693
  @type instance: L{objects.Instance}
6694
  @param instance: the instance whose disks we should remove
6695
  @type target_node: string
6696
  @param target_node: used to override the node on which to remove the disks
6697
  @rtype: boolean
6698
  @return: the success of the removal
6699

6700
  """
6701
  logging.info("Removing block devices for instance %s", instance.name)
6702

    
6703
  all_result = True
6704
  for device in instance.disks:
6705
    if target_node:
6706
      edata = [(target_node, device)]
6707
    else:
6708
      edata = device.ComputeNodeTree(instance.primary_node)
6709
    for node, disk in edata:
6710
      lu.cfg.SetDiskID(disk, node)
6711
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6712
      if msg:
6713
        lu.LogWarning("Could not remove block device %s on node %s,"
6714
                      " continuing anyway: %s", device.iv_name, node, msg)
6715
        all_result = False
6716

    
6717
  if instance.disk_template == constants.DT_FILE:
6718
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6719
    if target_node:
6720
      tgt = target_node
6721
    else:
6722
      tgt = instance.primary_node
6723
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6724
    if result.fail_msg:
6725
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6726
                    file_storage_dir, instance.primary_node, result.fail_msg)
6727
      all_result = False
6728

    
6729
  return all_result
6730

    
6731

    
6732
def _ComputeDiskSize(disk_template, disks):
6733
  """Compute disk size requirements in the volume group
6734

6735
  """
6736
  # Required free disk space as a function of disk and swap space
6737
  req_size_dict = {
6738
    constants.DT_DISKLESS: None,
6739
    constants.DT_PLAIN: sum(d["size"] for d in disks),
6740
    # 128 MB are added for drbd metadata for each disk
6741
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6742
    constants.DT_FILE: None,
6743
  }
6744

    
6745
  if disk_template not in req_size_dict:
6746
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6747
                                 " is unknown" %  disk_template)
6748

    
6749
  return req_size_dict[disk_template]
6750

    
6751

    
6752
def _CheckHVParams(lu, nodenames, hvname, hvparams):
6753
  """Hypervisor parameter validation.
6754

6755
  This function abstract the hypervisor parameter validation to be
6756
  used in both instance create and instance modify.
6757

6758
  @type lu: L{LogicalUnit}
6759
  @param lu: the logical unit for which we check
6760
  @type nodenames: list
6761
  @param nodenames: the list of nodes on which we should check
6762
  @type hvname: string
6763
  @param hvname: the name of the hypervisor we should use
6764
  @type hvparams: dict
6765
  @param hvparams: the parameters which we need to check
6766
  @raise errors.OpPrereqError: if the parameters are not valid
6767

6768
  """
6769
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6770
                                                  hvname,
6771
                                                  hvparams)
6772
  for node in nodenames:
6773
    info = hvinfo[node]
6774
    if info.offline:
6775
      continue
6776
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6777

    
6778

    
6779
def _CheckOSParams(lu, required, nodenames, osname, osparams):
6780
  """OS parameters validation.
6781

6782
  @type lu: L{LogicalUnit}
6783
  @param lu: the logical unit for which we check
6784
  @type required: boolean
6785
  @param required: whether the validation should fail if the OS is not
6786
      found
6787
  @type nodenames: list
6788
  @param nodenames: the list of nodes on which we should check
6789
  @type osname: string
6790
  @param osname: the name of the hypervisor we should use
6791
  @type osparams: dict
6792
  @param osparams: the parameters which we need to check
6793
  @raise errors.OpPrereqError: if the parameters are not valid
6794

6795
  """
6796
  result = lu.rpc.call_os_validate(required, nodenames, osname,
6797
                                   [constants.OS_VALIDATE_PARAMETERS],
6798
                                   osparams)
6799
  for node, nres in result.items():
6800
    # we don't check for offline cases since this should be run only
6801
    # against the master node and/or an instance's nodes
6802
    nres.Raise("OS Parameters validation failed on node %s" % node)
6803
    if not nres.payload:
6804
      lu.LogInfo("OS %s not found on node %s, validation skipped",
6805
                 osname, node)
6806

    
6807

    
6808
class LUCreateInstance(LogicalUnit):
6809
  """Create an instance.
6810

6811
  """
6812
  HPATH = "instance-add"
6813
  HTYPE = constants.HTYPE_INSTANCE
6814
  _OP_PARAMS = [
6815
    _PInstanceName,
6816
    ("mode", ht.NoDefault, ht.TElemOf(constants.INSTANCE_CREATE_MODES)),
6817
    ("start", True, ht.TBool),
6818
    ("wait_for_sync", True, ht.TBool),
6819
    ("ip_check", True, ht.TBool),
6820
    ("name_check", True, ht.TBool),
6821
    ("disks", ht.NoDefault, ht.TListOf(ht.TDict)),
6822
    ("nics", ht.NoDefault, ht.TListOf(ht.TDict)),
6823
    ("hvparams", ht.EmptyDict, ht.TDict),
6824
    ("beparams", ht.EmptyDict, ht.TDict),
6825
    ("osparams", ht.EmptyDict, ht.TDict),
6826
    ("no_install", None, ht.TMaybeBool),
6827
    ("os_type", None, ht.TMaybeString),
6828
    ("force_variant", False, ht.TBool),
6829
    ("source_handshake", None, ht.TOr(ht.TList, ht.TNone)),
6830
    ("source_x509_ca", None, ht.TMaybeString),
6831
    ("source_instance_name", None, ht.TMaybeString),
6832
    ("src_node", None, ht.TMaybeString),
6833
    ("src_path", None, ht.TMaybeString),
6834
    ("pnode", None, ht.TMaybeString),
6835
    ("snode", None, ht.TMaybeString),
6836
    ("iallocator", None, ht.TMaybeString),
6837
    ("hypervisor", None, ht.TMaybeString),
6838
    ("disk_template", ht.NoDefault, _CheckDiskTemplate),
6839
    ("identify_defaults", False, ht.TBool),
6840
    ("file_driver", None, ht.TOr(ht.TNone, ht.TElemOf(constants.FILE_DRIVER))),
6841
    ("file_storage_dir", None, ht.TMaybeString),
6842
    ]
6843
  REQ_BGL = False
6844

    
6845
  def CheckArguments(self):
6846
    """Check arguments.
6847

6848
    """
6849
    # do not require name_check to ease forward/backward compatibility
6850
    # for tools
6851
    if self.op.no_install and self.op.start:
6852
      self.LogInfo("No-installation mode selected, disabling startup")
6853
      self.op.start = False
6854
    # validate/normalize the instance name
6855
    self.op.instance_name = \
6856
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
6857

    
6858
    if self.op.ip_check and not self.op.name_check:
6859
      # TODO: make the ip check more flexible and not depend on the name check
6860
      raise errors.OpPrereqError("Cannot do ip check without a name check",
6861
                                 errors.ECODE_INVAL)
6862

    
6863
    # check nics' parameter names
6864
    for nic in self.op.nics:
6865
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6866

    
6867
    # check disks. parameter names and consistent adopt/no-adopt strategy
6868
    has_adopt = has_no_adopt = False
6869
    for disk in self.op.disks:
6870
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6871
      if "adopt" in disk:
6872
        has_adopt = True
6873
      else:
6874
        has_no_adopt = True
6875
    if has_adopt and has_no_adopt:
6876
      raise errors.OpPrereqError("Either all disks are adopted or none is",
6877
                                 errors.ECODE_INVAL)
6878
    if has_adopt:
6879
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6880
        raise errors.OpPrereqError("Disk adoption is not supported for the"
6881
                                   " '%s' disk template" %
6882
                                   self.op.disk_template,
6883
                                   errors.ECODE_INVAL)
6884
      if self.op.iallocator is not None:
6885
        raise errors.OpPrereqError("Disk adoption not allowed with an"
6886
                                   " iallocator script", errors.ECODE_INVAL)
6887
      if self.op.mode == constants.INSTANCE_IMPORT:
6888
        raise errors.OpPrereqError("Disk adoption not allowed for"
6889
                                   " instance import", errors.ECODE_INVAL)
6890

    
6891
    self.adopt_disks = has_adopt
6892

    
6893
    # instance name verification
6894
    if self.op.name_check:
6895
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
6896
      self.op.instance_name = self.hostname1.name
6897
      # used in CheckPrereq for ip ping check
6898
      self.check_ip = self.hostname1.ip
6899
    else:
6900
      self.check_ip = None
6901

    
6902
    # file storage checks
6903
    if (self.op.file_driver and
6904
        not self.op.file_driver in constants.FILE_DRIVER):
6905
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6906
                                 self.op.file_driver, errors.ECODE_INVAL)
6907

    
6908
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6909
      raise errors.OpPrereqError("File storage directory path not absolute",
6910
                                 errors.ECODE_INVAL)
6911

    
6912
    ### Node/iallocator related checks
6913
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6914

    
6915
    if self.op.pnode is not None:
6916
      if self.op.disk_template in constants.DTS_NET_MIRROR:
6917
        if self.op.snode is None:
6918
          raise errors.OpPrereqError("The networked disk templates need"
6919
                                     " a mirror node", errors.ECODE_INVAL)
6920
      elif self.op.snode:
6921
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6922
                        " template")
6923
        self.op.snode = None
6924

    
6925
    self._cds = _GetClusterDomainSecret()
6926

    
6927
    if self.op.mode == constants.INSTANCE_IMPORT:
6928
      # On import force_variant must be True, because if we forced it at
6929
      # initial install, our only chance when importing it back is that it
6930
      # works again!
6931
      self.op.force_variant = True
6932

    
6933
      if self.op.no_install:
6934
        self.LogInfo("No-installation mode has no effect during import")
6935

    
6936
    elif self.op.mode == constants.INSTANCE_CREATE:
6937
      if self.op.os_type is None:
6938
        raise errors.OpPrereqError("No guest OS specified",
6939
                                   errors.ECODE_INVAL)
6940
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
6941
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6942
                                   " installation" % self.op.os_type,
6943
                                   errors.ECODE_STATE)
6944
      if self.op.disk_template is None:
6945
        raise errors.OpPrereqError("No disk template specified",
6946
                                   errors.ECODE_INVAL)
6947

    
6948
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6949
      # Check handshake to ensure both clusters have the same domain secret
6950
      src_handshake = self.op.source_handshake
6951
      if not src_handshake:
6952
        raise errors.OpPrereqError("Missing source handshake",
6953
                                   errors.ECODE_INVAL)
6954

    
6955
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6956
                                                           src_handshake)
6957
      if errmsg:
6958
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6959
                                   errors.ECODE_INVAL)
6960

    
6961
      # Load and check source CA
6962
      self.source_x509_ca_pem = self.op.source_x509_ca
6963
      if not self.source_x509_ca_pem:
6964
        raise errors.OpPrereqError("Missing source X509 CA",
6965
                                   errors.ECODE_INVAL)
6966

    
6967
      try:
6968
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6969
                                                    self._cds)
6970
      except OpenSSL.crypto.Error, err:
6971
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6972
                                   (err, ), errors.ECODE_INVAL)
6973

    
6974
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6975
      if errcode is not None:
6976
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6977
                                   errors.ECODE_INVAL)
6978

    
6979
      self.source_x509_ca = cert
6980

    
6981
      src_instance_name = self.op.source_instance_name
6982
      if not src_instance_name:
6983
        raise errors.OpPrereqError("Missing source instance name",
6984
                                   errors.ECODE_INVAL)
6985

    
6986
      self.source_instance_name = \
6987
          netutils.GetHostname(name=src_instance_name).name
6988

    
6989
    else:
6990
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
6991
                                 self.op.mode, errors.ECODE_INVAL)
6992

    
6993
  def ExpandNames(self):
6994
    """ExpandNames for CreateInstance.
6995

6996
    Figure out the right locks for instance creation.
6997

6998
    """
6999
    self.needed_locks = {}
7000

    
7001
    instance_name = self.op.instance_name
7002
    # this is just a preventive check, but someone might still add this
7003
    # instance in the meantime, and creation will fail at lock-add time
7004
    if instance_name in self.cfg.GetInstanceList():
7005
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7006
                                 instance_name, errors.ECODE_EXISTS)
7007

    
7008
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7009

    
7010
    if self.op.iallocator:
7011
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7012
    else:
7013
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7014
      nodelist = [self.op.pnode]
7015
      if self.op.snode is not None:
7016
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7017
        nodelist.append(self.op.snode)
7018
      self.needed_locks[locking.LEVEL_NODE] = nodelist
7019

    
7020
    # in case of import lock the source node too
7021
    if self.op.mode == constants.INSTANCE_IMPORT:
7022
      src_node = self.op.src_node
7023
      src_path = self.op.src_path
7024

    
7025
      if src_path is None:
7026
        self.op.src_path = src_path = self.op.instance_name
7027

    
7028
      if src_node is None:
7029
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7030
        self.op.src_node = None
7031
        if os.path.isabs(src_path):
7032
          raise errors.OpPrereqError("Importing an instance from an absolute"
7033
                                     " path requires a source node option.",
7034
                                     errors.ECODE_INVAL)
7035
      else:
7036
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7037
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7038
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
7039
        if not os.path.isabs(src_path):
7040
          self.op.src_path = src_path = \
7041
            utils.PathJoin(constants.EXPORT_DIR, src_path)
7042

    
7043
  def _RunAllocator(self):
7044
    """Run the allocator based on input opcode.
7045

7046
    """
7047
    nics = [n.ToDict() for n in self.nics]
7048
    ial = IAllocator(self.cfg, self.rpc,
7049
                     mode=constants.IALLOCATOR_MODE_ALLOC,
7050
                     name=self.op.instance_name,
7051
                     disk_template=self.op.disk_template,
7052
                     tags=[],
7053
                     os=self.op.os_type,
7054
                     vcpus=self.be_full[constants.BE_VCPUS],
7055
                     mem_size=self.be_full[constants.BE_MEMORY],
7056
                     disks=self.disks,
7057
                     nics=nics,
7058
                     hypervisor=self.op.hypervisor,
7059
                     )
7060

    
7061
    ial.Run(self.op.iallocator)
7062

    
7063
    if not ial.success:
7064
      raise errors.OpPrereqError("Can't compute nodes using"
7065
                                 " iallocator '%s': %s" %
7066
                                 (self.op.iallocator, ial.info),
7067
                                 errors.ECODE_NORES)
7068
    if len(ial.result) != ial.required_nodes:
7069
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7070
                                 " of nodes (%s), required %s" %
7071
                                 (self.op.iallocator, len(ial.result),
7072
                                  ial.required_nodes), errors.ECODE_FAULT)
7073
    self.op.pnode = ial.result[0]
7074
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7075
                 self.op.instance_name, self.op.iallocator,
7076
                 utils.CommaJoin(ial.result))
7077
    if ial.required_nodes == 2:
7078
      self.op.snode = ial.result[1]
7079

    
7080
  def BuildHooksEnv(self):
7081
    """Build hooks env.
7082

7083
    This runs on master, primary and secondary nodes of the instance.
7084

7085
    """
7086
    env = {
7087
      "ADD_MODE": self.op.mode,
7088
      }
7089
    if self.op.mode == constants.INSTANCE_IMPORT:
7090
      env["SRC_NODE"] = self.op.src_node
7091
      env["SRC_PATH"] = self.op.src_path
7092
      env["SRC_IMAGES"] = self.src_images
7093

    
7094
    env.update(_BuildInstanceHookEnv(
7095
      name=self.op.instance_name,
7096
      primary_node=self.op.pnode,
7097
      secondary_nodes=self.secondaries,
7098
      status=self.op.start,
7099
      os_type=self.op.os_type,
7100
      memory=self.be_full[constants.BE_MEMORY],
7101
      vcpus=self.be_full[constants.BE_VCPUS],
7102
      nics=_NICListToTuple(self, self.nics),
7103
      disk_template=self.op.disk_template,
7104
      disks=[(d["size"], d["mode"]) for d in self.disks],
7105
      bep=self.be_full,
7106
      hvp=self.hv_full,
7107
      hypervisor_name=self.op.hypervisor,
7108
    ))
7109

    
7110
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7111
          self.secondaries)
7112
    return env, nl, nl
7113

    
7114
  def _ReadExportInfo(self):
7115
    """Reads the export information from disk.
7116

7117
    It will override the opcode source node and path with the actual
7118
    information, if these two were not specified before.
7119

7120
    @return: the export information
7121

7122
    """
7123
    assert self.op.mode == constants.INSTANCE_IMPORT
7124

    
7125
    src_node = self.op.src_node
7126
    src_path = self.op.src_path
7127

    
7128
    if src_node is None:
7129
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7130
      exp_list = self.rpc.call_export_list(locked_nodes)
7131
      found = False
7132
      for node in exp_list:
7133
        if exp_list[node].fail_msg:
7134
          continue
7135
        if src_path in exp_list[node].payload:
7136
          found = True
7137
          self.op.src_node = src_node = node
7138
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7139
                                                       src_path)
7140
          break
7141
      if not found:
7142
        raise errors.OpPrereqError("No export found for relative path %s" %
7143
                                    src_path, errors.ECODE_INVAL)
7144

    
7145
    _CheckNodeOnline(self, src_node)
7146
    result = self.rpc.call_export_info(src_node, src_path)
7147
    result.Raise("No export or invalid export found in dir %s" % src_path)
7148

    
7149
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7150
    if not export_info.has_section(constants.INISECT_EXP):
7151
      raise errors.ProgrammerError("Corrupted export config",
7152
                                   errors.ECODE_ENVIRON)
7153

    
7154
    ei_version = export_info.get(constants.INISECT_EXP, "version")
7155
    if (int(ei_version) != constants.EXPORT_VERSION):
7156
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7157
                                 (ei_version, constants.EXPORT_VERSION),
7158
                                 errors.ECODE_ENVIRON)
7159
    return export_info
7160

    
7161
  def _ReadExportParams(self, einfo):
7162
    """Use export parameters as defaults.
7163

7164
    In case the opcode doesn't specify (as in override) some instance
7165
    parameters, then try to use them from the export information, if
7166
    that declares them.
7167

7168
    """
7169
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7170

    
7171
    if self.op.disk_template is None:
7172
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
7173
        self.op.disk_template = einfo.get(constants.INISECT_INS,
7174
                                          "disk_template")
7175
      else:
7176
        raise errors.OpPrereqError("No disk template specified and the export"
7177
                                   " is missing the disk_template information",
7178
                                   errors.ECODE_INVAL)
7179

    
7180
    if not self.op.disks:
7181
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
7182
        disks = []
7183
        # TODO: import the disk iv_name too
7184
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7185
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7186
          disks.append({"size": disk_sz})
7187
        self.op.disks = disks
7188
      else:
7189
        raise errors.OpPrereqError("No disk info specified and the export"
7190
                                   " is missing the disk information",
7191
                                   errors.ECODE_INVAL)
7192

    
7193
    if (not self.op.nics and
7194
        einfo.has_option(constants.INISECT_INS, "nic_count")):
7195
      nics = []
7196
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7197
        ndict = {}
7198
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7199
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7200
          ndict[name] = v
7201
        nics.append(ndict)
7202
      self.op.nics = nics
7203

    
7204
    if (self.op.hypervisor is None and
7205
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
7206
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7207
    if einfo.has_section(constants.INISECT_HYP):
7208
      # use the export parameters but do not override the ones
7209
      # specified by the user
7210
      for name, value in einfo.items(constants.INISECT_HYP):
7211
        if name not in self.op.hvparams:
7212
          self.op.hvparams[name] = value
7213

    
7214
    if einfo.has_section(constants.INISECT_BEP):
7215
      # use the parameters, without overriding
7216
      for name, value in einfo.items(constants.INISECT_BEP):
7217
        if name not in self.op.beparams:
7218
          self.op.beparams[name] = value
7219
    else:
7220
      # try to read the parameters old style, from the main section
7221
      for name in constants.BES_PARAMETERS:
7222
        if (name not in self.op.beparams and
7223
            einfo.has_option(constants.INISECT_INS, name)):
7224
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7225

    
7226
    if einfo.has_section(constants.INISECT_OSP):
7227
      # use the parameters, without overriding
7228
      for name, value in einfo.items(constants.INISECT_OSP):
7229
        if name not in self.op.osparams:
7230
          self.op.osparams[name] = value
7231

    
7232
  def _RevertToDefaults(self, cluster):
7233
    """Revert the instance parameters to the default values.
7234

7235
    """
7236
    # hvparams
7237
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7238
    for name in self.op.hvparams.keys():
7239
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7240
        del self.op.hvparams[name]
7241
    # beparams
7242
    be_defs = cluster.SimpleFillBE({})
7243
    for name in self.op.beparams.keys():
7244
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
7245
        del self.op.beparams[name]
7246
    # nic params
7247
    nic_defs = cluster.SimpleFillNIC({})
7248
    for nic in self.op.nics:
7249
      for name in constants.NICS_PARAMETERS:
7250
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7251
          del nic[name]
7252
    # osparams
7253
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7254
    for name in self.op.osparams.keys():
7255
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
7256
        del self.op.osparams[name]
7257

    
7258
  def CheckPrereq(self):
7259
    """Check prerequisites.
7260

7261
    """
7262
    if self.op.mode == constants.INSTANCE_IMPORT:
7263
      export_info = self._ReadExportInfo()
7264
      self._ReadExportParams(export_info)
7265

    
7266
    _CheckDiskTemplate(self.op.disk_template)
7267

    
7268
    if (not self.cfg.GetVGName() and
7269
        self.op.disk_template not in constants.DTS_NOT_LVM):
7270
      raise errors.OpPrereqError("Cluster does not support lvm-based"
7271
                                 " instances", errors.ECODE_STATE)
7272

    
7273
    if self.op.hypervisor is None:
7274
      self.op.hypervisor = self.cfg.GetHypervisorType()
7275

    
7276
    cluster = self.cfg.GetClusterInfo()
7277
    enabled_hvs = cluster.enabled_hypervisors
7278
    if self.op.hypervisor not in enabled_hvs:
7279
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7280
                                 " cluster (%s)" % (self.op.hypervisor,
7281
                                  ",".join(enabled_hvs)),
7282
                                 errors.ECODE_STATE)
7283

    
7284
    # check hypervisor parameter syntax (locally)
7285
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7286
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7287
                                      self.op.hvparams)
7288
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7289
    hv_type.CheckParameterSyntax(filled_hvp)
7290
    self.hv_full = filled_hvp
7291
    # check that we don't specify global parameters on an instance
7292
    _CheckGlobalHvParams(self.op.hvparams)
7293

    
7294
    # fill and remember the beparams dict
7295
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7296
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
7297

    
7298
    # build os parameters
7299
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7300

    
7301
    # now that hvp/bep are in final format, let's reset to defaults,
7302
    # if told to do so
7303
    if self.op.identify_defaults:
7304
      self._RevertToDefaults(cluster)
7305

    
7306
    # NIC buildup
7307
    self.nics = []
7308
    for idx, nic in enumerate(self.op.nics):
7309
      nic_mode_req = nic.get("mode", None)
7310
      nic_mode = nic_mode_req
7311
      if nic_mode is None:
7312
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7313

    
7314
      # in routed mode, for the first nic, the default ip is 'auto'
7315
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7316
        default_ip_mode = constants.VALUE_AUTO
7317
      else:
7318
        default_ip_mode = constants.VALUE_NONE
7319

    
7320
      # ip validity checks
7321
      ip = nic.get("ip", default_ip_mode)
7322
      if ip is None or ip.lower() == constants.VALUE_NONE:
7323
        nic_ip = None
7324
      elif ip.lower() == constants.VALUE_AUTO:
7325
        if not self.op.name_check:
7326
          raise errors.OpPrereqError("IP address set to auto but name checks"
7327
                                     " have been skipped",
7328
                                     errors.ECODE_INVAL)
7329
        nic_ip = self.hostname1.ip
7330
      else:
7331
        if not netutils.IPAddress.IsValid(ip):
7332
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7333
                                     errors.ECODE_INVAL)
7334
        nic_ip = ip
7335

    
7336
      # TODO: check the ip address for uniqueness
7337
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7338
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
7339
                                   errors.ECODE_INVAL)
7340

    
7341
      # MAC address verification
7342
      mac = nic.get("mac", constants.VALUE_AUTO)
7343
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7344
        mac = utils.NormalizeAndValidateMac(mac)
7345

    
7346
        try:
7347
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
7348
        except errors.ReservationError:
7349
          raise errors.OpPrereqError("MAC address %s already in use"
7350
                                     " in cluster" % mac,
7351
                                     errors.ECODE_NOTUNIQUE)
7352

    
7353
      # bridge verification
7354
      bridge = nic.get("bridge", None)
7355
      link = nic.get("link", None)
7356
      if bridge and link:
7357
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7358
                                   " at the same time", errors.ECODE_INVAL)
7359
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7360
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7361
                                   errors.ECODE_INVAL)
7362
      elif bridge:
7363
        link = bridge
7364

    
7365
      nicparams = {}
7366
      if nic_mode_req:
7367
        nicparams[constants.NIC_MODE] = nic_mode_req
7368
      if link:
7369
        nicparams[constants.NIC_LINK] = link
7370

    
7371
      check_params = cluster.SimpleFillNIC(nicparams)
7372
      objects.NIC.CheckParameterSyntax(check_params)
7373
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7374

    
7375
    # disk checks/pre-build
7376
    self.disks = []
7377
    for disk in self.op.disks:
7378
      mode = disk.get("mode", constants.DISK_RDWR)
7379
      if mode not in constants.DISK_ACCESS_SET:
7380
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7381
                                   mode, errors.ECODE_INVAL)
7382
      size = disk.get("size", None)
7383
      if size is None:
7384
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7385
      try:
7386
        size = int(size)
7387
      except (TypeError, ValueError):
7388
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7389
                                   errors.ECODE_INVAL)
7390
      new_disk = {"size": size, "mode": mode}
7391
      if "adopt" in disk:
7392
        new_disk["adopt"] = disk["adopt"]
7393
      self.disks.append(new_disk)
7394

    
7395
    if self.op.mode == constants.INSTANCE_IMPORT:
7396

    
7397
      # Check that the new instance doesn't have less disks than the export
7398
      instance_disks = len(self.disks)
7399
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7400
      if instance_disks < export_disks:
7401
        raise errors.OpPrereqError("Not enough disks to import."
7402
                                   " (instance: %d, export: %d)" %
7403
                                   (instance_disks, export_disks),
7404
                                   errors.ECODE_INVAL)
7405

    
7406
      disk_images = []
7407
      for idx in range(export_disks):
7408
        option = 'disk%d_dump' % idx
7409
        if export_info.has_option(constants.INISECT_INS, option):
7410
          # FIXME: are the old os-es, disk sizes, etc. useful?
7411
          export_name = export_info.get(constants.INISECT_INS, option)
7412
          image = utils.PathJoin(self.op.src_path, export_name)
7413
          disk_images.append(image)
7414
        else:
7415
          disk_images.append(False)
7416

    
7417
      self.src_images = disk_images
7418

    
7419
      old_name = export_info.get(constants.INISECT_INS, 'name')
7420
      try:
7421
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7422
      except (TypeError, ValueError), err:
7423
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
7424
                                   " an integer: %s" % str(err),
7425
                                   errors.ECODE_STATE)
7426
      if self.op.instance_name == old_name:
7427
        for idx, nic in enumerate(self.nics):
7428
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7429
            nic_mac_ini = 'nic%d_mac' % idx
7430
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7431

    
7432
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7433

    
7434
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
7435
    if self.op.ip_check:
7436
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7437
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
7438
                                   (self.check_ip, self.op.instance_name),
7439
                                   errors.ECODE_NOTUNIQUE)
7440

    
7441
    #### mac address generation
7442
    # By generating here the mac address both the allocator and the hooks get
7443
    # the real final mac address rather than the 'auto' or 'generate' value.
7444
    # There is a race condition between the generation and the instance object
7445
    # creation, which means that we know the mac is valid now, but we're not
7446
    # sure it will be when we actually add the instance. If things go bad
7447
    # adding the instance will abort because of a duplicate mac, and the
7448
    # creation job will fail.
7449
    for nic in self.nics:
7450
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7451
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7452

    
7453
    #### allocator run
7454

    
7455
    if self.op.iallocator is not None:
7456
      self._RunAllocator()
7457

    
7458
    #### node related checks
7459

    
7460
    # check primary node
7461
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7462
    assert self.pnode is not None, \
7463
      "Cannot retrieve locked node %s" % self.op.pnode
7464
    if pnode.offline:
7465
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7466
                                 pnode.name, errors.ECODE_STATE)
7467
    if pnode.drained:
7468
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7469
                                 pnode.name, errors.ECODE_STATE)
7470
    if not pnode.vm_capable:
7471
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7472
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
7473

    
7474
    self.secondaries = []
7475

    
7476
    # mirror node verification
7477
    if self.op.disk_template in constants.DTS_NET_MIRROR:
7478
      if self.op.snode == pnode.name:
7479
        raise errors.OpPrereqError("The secondary node cannot be the"
7480
                                   " primary node.", errors.ECODE_INVAL)
7481
      _CheckNodeOnline(self, self.op.snode)
7482
      _CheckNodeNotDrained(self, self.op.snode)
7483
      _CheckNodeVmCapable(self, self.op.snode)
7484
      self.secondaries.append(self.op.snode)
7485

    
7486
    nodenames = [pnode.name] + self.secondaries
7487

    
7488
    req_size = _ComputeDiskSize(self.op.disk_template,
7489
                                self.disks)
7490

    
7491
    # Check lv size requirements, if not adopting
7492
    if req_size is not None and not self.adopt_disks:
7493
      _CheckNodesFreeDisk(self, nodenames, req_size)
7494

    
7495
    if self.adopt_disks: # instead, we must check the adoption data
7496
      all_lvs = set([i["adopt"] for i in self.disks])
7497
      if len(all_lvs) != len(self.disks):
7498
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
7499
                                   errors.ECODE_INVAL)
7500
      for lv_name in all_lvs:
7501
        try:
7502
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7503
        except errors.ReservationError:
7504
          raise errors.OpPrereqError("LV named %s used by another instance" %
7505
                                     lv_name, errors.ECODE_NOTUNIQUE)
7506

    
7507
      node_lvs = self.rpc.call_lv_list([pnode.name],
7508
                                       self.cfg.GetVGName())[pnode.name]
7509
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7510
      node_lvs = node_lvs.payload
7511
      delta = all_lvs.difference(node_lvs.keys())
7512
      if delta:
7513
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
7514
                                   utils.CommaJoin(delta),
7515
                                   errors.ECODE_INVAL)
7516
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7517
      if online_lvs:
7518
        raise errors.OpPrereqError("Online logical volumes found, cannot"
7519
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
7520
                                   errors.ECODE_STATE)
7521
      # update the size of disk based on what is found
7522
      for dsk in self.disks:
7523
        dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7524

    
7525
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7526

    
7527
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7528
    # check OS parameters (remotely)
7529
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7530

    
7531
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7532

    
7533
    # memory check on primary node
7534
    if self.op.start:
7535
      _CheckNodeFreeMemory(self, self.pnode.name,
7536
                           "creating instance %s" % self.op.instance_name,
7537
                           self.be_full[constants.BE_MEMORY],
7538
                           self.op.hypervisor)
7539

    
7540
    self.dry_run_result = list(nodenames)
7541

    
7542
  def Exec(self, feedback_fn):
7543
    """Create and add the instance to the cluster.
7544

7545
    """
7546
    instance = self.op.instance_name
7547
    pnode_name = self.pnode.name
7548

    
7549
    ht_kind = self.op.hypervisor
7550
    if ht_kind in constants.HTS_REQ_PORT:
7551
      network_port = self.cfg.AllocatePort()
7552
    else:
7553
      network_port = None
7554

    
7555
    if constants.ENABLE_FILE_STORAGE:
7556
      # this is needed because os.path.join does not accept None arguments
7557
      if self.op.file_storage_dir is None:
7558
        string_file_storage_dir = ""
7559
      else:
7560
        string_file_storage_dir = self.op.file_storage_dir
7561

    
7562
      # build the full file storage dir path
7563
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7564
                                        string_file_storage_dir, instance)
7565
    else:
7566
      file_storage_dir = ""
7567

    
7568
    disks = _GenerateDiskTemplate(self,
7569
                                  self.op.disk_template,
7570
                                  instance, pnode_name,
7571
                                  self.secondaries,
7572
                                  self.disks,
7573
                                  file_storage_dir,
7574
                                  self.op.file_driver,
7575
                                  0)
7576

    
7577
    iobj = objects.Instance(name=instance, os=self.op.os_type,
7578
                            primary_node=pnode_name,
7579
                            nics=self.nics, disks=disks,
7580
                            disk_template=self.op.disk_template,
7581
                            admin_up=False,
7582
                            network_port=network_port,
7583
                            beparams=self.op.beparams,
7584
                            hvparams=self.op.hvparams,
7585
                            hypervisor=self.op.hypervisor,
7586
                            osparams=self.op.osparams,
7587
                            )
7588

    
7589
    if self.adopt_disks:
7590
      # rename LVs to the newly-generated names; we need to construct
7591
      # 'fake' LV disks with the old data, plus the new unique_id
7592
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7593
      rename_to = []
7594
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7595
        rename_to.append(t_dsk.logical_id)
7596
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7597
        self.cfg.SetDiskID(t_dsk, pnode_name)
7598
      result = self.rpc.call_blockdev_rename(pnode_name,
7599
                                             zip(tmp_disks, rename_to))
7600
      result.Raise("Failed to rename adoped LVs")
7601
    else:
7602
      feedback_fn("* creating instance disks...")
7603
      try:
7604
        _CreateDisks(self, iobj)
7605
      except errors.OpExecError:
7606
        self.LogWarning("Device creation failed, reverting...")
7607
        try:
7608
          _RemoveDisks(self, iobj)
7609
        finally:
7610
          self.cfg.ReleaseDRBDMinors(instance)
7611
          raise
7612

    
7613
      if self.cfg.GetClusterInfo().prealloc_wipe_disks:
7614
        feedback_fn("* wiping instance disks...")
7615
        try:
7616
          _WipeDisks(self, iobj)
7617
        except errors.OpExecError:
7618
          self.LogWarning("Device wiping failed, reverting...")
7619
          try:
7620
            _RemoveDisks(self, iobj)
7621
          finally:
7622
            self.cfg.ReleaseDRBDMinors(instance)
7623
            raise
7624

    
7625
    feedback_fn("adding instance %s to cluster config" % instance)
7626

    
7627
    self.cfg.AddInstance(iobj, self.proc.GetECId())
7628

    
7629
    # Declare that we don't want to remove the instance lock anymore, as we've
7630
    # added the instance to the config
7631
    del self.remove_locks[locking.LEVEL_INSTANCE]
7632
    # Unlock all the nodes
7633
    if self.op.mode == constants.INSTANCE_IMPORT:
7634
      nodes_keep = [self.op.src_node]
7635
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7636
                       if node != self.op.src_node]
7637
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7638
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7639
    else:
7640
      self.context.glm.release(locking.LEVEL_NODE)
7641
      del self.acquired_locks[locking.LEVEL_NODE]
7642

    
7643
    if self.op.wait_for_sync:
7644
      disk_abort = not _WaitForSync(self, iobj)
7645
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
7646
      # make sure the disks are not degraded (still sync-ing is ok)
7647
      time.sleep(15)
7648
      feedback_fn("* checking mirrors status")
7649
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7650
    else:
7651
      disk_abort = False
7652

    
7653
    if disk_abort:
7654
      _RemoveDisks(self, iobj)
7655
      self.cfg.RemoveInstance(iobj.name)
7656
      # Make sure the instance lock gets removed
7657
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7658
      raise errors.OpExecError("There are some degraded disks for"
7659
                               " this instance")
7660

    
7661
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7662
      if self.op.mode == constants.INSTANCE_CREATE:
7663
        if not self.op.no_install:
7664
          feedback_fn("* running the instance OS create scripts...")
7665
          # FIXME: pass debug option from opcode to backend
7666
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7667
                                                 self.op.debug_level)
7668
          result.Raise("Could not add os for instance %s"
7669
                       " on node %s" % (instance, pnode_name))
7670

    
7671
      elif self.op.mode == constants.INSTANCE_IMPORT:
7672
        feedback_fn("* running the instance OS import scripts...")
7673

    
7674
        transfers = []
7675

    
7676
        for idx, image in enumerate(self.src_images):
7677
          if not image:
7678
            continue
7679

    
7680
          # FIXME: pass debug option from opcode to backend
7681
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7682
                                             constants.IEIO_FILE, (image, ),
7683
                                             constants.IEIO_SCRIPT,
7684
                                             (iobj.disks[idx], idx),
7685
                                             None)
7686
          transfers.append(dt)
7687

    
7688
        import_result = \
7689
          masterd.instance.TransferInstanceData(self, feedback_fn,
7690
                                                self.op.src_node, pnode_name,
7691
                                                self.pnode.secondary_ip,
7692
                                                iobj, transfers)
7693
        if not compat.all(import_result):
7694
          self.LogWarning("Some disks for instance %s on node %s were not"
7695
                          " imported successfully" % (instance, pnode_name))
7696

    
7697
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7698
        feedback_fn("* preparing remote import...")
7699
        connect_timeout = constants.RIE_CONNECT_TIMEOUT
7700
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7701

    
7702
        disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7703
                                                     self.source_x509_ca,
7704
                                                     self._cds, timeouts)
7705
        if not compat.all(disk_results):
7706
          # TODO: Should the instance still be started, even if some disks
7707
          # failed to import (valid for local imports, too)?
7708
          self.LogWarning("Some disks for instance %s on node %s were not"
7709
                          " imported successfully" % (instance, pnode_name))
7710

    
7711
        # Run rename script on newly imported instance
7712
        assert iobj.name == instance
7713
        feedback_fn("Running rename script for %s" % instance)
7714
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7715
                                                   self.source_instance_name,
7716
                                                   self.op.debug_level)
7717
        if result.fail_msg:
7718
          self.LogWarning("Failed to run rename script for %s on node"
7719
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
7720

    
7721
      else:
7722
        # also checked in the prereq part
7723
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7724
                                     % self.op.mode)
7725

    
7726
    if self.op.start:
7727
      iobj.admin_up = True
7728
      self.cfg.Update(iobj, feedback_fn)
7729
      logging.info("Starting instance %s on node %s", instance, pnode_name)
7730
      feedback_fn("* starting instance...")
7731
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7732
      result.Raise("Could not start instance")
7733

    
7734
    return list(iobj.all_nodes)
7735

    
7736

    
7737
class LUConnectConsole(NoHooksLU):
7738
  """Connect to an instance's console.
7739

7740
  This is somewhat special in that it returns the command line that
7741
  you need to run on the master node in order to connect to the
7742
  console.
7743

7744
  """
7745
  _OP_PARAMS = [
7746
    _PInstanceName
7747
    ]
7748
  REQ_BGL = False
7749

    
7750
  def ExpandNames(self):
7751
    self._ExpandAndLockInstance()
7752

    
7753
  def CheckPrereq(self):
7754
    """Check prerequisites.
7755

7756
    This checks that the instance is in the cluster.
7757

7758
    """
7759
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7760
    assert self.instance is not None, \
7761
      "Cannot retrieve locked instance %s" % self.op.instance_name
7762
    _CheckNodeOnline(self, self.instance.primary_node)
7763

    
7764
  def Exec(self, feedback_fn):
7765
    """Connect to the console of an instance
7766

7767
    """
7768
    instance = self.instance
7769
    node = instance.primary_node
7770

    
7771
    node_insts = self.rpc.call_instance_list([node],
7772
                                             [instance.hypervisor])[node]
7773
    node_insts.Raise("Can't get node information from %s" % node)
7774

    
7775
    if instance.name not in node_insts.payload:
7776
      if instance.admin_up:
7777
        state = "ERROR_down"
7778
      else:
7779
        state = "ADMIN_down"
7780
      raise errors.OpExecError("Instance %s is not running (state %s)" %
7781
                               (instance.name, state))
7782

    
7783
    logging.debug("Connecting to console of %s on %s", instance.name, node)
7784

    
7785
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
7786
    cluster = self.cfg.GetClusterInfo()
7787
    # beparams and hvparams are passed separately, to avoid editing the
7788
    # instance and then saving the defaults in the instance itself.
7789
    hvparams = cluster.FillHV(instance)
7790
    beparams = cluster.FillBE(instance)
7791
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7792

    
7793
    # build ssh cmdline
7794
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7795

    
7796

    
7797
class LUReplaceDisks(LogicalUnit):
7798
  """Replace the disks of an instance.
7799

7800
  """
7801
  HPATH = "mirrors-replace"
7802
  HTYPE = constants.HTYPE_INSTANCE
7803
  _OP_PARAMS = [
7804
    _PInstanceName,
7805
    ("mode", ht.NoDefault, ht.TElemOf(constants.REPLACE_MODES)),
7806
    ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
7807
    ("remote_node", None, ht.TMaybeString),
7808
    ("iallocator", None, ht.TMaybeString),
7809
    ("early_release", False, ht.TBool),
7810
    ]
7811
  REQ_BGL = False
7812

    
7813
  def CheckArguments(self):
7814
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7815
                                  self.op.iallocator)
7816

    
7817
  def ExpandNames(self):
7818
    self._ExpandAndLockInstance()
7819

    
7820
    if self.op.iallocator is not None:
7821
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7822

    
7823
    elif self.op.remote_node is not None:
7824
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7825
      self.op.remote_node = remote_node
7826

    
7827
      # Warning: do not remove the locking of the new secondary here
7828
      # unless DRBD8.AddChildren is changed to work in parallel;
7829
      # currently it doesn't since parallel invocations of
7830
      # FindUnusedMinor will conflict
7831
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7832
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7833

    
7834
    else:
7835
      self.needed_locks[locking.LEVEL_NODE] = []
7836
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7837

    
7838
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7839
                                   self.op.iallocator, self.op.remote_node,
7840
                                   self.op.disks, False, self.op.early_release)
7841

    
7842
    self.tasklets = [self.replacer]
7843

    
7844
  def DeclareLocks(self, level):
7845
    # If we're not already locking all nodes in the set we have to declare the
7846
    # instance's primary/secondary nodes.
7847
    if (level == locking.LEVEL_NODE and
7848
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7849
      self._LockInstancesNodes()
7850

    
7851
  def BuildHooksEnv(self):
7852
    """Build hooks env.
7853

7854
    This runs on the master, the primary and all the secondaries.
7855

7856
    """
7857
    instance = self.replacer.instance
7858
    env = {
7859
      "MODE": self.op.mode,
7860
      "NEW_SECONDARY": self.op.remote_node,
7861
      "OLD_SECONDARY": instance.secondary_nodes[0],
7862
      }
7863
    env.update(_BuildInstanceHookEnvByObject(self, instance))
7864
    nl = [
7865
      self.cfg.GetMasterNode(),
7866
      instance.primary_node,
7867
      ]
7868
    if self.op.remote_node is not None:
7869
      nl.append(self.op.remote_node)
7870
    return env, nl, nl
7871

    
7872

    
7873
class TLReplaceDisks(Tasklet):
7874
  """Replaces disks for an instance.
7875

7876
  Note: Locking is not within the scope of this class.
7877

7878
  """
7879
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7880
               disks, delay_iallocator, early_release):
7881
    """Initializes this class.
7882

7883
    """
7884
    Tasklet.__init__(self, lu)
7885

    
7886
    # Parameters
7887
    self.instance_name = instance_name
7888
    self.mode = mode
7889
    self.iallocator_name = iallocator_name
7890
    self.remote_node = remote_node
7891
    self.disks = disks
7892
    self.delay_iallocator = delay_iallocator
7893
    self.early_release = early_release
7894

    
7895
    # Runtime data
7896
    self.instance = None
7897
    self.new_node = None
7898
    self.target_node = None
7899
    self.other_node = None
7900
    self.remote_node_info = None
7901
    self.node_secondary_ip = None
7902

    
7903
  @staticmethod
7904
  def CheckArguments(mode, remote_node, iallocator):
7905
    """Helper function for users of this class.
7906

7907
    """
7908
    # check for valid parameter combination
7909
    if mode == constants.REPLACE_DISK_CHG:
7910
      if remote_node is None and iallocator is None:
7911
        raise errors.OpPrereqError("When changing the secondary either an"
7912
                                   " iallocator script must be used or the"
7913
                                   " new node given", errors.ECODE_INVAL)
7914

    
7915
      if remote_node is not None and iallocator is not None:
7916
        raise errors.OpPrereqError("Give either the iallocator or the new"
7917
                                   " secondary, not both", errors.ECODE_INVAL)
7918

    
7919
    elif remote_node is not None or iallocator is not None:
7920
      # Not replacing the secondary
7921
      raise errors.OpPrereqError("The iallocator and new node options can"
7922
                                 " only be used when changing the"
7923
                                 " secondary node", errors.ECODE_INVAL)
7924

    
7925
  @staticmethod
7926
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7927
    """Compute a new secondary node using an IAllocator.
7928

7929
    """
7930
    ial = IAllocator(lu.cfg, lu.rpc,
7931
                     mode=constants.IALLOCATOR_MODE_RELOC,
7932
                     name=instance_name,
7933
                     relocate_from=relocate_from)
7934

    
7935
    ial.Run(iallocator_name)
7936

    
7937
    if not ial.success:
7938
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7939
                                 " %s" % (iallocator_name, ial.info),
7940
                                 errors.ECODE_NORES)
7941

    
7942
    if len(ial.result) != ial.required_nodes:
7943
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7944
                                 " of nodes (%s), required %s" %
7945
                                 (iallocator_name,
7946
                                  len(ial.result), ial.required_nodes),
7947
                                 errors.ECODE_FAULT)
7948

    
7949
    remote_node_name = ial.result[0]
7950

    
7951
    lu.LogInfo("Selected new secondary for instance '%s': %s",
7952
               instance_name, remote_node_name)
7953

    
7954
    return remote_node_name
7955

    
7956
  def _FindFaultyDisks(self, node_name):
7957
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7958
                                    node_name, True)
7959

    
7960
  def CheckPrereq(self):
7961
    """Check prerequisites.
7962

7963
    This checks that the instance is in the cluster.
7964

7965
    """
7966
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7967
    assert instance is not None, \
7968
      "Cannot retrieve locked instance %s" % self.instance_name
7969

    
7970
    if instance.disk_template != constants.DT_DRBD8:
7971
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7972
                                 " instances", errors.ECODE_INVAL)
7973

    
7974
    if len(instance.secondary_nodes) != 1:
7975
      raise errors.OpPrereqError("The instance has a strange layout,"
7976
                                 " expected one secondary but found %d" %
7977
                                 len(instance.secondary_nodes),
7978
                                 errors.ECODE_FAULT)
7979

    
7980
    if not self.delay_iallocator:
7981
      self._CheckPrereq2()
7982

    
7983
  def _CheckPrereq2(self):
7984
    """Check prerequisites, second part.
7985

7986
    This function should always be part of CheckPrereq. It was separated and is
7987
    now called from Exec because during node evacuation iallocator was only
7988
    called with an unmodified cluster model, not taking planned changes into
7989
    account.
7990

7991
    """
7992
    instance = self.instance
7993
    secondary_node = instance.secondary_nodes[0]
7994

    
7995
    if self.iallocator_name is None:
7996
      remote_node = self.remote_node
7997
    else:
7998
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7999
                                       instance.name, instance.secondary_nodes)
8000

    
8001
    if remote_node is not None:
8002
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8003
      assert self.remote_node_info is not None, \
8004
        "Cannot retrieve locked node %s" % remote_node
8005
    else:
8006
      self.remote_node_info = None
8007

    
8008
    if remote_node == self.instance.primary_node:
8009
      raise errors.OpPrereqError("The specified node is the primary node of"
8010
                                 " the instance.", errors.ECODE_INVAL)
8011

    
8012
    if remote_node == secondary_node:
8013
      raise errors.OpPrereqError("The specified node is already the"
8014
                                 " secondary node of the instance.",
8015
                                 errors.ECODE_INVAL)
8016

    
8017
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8018
                                    constants.REPLACE_DISK_CHG):
8019
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
8020
                                 errors.ECODE_INVAL)
8021

    
8022
    if self.mode == constants.REPLACE_DISK_AUTO:
8023
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
8024
      faulty_secondary = self._FindFaultyDisks(secondary_node)
8025

    
8026
      if faulty_primary and faulty_secondary:
8027
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8028
                                   " one node and can not be repaired"
8029
                                   " automatically" % self.instance_name,
8030
                                   errors.ECODE_STATE)
8031

    
8032
      if faulty_primary:
8033
        self.disks = faulty_primary
8034
        self.target_node = instance.primary_node
8035
        self.other_node = secondary_node
8036
        check_nodes = [self.target_node, self.other_node]
8037
      elif faulty_secondary:
8038
        self.disks = faulty_secondary
8039
        self.target_node = secondary_node
8040
        self.other_node = instance.primary_node
8041
        check_nodes = [self.target_node, self.other_node]
8042
      else:
8043
        self.disks = []
8044
        check_nodes = []
8045

    
8046
    else:
8047
      # Non-automatic modes
8048
      if self.mode == constants.REPLACE_DISK_PRI:
8049
        self.target_node = instance.primary_node
8050
        self.other_node = secondary_node
8051
        check_nodes = [self.target_node, self.other_node]
8052

    
8053
      elif self.mode == constants.REPLACE_DISK_SEC:
8054
        self.target_node = secondary_node
8055
        self.other_node = instance.primary_node
8056
        check_nodes = [self.target_node, self.other_node]
8057

    
8058
      elif self.mode == constants.REPLACE_DISK_CHG:
8059
        self.new_node = remote_node
8060
        self.other_node = instance.primary_node
8061
        self.target_node = secondary_node
8062
        check_nodes = [self.new_node, self.other_node]
8063

    
8064
        _CheckNodeNotDrained(self.lu, remote_node)
8065
        _CheckNodeVmCapable(self.lu, remote_node)
8066

    
8067
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
8068
        assert old_node_info is not None
8069
        if old_node_info.offline and not self.early_release:
8070
          # doesn't make sense to delay the release
8071
          self.early_release = True
8072
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8073
                          " early-release mode", secondary_node)
8074

    
8075
      else:
8076
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8077
                                     self.mode)
8078

    
8079
      # If not specified all disks should be replaced
8080
      if not self.disks:
8081
        self.disks = range(len(self.instance.disks))
8082

    
8083
    for node in check_nodes:
8084
      _CheckNodeOnline(self.lu, node)
8085

    
8086
    # Check whether disks are valid
8087
    for disk_idx in self.disks:
8088
      instance.FindDisk(disk_idx)
8089

    
8090
    # Get secondary node IP addresses
8091
    node_2nd_ip = {}
8092

    
8093
    for node_name in [self.target_node, self.other_node, self.new_node]:
8094
      if node_name is not None:
8095
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8096

    
8097
    self.node_secondary_ip = node_2nd_ip
8098

    
8099
  def Exec(self, feedback_fn):
8100
    """Execute disk replacement.
8101

8102
    This dispatches the disk replacement to the appropriate handler.
8103

8104
    """
8105
    if self.delay_iallocator:
8106
      self._CheckPrereq2()
8107

    
8108
    if not self.disks:
8109
      feedback_fn("No disks need replacement")
8110
      return
8111

    
8112
    feedback_fn("Replacing disk(s) %s for %s" %
8113
                (utils.CommaJoin(self.disks), self.instance.name))
8114

    
8115
    activate_disks = (not self.instance.admin_up)
8116

    
8117
    # Activate the instance disks if we're replacing them on a down instance
8118
    if activate_disks:
8119
      _StartInstanceDisks(self.lu, self.instance, True)
8120

    
8121
    try:
8122
      # Should we replace the secondary node?
8123
      if self.new_node is not None:
8124
        fn = self._ExecDrbd8Secondary
8125
      else:
8126
        fn = self._ExecDrbd8DiskOnly
8127

    
8128
      return fn(feedback_fn)
8129

    
8130
    finally:
8131
      # Deactivate the instance disks if we're replacing them on a
8132
      # down instance
8133
      if activate_disks:
8134
        _SafeShutdownInstanceDisks(self.lu, self.instance)
8135

    
8136
  def _CheckVolumeGroup(self, nodes):
8137
    self.lu.LogInfo("Checking volume groups")
8138

    
8139
    vgname = self.cfg.GetVGName()
8140

    
8141
    # Make sure volume group exists on all involved nodes
8142
    results = self.rpc.call_vg_list(nodes)
8143
    if not results:
8144
      raise errors.OpExecError("Can't list volume groups on the nodes")
8145

    
8146
    for node in nodes:
8147
      res = results[node]
8148
      res.Raise("Error checking node %s" % node)
8149
      if vgname not in res.payload:
8150
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
8151
                                 (vgname, node))
8152

    
8153
  def _CheckDisksExistence(self, nodes):
8154
    # Check disk existence
8155
    for idx, dev in enumerate(self.instance.disks):
8156
      if idx not in self.disks:
8157
        continue
8158

    
8159
      for node in nodes:
8160
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8161
        self.cfg.SetDiskID(dev, node)
8162

    
8163
        result = self.rpc.call_blockdev_find(node, dev)
8164

    
8165
        msg = result.fail_msg
8166
        if msg or not result.payload:
8167
          if not msg:
8168
            msg = "disk not found"
8169
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8170
                                   (idx, node, msg))
8171

    
8172
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8173
    for idx, dev in enumerate(self.instance.disks):
8174
      if idx not in self.disks:
8175
        continue
8176

    
8177
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8178
                      (idx, node_name))
8179

    
8180
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8181
                                   ldisk=ldisk):
8182
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8183
                                 " replace disks for instance %s" %
8184
                                 (node_name, self.instance.name))
8185

    
8186
  def _CreateNewStorage(self, node_name):
8187
    vgname = self.cfg.GetVGName()
8188
    iv_names = {}
8189

    
8190
    for idx, dev in enumerate(self.instance.disks):
8191
      if idx not in self.disks:
8192
        continue
8193

    
8194
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8195

    
8196
      self.cfg.SetDiskID(dev, node_name)
8197

    
8198
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8199
      names = _GenerateUniqueNames(self.lu, lv_names)
8200

    
8201
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8202
                             logical_id=(vgname, names[0]))
8203
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8204
                             logical_id=(vgname, names[1]))
8205

    
8206
      new_lvs = [lv_data, lv_meta]
8207
      old_lvs = dev.children
8208
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8209

    
8210
      # we pass force_create=True to force the LVM creation
8211
      for new_lv in new_lvs:
8212
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8213
                        _GetInstanceInfoText(self.instance), False)
8214

    
8215
    return iv_names
8216

    
8217
  def _CheckDevices(self, node_name, iv_names):
8218
    for name, (dev, _, _) in iv_names.iteritems():
8219
      self.cfg.SetDiskID(dev, node_name)
8220

    
8221
      result = self.rpc.call_blockdev_find(node_name, dev)
8222

    
8223
      msg = result.fail_msg
8224
      if msg or not result.payload:
8225
        if not msg:
8226
          msg = "disk not found"
8227
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
8228
                                 (name, msg))
8229

    
8230
      if result.payload.is_degraded:
8231
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
8232

    
8233
  def _RemoveOldStorage(self, node_name, iv_names):
8234
    for name, (_, old_lvs, _) in iv_names.iteritems():
8235
      self.lu.LogInfo("Remove logical volumes for %s" % name)
8236

    
8237
      for lv in old_lvs:
8238
        self.cfg.SetDiskID(lv, node_name)
8239

    
8240
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8241
        if msg:
8242
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
8243
                             hint="remove unused LVs manually")
8244

    
8245
  def _ReleaseNodeLock(self, node_name):
8246
    """Releases the lock for a given node."""
8247
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8248

    
8249
  def _ExecDrbd8DiskOnly(self, feedback_fn):
8250
    """Replace a disk on the primary or secondary for DRBD 8.
8251

8252
    The algorithm for replace is quite complicated:
8253

8254
      1. for each disk to be replaced:
8255

8256
        1. create new LVs on the target node with unique names
8257
        1. detach old LVs from the drbd device
8258
        1. rename old LVs to name_replaced.<time_t>
8259
        1. rename new LVs to old LVs
8260
        1. attach the new LVs (with the old names now) to the drbd device
8261

8262
      1. wait for sync across all devices
8263

8264
      1. for each modified disk:
8265

8266
        1. remove old LVs (which have the name name_replaces.<time_t>)
8267

8268
    Failures are not very well handled.
8269

8270
    """
8271
    steps_total = 6
8272

    
8273
    # Step: check device activation
8274
    self.lu.LogStep(1, steps_total, "Check device existence")
8275
    self._CheckDisksExistence([self.other_node, self.target_node])
8276
    self._CheckVolumeGroup([self.target_node, self.other_node])
8277

    
8278
    # Step: check other node consistency
8279
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8280
    self._CheckDisksConsistency(self.other_node,
8281
                                self.other_node == self.instance.primary_node,
8282
                                False)
8283

    
8284
    # Step: create new storage
8285
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8286
    iv_names = self._CreateNewStorage(self.target_node)
8287

    
8288
    # Step: for each lv, detach+rename*2+attach
8289
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8290
    for dev, old_lvs, new_lvs in iv_names.itervalues():
8291
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8292

    
8293
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8294
                                                     old_lvs)
8295
      result.Raise("Can't detach drbd from local storage on node"
8296
                   " %s for device %s" % (self.target_node, dev.iv_name))
8297
      #dev.children = []
8298
      #cfg.Update(instance)
8299

    
8300
      # ok, we created the new LVs, so now we know we have the needed
8301
      # storage; as such, we proceed on the target node to rename
8302
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8303
      # using the assumption that logical_id == physical_id (which in
8304
      # turn is the unique_id on that node)
8305

    
8306
      # FIXME(iustin): use a better name for the replaced LVs
8307
      temp_suffix = int(time.time())
8308
      ren_fn = lambda d, suff: (d.physical_id[0],
8309
                                d.physical_id[1] + "_replaced-%s" % suff)
8310

    
8311
      # Build the rename list based on what LVs exist on the node
8312
      rename_old_to_new = []
8313
      for to_ren in old_lvs:
8314
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8315
        if not result.fail_msg and result.payload:
8316
          # device exists
8317
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8318

    
8319
      self.lu.LogInfo("Renaming the old LVs on the target node")
8320
      result = self.rpc.call_blockdev_rename(self.target_node,
8321
                                             rename_old_to_new)
8322
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
8323

    
8324
      # Now we rename the new LVs to the old LVs
8325
      self.lu.LogInfo("Renaming the new LVs on the target node")
8326
      rename_new_to_old = [(new, old.physical_id)
8327
                           for old, new in zip(old_lvs, new_lvs)]
8328
      result = self.rpc.call_blockdev_rename(self.target_node,
8329
                                             rename_new_to_old)
8330
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
8331

    
8332
      for old, new in zip(old_lvs, new_lvs):
8333
        new.logical_id = old.logical_id
8334
        self.cfg.SetDiskID(new, self.target_node)
8335

    
8336
      for disk in old_lvs:
8337
        disk.logical_id = ren_fn(disk, temp_suffix)
8338
        self.cfg.SetDiskID(disk, self.target_node)
8339

    
8340
      # Now that the new lvs have the old name, we can add them to the device
8341
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8342
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8343
                                                  new_lvs)
8344
      msg = result.fail_msg
8345
      if msg:
8346
        for new_lv in new_lvs:
8347
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
8348
                                               new_lv).fail_msg
8349
          if msg2:
8350
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8351
                               hint=("cleanup manually the unused logical"
8352
                                     "volumes"))
8353
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8354

    
8355
      dev.children = new_lvs
8356

    
8357
      self.cfg.Update(self.instance, feedback_fn)
8358

    
8359
    cstep = 5
8360
    if self.early_release:
8361
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8362
      cstep += 1
8363
      self._RemoveOldStorage(self.target_node, iv_names)
8364
      # WARNING: we release both node locks here, do not do other RPCs
8365
      # than WaitForSync to the primary node
8366
      self._ReleaseNodeLock([self.target_node, self.other_node])
8367

    
8368
    # Wait for sync
8369
    # This can fail as the old devices are degraded and _WaitForSync
8370
    # does a combined result over all disks, so we don't check its return value
8371
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8372
    cstep += 1
8373
    _WaitForSync(self.lu, self.instance)
8374

    
8375
    # Check all devices manually
8376
    self._CheckDevices(self.instance.primary_node, iv_names)
8377

    
8378
    # Step: remove old storage
8379
    if not self.early_release:
8380
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8381
      cstep += 1
8382
      self._RemoveOldStorage(self.target_node, iv_names)
8383

    
8384
  def _ExecDrbd8Secondary(self, feedback_fn):
8385
    """Replace the secondary node for DRBD 8.
8386

8387
    The algorithm for replace is quite complicated:
8388
      - for all disks of the instance:
8389
        - create new LVs on the new node with same names
8390
        - shutdown the drbd device on the old secondary
8391
        - disconnect the drbd network on the primary
8392
        - create the drbd device on the new secondary
8393
        - network attach the drbd on the primary, using an artifice:
8394
          the drbd code for Attach() will connect to the network if it
8395
          finds a device which is connected to the good local disks but
8396
          not network enabled
8397
      - wait for sync across all devices
8398
      - remove all disks from the old secondary
8399

8400
    Failures are not very well handled.
8401

8402
    """
8403
    steps_total = 6
8404

    
8405
    # Step: check device activation
8406
    self.lu.LogStep(1, steps_total, "Check device existence")
8407
    self._CheckDisksExistence([self.instance.primary_node])
8408
    self._CheckVolumeGroup([self.instance.primary_node])
8409

    
8410
    # Step: check other node consistency
8411
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8412
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
8413

    
8414
    # Step: create new storage
8415
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8416
    for idx, dev in enumerate(self.instance.disks):
8417
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8418
                      (self.new_node, idx))
8419
      # we pass force_create=True to force LVM creation
8420
      for new_lv in dev.children:
8421
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8422
                        _GetInstanceInfoText(self.instance), False)
8423

    
8424
    # Step 4: dbrd minors and drbd setups changes
8425
    # after this, we must manually remove the drbd minors on both the
8426
    # error and the success paths
8427
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8428
    minors = self.cfg.AllocateDRBDMinor([self.new_node
8429
                                         for dev in self.instance.disks],
8430
                                        self.instance.name)
8431
    logging.debug("Allocated minors %r", minors)
8432

    
8433
    iv_names = {}
8434
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8435
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8436
                      (self.new_node, idx))
8437
      # create new devices on new_node; note that we create two IDs:
8438
      # one without port, so the drbd will be activated without
8439
      # networking information on the new node at this stage, and one
8440
      # with network, for the latter activation in step 4
8441
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8442
      if self.instance.primary_node == o_node1:
8443
        p_minor = o_minor1
8444
      else:
8445
        assert self.instance.primary_node == o_node2, "Three-node instance?"
8446
        p_minor = o_minor2
8447

    
8448
      new_alone_id = (self.instance.primary_node, self.new_node, None,
8449
                      p_minor, new_minor, o_secret)
8450
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
8451
                    p_minor, new_minor, o_secret)
8452

    
8453
      iv_names[idx] = (dev, dev.children, new_net_id)
8454
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8455
                    new_net_id)
8456
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8457
                              logical_id=new_alone_id,
8458
                              children=dev.children,
8459
                              size=dev.size)
8460
      try:
8461
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8462
                              _GetInstanceInfoText(self.instance), False)
8463
      except errors.GenericError:
8464
        self.cfg.ReleaseDRBDMinors(self.instance.name)
8465
        raise
8466

    
8467
    # We have new devices, shutdown the drbd on the old secondary
8468
    for idx, dev in enumerate(self.instance.disks):
8469
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8470
      self.cfg.SetDiskID(dev, self.target_node)
8471
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8472
      if msg:
8473
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8474
                           "node: %s" % (idx, msg),
8475
                           hint=("Please cleanup this device manually as"
8476
                                 " soon as possible"))
8477

    
8478
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8479
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8480
                                               self.node_secondary_ip,
8481
                                               self.instance.disks)\
8482
                                              [self.instance.primary_node]
8483

    
8484
    msg = result.fail_msg
8485
    if msg:
8486
      # detaches didn't succeed (unlikely)
8487
      self.cfg.ReleaseDRBDMinors(self.instance.name)
8488
      raise errors.OpExecError("Can't detach the disks from the network on"
8489
                               " old node: %s" % (msg,))
8490

    
8491
    # if we managed to detach at least one, we update all the disks of
8492
    # the instance to point to the new secondary
8493
    self.lu.LogInfo("Updating instance configuration")
8494
    for dev, _, new_logical_id in iv_names.itervalues():
8495
      dev.logical_id = new_logical_id
8496
      self.cfg.SetDiskID(dev, self.instance.primary_node)
8497

    
8498
    self.cfg.Update(self.instance, feedback_fn)
8499

    
8500
    # and now perform the drbd attach
8501
    self.lu.LogInfo("Attaching primary drbds to new secondary"
8502
                    " (standalone => connected)")
8503
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8504
                                            self.new_node],
8505
                                           self.node_secondary_ip,
8506
                                           self.instance.disks,
8507
                                           self.instance.name,
8508
                                           False)
8509
    for to_node, to_result in result.items():
8510
      msg = to_result.fail_msg
8511
      if msg:
8512
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8513
                           to_node, msg,
8514
                           hint=("please do a gnt-instance info to see the"
8515
                                 " status of disks"))
8516
    cstep = 5
8517
    if self.early_release:
8518
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8519
      cstep += 1
8520
      self._RemoveOldStorage(self.target_node, iv_names)
8521
      # WARNING: we release all node locks here, do not do other RPCs
8522
      # than WaitForSync to the primary node
8523
      self._ReleaseNodeLock([self.instance.primary_node,
8524
                             self.target_node,
8525
                             self.new_node])
8526

    
8527
    # Wait for sync
8528
    # This can fail as the old devices are degraded and _WaitForSync
8529
    # does a combined result over all disks, so we don't check its return value
8530
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8531
    cstep += 1
8532
    _WaitForSync(self.lu, self.instance)
8533

    
8534
    # Check all devices manually
8535
    self._CheckDevices(self.instance.primary_node, iv_names)
8536

    
8537
    # Step: remove old storage
8538
    if not self.early_release:
8539
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8540
      self._RemoveOldStorage(self.target_node, iv_names)
8541

    
8542

    
8543
class LURepairNodeStorage(NoHooksLU):
8544
  """Repairs the volume group on a node.
8545

8546
  """
8547
  _OP_PARAMS = [
8548
    _PNodeName,
8549
    ("storage_type", ht.NoDefault, _CheckStorageType),
8550
    ("name", ht.NoDefault, ht.TNonEmptyString),
8551
    ("ignore_consistency", False, ht.TBool),
8552
    ]
8553
  REQ_BGL = False
8554

    
8555
  def CheckArguments(self):
8556
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8557

    
8558
    storage_type = self.op.storage_type
8559

    
8560
    if (constants.SO_FIX_CONSISTENCY not in
8561
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8562
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
8563
                                 " repaired" % storage_type,
8564
                                 errors.ECODE_INVAL)
8565

    
8566
  def ExpandNames(self):
8567
    self.needed_locks = {
8568
      locking.LEVEL_NODE: [self.op.node_name],
8569
      }
8570

    
8571
  def _CheckFaultyDisks(self, instance, node_name):
8572
    """Ensure faulty disks abort the opcode or at least warn."""
8573
    try:
8574
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8575
                                  node_name, True):
8576
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8577
                                   " node '%s'" % (instance.name, node_name),
8578
                                   errors.ECODE_STATE)
8579
    except errors.OpPrereqError, err:
8580
      if self.op.ignore_consistency:
8581
        self.proc.LogWarning(str(err.args[0]))
8582
      else:
8583
        raise
8584

    
8585
  def CheckPrereq(self):
8586
    """Check prerequisites.
8587

8588
    """
8589
    # Check whether any instance on this node has faulty disks
8590
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8591
      if not inst.admin_up:
8592
        continue
8593
      check_nodes = set(inst.all_nodes)
8594
      check_nodes.discard(self.op.node_name)
8595
      for inst_node_name in check_nodes:
8596
        self._CheckFaultyDisks(inst, inst_node_name)
8597

    
8598
  def Exec(self, feedback_fn):
8599
    feedback_fn("Repairing storage unit '%s' on %s ..." %
8600
                (self.op.name, self.op.node_name))
8601

    
8602
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8603
    result = self.rpc.call_storage_execute(self.op.node_name,
8604
                                           self.op.storage_type, st_args,
8605
                                           self.op.name,
8606
                                           constants.SO_FIX_CONSISTENCY)
8607
    result.Raise("Failed to repair storage unit '%s' on %s" %
8608
                 (self.op.name, self.op.node_name))
8609

    
8610

    
8611
class LUNodeEvacuationStrategy(NoHooksLU):
8612
  """Computes the node evacuation strategy.
8613

8614
  """
8615
  _OP_PARAMS = [
8616
    ("nodes", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
8617
    ("remote_node", None, ht.TMaybeString),
8618
    ("iallocator", None, ht.TMaybeString),
8619
    ]
8620
  REQ_BGL = False
8621

    
8622
  def CheckArguments(self):
8623
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8624

    
8625
  def ExpandNames(self):
8626
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8627
    self.needed_locks = locks = {}
8628
    if self.op.remote_node is None:
8629
      locks[locking.LEVEL_NODE] = locking.ALL_SET
8630
    else:
8631
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8632
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8633

    
8634
  def Exec(self, feedback_fn):
8635
    if self.op.remote_node is not None:
8636
      instances = []
8637
      for node in self.op.nodes:
8638
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8639
      result = []
8640
      for i in instances:
8641
        if i.primary_node == self.op.remote_node:
8642
          raise errors.OpPrereqError("Node %s is the primary node of"
8643
                                     " instance %s, cannot use it as"
8644
                                     " secondary" %
8645
                                     (self.op.remote_node, i.name),
8646
                                     errors.ECODE_INVAL)
8647
        result.append([i.name, self.op.remote_node])
8648
    else:
8649
      ial = IAllocator(self.cfg, self.rpc,
8650
                       mode=constants.IALLOCATOR_MODE_MEVAC,
8651
                       evac_nodes=self.op.nodes)
8652
      ial.Run(self.op.iallocator, validate=True)
8653
      if not ial.success:
8654
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8655
                                 errors.ECODE_NORES)
8656
      result = ial.result
8657
    return result
8658

    
8659

    
8660
class LUGrowDisk(LogicalUnit):
8661
  """Grow a disk of an instance.
8662

8663
  """
8664
  HPATH = "disk-grow"
8665
  HTYPE = constants.HTYPE_INSTANCE
8666
  _OP_PARAMS = [
8667
    _PInstanceName,
8668
    ("disk", ht.NoDefault, ht.TInt),
8669
    ("amount", ht.NoDefault, ht.TInt),
8670
    ("wait_for_sync", True, ht.TBool),
8671
    ]
8672
  REQ_BGL = False
8673

    
8674
  def ExpandNames(self):
8675
    self._ExpandAndLockInstance()
8676
    self.needed_locks[locking.LEVEL_NODE] = []
8677
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8678

    
8679
  def DeclareLocks(self, level):
8680
    if level == locking.LEVEL_NODE:
8681
      self._LockInstancesNodes()
8682

    
8683
  def BuildHooksEnv(self):
8684
    """Build hooks env.
8685

8686
    This runs on the master, the primary and all the secondaries.
8687

8688
    """
8689
    env = {
8690
      "DISK": self.op.disk,
8691
      "AMOUNT": self.op.amount,
8692
      }
8693
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8694
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8695
    return env, nl, nl
8696

    
8697
  def CheckPrereq(self):
8698
    """Check prerequisites.
8699

8700
    This checks that the instance is in the cluster.
8701

8702
    """
8703
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8704
    assert instance is not None, \
8705
      "Cannot retrieve locked instance %s" % self.op.instance_name
8706
    nodenames = list(instance.all_nodes)
8707
    for node in nodenames:
8708
      _CheckNodeOnline(self, node)
8709

    
8710
    self.instance = instance
8711

    
8712
    if instance.disk_template not in constants.DTS_GROWABLE:
8713
      raise errors.OpPrereqError("Instance's disk layout does not support"
8714
                                 " growing.", errors.ECODE_INVAL)
8715

    
8716
    self.disk = instance.FindDisk(self.op.disk)
8717

    
8718
    if instance.disk_template != constants.DT_FILE:
8719
      # TODO: check the free disk space for file, when that feature will be
8720
      # supported
8721
      _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8722

    
8723
  def Exec(self, feedback_fn):
8724
    """Execute disk grow.
8725

8726
    """
8727
    instance = self.instance
8728
    disk = self.disk
8729

    
8730
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8731
    if not disks_ok:
8732
      raise errors.OpExecError("Cannot activate block device to grow")
8733

    
8734
    for node in instance.all_nodes:
8735
      self.cfg.SetDiskID(disk, node)
8736
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8737
      result.Raise("Grow request failed to node %s" % node)
8738

    
8739
      # TODO: Rewrite code to work properly
8740
      # DRBD goes into sync mode for a short amount of time after executing the
8741
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8742
      # calling "resize" in sync mode fails. Sleeping for a short amount of
8743
      # time is a work-around.
8744
      time.sleep(5)
8745

    
8746
    disk.RecordGrow(self.op.amount)
8747
    self.cfg.Update(instance, feedback_fn)
8748
    if self.op.wait_for_sync:
8749
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
8750
      if disk_abort:
8751
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8752
                             " status.\nPlease check the instance.")
8753
      if not instance.admin_up:
8754
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8755
    elif not instance.admin_up:
8756
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
8757
                           " not supposed to be running because no wait for"
8758
                           " sync mode was requested.")
8759

    
8760

    
8761
class LUQueryInstanceData(NoHooksLU):
8762
  """Query runtime instance data.
8763

8764
  """
8765
  _OP_PARAMS = [
8766
    ("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
8767
    ("static", False, ht.TBool),
8768
    ]
8769
  REQ_BGL = False
8770

    
8771
  def ExpandNames(self):
8772
    self.needed_locks = {}
8773
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8774

    
8775
    if self.op.instances:
8776
      self.wanted_names = []
8777
      for name in self.op.instances:
8778
        full_name = _ExpandInstanceName(self.cfg, name)
8779
        self.wanted_names.append(full_name)
8780
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8781
    else:
8782
      self.wanted_names = None
8783
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8784

    
8785
    self.needed_locks[locking.LEVEL_NODE] = []
8786
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8787

    
8788
  def DeclareLocks(self, level):
8789
    if level == locking.LEVEL_NODE:
8790
      self._LockInstancesNodes()
8791

    
8792
  def CheckPrereq(self):
8793
    """Check prerequisites.
8794

8795
    This only checks the optional instance list against the existing names.
8796

8797
    """
8798
    if self.wanted_names is None:
8799
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8800

    
8801
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8802
                             in self.wanted_names]
8803

    
8804
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
8805
    """Returns the status of a block device
8806

8807
    """
8808
    if self.op.static or not node:
8809
      return None
8810

    
8811
    self.cfg.SetDiskID(dev, node)
8812

    
8813
    result = self.rpc.call_blockdev_find(node, dev)
8814
    if result.offline:
8815
      return None
8816

    
8817
    result.Raise("Can't compute disk status for %s" % instance_name)
8818

    
8819
    status = result.payload
8820
    if status is None:
8821
      return None
8822

    
8823
    return (status.dev_path, status.major, status.minor,
8824
            status.sync_percent, status.estimated_time,
8825
            status.is_degraded, status.ldisk_status)
8826

    
8827
  def _ComputeDiskStatus(self, instance, snode, dev):
8828
    """Compute block device status.
8829

8830
    """
8831
    if dev.dev_type in constants.LDS_DRBD:
8832
      # we change the snode then (otherwise we use the one passed in)
8833
      if dev.logical_id[0] == instance.primary_node:
8834
        snode = dev.logical_id[1]
8835
      else:
8836
        snode = dev.logical_id[0]
8837

    
8838
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8839
                                              instance.name, dev)
8840
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8841

    
8842
    if dev.children:
8843
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
8844
                      for child in dev.children]
8845
    else:
8846
      dev_children = []
8847

    
8848
    data = {
8849
      "iv_name": dev.iv_name,
8850
      "dev_type": dev.dev_type,
8851
      "logical_id": dev.logical_id,
8852
      "physical_id": dev.physical_id,
8853
      "pstatus": dev_pstatus,
8854
      "sstatus": dev_sstatus,
8855
      "children": dev_children,
8856
      "mode": dev.mode,
8857
      "size": dev.size,
8858
      }
8859

    
8860
    return data
8861

    
8862
  def Exec(self, feedback_fn):
8863
    """Gather and return data"""
8864
    result = {}
8865

    
8866
    cluster = self.cfg.GetClusterInfo()
8867

    
8868
    for instance in self.wanted_instances:
8869
      if not self.op.static:
8870
        remote_info = self.rpc.call_instance_info(instance.primary_node,
8871
                                                  instance.name,
8872
                                                  instance.hypervisor)
8873
        remote_info.Raise("Error checking node %s" % instance.primary_node)
8874
        remote_info = remote_info.payload
8875
        if remote_info and "state" in remote_info:
8876
          remote_state = "up"
8877
        else:
8878
          remote_state = "down"
8879
      else:
8880
        remote_state = None
8881
      if instance.admin_up:
8882
        config_state = "up"
8883
      else:
8884
        config_state = "down"
8885

    
8886
      disks = [self._ComputeDiskStatus(instance, None, device)
8887
               for device in instance.disks]
8888

    
8889
      idict = {
8890
        "name": instance.name,
8891
        "config_state": config_state,
8892
        "run_state": remote_state,
8893
        "pnode": instance.primary_node,
8894
        "snodes": instance.secondary_nodes,
8895
        "os": instance.os,
8896
        # this happens to be the same format used for hooks
8897
        "nics": _NICListToTuple(self, instance.nics),
8898
        "disk_template": instance.disk_template,
8899
        "disks": disks,
8900
        "hypervisor": instance.hypervisor,
8901
        "network_port": instance.network_port,
8902
        "hv_instance": instance.hvparams,
8903
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
8904
        "be_instance": instance.beparams,
8905
        "be_actual": cluster.FillBE(instance),
8906
        "os_instance": instance.osparams,
8907
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8908
        "serial_no": instance.serial_no,
8909
        "mtime": instance.mtime,
8910
        "ctime": instance.ctime,
8911
        "uuid": instance.uuid,
8912
        }
8913

    
8914
      result[instance.name] = idict
8915

    
8916
    return result
8917

    
8918

    
8919
class LUSetInstanceParams(LogicalUnit):
8920
  """Modifies an instances's parameters.
8921

8922
  """
8923
  HPATH = "instance-modify"
8924
  HTYPE = constants.HTYPE_INSTANCE
8925
  _OP_PARAMS = [
8926
    _PInstanceName,
8927
    ("nics", ht.EmptyList, ht.TList),
8928
    ("disks", ht.EmptyList, ht.TList),
8929
    ("beparams", ht.EmptyDict, ht.TDict),
8930
    ("hvparams", ht.EmptyDict, ht.TDict),
8931
    ("disk_template", None, ht.TMaybeString),
8932
    ("remote_node", None, ht.TMaybeString),
8933
    ("os_name", None, ht.TMaybeString),
8934
    ("force_variant", False, ht.TBool),
8935
    ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
8936
    _PForce,
8937
    ]
8938
  REQ_BGL = False
8939

    
8940
  def CheckArguments(self):
8941
    if not (self.op.nics or self.op.disks or self.op.disk_template or
8942
            self.op.hvparams or self.op.beparams or self.op.os_name):
8943
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8944

    
8945
    if self.op.hvparams:
8946
      _CheckGlobalHvParams(self.op.hvparams)
8947

    
8948
    # Disk validation
8949
    disk_addremove = 0
8950
    for disk_op, disk_dict in self.op.disks:
8951
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8952
      if disk_op == constants.DDM_REMOVE:
8953
        disk_addremove += 1
8954
        continue
8955
      elif disk_op == constants.DDM_ADD:
8956
        disk_addremove += 1
8957
      else:
8958
        if not isinstance(disk_op, int):
8959
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8960
        if not isinstance(disk_dict, dict):
8961
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8962
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8963

    
8964
      if disk_op == constants.DDM_ADD:
8965
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8966
        if mode not in constants.DISK_ACCESS_SET:
8967
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8968
                                     errors.ECODE_INVAL)
8969
        size = disk_dict.get('size', None)
8970
        if size is None:
8971
          raise errors.OpPrereqError("Required disk parameter size missing",
8972
                                     errors.ECODE_INVAL)
8973
        try:
8974
          size = int(size)
8975
        except (TypeError, ValueError), err:
8976
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8977
                                     str(err), errors.ECODE_INVAL)
8978
        disk_dict['size'] = size
8979
      else:
8980
        # modification of disk
8981
        if 'size' in disk_dict:
8982
          raise errors.OpPrereqError("Disk size change not possible, use"
8983
                                     " grow-disk", errors.ECODE_INVAL)
8984

    
8985
    if disk_addremove > 1:
8986
      raise errors.OpPrereqError("Only one disk add or remove operation"
8987
                                 " supported at a time", errors.ECODE_INVAL)
8988

    
8989
    if self.op.disks and self.op.disk_template is not None:
8990
      raise errors.OpPrereqError("Disk template conversion and other disk"
8991
                                 " changes not supported at the same time",
8992
                                 errors.ECODE_INVAL)
8993

    
8994
    if self.op.disk_template:
8995
      _CheckDiskTemplate(self.op.disk_template)
8996
      if (self.op.disk_template in constants.DTS_NET_MIRROR and
8997
          self.op.remote_node is None):
8998
        raise errors.OpPrereqError("Changing the disk template to a mirrored"
8999
                                   " one requires specifying a secondary node",
9000
                                   errors.ECODE_INVAL)
9001

    
9002
    # NIC validation
9003
    nic_addremove = 0
9004
    for nic_op, nic_dict in self.op.nics:
9005
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9006
      if nic_op == constants.DDM_REMOVE:
9007
        nic_addremove += 1
9008
        continue
9009
      elif nic_op == constants.DDM_ADD:
9010
        nic_addremove += 1
9011
      else:
9012
        if not isinstance(nic_op, int):
9013
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9014
        if not isinstance(nic_dict, dict):
9015
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9016
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9017

    
9018
      # nic_dict should be a dict
9019
      nic_ip = nic_dict.get('ip', None)
9020
      if nic_ip is not None:
9021
        if nic_ip.lower() == constants.VALUE_NONE:
9022
          nic_dict['ip'] = None
9023
        else:
9024
          if not netutils.IPAddress.IsValid(nic_ip):
9025
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9026
                                       errors.ECODE_INVAL)
9027

    
9028
      nic_bridge = nic_dict.get('bridge', None)
9029
      nic_link = nic_dict.get('link', None)
9030
      if nic_bridge and nic_link:
9031
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9032
                                   " at the same time", errors.ECODE_INVAL)
9033
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9034
        nic_dict['bridge'] = None
9035
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9036
        nic_dict['link'] = None
9037

    
9038
      if nic_op == constants.DDM_ADD:
9039
        nic_mac = nic_dict.get('mac', None)
9040
        if nic_mac is None:
9041
          nic_dict['mac'] = constants.VALUE_AUTO
9042

    
9043
      if 'mac' in nic_dict:
9044
        nic_mac = nic_dict['mac']
9045
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9046
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9047

    
9048
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9049
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9050
                                     " modifying an existing nic",
9051
                                     errors.ECODE_INVAL)
9052

    
9053
    if nic_addremove > 1:
9054
      raise errors.OpPrereqError("Only one NIC add or remove operation"
9055
                                 " supported at a time", errors.ECODE_INVAL)
9056

    
9057
  def ExpandNames(self):
9058
    self._ExpandAndLockInstance()
9059
    self.needed_locks[locking.LEVEL_NODE] = []
9060
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9061

    
9062
  def DeclareLocks(self, level):
9063
    if level == locking.LEVEL_NODE:
9064
      self._LockInstancesNodes()
9065
      if self.op.disk_template and self.op.remote_node:
9066
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9067
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9068

    
9069
  def BuildHooksEnv(self):
9070
    """Build hooks env.
9071

9072
    This runs on the master, primary and secondaries.
9073

9074
    """
9075
    args = dict()
9076
    if constants.BE_MEMORY in self.be_new:
9077
      args['memory'] = self.be_new[constants.BE_MEMORY]
9078
    if constants.BE_VCPUS in self.be_new:
9079
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
9080
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9081
    # information at all.
9082
    if self.op.nics:
9083
      args['nics'] = []
9084
      nic_override = dict(self.op.nics)
9085
      for idx, nic in enumerate(self.instance.nics):
9086
        if idx in nic_override:
9087
          this_nic_override = nic_override[idx]
9088
        else:
9089
          this_nic_override = {}
9090
        if 'ip' in this_nic_override:
9091
          ip = this_nic_override['ip']
9092
        else:
9093
          ip = nic.ip
9094
        if 'mac' in this_nic_override:
9095
          mac = this_nic_override['mac']
9096
        else:
9097
          mac = nic.mac
9098
        if idx in self.nic_pnew:
9099
          nicparams = self.nic_pnew[idx]
9100
        else:
9101
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9102
        mode = nicparams[constants.NIC_MODE]
9103
        link = nicparams[constants.NIC_LINK]
9104
        args['nics'].append((ip, mac, mode, link))
9105
      if constants.DDM_ADD in nic_override:
9106
        ip = nic_override[constants.DDM_ADD].get('ip', None)
9107
        mac = nic_override[constants.DDM_ADD]['mac']
9108
        nicparams = self.nic_pnew[constants.DDM_ADD]
9109
        mode = nicparams[constants.NIC_MODE]
9110
        link = nicparams[constants.NIC_LINK]
9111
        args['nics'].append((ip, mac, mode, link))
9112
      elif constants.DDM_REMOVE in nic_override:
9113
        del args['nics'][-1]
9114

    
9115
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9116
    if self.op.disk_template:
9117
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9118
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9119
    return env, nl, nl
9120

    
9121
  def CheckPrereq(self):
9122
    """Check prerequisites.
9123

9124
    This only checks the instance list against the existing names.
9125

9126
    """
9127
    # checking the new params on the primary/secondary nodes
9128

    
9129
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9130
    cluster = self.cluster = self.cfg.GetClusterInfo()
9131
    assert self.instance is not None, \
9132
      "Cannot retrieve locked instance %s" % self.op.instance_name
9133
    pnode = instance.primary_node
9134
    nodelist = list(instance.all_nodes)
9135

    
9136
    # OS change
9137
    if self.op.os_name and not self.op.force:
9138
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9139
                      self.op.force_variant)
9140
      instance_os = self.op.os_name
9141
    else:
9142
      instance_os = instance.os
9143

    
9144
    if self.op.disk_template:
9145
      if instance.disk_template == self.op.disk_template:
9146
        raise errors.OpPrereqError("Instance already has disk template %s" %
9147
                                   instance.disk_template, errors.ECODE_INVAL)
9148

    
9149
      if (instance.disk_template,
9150
          self.op.disk_template) not in self._DISK_CONVERSIONS:
9151
        raise errors.OpPrereqError("Unsupported disk template conversion from"
9152
                                   " %s to %s" % (instance.disk_template,
9153
                                                  self.op.disk_template),
9154
                                   errors.ECODE_INVAL)
9155
      _CheckInstanceDown(self, instance, "cannot change disk template")
9156
      if self.op.disk_template in constants.DTS_NET_MIRROR:
9157
        if self.op.remote_node == pnode:
9158
          raise errors.OpPrereqError("Given new secondary node %s is the same"
9159
                                     " as the primary node of the instance" %
9160
                                     self.op.remote_node, errors.ECODE_STATE)
9161
        _CheckNodeOnline(self, self.op.remote_node)
9162
        _CheckNodeNotDrained(self, self.op.remote_node)
9163
        disks = [{"size": d.size} for d in instance.disks]
9164
        required = _ComputeDiskSize(self.op.disk_template, disks)
9165
        _CheckNodesFreeDisk(self, [self.op.remote_node], required)
9166

    
9167
    # hvparams processing
9168
    if self.op.hvparams:
9169
      hv_type = instance.hypervisor
9170
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9171
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9172
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9173

    
9174
      # local check
9175
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9176
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9177
      self.hv_new = hv_new # the new actual values
9178
      self.hv_inst = i_hvdict # the new dict (without defaults)
9179
    else:
9180
      self.hv_new = self.hv_inst = {}
9181

    
9182
    # beparams processing
9183
    if self.op.beparams:
9184
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9185
                                   use_none=True)
9186
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9187
      be_new = cluster.SimpleFillBE(i_bedict)
9188
      self.be_new = be_new # the new actual values
9189
      self.be_inst = i_bedict # the new dict (without defaults)
9190
    else:
9191
      self.be_new = self.be_inst = {}
9192

    
9193
    # osparams processing
9194
    if self.op.osparams:
9195
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9196
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9197
      self.os_inst = i_osdict # the new dict (without defaults)
9198
    else:
9199
      self.os_inst = {}
9200

    
9201
    self.warn = []
9202

    
9203
    if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9204
      mem_check_list = [pnode]
9205
      if be_new[constants.BE_AUTO_BALANCE]:
9206
        # either we changed auto_balance to yes or it was from before
9207
        mem_check_list.extend(instance.secondary_nodes)
9208
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
9209
                                                  instance.hypervisor)
9210
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
9211
                                         instance.hypervisor)
9212
      pninfo = nodeinfo[pnode]
9213
      msg = pninfo.fail_msg
9214
      if msg:
9215
        # Assume the primary node is unreachable and go ahead
9216
        self.warn.append("Can't get info from primary node %s: %s" %
9217
                         (pnode,  msg))
9218
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
9219
        self.warn.append("Node data from primary node %s doesn't contain"
9220
                         " free memory information" % pnode)
9221
      elif instance_info.fail_msg:
9222
        self.warn.append("Can't get instance runtime information: %s" %
9223
                        instance_info.fail_msg)
9224
      else:
9225
        if instance_info.payload:
9226
          current_mem = int(instance_info.payload['memory'])
9227
        else:
9228
          # Assume instance not running
9229
          # (there is a slight race condition here, but it's not very probable,
9230
          # and we have no other way to check)
9231
          current_mem = 0
9232
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9233
                    pninfo.payload['memory_free'])
9234
        if miss_mem > 0:
9235
          raise errors.OpPrereqError("This change will prevent the instance"
9236
                                     " from starting, due to %d MB of memory"
9237
                                     " missing on its primary node" % miss_mem,
9238
                                     errors.ECODE_NORES)
9239

    
9240
      if be_new[constants.BE_AUTO_BALANCE]:
9241
        for node, nres in nodeinfo.items():
9242
          if node not in instance.secondary_nodes:
9243
            continue
9244
          msg = nres.fail_msg
9245
          if msg:
9246
            self.warn.append("Can't get info from secondary node %s: %s" %
9247
                             (node, msg))
9248
          elif not isinstance(nres.payload.get('memory_free', None), int):
9249
            self.warn.append("Secondary node %s didn't return free"
9250
                             " memory information" % node)
9251
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9252
            self.warn.append("Not enough memory to failover instance to"
9253
                             " secondary node %s" % node)
9254

    
9255
    # NIC processing
9256
    self.nic_pnew = {}
9257
    self.nic_pinst = {}
9258
    for nic_op, nic_dict in self.op.nics:
9259
      if nic_op == constants.DDM_REMOVE:
9260
        if not instance.nics:
9261
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9262
                                     errors.ECODE_INVAL)
9263
        continue
9264
      if nic_op != constants.DDM_ADD:
9265
        # an existing nic
9266
        if not instance.nics:
9267
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9268
                                     " no NICs" % nic_op,
9269
                                     errors.ECODE_INVAL)
9270
        if nic_op < 0 or nic_op >= len(instance.nics):
9271
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9272
                                     " are 0 to %d" %
9273
                                     (nic_op, len(instance.nics) - 1),
9274
                                     errors.ECODE_INVAL)
9275
        old_nic_params = instance.nics[nic_op].nicparams
9276
        old_nic_ip = instance.nics[nic_op].ip
9277
      else:
9278
        old_nic_params = {}
9279
        old_nic_ip = None
9280

    
9281
      update_params_dict = dict([(key, nic_dict[key])
9282
                                 for key in constants.NICS_PARAMETERS
9283
                                 if key in nic_dict])
9284

    
9285
      if 'bridge' in nic_dict:
9286
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9287

    
9288
      new_nic_params = _GetUpdatedParams(old_nic_params,
9289
                                         update_params_dict)
9290
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9291
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9292
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9293
      self.nic_pinst[nic_op] = new_nic_params
9294
      self.nic_pnew[nic_op] = new_filled_nic_params
9295
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9296

    
9297
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
9298
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9299
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9300
        if msg:
9301
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9302
          if self.op.force:
9303
            self.warn.append(msg)
9304
          else:
9305
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9306
      if new_nic_mode == constants.NIC_MODE_ROUTED:
9307
        if 'ip' in nic_dict:
9308
          nic_ip = nic_dict['ip']
9309
        else:
9310
          nic_ip = old_nic_ip
9311
        if nic_ip is None:
9312
          raise errors.OpPrereqError('Cannot set the nic ip to None'
9313
                                     ' on a routed nic', errors.ECODE_INVAL)
9314
      if 'mac' in nic_dict:
9315
        nic_mac = nic_dict['mac']
9316
        if nic_mac is None:
9317
          raise errors.OpPrereqError('Cannot set the nic mac to None',
9318
                                     errors.ECODE_INVAL)
9319
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9320
          # otherwise generate the mac
9321
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9322
        else:
9323
          # or validate/reserve the current one
9324
          try:
9325
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9326
          except errors.ReservationError:
9327
            raise errors.OpPrereqError("MAC address %s already in use"
9328
                                       " in cluster" % nic_mac,
9329
                                       errors.ECODE_NOTUNIQUE)
9330

    
9331
    # DISK processing
9332
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9333
      raise errors.OpPrereqError("Disk operations not supported for"
9334
                                 " diskless instances",
9335
                                 errors.ECODE_INVAL)
9336
    for disk_op, _ in self.op.disks:
9337
      if disk_op == constants.DDM_REMOVE:
9338
        if len(instance.disks) == 1:
9339
          raise errors.OpPrereqError("Cannot remove the last disk of"
9340
                                     " an instance", errors.ECODE_INVAL)
9341
        _CheckInstanceDown(self, instance, "cannot remove disks")
9342

    
9343
      if (disk_op == constants.DDM_ADD and
9344
          len(instance.nics) >= constants.MAX_DISKS):
9345
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9346
                                   " add more" % constants.MAX_DISKS,
9347
                                   errors.ECODE_STATE)
9348
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9349
        # an existing disk
9350
        if disk_op < 0 or disk_op >= len(instance.disks):
9351
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
9352
                                     " are 0 to %d" %
9353
                                     (disk_op, len(instance.disks)),
9354
                                     errors.ECODE_INVAL)
9355

    
9356
    return
9357

    
9358
  def _ConvertPlainToDrbd(self, feedback_fn):
9359
    """Converts an instance from plain to drbd.
9360

9361
    """
9362
    feedback_fn("Converting template to drbd")
9363
    instance = self.instance
9364
    pnode = instance.primary_node
9365
    snode = self.op.remote_node
9366

    
9367
    # create a fake disk info for _GenerateDiskTemplate
9368
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9369
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9370
                                      instance.name, pnode, [snode],
9371
                                      disk_info, None, None, 0)
9372
    info = _GetInstanceInfoText(instance)
9373
    feedback_fn("Creating aditional volumes...")
9374
    # first, create the missing data and meta devices
9375
    for disk in new_disks:
9376
      # unfortunately this is... not too nice
9377
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9378
                            info, True)
9379
      for child in disk.children:
9380
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
9381
    # at this stage, all new LVs have been created, we can rename the
9382
    # old ones
9383
    feedback_fn("Renaming original volumes...")
9384
    rename_list = [(o, n.children[0].logical_id)
9385
                   for (o, n) in zip(instance.disks, new_disks)]
9386
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
9387
    result.Raise("Failed to rename original LVs")
9388

    
9389
    feedback_fn("Initializing DRBD devices...")
9390
    # all child devices are in place, we can now create the DRBD devices
9391
    for disk in new_disks:
9392
      for node in [pnode, snode]:
9393
        f_create = node == pnode
9394
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9395

    
9396
    # at this point, the instance has been modified
9397
    instance.disk_template = constants.DT_DRBD8
9398
    instance.disks = new_disks
9399
    self.cfg.Update(instance, feedback_fn)
9400

    
9401
    # disks are created, waiting for sync
9402
    disk_abort = not _WaitForSync(self, instance)
9403
    if disk_abort:
9404
      raise errors.OpExecError("There are some degraded disks for"
9405
                               " this instance, please cleanup manually")
9406

    
9407
  def _ConvertDrbdToPlain(self, feedback_fn):
9408
    """Converts an instance from drbd to plain.
9409

9410
    """
9411
    instance = self.instance
9412
    assert len(instance.secondary_nodes) == 1
9413
    pnode = instance.primary_node
9414
    snode = instance.secondary_nodes[0]
9415
    feedback_fn("Converting template to plain")
9416

    
9417
    old_disks = instance.disks
9418
    new_disks = [d.children[0] for d in old_disks]
9419

    
9420
    # copy over size and mode
9421
    for parent, child in zip(old_disks, new_disks):
9422
      child.size = parent.size
9423
      child.mode = parent.mode
9424

    
9425
    # update instance structure
9426
    instance.disks = new_disks
9427
    instance.disk_template = constants.DT_PLAIN
9428
    self.cfg.Update(instance, feedback_fn)
9429

    
9430
    feedback_fn("Removing volumes on the secondary node...")
9431
    for disk in old_disks:
9432
      self.cfg.SetDiskID(disk, snode)
9433
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9434
      if msg:
9435
        self.LogWarning("Could not remove block device %s on node %s,"
9436
                        " continuing anyway: %s", disk.iv_name, snode, msg)
9437

    
9438
    feedback_fn("Removing unneeded volumes on the primary node...")
9439
    for idx, disk in enumerate(old_disks):
9440
      meta = disk.children[1]
9441
      self.cfg.SetDiskID(meta, pnode)
9442
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9443
      if msg:
9444
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
9445
                        " continuing anyway: %s", idx, pnode, msg)
9446

    
9447

    
9448
  def Exec(self, feedback_fn):
9449
    """Modifies an instance.
9450

9451
    All parameters take effect only at the next restart of the instance.
9452

9453
    """
9454
    # Process here the warnings from CheckPrereq, as we don't have a
9455
    # feedback_fn there.
9456
    for warn in self.warn:
9457
      feedback_fn("WARNING: %s" % warn)
9458

    
9459
    result = []
9460
    instance = self.instance
9461
    # disk changes
9462
    for disk_op, disk_dict in self.op.disks:
9463
      if disk_op == constants.DDM_REMOVE:
9464
        # remove the last disk
9465
        device = instance.disks.pop()
9466
        device_idx = len(instance.disks)
9467
        for node, disk in device.ComputeNodeTree(instance.primary_node):
9468
          self.cfg.SetDiskID(disk, node)
9469
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9470
          if msg:
9471
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
9472
                            " continuing anyway", device_idx, node, msg)
9473
        result.append(("disk/%d" % device_idx, "remove"))
9474
      elif disk_op == constants.DDM_ADD:
9475
        # add a new disk
9476
        if instance.disk_template == constants.DT_FILE:
9477
          file_driver, file_path = instance.disks[0].logical_id
9478
          file_path = os.path.dirname(file_path)
9479
        else:
9480
          file_driver = file_path = None
9481
        disk_idx_base = len(instance.disks)
9482
        new_disk = _GenerateDiskTemplate(self,
9483
                                         instance.disk_template,
9484
                                         instance.name, instance.primary_node,
9485
                                         instance.secondary_nodes,
9486
                                         [disk_dict],
9487
                                         file_path,
9488
                                         file_driver,
9489
                                         disk_idx_base)[0]
9490
        instance.disks.append(new_disk)
9491
        info = _GetInstanceInfoText(instance)
9492

    
9493
        logging.info("Creating volume %s for instance %s",
9494
                     new_disk.iv_name, instance.name)
9495
        # Note: this needs to be kept in sync with _CreateDisks
9496
        #HARDCODE
9497
        for node in instance.all_nodes:
9498
          f_create = node == instance.primary_node
9499
          try:
9500
            _CreateBlockDev(self, node, instance, new_disk,
9501
                            f_create, info, f_create)
9502
          except errors.OpExecError, err:
9503
            self.LogWarning("Failed to create volume %s (%s) on"
9504
                            " node %s: %s",
9505
                            new_disk.iv_name, new_disk, node, err)
9506
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9507
                       (new_disk.size, new_disk.mode)))
9508
      else:
9509
        # change a given disk
9510
        instance.disks[disk_op].mode = disk_dict['mode']
9511
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9512

    
9513
    if self.op.disk_template:
9514
      r_shut = _ShutdownInstanceDisks(self, instance)
9515
      if not r_shut:
9516
        raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9517
                                 " proceed with disk template conversion")
9518
      mode = (instance.disk_template, self.op.disk_template)
9519
      try:
9520
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
9521
      except:
9522
        self.cfg.ReleaseDRBDMinors(instance.name)
9523
        raise
9524
      result.append(("disk_template", self.op.disk_template))
9525

    
9526
    # NIC changes
9527
    for nic_op, nic_dict in self.op.nics:
9528
      if nic_op == constants.DDM_REMOVE:
9529
        # remove the last nic
9530
        del instance.nics[-1]
9531
        result.append(("nic.%d" % len(instance.nics), "remove"))
9532
      elif nic_op == constants.DDM_ADD:
9533
        # mac and bridge should be set, by now
9534
        mac = nic_dict['mac']
9535
        ip = nic_dict.get('ip', None)
9536
        nicparams = self.nic_pinst[constants.DDM_ADD]
9537
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9538
        instance.nics.append(new_nic)
9539
        result.append(("nic.%d" % (len(instance.nics) - 1),
9540
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
9541
                       (new_nic.mac, new_nic.ip,
9542
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9543
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9544
                       )))
9545
      else:
9546
        for key in 'mac', 'ip':
9547
          if key in nic_dict:
9548
            setattr(instance.nics[nic_op], key, nic_dict[key])
9549
        if nic_op in self.nic_pinst:
9550
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9551
        for key, val in nic_dict.iteritems():
9552
          result.append(("nic.%s/%d" % (key, nic_op), val))
9553

    
9554
    # hvparams changes
9555
    if self.op.hvparams:
9556
      instance.hvparams = self.hv_inst
9557
      for key, val in self.op.hvparams.iteritems():
9558
        result.append(("hv/%s" % key, val))
9559

    
9560
    # beparams changes
9561
    if self.op.beparams:
9562
      instance.beparams = self.be_inst
9563
      for key, val in self.op.beparams.iteritems():
9564
        result.append(("be/%s" % key, val))
9565

    
9566
    # OS change
9567
    if self.op.os_name:
9568
      instance.os = self.op.os_name
9569

    
9570
    # osparams changes
9571
    if self.op.osparams:
9572
      instance.osparams = self.os_inst
9573
      for key, val in self.op.osparams.iteritems():
9574
        result.append(("os/%s" % key, val))
9575

    
9576
    self.cfg.Update(instance, feedback_fn)
9577

    
9578
    return result
9579

    
9580
  _DISK_CONVERSIONS = {
9581
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9582
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9583
    }
9584

    
9585

    
9586
class LUQueryExports(NoHooksLU):
9587
  """Query the exports list
9588

9589
  """
9590
  _OP_PARAMS = [
9591
    ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
9592
    ("use_locking", False, ht.TBool),
9593
    ]
9594
  REQ_BGL = False
9595

    
9596
  def ExpandNames(self):
9597
    self.needed_locks = {}
9598
    self.share_locks[locking.LEVEL_NODE] = 1
9599
    if not self.op.nodes:
9600
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9601
    else:
9602
      self.needed_locks[locking.LEVEL_NODE] = \
9603
        _GetWantedNodes(self, self.op.nodes)
9604

    
9605
  def Exec(self, feedback_fn):
9606
    """Compute the list of all the exported system images.
9607

9608
    @rtype: dict
9609
    @return: a dictionary with the structure node->(export-list)
9610
        where export-list is a list of the instances exported on
9611
        that node.
9612

9613
    """
9614
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9615
    rpcresult = self.rpc.call_export_list(self.nodes)
9616
    result = {}
9617
    for node in rpcresult:
9618
      if rpcresult[node].fail_msg:
9619
        result[node] = False
9620
      else:
9621
        result[node] = rpcresult[node].payload
9622

    
9623
    return result
9624

    
9625

    
9626
class LUPrepareExport(NoHooksLU):
9627
  """Prepares an instance for an export and returns useful information.
9628

9629
  """
9630
  _OP_PARAMS = [
9631
    _PInstanceName,
9632
    ("mode", ht.NoDefault, ht.TElemOf(constants.EXPORT_MODES)),
9633
    ]
9634
  REQ_BGL = False
9635

    
9636
  def ExpandNames(self):
9637
    self._ExpandAndLockInstance()
9638

    
9639
  def CheckPrereq(self):
9640
    """Check prerequisites.
9641

9642
    """
9643
    instance_name = self.op.instance_name
9644

    
9645
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9646
    assert self.instance is not None, \
9647
          "Cannot retrieve locked instance %s" % self.op.instance_name
9648
    _CheckNodeOnline(self, self.instance.primary_node)
9649

    
9650
    self._cds = _GetClusterDomainSecret()
9651

    
9652
  def Exec(self, feedback_fn):
9653
    """Prepares an instance for an export.
9654

9655
    """
9656
    instance = self.instance
9657

    
9658
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9659
      salt = utils.GenerateSecret(8)
9660

    
9661
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9662
      result = self.rpc.call_x509_cert_create(instance.primary_node,
9663
                                              constants.RIE_CERT_VALIDITY)
9664
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
9665

    
9666
      (name, cert_pem) = result.payload
9667

    
9668
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9669
                                             cert_pem)
9670

    
9671
      return {
9672
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9673
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9674
                          salt),
9675
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9676
        }
9677

    
9678
    return None
9679

    
9680

    
9681
class LUExportInstance(LogicalUnit):
9682
  """Export an instance to an image in the cluster.
9683

9684
  """
9685
  HPATH = "instance-export"
9686
  HTYPE = constants.HTYPE_INSTANCE
9687
  _OP_PARAMS = [
9688
    _PInstanceName,
9689
    ("target_node", ht.NoDefault, ht.TOr(ht.TNonEmptyString, ht.TList)),
9690
    ("shutdown", True, ht.TBool),
9691
    _PShutdownTimeout,
9692
    ("remove_instance", False, ht.TBool),
9693
    ("ignore_remove_failures", False, ht.TBool),
9694
    ("mode", constants.EXPORT_MODE_LOCAL, ht.TElemOf(constants.EXPORT_MODES)),
9695
    ("x509_key_name", None, ht.TOr(ht.TList, ht.TNone)),
9696
    ("destination_x509_ca", None, ht.TMaybeString),
9697
    ]
9698
  REQ_BGL = False
9699

    
9700
  def CheckArguments(self):
9701
    """Check the arguments.
9702

9703
    """
9704
    self.x509_key_name = self.op.x509_key_name
9705
    self.dest_x509_ca_pem = self.op.destination_x509_ca
9706

    
9707
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9708
      if not self.x509_key_name:
9709
        raise errors.OpPrereqError("Missing X509 key name for encryption",
9710
                                   errors.ECODE_INVAL)
9711

    
9712
      if not self.dest_x509_ca_pem:
9713
        raise errors.OpPrereqError("Missing destination X509 CA",
9714
                                   errors.ECODE_INVAL)
9715

    
9716
  def ExpandNames(self):
9717
    self._ExpandAndLockInstance()
9718

    
9719
    # Lock all nodes for local exports
9720
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9721
      # FIXME: lock only instance primary and destination node
9722
      #
9723
      # Sad but true, for now we have do lock all nodes, as we don't know where
9724
      # the previous export might be, and in this LU we search for it and
9725
      # remove it from its current node. In the future we could fix this by:
9726
      #  - making a tasklet to search (share-lock all), then create the
9727
      #    new one, then one to remove, after
9728
      #  - removing the removal operation altogether
9729
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9730

    
9731
  def DeclareLocks(self, level):
9732
    """Last minute lock declaration."""
9733
    # All nodes are locked anyway, so nothing to do here.
9734

    
9735
  def BuildHooksEnv(self):
9736
    """Build hooks env.
9737

9738
    This will run on the master, primary node and target node.
9739

9740
    """
9741
    env = {
9742
      "EXPORT_MODE": self.op.mode,
9743
      "EXPORT_NODE": self.op.target_node,
9744
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9745
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9746
      # TODO: Generic function for boolean env variables
9747
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9748
      }
9749

    
9750
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9751

    
9752
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9753

    
9754
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9755
      nl.append(self.op.target_node)
9756

    
9757
    return env, nl, nl
9758

    
9759
  def CheckPrereq(self):
9760
    """Check prerequisites.
9761

9762
    This checks that the instance and node names are valid.
9763

9764
    """
9765
    instance_name = self.op.instance_name
9766

    
9767
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9768
    assert self.instance is not None, \
9769
          "Cannot retrieve locked instance %s" % self.op.instance_name
9770
    _CheckNodeOnline(self, self.instance.primary_node)
9771

    
9772
    if (self.op.remove_instance and self.instance.admin_up and
9773
        not self.op.shutdown):
9774
      raise errors.OpPrereqError("Can not remove instance without shutting it"
9775
                                 " down before")
9776

    
9777
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9778
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9779
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9780
      assert self.dst_node is not None
9781

    
9782
      _CheckNodeOnline(self, self.dst_node.name)
9783
      _CheckNodeNotDrained(self, self.dst_node.name)
9784

    
9785
      self._cds = None
9786
      self.dest_disk_info = None
9787
      self.dest_x509_ca = None
9788

    
9789
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9790
      self.dst_node = None
9791

    
9792
      if len(self.op.target_node) != len(self.instance.disks):
9793
        raise errors.OpPrereqError(("Received destination information for %s"
9794
                                    " disks, but instance %s has %s disks") %
9795
                                   (len(self.op.target_node), instance_name,
9796
                                    len(self.instance.disks)),
9797
                                   errors.ECODE_INVAL)
9798

    
9799
      cds = _GetClusterDomainSecret()
9800

    
9801
      # Check X509 key name
9802
      try:
9803
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9804
      except (TypeError, ValueError), err:
9805
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9806

    
9807
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9808
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9809
                                   errors.ECODE_INVAL)
9810

    
9811
      # Load and verify CA
9812
      try:
9813
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9814
      except OpenSSL.crypto.Error, err:
9815
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9816
                                   (err, ), errors.ECODE_INVAL)
9817

    
9818
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9819
      if errcode is not None:
9820
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9821
                                   (msg, ), errors.ECODE_INVAL)
9822

    
9823
      self.dest_x509_ca = cert
9824

    
9825
      # Verify target information
9826
      disk_info = []
9827
      for idx, disk_data in enumerate(self.op.target_node):
9828
        try:
9829
          (host, port, magic) = \
9830
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9831
        except errors.GenericError, err:
9832
          raise errors.OpPrereqError("Target info for disk %s: %s" %
9833
                                     (idx, err), errors.ECODE_INVAL)
9834

    
9835
        disk_info.append((host, port, magic))
9836

    
9837
      assert len(disk_info) == len(self.op.target_node)
9838
      self.dest_disk_info = disk_info
9839

    
9840
    else:
9841
      raise errors.ProgrammerError("Unhandled export mode %r" %
9842
                                   self.op.mode)
9843

    
9844
    # instance disk type verification
9845
    # TODO: Implement export support for file-based disks
9846
    for disk in self.instance.disks:
9847
      if disk.dev_type == constants.LD_FILE:
9848
        raise errors.OpPrereqError("Export not supported for instances with"
9849
                                   " file-based disks", errors.ECODE_INVAL)
9850

    
9851
  def _CleanupExports(self, feedback_fn):
9852
    """Removes exports of current instance from all other nodes.
9853

9854
    If an instance in a cluster with nodes A..D was exported to node C, its
9855
    exports will be removed from the nodes A, B and D.
9856

9857
    """
9858
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
9859

    
9860
    nodelist = self.cfg.GetNodeList()
9861
    nodelist.remove(self.dst_node.name)
9862

    
9863
    # on one-node clusters nodelist will be empty after the removal
9864
    # if we proceed the backup would be removed because OpQueryExports
9865
    # substitutes an empty list with the full cluster node list.
9866
    iname = self.instance.name
9867
    if nodelist:
9868
      feedback_fn("Removing old exports for instance %s" % iname)
9869
      exportlist = self.rpc.call_export_list(nodelist)
9870
      for node in exportlist:
9871
        if exportlist[node].fail_msg:
9872
          continue
9873
        if iname in exportlist[node].payload:
9874
          msg = self.rpc.call_export_remove(node, iname).fail_msg
9875
          if msg:
9876
            self.LogWarning("Could not remove older export for instance %s"
9877
                            " on node %s: %s", iname, node, msg)
9878

    
9879
  def Exec(self, feedback_fn):
9880
    """Export an instance to an image in the cluster.
9881

9882
    """
9883
    assert self.op.mode in constants.EXPORT_MODES
9884

    
9885
    instance = self.instance
9886
    src_node = instance.primary_node
9887

    
9888
    if self.op.shutdown:
9889
      # shutdown the instance, but not the disks
9890
      feedback_fn("Shutting down instance %s" % instance.name)
9891
      result = self.rpc.call_instance_shutdown(src_node, instance,
9892
                                               self.op.shutdown_timeout)
9893
      # TODO: Maybe ignore failures if ignore_remove_failures is set
9894
      result.Raise("Could not shutdown instance %s on"
9895
                   " node %s" % (instance.name, src_node))
9896

    
9897
    # set the disks ID correctly since call_instance_start needs the
9898
    # correct drbd minor to create the symlinks
9899
    for disk in instance.disks:
9900
      self.cfg.SetDiskID(disk, src_node)
9901

    
9902
    activate_disks = (not instance.admin_up)
9903

    
9904
    if activate_disks:
9905
      # Activate the instance disks if we'exporting a stopped instance
9906
      feedback_fn("Activating disks for %s" % instance.name)
9907
      _StartInstanceDisks(self, instance, None)
9908

    
9909
    try:
9910
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9911
                                                     instance)
9912

    
9913
      helper.CreateSnapshots()
9914
      try:
9915
        if (self.op.shutdown and instance.admin_up and
9916
            not self.op.remove_instance):
9917
          assert not activate_disks
9918
          feedback_fn("Starting instance %s" % instance.name)
9919
          result = self.rpc.call_instance_start(src_node, instance, None, None)
9920
          msg = result.fail_msg
9921
          if msg:
9922
            feedback_fn("Failed to start instance: %s" % msg)
9923
            _ShutdownInstanceDisks(self, instance)
9924
            raise errors.OpExecError("Could not start instance: %s" % msg)
9925

    
9926
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
9927
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9928
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9929
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
9930
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9931

    
9932
          (key_name, _, _) = self.x509_key_name
9933

    
9934
          dest_ca_pem = \
9935
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9936
                                            self.dest_x509_ca)
9937

    
9938
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9939
                                                     key_name, dest_ca_pem,
9940
                                                     timeouts)
9941
      finally:
9942
        helper.Cleanup()
9943

    
9944
      # Check for backwards compatibility
9945
      assert len(dresults) == len(instance.disks)
9946
      assert compat.all(isinstance(i, bool) for i in dresults), \
9947
             "Not all results are boolean: %r" % dresults
9948

    
9949
    finally:
9950
      if activate_disks:
9951
        feedback_fn("Deactivating disks for %s" % instance.name)
9952
        _ShutdownInstanceDisks(self, instance)
9953

    
9954
    if not (compat.all(dresults) and fin_resu):
9955
      failures = []
9956
      if not fin_resu:
9957
        failures.append("export finalization")
9958
      if not compat.all(dresults):
9959
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9960
                               if not dsk)
9961
        failures.append("disk export: disk(s) %s" % fdsk)
9962

    
9963
      raise errors.OpExecError("Export failed, errors in %s" %
9964
                               utils.CommaJoin(failures))
9965

    
9966
    # At this point, the export was successful, we can cleanup/finish
9967

    
9968
    # Remove instance if requested
9969
    if self.op.remove_instance:
9970
      feedback_fn("Removing instance %s" % instance.name)
9971
      _RemoveInstance(self, feedback_fn, instance,
9972
                      self.op.ignore_remove_failures)
9973

    
9974
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9975
      self._CleanupExports(feedback_fn)
9976

    
9977
    return fin_resu, dresults
9978

    
9979

    
9980
class LURemoveExport(NoHooksLU):
9981
  """Remove exports related to the named instance.
9982

9983
  """
9984
  _OP_PARAMS = [
9985
    _PInstanceName,
9986
    ]
9987
  REQ_BGL = False
9988

    
9989
  def ExpandNames(self):
9990
    self.needed_locks = {}
9991
    # We need all nodes to be locked in order for RemoveExport to work, but we
9992
    # don't need to lock the instance itself, as nothing will happen to it (and
9993
    # we can remove exports also for a removed instance)
9994
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9995

    
9996
  def Exec(self, feedback_fn):
9997
    """Remove any export.
9998

9999
    """
10000
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10001
    # If the instance was not found we'll try with the name that was passed in.
10002
    # This will only work if it was an FQDN, though.
10003
    fqdn_warn = False
10004
    if not instance_name:
10005
      fqdn_warn = True
10006
      instance_name = self.op.instance_name
10007

    
10008
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10009
    exportlist = self.rpc.call_export_list(locked_nodes)
10010
    found = False
10011
    for node in exportlist:
10012
      msg = exportlist[node].fail_msg
10013
      if msg:
10014
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10015
        continue
10016
      if instance_name in exportlist[node].payload:
10017
        found = True
10018
        result = self.rpc.call_export_remove(node, instance_name)
10019
        msg = result.fail_msg
10020
        if msg:
10021
          logging.error("Could not remove export for instance %s"
10022
                        " on node %s: %s", instance_name, node, msg)
10023

    
10024
    if fqdn_warn and not found:
10025
      feedback_fn("Export not found. If trying to remove an export belonging"
10026
                  " to a deleted instance please use its Fully Qualified"
10027
                  " Domain Name.")
10028

    
10029

    
10030
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10031
  """Generic tags LU.
10032

10033
  This is an abstract class which is the parent of all the other tags LUs.
10034

10035
  """
10036

    
10037
  def ExpandNames(self):
10038
    self.needed_locks = {}
10039
    if self.op.kind == constants.TAG_NODE:
10040
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10041
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
10042
    elif self.op.kind == constants.TAG_INSTANCE:
10043
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10044
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10045

    
10046
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10047
    # not possible to acquire the BGL based on opcode parameters)
10048

    
10049
  def CheckPrereq(self):
10050
    """Check prerequisites.
10051

10052
    """
10053
    if self.op.kind == constants.TAG_CLUSTER:
10054
      self.target = self.cfg.GetClusterInfo()
10055
    elif self.op.kind == constants.TAG_NODE:
10056
      self.target = self.cfg.GetNodeInfo(self.op.name)
10057
    elif self.op.kind == constants.TAG_INSTANCE:
10058
      self.target = self.cfg.GetInstanceInfo(self.op.name)
10059
    else:
10060
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10061
                                 str(self.op.kind), errors.ECODE_INVAL)
10062

    
10063

    
10064
class LUGetTags(TagsLU):
10065
  """Returns the tags of a given object.
10066

10067
  """
10068
  _OP_PARAMS = [
10069
    ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10070
    # Name is only meaningful for nodes and instances
10071
    ("name", ht.NoDefault, ht.TMaybeString),
10072
    ]
10073
  REQ_BGL = False
10074

    
10075
  def ExpandNames(self):
10076
    TagsLU.ExpandNames(self)
10077

    
10078
    # Share locks as this is only a read operation
10079
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10080

    
10081
  def Exec(self, feedback_fn):
10082
    """Returns the tag list.
10083

10084
    """
10085
    return list(self.target.GetTags())
10086

    
10087

    
10088
class LUSearchTags(NoHooksLU):
10089
  """Searches the tags for a given pattern.
10090

10091
  """
10092
  _OP_PARAMS = [
10093
    ("pattern", ht.NoDefault, ht.TNonEmptyString),
10094
    ]
10095
  REQ_BGL = False
10096

    
10097
  def ExpandNames(self):
10098
    self.needed_locks = {}
10099

    
10100
  def CheckPrereq(self):
10101
    """Check prerequisites.
10102

10103
    This checks the pattern passed for validity by compiling it.
10104

10105
    """
10106
    try:
10107
      self.re = re.compile(self.op.pattern)
10108
    except re.error, err:
10109
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10110
                                 (self.op.pattern, err), errors.ECODE_INVAL)
10111

    
10112
  def Exec(self, feedback_fn):
10113
    """Returns the tag list.
10114

10115
    """
10116
    cfg = self.cfg
10117
    tgts = [("/cluster", cfg.GetClusterInfo())]
10118
    ilist = cfg.GetAllInstancesInfo().values()
10119
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10120
    nlist = cfg.GetAllNodesInfo().values()
10121
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10122
    results = []
10123
    for path, target in tgts:
10124
      for tag in target.GetTags():
10125
        if self.re.search(tag):
10126
          results.append((path, tag))
10127
    return results
10128

    
10129

    
10130
class LUAddTags(TagsLU):
10131
  """Sets a tag on a given object.
10132

10133
  """
10134
  _OP_PARAMS = [
10135
    ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10136
    # Name is only meaningful for nodes and instances
10137
    ("name", ht.NoDefault, ht.TMaybeString),
10138
    ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
10139
    ]
10140
  REQ_BGL = False
10141

    
10142
  def CheckPrereq(self):
10143
    """Check prerequisites.
10144

10145
    This checks the type and length of the tag name and value.
10146

10147
    """
10148
    TagsLU.CheckPrereq(self)
10149
    for tag in self.op.tags:
10150
      objects.TaggableObject.ValidateTag(tag)
10151

    
10152
  def Exec(self, feedback_fn):
10153
    """Sets the tag.
10154

10155
    """
10156
    try:
10157
      for tag in self.op.tags:
10158
        self.target.AddTag(tag)
10159
    except errors.TagError, err:
10160
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
10161
    self.cfg.Update(self.target, feedback_fn)
10162

    
10163

    
10164
class LUDelTags(TagsLU):
10165
  """Delete a list of tags from a given object.
10166

10167
  """
10168
  _OP_PARAMS = [
10169
    ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10170
    # Name is only meaningful for nodes and instances
10171
    ("name", ht.NoDefault, ht.TMaybeString),
10172
    ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
10173
    ]
10174
  REQ_BGL = False
10175

    
10176
  def CheckPrereq(self):
10177
    """Check prerequisites.
10178

10179
    This checks that we have the given tag.
10180

10181
    """
10182
    TagsLU.CheckPrereq(self)
10183
    for tag in self.op.tags:
10184
      objects.TaggableObject.ValidateTag(tag)
10185
    del_tags = frozenset(self.op.tags)
10186
    cur_tags = self.target.GetTags()
10187

    
10188
    diff_tags = del_tags - cur_tags
10189
    if diff_tags:
10190
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
10191
      raise errors.OpPrereqError("Tag(s) %s not found" %
10192
                                 (utils.CommaJoin(diff_names), ),
10193
                                 errors.ECODE_NOENT)
10194

    
10195
  def Exec(self, feedback_fn):
10196
    """Remove the tag from the object.
10197

10198
    """
10199
    for tag in self.op.tags:
10200
      self.target.RemoveTag(tag)
10201
    self.cfg.Update(self.target, feedback_fn)
10202

    
10203

    
10204
class LUTestDelay(NoHooksLU):
10205
  """Sleep for a specified amount of time.
10206

10207
  This LU sleeps on the master and/or nodes for a specified amount of
10208
  time.
10209

10210
  """
10211
  _OP_PARAMS = [
10212
    ("duration", ht.NoDefault, ht.TFloat),
10213
    ("on_master", True, ht.TBool),
10214
    ("on_nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
10215
    ("repeat", 0, ht.TPositiveInt)
10216
    ]
10217
  REQ_BGL = False
10218

    
10219
  def ExpandNames(self):
10220
    """Expand names and set required locks.
10221

10222
    This expands the node list, if any.
10223

10224
    """
10225
    self.needed_locks = {}
10226
    if self.op.on_nodes:
10227
      # _GetWantedNodes can be used here, but is not always appropriate to use
10228
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10229
      # more information.
10230
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10231
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10232

    
10233
  def _TestDelay(self):
10234
    """Do the actual sleep.
10235

10236
    """
10237
    if self.op.on_master:
10238
      if not utils.TestDelay(self.op.duration):
10239
        raise errors.OpExecError("Error during master delay test")
10240
    if self.op.on_nodes:
10241
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10242
      for node, node_result in result.items():
10243
        node_result.Raise("Failure during rpc call to node %s" % node)
10244

    
10245
  def Exec(self, feedback_fn):
10246
    """Execute the test delay opcode, with the wanted repetitions.
10247

10248
    """
10249
    if self.op.repeat == 0:
10250
      self._TestDelay()
10251
    else:
10252
      top_value = self.op.repeat - 1
10253
      for i in range(self.op.repeat):
10254
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10255
        self._TestDelay()
10256

    
10257

    
10258
class LUTestJobqueue(NoHooksLU):
10259
  """Utility LU to test some aspects of the job queue.
10260

10261
  """
10262
  _OP_PARAMS = [
10263
    ("notify_waitlock", False, ht.TBool),
10264
    ("notify_exec", False, ht.TBool),
10265
    ("log_messages", ht.EmptyList, ht.TListOf(ht.TString)),
10266
    ("fail", False, ht.TBool),
10267
    ]
10268
  REQ_BGL = False
10269

    
10270
  # Must be lower than default timeout for WaitForJobChange to see whether it
10271
  # notices changed jobs
10272
  _CLIENT_CONNECT_TIMEOUT = 20.0
10273
  _CLIENT_CONFIRM_TIMEOUT = 60.0
10274

    
10275
  @classmethod
10276
  def _NotifyUsingSocket(cls, cb, errcls):
10277
    """Opens a Unix socket and waits for another program to connect.
10278

10279
    @type cb: callable
10280
    @param cb: Callback to send socket name to client
10281
    @type errcls: class
10282
    @param errcls: Exception class to use for errors
10283

10284
    """
10285
    # Using a temporary directory as there's no easy way to create temporary
10286
    # sockets without writing a custom loop around tempfile.mktemp and
10287
    # socket.bind
10288
    tmpdir = tempfile.mkdtemp()
10289
    try:
10290
      tmpsock = utils.PathJoin(tmpdir, "sock")
10291

    
10292
      logging.debug("Creating temporary socket at %s", tmpsock)
10293
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10294
      try:
10295
        sock.bind(tmpsock)
10296
        sock.listen(1)
10297

    
10298
        # Send details to client
10299
        cb(tmpsock)
10300

    
10301
        # Wait for client to connect before continuing
10302
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10303
        try:
10304
          (conn, _) = sock.accept()
10305
        except socket.error, err:
10306
          raise errcls("Client didn't connect in time (%s)" % err)
10307
      finally:
10308
        sock.close()
10309
    finally:
10310
      # Remove as soon as client is connected
10311
      shutil.rmtree(tmpdir)
10312

    
10313
    # Wait for client to close
10314
    try:
10315
      try:
10316
        # pylint: disable-msg=E1101
10317
        # Instance of '_socketobject' has no ... member
10318
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10319
        conn.recv(1)
10320
      except socket.error, err:
10321
        raise errcls("Client failed to confirm notification (%s)" % err)
10322
    finally:
10323
      conn.close()
10324

    
10325
  def _SendNotification(self, test, arg, sockname):
10326
    """Sends a notification to the client.
10327

10328
    @type test: string
10329
    @param test: Test name
10330
    @param arg: Test argument (depends on test)
10331
    @type sockname: string
10332
    @param sockname: Socket path
10333

10334
    """
10335
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10336

    
10337
  def _Notify(self, prereq, test, arg):
10338
    """Notifies the client of a test.
10339

10340
    @type prereq: bool
10341
    @param prereq: Whether this is a prereq-phase test
10342
    @type test: string
10343
    @param test: Test name
10344
    @param arg: Test argument (depends on test)
10345

10346
    """
10347
    if prereq:
10348
      errcls = errors.OpPrereqError
10349
    else:
10350
      errcls = errors.OpExecError
10351

    
10352
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10353
                                                  test, arg),
10354
                                   errcls)
10355

    
10356
  def CheckArguments(self):
10357
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10358
    self.expandnames_calls = 0
10359

    
10360
  def ExpandNames(self):
10361
    checkargs_calls = getattr(self, "checkargs_calls", 0)
10362
    if checkargs_calls < 1:
10363
      raise errors.ProgrammerError("CheckArguments was not called")
10364

    
10365
    self.expandnames_calls += 1
10366

    
10367
    if self.op.notify_waitlock:
10368
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
10369

    
10370
    self.LogInfo("Expanding names")
10371

    
10372
    # Get lock on master node (just to get a lock, not for a particular reason)
10373
    self.needed_locks = {
10374
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10375
      }
10376

    
10377
  def Exec(self, feedback_fn):
10378
    if self.expandnames_calls < 1:
10379
      raise errors.ProgrammerError("ExpandNames was not called")
10380

    
10381
    if self.op.notify_exec:
10382
      self._Notify(False, constants.JQT_EXEC, None)
10383

    
10384
    self.LogInfo("Executing")
10385

    
10386
    if self.op.log_messages:
10387
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10388
      for idx, msg in enumerate(self.op.log_messages):
10389
        self.LogInfo("Sending log message %s", idx + 1)
10390
        feedback_fn(constants.JQT_MSGPREFIX + msg)
10391
        # Report how many test messages have been sent
10392
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10393

    
10394
    if self.op.fail:
10395
      raise errors.OpExecError("Opcode failure was requested")
10396

    
10397
    return True
10398

    
10399

    
10400
class IAllocator(object):
10401
  """IAllocator framework.
10402

10403
  An IAllocator instance has three sets of attributes:
10404
    - cfg that is needed to query the cluster
10405
    - input data (all members of the _KEYS class attribute are required)
10406
    - four buffer attributes (in|out_data|text), that represent the
10407
      input (to the external script) in text and data structure format,
10408
      and the output from it, again in two formats
10409
    - the result variables from the script (success, info, nodes) for
10410
      easy usage
10411

10412
  """
10413
  # pylint: disable-msg=R0902
10414
  # lots of instance attributes
10415
  _ALLO_KEYS = [
10416
    "name", "mem_size", "disks", "disk_template",
10417
    "os", "tags", "nics", "vcpus", "hypervisor",
10418
    ]
10419
  _RELO_KEYS = [
10420
    "name", "relocate_from",
10421
    ]
10422
  _EVAC_KEYS = [
10423
    "evac_nodes",
10424
    ]
10425

    
10426
  def __init__(self, cfg, rpc, mode, **kwargs):
10427
    self.cfg = cfg
10428
    self.rpc = rpc
10429
    # init buffer variables
10430
    self.in_text = self.out_text = self.in_data = self.out_data = None
10431
    # init all input fields so that pylint is happy
10432
    self.mode = mode
10433
    self.mem_size = self.disks = self.disk_template = None
10434
    self.os = self.tags = self.nics = self.vcpus = None
10435
    self.hypervisor = None
10436
    self.relocate_from = None
10437
    self.name = None
10438
    self.evac_nodes = None
10439
    # computed fields
10440
    self.required_nodes = None
10441
    # init result fields
10442
    self.success = self.info = self.result = None
10443
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10444
      keyset = self._ALLO_KEYS
10445
      fn = self._AddNewInstance
10446
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10447
      keyset = self._RELO_KEYS
10448
      fn = self._AddRelocateInstance
10449
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10450
      keyset = self._EVAC_KEYS
10451
      fn = self._AddEvacuateNodes
10452
    else:
10453
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10454
                                   " IAllocator" % self.mode)
10455
    for key in kwargs:
10456
      if key not in keyset:
10457
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
10458
                                     " IAllocator" % key)
10459
      setattr(self, key, kwargs[key])
10460

    
10461
    for key in keyset:
10462
      if key not in kwargs:
10463
        raise errors.ProgrammerError("Missing input parameter '%s' to"
10464
                                     " IAllocator" % key)
10465
    self._BuildInputData(fn)
10466

    
10467
  def _ComputeClusterData(self):
10468
    """Compute the generic allocator input data.
10469

10470
    This is the data that is independent of the actual operation.
10471

10472
    """
10473
    cfg = self.cfg
10474
    cluster_info = cfg.GetClusterInfo()
10475
    # cluster data
10476
    data = {
10477
      "version": constants.IALLOCATOR_VERSION,
10478
      "cluster_name": cfg.GetClusterName(),
10479
      "cluster_tags": list(cluster_info.GetTags()),
10480
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10481
      # we don't have job IDs
10482
      }
10483
    iinfo = cfg.GetAllInstancesInfo().values()
10484
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10485

    
10486
    # node data
10487
    node_list = cfg.GetNodeList()
10488

    
10489
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10490
      hypervisor_name = self.hypervisor
10491
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10492
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10493
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10494
      hypervisor_name = cluster_info.enabled_hypervisors[0]
10495

    
10496
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10497
                                        hypervisor_name)
10498
    node_iinfo = \
10499
      self.rpc.call_all_instances_info(node_list,
10500
                                       cluster_info.enabled_hypervisors)
10501

    
10502
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
10503

    
10504
    data["nodes"] = self._ComputeNodeData(cfg, node_data, node_iinfo, i_list)
10505

    
10506
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
10507

    
10508
    self.in_data = data
10509

    
10510
  @staticmethod
10511
  def _ComputeNodeGroupData(cfg):
10512
    """Compute node groups data.
10513

10514
    """
10515
    ng = {}
10516
    for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
10517
      ng[guuid] = { "name": gdata.name }
10518
    return ng
10519

    
10520
  @staticmethod
10521
  def _ComputeNodeData(cfg, node_data, node_iinfo, i_list):
10522
    """Compute global node data.
10523

10524
    """
10525
    node_results = {}
10526
    for nname, nresult in node_data.items():
10527
      # first fill in static (config-based) values
10528
      ninfo = cfg.GetNodeInfo(nname)
10529
      pnr = {
10530
        "tags": list(ninfo.GetTags()),
10531
        "primary_ip": ninfo.primary_ip,
10532
        "secondary_ip": ninfo.secondary_ip,
10533
        "offline": ninfo.offline,
10534
        "drained": ninfo.drained,
10535
        "master_candidate": ninfo.master_candidate,
10536
        "group": ninfo.group,
10537
        "master_capable": ninfo.master_capable,
10538
        "vm_capable": ninfo.vm_capable,
10539
        }
10540

    
10541
      if not (ninfo.offline or ninfo.drained):
10542
        nresult.Raise("Can't get data for node %s" % nname)
10543
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10544
                                nname)
10545
        remote_info = nresult.payload
10546

    
10547
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
10548
                     'vg_size', 'vg_free', 'cpu_total']:
10549
          if attr not in remote_info:
10550
            raise errors.OpExecError("Node '%s' didn't return attribute"
10551
                                     " '%s'" % (nname, attr))
10552
          if not isinstance(remote_info[attr], int):
10553
            raise errors.OpExecError("Node '%s' returned invalid value"
10554
                                     " for '%s': %s" %
10555
                                     (nname, attr, remote_info[attr]))
10556
        # compute memory used by primary instances
10557
        i_p_mem = i_p_up_mem = 0
10558
        for iinfo, beinfo in i_list:
10559
          if iinfo.primary_node == nname:
10560
            i_p_mem += beinfo[constants.BE_MEMORY]
10561
            if iinfo.name not in node_iinfo[nname].payload:
10562
              i_used_mem = 0
10563
            else:
10564
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10565
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10566
            remote_info['memory_free'] -= max(0, i_mem_diff)
10567

    
10568
            if iinfo.admin_up:
10569
              i_p_up_mem += beinfo[constants.BE_MEMORY]
10570

    
10571
        # compute memory used by instances
10572
        pnr_dyn = {
10573
          "total_memory": remote_info['memory_total'],
10574
          "reserved_memory": remote_info['memory_dom0'],
10575
          "free_memory": remote_info['memory_free'],
10576
          "total_disk": remote_info['vg_size'],
10577
          "free_disk": remote_info['vg_free'],
10578
          "total_cpus": remote_info['cpu_total'],
10579
          "i_pri_memory": i_p_mem,
10580
          "i_pri_up_memory": i_p_up_mem,
10581
          }
10582
        pnr.update(pnr_dyn)
10583

    
10584
      node_results[nname] = pnr
10585

    
10586
    return node_results
10587

    
10588
  @staticmethod
10589
  def _ComputeInstanceData(cluster_info, i_list):
10590
    """Compute global instance data.
10591

10592
    """
10593
    instance_data = {}
10594
    for iinfo, beinfo in i_list:
10595
      nic_data = []
10596
      for nic in iinfo.nics:
10597
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10598
        nic_dict = {"mac": nic.mac,
10599
                    "ip": nic.ip,
10600
                    "mode": filled_params[constants.NIC_MODE],
10601
                    "link": filled_params[constants.NIC_LINK],
10602
                   }
10603
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10604
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10605
        nic_data.append(nic_dict)
10606
      pir = {
10607
        "tags": list(iinfo.GetTags()),
10608
        "admin_up": iinfo.admin_up,
10609
        "vcpus": beinfo[constants.BE_VCPUS],
10610
        "memory": beinfo[constants.BE_MEMORY],
10611
        "os": iinfo.os,
10612
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10613
        "nics": nic_data,
10614
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10615
        "disk_template": iinfo.disk_template,
10616
        "hypervisor": iinfo.hypervisor,
10617
        }
10618
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10619
                                                 pir["disks"])
10620
      instance_data[iinfo.name] = pir
10621

    
10622
    return instance_data
10623

    
10624
  def _AddNewInstance(self):
10625
    """Add new instance data to allocator structure.
10626

10627
    This in combination with _AllocatorGetClusterData will create the
10628
    correct structure needed as input for the allocator.
10629

10630
    The checks for the completeness of the opcode must have already been
10631
    done.
10632

10633
    """
10634
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10635

    
10636
    if self.disk_template in constants.DTS_NET_MIRROR:
10637
      self.required_nodes = 2
10638
    else:
10639
      self.required_nodes = 1
10640
    request = {
10641
      "name": self.name,
10642
      "disk_template": self.disk_template,
10643
      "tags": self.tags,
10644
      "os": self.os,
10645
      "vcpus": self.vcpus,
10646
      "memory": self.mem_size,
10647
      "disks": self.disks,
10648
      "disk_space_total": disk_space,
10649
      "nics": self.nics,
10650
      "required_nodes": self.required_nodes,
10651
      }
10652
    return request
10653

    
10654
  def _AddRelocateInstance(self):
10655
    """Add relocate instance data to allocator structure.
10656

10657
    This in combination with _IAllocatorGetClusterData will create the
10658
    correct structure needed as input for the allocator.
10659

10660
    The checks for the completeness of the opcode must have already been
10661
    done.
10662

10663
    """
10664
    instance = self.cfg.GetInstanceInfo(self.name)
10665
    if instance is None:
10666
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
10667
                                   " IAllocator" % self.name)
10668

    
10669
    if instance.disk_template not in constants.DTS_NET_MIRROR:
10670
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10671
                                 errors.ECODE_INVAL)
10672

    
10673
    if len(instance.secondary_nodes) != 1:
10674
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
10675
                                 errors.ECODE_STATE)
10676

    
10677
    self.required_nodes = 1
10678
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
10679
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10680

    
10681
    request = {
10682
      "name": self.name,
10683
      "disk_space_total": disk_space,
10684
      "required_nodes": self.required_nodes,
10685
      "relocate_from": self.relocate_from,
10686
      }
10687
    return request
10688

    
10689
  def _AddEvacuateNodes(self):
10690
    """Add evacuate nodes data to allocator structure.
10691

10692
    """
10693
    request = {
10694
      "evac_nodes": self.evac_nodes
10695
      }
10696
    return request
10697

    
10698
  def _BuildInputData(self, fn):
10699
    """Build input data structures.
10700

10701
    """
10702
    self._ComputeClusterData()
10703

    
10704
    request = fn()
10705
    request["type"] = self.mode
10706
    self.in_data["request"] = request
10707

    
10708
    self.in_text = serializer.Dump(self.in_data)
10709

    
10710
  def Run(self, name, validate=True, call_fn=None):
10711
    """Run an instance allocator and return the results.
10712

10713
    """
10714
    if call_fn is None:
10715
      call_fn = self.rpc.call_iallocator_runner
10716

    
10717
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10718
    result.Raise("Failure while running the iallocator script")
10719

    
10720
    self.out_text = result.payload
10721
    if validate:
10722
      self._ValidateResult()
10723

    
10724
  def _ValidateResult(self):
10725
    """Process the allocator results.
10726

10727
    This will process and if successful save the result in
10728
    self.out_data and the other parameters.
10729

10730
    """
10731
    try:
10732
      rdict = serializer.Load(self.out_text)
10733
    except Exception, err:
10734
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10735

    
10736
    if not isinstance(rdict, dict):
10737
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
10738

    
10739
    # TODO: remove backwards compatiblity in later versions
10740
    if "nodes" in rdict and "result" not in rdict:
10741
      rdict["result"] = rdict["nodes"]
10742
      del rdict["nodes"]
10743

    
10744
    for key in "success", "info", "result":
10745
      if key not in rdict:
10746
        raise errors.OpExecError("Can't parse iallocator results:"
10747
                                 " missing key '%s'" % key)
10748
      setattr(self, key, rdict[key])
10749

    
10750
    if not isinstance(rdict["result"], list):
10751
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10752
                               " is not a list")
10753
    self.out_data = rdict
10754

    
10755

    
10756
class LUTestAllocator(NoHooksLU):
10757
  """Run allocator tests.
10758

10759
  This LU runs the allocator tests
10760

10761
  """
10762
  _OP_PARAMS = [
10763
    ("direction", ht.NoDefault,
10764
     ht.TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10765
    ("mode", ht.NoDefault, ht.TElemOf(constants.VALID_IALLOCATOR_MODES)),
10766
    ("name", ht.NoDefault, ht.TNonEmptyString),
10767
    ("nics", ht.NoDefault, ht.TOr(ht.TNone, ht.TListOf(
10768
      ht.TDictOf(ht.TElemOf(["mac", "ip", "bridge"]),
10769
               ht.TOr(ht.TNone, ht.TNonEmptyString))))),
10770
    ("disks", ht.NoDefault, ht.TOr(ht.TNone, ht.TList)),
10771
    ("hypervisor", None, ht.TMaybeString),
10772
    ("allocator", None, ht.TMaybeString),
10773
    ("tags", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
10774
    ("mem_size", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
10775
    ("vcpus", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
10776
    ("os", None, ht.TMaybeString),
10777
    ("disk_template", None, ht.TMaybeString),
10778
    ("evac_nodes", None, ht.TOr(ht.TNone, ht.TListOf(ht.TNonEmptyString))),
10779
    ]
10780

    
10781
  def CheckPrereq(self):
10782
    """Check prerequisites.
10783

10784
    This checks the opcode parameters depending on the director and mode test.
10785

10786
    """
10787
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10788
      for attr in ["mem_size", "disks", "disk_template",
10789
                   "os", "tags", "nics", "vcpus"]:
10790
        if not hasattr(self.op, attr):
10791
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10792
                                     attr, errors.ECODE_INVAL)
10793
      iname = self.cfg.ExpandInstanceName(self.op.name)
10794
      if iname is not None:
10795
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10796
                                   iname, errors.ECODE_EXISTS)
10797
      if not isinstance(self.op.nics, list):
10798
        raise errors.OpPrereqError("Invalid parameter 'nics'",
10799
                                   errors.ECODE_INVAL)
10800
      if not isinstance(self.op.disks, list):
10801
        raise errors.OpPrereqError("Invalid parameter 'disks'",
10802
                                   errors.ECODE_INVAL)
10803
      for row in self.op.disks:
10804
        if (not isinstance(row, dict) or
10805
            "size" not in row or
10806
            not isinstance(row["size"], int) or
10807
            "mode" not in row or
10808
            row["mode"] not in ['r', 'w']):
10809
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
10810
                                     " parameter", errors.ECODE_INVAL)
10811
      if self.op.hypervisor is None:
10812
        self.op.hypervisor = self.cfg.GetHypervisorType()
10813
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10814
      fname = _ExpandInstanceName(self.cfg, self.op.name)
10815
      self.op.name = fname
10816
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10817
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10818
      if not hasattr(self.op, "evac_nodes"):
10819
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10820
                                   " opcode input", errors.ECODE_INVAL)
10821
    else:
10822
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10823
                                 self.op.mode, errors.ECODE_INVAL)
10824

    
10825
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10826
      if self.op.allocator is None:
10827
        raise errors.OpPrereqError("Missing allocator name",
10828
                                   errors.ECODE_INVAL)
10829
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10830
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
10831
                                 self.op.direction, errors.ECODE_INVAL)
10832

    
10833
  def Exec(self, feedback_fn):
10834
    """Run the allocator test.
10835

10836
    """
10837
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10838
      ial = IAllocator(self.cfg, self.rpc,
10839
                       mode=self.op.mode,
10840
                       name=self.op.name,
10841
                       mem_size=self.op.mem_size,
10842
                       disks=self.op.disks,
10843
                       disk_template=self.op.disk_template,
10844
                       os=self.op.os,
10845
                       tags=self.op.tags,
10846
                       nics=self.op.nics,
10847
                       vcpus=self.op.vcpus,
10848
                       hypervisor=self.op.hypervisor,
10849
                       )
10850
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10851
      ial = IAllocator(self.cfg, self.rpc,
10852
                       mode=self.op.mode,
10853
                       name=self.op.name,
10854
                       relocate_from=list(self.relocate_from),
10855
                       )
10856
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10857
      ial = IAllocator(self.cfg, self.rpc,
10858
                       mode=self.op.mode,
10859
                       evac_nodes=self.op.evac_nodes)
10860
    else:
10861
      raise errors.ProgrammerError("Uncatched mode %s in"
10862
                                   " LUTestAllocator.Exec", self.op.mode)
10863

    
10864
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
10865
      result = ial.in_text
10866
    else:
10867
      ial.Run(self.op.allocator, validate=False)
10868
      result = ial.out_text
10869
    return result