Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 4edc512c

History | View | Annotate | Download (394.2 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42

    
43
from ganeti import ssh
44
from ganeti import utils
45
from ganeti import errors
46
from ganeti import hypervisor
47
from ganeti import locking
48
from ganeti import constants
49
from ganeti import objects
50
from ganeti import serializer
51
from ganeti import ssconf
52
from ganeti import uidpool
53
from ganeti import compat
54
from ganeti import masterd
55
from ganeti import netutils
56
from ganeti import ht
57
from ganeti import query
58
from ganeti import qlang
59

    
60
import ganeti.masterd.instance # pylint: disable-msg=W0611
61

    
62
# Common opcode attributes
63

    
64
#: output fields for a query operation
65
_POutputFields = ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString))
66

    
67

    
68
#: the shutdown timeout
69
_PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
70
                     ht.TPositiveInt)
71

    
72
#: the force parameter
73
_PForce = ("force", False, ht.TBool)
74

    
75
#: a required instance name (for single-instance LUs)
76
_PInstanceName = ("instance_name", ht.NoDefault, ht.TNonEmptyString)
77

    
78
#: Whether to ignore offline nodes
79
_PIgnoreOfflineNodes = ("ignore_offline_nodes", False, ht.TBool)
80

    
81
#: a required node name (for single-node LUs)
82
_PNodeName = ("node_name", ht.NoDefault, ht.TNonEmptyString)
83

    
84
#: the migration type (live/non-live)
85
_PMigrationMode = ("mode", None,
86
                   ht.TOr(ht.TNone, ht.TElemOf(constants.HT_MIGRATION_MODES)))
87

    
88
#: the obsolete 'live' mode (boolean)
89
_PMigrationLive = ("live", None, ht.TMaybeBool)
90

    
91

    
92
# End types
93
class LogicalUnit(object):
94
  """Logical Unit base class.
95

96
  Subclasses must follow these rules:
97
    - implement ExpandNames
98
    - implement CheckPrereq (except when tasklets are used)
99
    - implement Exec (except when tasklets are used)
100
    - implement BuildHooksEnv
101
    - redefine HPATH and HTYPE
102
    - optionally redefine their run requirements:
103
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
104

105
  Note that all commands require root permissions.
106

107
  @ivar dry_run_result: the value (if any) that will be returned to the caller
108
      in dry-run mode (signalled by opcode dry_run parameter)
109
  @cvar _OP_PARAMS: a list of opcode attributes, the default values
110
      they should get if not already defined, and types they must match
111

112
  """
113
  HPATH = None
114
  HTYPE = None
115
  _OP_PARAMS = []
116
  REQ_BGL = True
117

    
118
  def __init__(self, processor, op, context, rpc):
119
    """Constructor for LogicalUnit.
120

121
    This needs to be overridden in derived classes in order to check op
122
    validity.
123

124
    """
125
    self.proc = processor
126
    self.op = op
127
    self.cfg = context.cfg
128
    self.context = context
129
    self.rpc = rpc
130
    # Dicts used to declare locking needs to mcpu
131
    self.needed_locks = None
132
    self.acquired_locks = {}
133
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
134
    self.add_locks = {}
135
    self.remove_locks = {}
136
    # Used to force good behavior when calling helper functions
137
    self.recalculate_locks = {}
138
    self.__ssh = None
139
    # logging
140
    self.Log = processor.Log # pylint: disable-msg=C0103
141
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
142
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
143
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
144
    # support for dry-run
145
    self.dry_run_result = None
146
    # support for generic debug attribute
147
    if (not hasattr(self.op, "debug_level") or
148
        not isinstance(self.op.debug_level, int)):
149
      self.op.debug_level = 0
150

    
151
    # Tasklets
152
    self.tasklets = None
153

    
154
    # The new kind-of-type-system
155
    op_id = self.op.OP_ID
156
    for attr_name, aval, test in self._OP_PARAMS:
157
      if not hasattr(op, attr_name):
158
        if aval == ht.NoDefault:
159
          raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
160
                                     (op_id, attr_name), errors.ECODE_INVAL)
161
        else:
162
          if callable(aval):
163
            dval = aval()
164
          else:
165
            dval = aval
166
          setattr(self.op, attr_name, dval)
167
      attr_val = getattr(op, attr_name)
168
      if test == ht.NoType:
169
        # no tests here
170
        continue
171
      if not callable(test):
172
        raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
173
                                     " given type is not a proper type (%s)" %
174
                                     (op_id, attr_name, test))
175
      if not test(attr_val):
176
        logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
177
                      self.op.OP_ID, attr_name, type(attr_val), attr_val)
178
        raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
179
                                   (op_id, attr_name), errors.ECODE_INVAL)
180

    
181
    self.CheckArguments()
182

    
183
  def __GetSSH(self):
184
    """Returns the SshRunner object
185

186
    """
187
    if not self.__ssh:
188
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
189
    return self.__ssh
190

    
191
  ssh = property(fget=__GetSSH)
192

    
193
  def CheckArguments(self):
194
    """Check syntactic validity for the opcode arguments.
195

196
    This method is for doing a simple syntactic check and ensure
197
    validity of opcode parameters, without any cluster-related
198
    checks. While the same can be accomplished in ExpandNames and/or
199
    CheckPrereq, doing these separate is better because:
200

201
      - ExpandNames is left as as purely a lock-related function
202
      - CheckPrereq is run after we have acquired locks (and possible
203
        waited for them)
204

205
    The function is allowed to change the self.op attribute so that
206
    later methods can no longer worry about missing parameters.
207

208
    """
209
    pass
210

    
211
  def ExpandNames(self):
212
    """Expand names for this LU.
213

214
    This method is called before starting to execute the opcode, and it should
215
    update all the parameters of the opcode to their canonical form (e.g. a
216
    short node name must be fully expanded after this method has successfully
217
    completed). This way locking, hooks, logging, etc. can work correctly.
218

219
    LUs which implement this method must also populate the self.needed_locks
220
    member, as a dict with lock levels as keys, and a list of needed lock names
221
    as values. Rules:
222

223
      - use an empty dict if you don't need any lock
224
      - if you don't need any lock at a particular level omit that level
225
      - don't put anything for the BGL level
226
      - if you want all locks at a level use locking.ALL_SET as a value
227

228
    If you need to share locks (rather than acquire them exclusively) at one
229
    level you can modify self.share_locks, setting a true value (usually 1) for
230
    that level. By default locks are not shared.
231

232
    This function can also define a list of tasklets, which then will be
233
    executed in order instead of the usual LU-level CheckPrereq and Exec
234
    functions, if those are not defined by the LU.
235

236
    Examples::
237

238
      # Acquire all nodes and one instance
239
      self.needed_locks = {
240
        locking.LEVEL_NODE: locking.ALL_SET,
241
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
242
      }
243
      # Acquire just two nodes
244
      self.needed_locks = {
245
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
246
      }
247
      # Acquire no locks
248
      self.needed_locks = {} # No, you can't leave it to the default value None
249

250
    """
251
    # The implementation of this method is mandatory only if the new LU is
252
    # concurrent, so that old LUs don't need to be changed all at the same
253
    # time.
254
    if self.REQ_BGL:
255
      self.needed_locks = {} # Exclusive LUs don't need locks.
256
    else:
257
      raise NotImplementedError
258

    
259
  def DeclareLocks(self, level):
260
    """Declare LU locking needs for a level
261

262
    While most LUs can just declare their locking needs at ExpandNames time,
263
    sometimes there's the need to calculate some locks after having acquired
264
    the ones before. This function is called just before acquiring locks at a
265
    particular level, but after acquiring the ones at lower levels, and permits
266
    such calculations. It can be used to modify self.needed_locks, and by
267
    default it does nothing.
268

269
    This function is only called if you have something already set in
270
    self.needed_locks for the level.
271

272
    @param level: Locking level which is going to be locked
273
    @type level: member of ganeti.locking.LEVELS
274

275
    """
276

    
277
  def CheckPrereq(self):
278
    """Check prerequisites for this LU.
279

280
    This method should check that the prerequisites for the execution
281
    of this LU are fulfilled. It can do internode communication, but
282
    it should be idempotent - no cluster or system changes are
283
    allowed.
284

285
    The method should raise errors.OpPrereqError in case something is
286
    not fulfilled. Its return value is ignored.
287

288
    This method should also update all the parameters of the opcode to
289
    their canonical form if it hasn't been done by ExpandNames before.
290

291
    """
292
    if self.tasklets is not None:
293
      for (idx, tl) in enumerate(self.tasklets):
294
        logging.debug("Checking prerequisites for tasklet %s/%s",
295
                      idx + 1, len(self.tasklets))
296
        tl.CheckPrereq()
297
    else:
298
      pass
299

    
300
  def Exec(self, feedback_fn):
301
    """Execute the LU.
302

303
    This method should implement the actual work. It should raise
304
    errors.OpExecError for failures that are somewhat dealt with in
305
    code, or expected.
306

307
    """
308
    if self.tasklets is not None:
309
      for (idx, tl) in enumerate(self.tasklets):
310
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
311
        tl.Exec(feedback_fn)
312
    else:
313
      raise NotImplementedError
314

    
315
  def BuildHooksEnv(self):
316
    """Build hooks environment for this LU.
317

318
    This method should return a three-node tuple consisting of: a dict
319
    containing the environment that will be used for running the
320
    specific hook for this LU, a list of node names on which the hook
321
    should run before the execution, and a list of node names on which
322
    the hook should run after the execution.
323

324
    The keys of the dict must not have 'GANETI_' prefixed as this will
325
    be handled in the hooks runner. Also note additional keys will be
326
    added by the hooks runner. If the LU doesn't define any
327
    environment, an empty dict (and not None) should be returned.
328

329
    No nodes should be returned as an empty list (and not None).
330

331
    Note that if the HPATH for a LU class is None, this function will
332
    not be called.
333

334
    """
335
    raise NotImplementedError
336

    
337
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
338
    """Notify the LU about the results of its hooks.
339

340
    This method is called every time a hooks phase is executed, and notifies
341
    the Logical Unit about the hooks' result. The LU can then use it to alter
342
    its result based on the hooks.  By default the method does nothing and the
343
    previous result is passed back unchanged but any LU can define it if it
344
    wants to use the local cluster hook-scripts somehow.
345

346
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
347
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
348
    @param hook_results: the results of the multi-node hooks rpc call
349
    @param feedback_fn: function used send feedback back to the caller
350
    @param lu_result: the previous Exec result this LU had, or None
351
        in the PRE phase
352
    @return: the new Exec result, based on the previous result
353
        and hook results
354

355
    """
356
    # API must be kept, thus we ignore the unused argument and could
357
    # be a function warnings
358
    # pylint: disable-msg=W0613,R0201
359
    return lu_result
360

    
361
  def _ExpandAndLockInstance(self):
362
    """Helper function to expand and lock an instance.
363

364
    Many LUs that work on an instance take its name in self.op.instance_name
365
    and need to expand it and then declare the expanded name for locking. This
366
    function does it, and then updates self.op.instance_name to the expanded
367
    name. It also initializes needed_locks as a dict, if this hasn't been done
368
    before.
369

370
    """
371
    if self.needed_locks is None:
372
      self.needed_locks = {}
373
    else:
374
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
375
        "_ExpandAndLockInstance called with instance-level locks set"
376
    self.op.instance_name = _ExpandInstanceName(self.cfg,
377
                                                self.op.instance_name)
378
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
379

    
380
  def _LockInstancesNodes(self, primary_only=False):
381
    """Helper function to declare instances' nodes for locking.
382

383
    This function should be called after locking one or more instances to lock
384
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
385
    with all primary or secondary nodes for instances already locked and
386
    present in self.needed_locks[locking.LEVEL_INSTANCE].
387

388
    It should be called from DeclareLocks, and for safety only works if
389
    self.recalculate_locks[locking.LEVEL_NODE] is set.
390

391
    In the future it may grow parameters to just lock some instance's nodes, or
392
    to just lock primaries or secondary nodes, if needed.
393

394
    If should be called in DeclareLocks in a way similar to::
395

396
      if level == locking.LEVEL_NODE:
397
        self._LockInstancesNodes()
398

399
    @type primary_only: boolean
400
    @param primary_only: only lock primary nodes of locked instances
401

402
    """
403
    assert locking.LEVEL_NODE in self.recalculate_locks, \
404
      "_LockInstancesNodes helper function called with no nodes to recalculate"
405

    
406
    # TODO: check if we're really been called with the instance locks held
407

    
408
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
409
    # future we might want to have different behaviors depending on the value
410
    # of self.recalculate_locks[locking.LEVEL_NODE]
411
    wanted_nodes = []
412
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
413
      instance = self.context.cfg.GetInstanceInfo(instance_name)
414
      wanted_nodes.append(instance.primary_node)
415
      if not primary_only:
416
        wanted_nodes.extend(instance.secondary_nodes)
417

    
418
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
419
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
420
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
421
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
422

    
423
    del self.recalculate_locks[locking.LEVEL_NODE]
424

    
425

    
426
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
427
  """Simple LU which runs no hooks.
428

429
  This LU is intended as a parent for other LogicalUnits which will
430
  run no hooks, in order to reduce duplicate code.
431

432
  """
433
  HPATH = None
434
  HTYPE = None
435

    
436
  def BuildHooksEnv(self):
437
    """Empty BuildHooksEnv for NoHooksLu.
438

439
    This just raises an error.
440

441
    """
442
    assert False, "BuildHooksEnv called for NoHooksLUs"
443

    
444

    
445
class Tasklet:
446
  """Tasklet base class.
447

448
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
449
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
450
  tasklets know nothing about locks.
451

452
  Subclasses must follow these rules:
453
    - Implement CheckPrereq
454
    - Implement Exec
455

456
  """
457
  def __init__(self, lu):
458
    self.lu = lu
459

    
460
    # Shortcuts
461
    self.cfg = lu.cfg
462
    self.rpc = lu.rpc
463

    
464
  def CheckPrereq(self):
465
    """Check prerequisites for this tasklets.
466

467
    This method should check whether the prerequisites for the execution of
468
    this tasklet are fulfilled. It can do internode communication, but it
469
    should be idempotent - no cluster or system changes are allowed.
470

471
    The method should raise errors.OpPrereqError in case something is not
472
    fulfilled. Its return value is ignored.
473

474
    This method should also update all parameters to their canonical form if it
475
    hasn't been done before.
476

477
    """
478
    pass
479

    
480
  def Exec(self, feedback_fn):
481
    """Execute the tasklet.
482

483
    This method should implement the actual work. It should raise
484
    errors.OpExecError for failures that are somewhat dealt with in code, or
485
    expected.
486

487
    """
488
    raise NotImplementedError
489

    
490

    
491
class _QueryBase:
492
  """Base for query utility classes.
493

494
  """
495
  #: Attribute holding field definitions
496
  FIELDS = None
497

    
498
  def __init__(self, names, fields, use_locking):
499
    """Initializes this class.
500

501
    """
502
    self.names = names
503
    self.use_locking = use_locking
504

    
505
    self.query = query.Query(self.FIELDS, fields)
506
    self.requested_data = self.query.RequestedData()
507

    
508
  @classmethod
509
  def FieldsQuery(cls, fields):
510
    """Returns list of available fields.
511

512
    @return: List of L{objects.QueryFieldDefinition}
513

514
    """
515
    if fields is None:
516
      # Client requests all fields
517
      fdefs = query.GetAllFields(cls.FIELDS.values())
518
    else:
519
      fdefs = query.Query(cls.FIELDS, fields).GetFields()
520

    
521
    return {
522
      "fields": [fdef.ToDict() for fdef in fdefs],
523
      }
524

    
525
  def ExpandNames(self, lu):
526
    """Expand names for this query.
527

528
    See L{LogicalUnit.ExpandNames}.
529

530
    """
531
    raise NotImplementedError()
532

    
533
  def DeclareLocks(self, level):
534
    """Declare locks for this query.
535

536
    See L{LogicalUnit.DeclareLocks}.
537

538
    """
539
    raise NotImplementedError()
540

    
541
  def _GetQueryData(self, lu):
542
    """Collects all data for this query.
543

544
    @return: Query data object
545

546
    """
547
    raise NotImplementedError()
548

    
549
  def NewStyleQuery(self, lu):
550
    """Collect data and execute query.
551

552
    """
553
    data = self._GetQueryData(lu)
554

    
555
    return {
556
      "data": self.query.Query(data),
557
      "fields": [fdef.ToDict()
558
                 for fdef in self.query.GetFields()],
559
      }
560

    
561
  def OldStyleQuery(self, lu):
562
    """Collect data and execute query.
563

564
    """
565
    return self.query.OldStyleQuery(self._GetQueryData(lu))
566

    
567

    
568
def _GetWantedNodes(lu, nodes):
569
  """Returns list of checked and expanded node names.
570

571
  @type lu: L{LogicalUnit}
572
  @param lu: the logical unit on whose behalf we execute
573
  @type nodes: list
574
  @param nodes: list of node names or None for all nodes
575
  @rtype: list
576
  @return: the list of nodes, sorted
577
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
578

579
  """
580
  if nodes:
581
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
582

    
583
  return utils.NiceSort(lu.cfg.GetNodeList())
584

    
585

    
586
def _GetWantedInstances(lu, instances):
587
  """Returns list of checked and expanded instance names.
588

589
  @type lu: L{LogicalUnit}
590
  @param lu: the logical unit on whose behalf we execute
591
  @type instances: list
592
  @param instances: list of instance names or None for all instances
593
  @rtype: list
594
  @return: the list of instances, sorted
595
  @raise errors.OpPrereqError: if the instances parameter is wrong type
596
  @raise errors.OpPrereqError: if any of the passed instances is not found
597

598
  """
599
  if instances:
600
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
601
  else:
602
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
603
  return wanted
604

    
605

    
606
def _GetUpdatedParams(old_params, update_dict,
607
                      use_default=True, use_none=False):
608
  """Return the new version of a parameter dictionary.
609

610
  @type old_params: dict
611
  @param old_params: old parameters
612
  @type update_dict: dict
613
  @param update_dict: dict containing new parameter values, or
614
      constants.VALUE_DEFAULT to reset the parameter to its default
615
      value
616
  @param use_default: boolean
617
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
618
      values as 'to be deleted' values
619
  @param use_none: boolean
620
  @type use_none: whether to recognise C{None} values as 'to be
621
      deleted' values
622
  @rtype: dict
623
  @return: the new parameter dictionary
624

625
  """
626
  params_copy = copy.deepcopy(old_params)
627
  for key, val in update_dict.iteritems():
628
    if ((use_default and val == constants.VALUE_DEFAULT) or
629
        (use_none and val is None)):
630
      try:
631
        del params_copy[key]
632
      except KeyError:
633
        pass
634
    else:
635
      params_copy[key] = val
636
  return params_copy
637

    
638

    
639
def _CheckOutputFields(static, dynamic, selected):
640
  """Checks whether all selected fields are valid.
641

642
  @type static: L{utils.FieldSet}
643
  @param static: static fields set
644
  @type dynamic: L{utils.FieldSet}
645
  @param dynamic: dynamic fields set
646

647
  """
648
  f = utils.FieldSet()
649
  f.Extend(static)
650
  f.Extend(dynamic)
651

    
652
  delta = f.NonMatching(selected)
653
  if delta:
654
    raise errors.OpPrereqError("Unknown output fields selected: %s"
655
                               % ",".join(delta), errors.ECODE_INVAL)
656

    
657

    
658
def _CheckGlobalHvParams(params):
659
  """Validates that given hypervisor params are not global ones.
660

661
  This will ensure that instances don't get customised versions of
662
  global params.
663

664
  """
665
  used_globals = constants.HVC_GLOBALS.intersection(params)
666
  if used_globals:
667
    msg = ("The following hypervisor parameters are global and cannot"
668
           " be customized at instance level, please modify them at"
669
           " cluster level: %s" % utils.CommaJoin(used_globals))
670
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
671

    
672

    
673
def _CheckNodeOnline(lu, node, msg=None):
674
  """Ensure that a given node is online.
675

676
  @param lu: the LU on behalf of which we make the check
677
  @param node: the node to check
678
  @param msg: if passed, should be a message to replace the default one
679
  @raise errors.OpPrereqError: if the node is offline
680

681
  """
682
  if msg is None:
683
    msg = "Can't use offline node"
684
  if lu.cfg.GetNodeInfo(node).offline:
685
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
686

    
687

    
688
def _CheckNodeNotDrained(lu, node):
689
  """Ensure that a given node is not drained.
690

691
  @param lu: the LU on behalf of which we make the check
692
  @param node: the node to check
693
  @raise errors.OpPrereqError: if the node is drained
694

695
  """
696
  if lu.cfg.GetNodeInfo(node).drained:
697
    raise errors.OpPrereqError("Can't use drained node %s" % node,
698
                               errors.ECODE_STATE)
699

    
700

    
701
def _CheckNodeVmCapable(lu, node):
702
  """Ensure that a given node is vm capable.
703

704
  @param lu: the LU on behalf of which we make the check
705
  @param node: the node to check
706
  @raise errors.OpPrereqError: if the node is not vm capable
707

708
  """
709
  if not lu.cfg.GetNodeInfo(node).vm_capable:
710
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
711
                               errors.ECODE_STATE)
712

    
713

    
714
def _CheckNodeHasOS(lu, node, os_name, force_variant):
715
  """Ensure that a node supports a given OS.
716

717
  @param lu: the LU on behalf of which we make the check
718
  @param node: the node to check
719
  @param os_name: the OS to query about
720
  @param force_variant: whether to ignore variant errors
721
  @raise errors.OpPrereqError: if the node is not supporting the OS
722

723
  """
724
  result = lu.rpc.call_os_get(node, os_name)
725
  result.Raise("OS '%s' not in supported OS list for node %s" %
726
               (os_name, node),
727
               prereq=True, ecode=errors.ECODE_INVAL)
728
  if not force_variant:
729
    _CheckOSVariant(result.payload, os_name)
730

    
731

    
732
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
733
  """Ensure that a node has the given secondary ip.
734

735
  @type lu: L{LogicalUnit}
736
  @param lu: the LU on behalf of which we make the check
737
  @type node: string
738
  @param node: the node to check
739
  @type secondary_ip: string
740
  @param secondary_ip: the ip to check
741
  @type prereq: boolean
742
  @param prereq: whether to throw a prerequisite or an execute error
743
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
744
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
745

746
  """
747
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
748
  result.Raise("Failure checking secondary ip on node %s" % node,
749
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
750
  if not result.payload:
751
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
752
           " please fix and re-run this command" % secondary_ip)
753
    if prereq:
754
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
755
    else:
756
      raise errors.OpExecError(msg)
757

    
758

    
759
def _RequireFileStorage():
760
  """Checks that file storage is enabled.
761

762
  @raise errors.OpPrereqError: when file storage is disabled
763

764
  """
765
  if not constants.ENABLE_FILE_STORAGE:
766
    raise errors.OpPrereqError("File storage disabled at configure time",
767
                               errors.ECODE_INVAL)
768

    
769

    
770
def _CheckDiskTemplate(template):
771
  """Ensure a given disk template is valid.
772

773
  """
774
  if template not in constants.DISK_TEMPLATES:
775
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
776
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
777
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
778
  if template == constants.DT_FILE:
779
    _RequireFileStorage()
780
  return True
781

    
782

    
783
def _CheckStorageType(storage_type):
784
  """Ensure a given storage type is valid.
785

786
  """
787
  if storage_type not in constants.VALID_STORAGE_TYPES:
788
    raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
789
                               errors.ECODE_INVAL)
790
  if storage_type == constants.ST_FILE:
791
    _RequireFileStorage()
792
  return True
793

    
794

    
795
def _GetClusterDomainSecret():
796
  """Reads the cluster domain secret.
797

798
  """
799
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
800
                               strict=True)
801

    
802

    
803
def _CheckInstanceDown(lu, instance, reason):
804
  """Ensure that an instance is not running."""
805
  if instance.admin_up:
806
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
807
                               (instance.name, reason), errors.ECODE_STATE)
808

    
809
  pnode = instance.primary_node
810
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
811
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
812
              prereq=True, ecode=errors.ECODE_ENVIRON)
813

    
814
  if instance.name in ins_l.payload:
815
    raise errors.OpPrereqError("Instance %s is running, %s" %
816
                               (instance.name, reason), errors.ECODE_STATE)
817

    
818

    
819
def _ExpandItemName(fn, name, kind):
820
  """Expand an item name.
821

822
  @param fn: the function to use for expansion
823
  @param name: requested item name
824
  @param kind: text description ('Node' or 'Instance')
825
  @return: the resolved (full) name
826
  @raise errors.OpPrereqError: if the item is not found
827

828
  """
829
  full_name = fn(name)
830
  if full_name is None:
831
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
832
                               errors.ECODE_NOENT)
833
  return full_name
834

    
835

    
836
def _ExpandNodeName(cfg, name):
837
  """Wrapper over L{_ExpandItemName} for nodes."""
838
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
839

    
840

    
841
def _ExpandInstanceName(cfg, name):
842
  """Wrapper over L{_ExpandItemName} for instance."""
843
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
844

    
845

    
846
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
847
                          memory, vcpus, nics, disk_template, disks,
848
                          bep, hvp, hypervisor_name):
849
  """Builds instance related env variables for hooks
850

851
  This builds the hook environment from individual variables.
852

853
  @type name: string
854
  @param name: the name of the instance
855
  @type primary_node: string
856
  @param primary_node: the name of the instance's primary node
857
  @type secondary_nodes: list
858
  @param secondary_nodes: list of secondary nodes as strings
859
  @type os_type: string
860
  @param os_type: the name of the instance's OS
861
  @type status: boolean
862
  @param status: the should_run status of the instance
863
  @type memory: string
864
  @param memory: the memory size of the instance
865
  @type vcpus: string
866
  @param vcpus: the count of VCPUs the instance has
867
  @type nics: list
868
  @param nics: list of tuples (ip, mac, mode, link) representing
869
      the NICs the instance has
870
  @type disk_template: string
871
  @param disk_template: the disk template of the instance
872
  @type disks: list
873
  @param disks: the list of (size, mode) pairs
874
  @type bep: dict
875
  @param bep: the backend parameters for the instance
876
  @type hvp: dict
877
  @param hvp: the hypervisor parameters for the instance
878
  @type hypervisor_name: string
879
  @param hypervisor_name: the hypervisor for the instance
880
  @rtype: dict
881
  @return: the hook environment for this instance
882

883
  """
884
  if status:
885
    str_status = "up"
886
  else:
887
    str_status = "down"
888
  env = {
889
    "OP_TARGET": name,
890
    "INSTANCE_NAME": name,
891
    "INSTANCE_PRIMARY": primary_node,
892
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
893
    "INSTANCE_OS_TYPE": os_type,
894
    "INSTANCE_STATUS": str_status,
895
    "INSTANCE_MEMORY": memory,
896
    "INSTANCE_VCPUS": vcpus,
897
    "INSTANCE_DISK_TEMPLATE": disk_template,
898
    "INSTANCE_HYPERVISOR": hypervisor_name,
899
  }
900

    
901
  if nics:
902
    nic_count = len(nics)
903
    for idx, (ip, mac, mode, link) in enumerate(nics):
904
      if ip is None:
905
        ip = ""
906
      env["INSTANCE_NIC%d_IP" % idx] = ip
907
      env["INSTANCE_NIC%d_MAC" % idx] = mac
908
      env["INSTANCE_NIC%d_MODE" % idx] = mode
909
      env["INSTANCE_NIC%d_LINK" % idx] = link
910
      if mode == constants.NIC_MODE_BRIDGED:
911
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
912
  else:
913
    nic_count = 0
914

    
915
  env["INSTANCE_NIC_COUNT"] = nic_count
916

    
917
  if disks:
918
    disk_count = len(disks)
919
    for idx, (size, mode) in enumerate(disks):
920
      env["INSTANCE_DISK%d_SIZE" % idx] = size
921
      env["INSTANCE_DISK%d_MODE" % idx] = mode
922
  else:
923
    disk_count = 0
924

    
925
  env["INSTANCE_DISK_COUNT"] = disk_count
926

    
927
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
928
    for key, value in source.items():
929
      env["INSTANCE_%s_%s" % (kind, key)] = value
930

    
931
  return env
932

    
933

    
934
def _NICListToTuple(lu, nics):
935
  """Build a list of nic information tuples.
936

937
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
938
  value in LUQueryInstanceData.
939

940
  @type lu:  L{LogicalUnit}
941
  @param lu: the logical unit on whose behalf we execute
942
  @type nics: list of L{objects.NIC}
943
  @param nics: list of nics to convert to hooks tuples
944

945
  """
946
  hooks_nics = []
947
  cluster = lu.cfg.GetClusterInfo()
948
  for nic in nics:
949
    ip = nic.ip
950
    mac = nic.mac
951
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
952
    mode = filled_params[constants.NIC_MODE]
953
    link = filled_params[constants.NIC_LINK]
954
    hooks_nics.append((ip, mac, mode, link))
955
  return hooks_nics
956

    
957

    
958
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
959
  """Builds instance related env variables for hooks from an object.
960

961
  @type lu: L{LogicalUnit}
962
  @param lu: the logical unit on whose behalf we execute
963
  @type instance: L{objects.Instance}
964
  @param instance: the instance for which we should build the
965
      environment
966
  @type override: dict
967
  @param override: dictionary with key/values that will override
968
      our values
969
  @rtype: dict
970
  @return: the hook environment dictionary
971

972
  """
973
  cluster = lu.cfg.GetClusterInfo()
974
  bep = cluster.FillBE(instance)
975
  hvp = cluster.FillHV(instance)
976
  args = {
977
    'name': instance.name,
978
    'primary_node': instance.primary_node,
979
    'secondary_nodes': instance.secondary_nodes,
980
    'os_type': instance.os,
981
    'status': instance.admin_up,
982
    'memory': bep[constants.BE_MEMORY],
983
    'vcpus': bep[constants.BE_VCPUS],
984
    'nics': _NICListToTuple(lu, instance.nics),
985
    'disk_template': instance.disk_template,
986
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
987
    'bep': bep,
988
    'hvp': hvp,
989
    'hypervisor_name': instance.hypervisor,
990
  }
991
  if override:
992
    args.update(override)
993
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
994

    
995

    
996
def _AdjustCandidatePool(lu, exceptions):
997
  """Adjust the candidate pool after node operations.
998

999
  """
1000
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1001
  if mod_list:
1002
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1003
               utils.CommaJoin(node.name for node in mod_list))
1004
    for name in mod_list:
1005
      lu.context.ReaddNode(name)
1006
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1007
  if mc_now > mc_max:
1008
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1009
               (mc_now, mc_max))
1010

    
1011

    
1012
def _DecideSelfPromotion(lu, exceptions=None):
1013
  """Decide whether I should promote myself as a master candidate.
1014

1015
  """
1016
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1017
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1018
  # the new node will increase mc_max with one, so:
1019
  mc_should = min(mc_should + 1, cp_size)
1020
  return mc_now < mc_should
1021

    
1022

    
1023
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1024
  """Check that the brigdes needed by a list of nics exist.
1025

1026
  """
1027
  cluster = lu.cfg.GetClusterInfo()
1028
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1029
  brlist = [params[constants.NIC_LINK] for params in paramslist
1030
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1031
  if brlist:
1032
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1033
    result.Raise("Error checking bridges on destination node '%s'" %
1034
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1035

    
1036

    
1037
def _CheckInstanceBridgesExist(lu, instance, node=None):
1038
  """Check that the brigdes needed by an instance exist.
1039

1040
  """
1041
  if node is None:
1042
    node = instance.primary_node
1043
  _CheckNicsBridgesExist(lu, instance.nics, node)
1044

    
1045

    
1046
def _CheckOSVariant(os_obj, name):
1047
  """Check whether an OS name conforms to the os variants specification.
1048

1049
  @type os_obj: L{objects.OS}
1050
  @param os_obj: OS object to check
1051
  @type name: string
1052
  @param name: OS name passed by the user, to check for validity
1053

1054
  """
1055
  if not os_obj.supported_variants:
1056
    return
1057
  variant = objects.OS.GetVariant(name)
1058
  if not variant:
1059
    raise errors.OpPrereqError("OS name must include a variant",
1060
                               errors.ECODE_INVAL)
1061

    
1062
  if variant not in os_obj.supported_variants:
1063
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1064

    
1065

    
1066
def _GetNodeInstancesInner(cfg, fn):
1067
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1068

    
1069

    
1070
def _GetNodeInstances(cfg, node_name):
1071
  """Returns a list of all primary and secondary instances on a node.
1072

1073
  """
1074

    
1075
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1076

    
1077

    
1078
def _GetNodePrimaryInstances(cfg, node_name):
1079
  """Returns primary instances on a node.
1080

1081
  """
1082
  return _GetNodeInstancesInner(cfg,
1083
                                lambda inst: node_name == inst.primary_node)
1084

    
1085

    
1086
def _GetNodeSecondaryInstances(cfg, node_name):
1087
  """Returns secondary instances on a node.
1088

1089
  """
1090
  return _GetNodeInstancesInner(cfg,
1091
                                lambda inst: node_name in inst.secondary_nodes)
1092

    
1093

    
1094
def _GetStorageTypeArgs(cfg, storage_type):
1095
  """Returns the arguments for a storage type.
1096

1097
  """
1098
  # Special case for file storage
1099
  if storage_type == constants.ST_FILE:
1100
    # storage.FileStorage wants a list of storage directories
1101
    return [[cfg.GetFileStorageDir()]]
1102

    
1103
  return []
1104

    
1105

    
1106
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1107
  faulty = []
1108

    
1109
  for dev in instance.disks:
1110
    cfg.SetDiskID(dev, node_name)
1111

    
1112
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1113
  result.Raise("Failed to get disk status from node %s" % node_name,
1114
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1115

    
1116
  for idx, bdev_status in enumerate(result.payload):
1117
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1118
      faulty.append(idx)
1119

    
1120
  return faulty
1121

    
1122

    
1123
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1124
  """Check the sanity of iallocator and node arguments and use the
1125
  cluster-wide iallocator if appropriate.
1126

1127
  Check that at most one of (iallocator, node) is specified. If none is
1128
  specified, then the LU's opcode's iallocator slot is filled with the
1129
  cluster-wide default iallocator.
1130

1131
  @type iallocator_slot: string
1132
  @param iallocator_slot: the name of the opcode iallocator slot
1133
  @type node_slot: string
1134
  @param node_slot: the name of the opcode target node slot
1135

1136
  """
1137
  node = getattr(lu.op, node_slot, None)
1138
  iallocator = getattr(lu.op, iallocator_slot, None)
1139

    
1140
  if node is not None and iallocator is not None:
1141
    raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1142
                               errors.ECODE_INVAL)
1143
  elif node is None and iallocator is None:
1144
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1145
    if default_iallocator:
1146
      setattr(lu.op, iallocator_slot, default_iallocator)
1147
    else:
1148
      raise errors.OpPrereqError("No iallocator or node given and no"
1149
                                 " cluster-wide default iallocator found."
1150
                                 " Please specify either an iallocator or a"
1151
                                 " node, or set a cluster-wide default"
1152
                                 " iallocator.")
1153

    
1154

    
1155
class LUPostInitCluster(LogicalUnit):
1156
  """Logical unit for running hooks after cluster initialization.
1157

1158
  """
1159
  HPATH = "cluster-init"
1160
  HTYPE = constants.HTYPE_CLUSTER
1161

    
1162
  def BuildHooksEnv(self):
1163
    """Build hooks env.
1164

1165
    """
1166
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1167
    mn = self.cfg.GetMasterNode()
1168
    return env, [], [mn]
1169

    
1170
  def Exec(self, feedback_fn):
1171
    """Nothing to do.
1172

1173
    """
1174
    return True
1175

    
1176

    
1177
class LUDestroyCluster(LogicalUnit):
1178
  """Logical unit for destroying the cluster.
1179

1180
  """
1181
  HPATH = "cluster-destroy"
1182
  HTYPE = constants.HTYPE_CLUSTER
1183

    
1184
  def BuildHooksEnv(self):
1185
    """Build hooks env.
1186

1187
    """
1188
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1189
    return env, [], []
1190

    
1191
  def CheckPrereq(self):
1192
    """Check prerequisites.
1193

1194
    This checks whether the cluster is empty.
1195

1196
    Any errors are signaled by raising errors.OpPrereqError.
1197

1198
    """
1199
    master = self.cfg.GetMasterNode()
1200

    
1201
    nodelist = self.cfg.GetNodeList()
1202
    if len(nodelist) != 1 or nodelist[0] != master:
1203
      raise errors.OpPrereqError("There are still %d node(s) in"
1204
                                 " this cluster." % (len(nodelist) - 1),
1205
                                 errors.ECODE_INVAL)
1206
    instancelist = self.cfg.GetInstanceList()
1207
    if instancelist:
1208
      raise errors.OpPrereqError("There are still %d instance(s) in"
1209
                                 " this cluster." % len(instancelist),
1210
                                 errors.ECODE_INVAL)
1211

    
1212
  def Exec(self, feedback_fn):
1213
    """Destroys the cluster.
1214

1215
    """
1216
    master = self.cfg.GetMasterNode()
1217

    
1218
    # Run post hooks on master node before it's removed
1219
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1220
    try:
1221
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1222
    except:
1223
      # pylint: disable-msg=W0702
1224
      self.LogWarning("Errors occurred running hooks on %s" % master)
1225

    
1226
    result = self.rpc.call_node_stop_master(master, False)
1227
    result.Raise("Could not disable the master role")
1228

    
1229
    return master
1230

    
1231

    
1232
def _VerifyCertificate(filename):
1233
  """Verifies a certificate for LUVerifyCluster.
1234

1235
  @type filename: string
1236
  @param filename: Path to PEM file
1237

1238
  """
1239
  try:
1240
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1241
                                           utils.ReadFile(filename))
1242
  except Exception, err: # pylint: disable-msg=W0703
1243
    return (LUVerifyCluster.ETYPE_ERROR,
1244
            "Failed to load X509 certificate %s: %s" % (filename, err))
1245

    
1246
  (errcode, msg) = \
1247
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1248
                                constants.SSL_CERT_EXPIRATION_ERROR)
1249

    
1250
  if msg:
1251
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1252
  else:
1253
    fnamemsg = None
1254

    
1255
  if errcode is None:
1256
    return (None, fnamemsg)
1257
  elif errcode == utils.CERT_WARNING:
1258
    return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1259
  elif errcode == utils.CERT_ERROR:
1260
    return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1261

    
1262
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1263

    
1264

    
1265
class LUVerifyCluster(LogicalUnit):
1266
  """Verifies the cluster status.
1267

1268
  """
1269
  HPATH = "cluster-verify"
1270
  HTYPE = constants.HTYPE_CLUSTER
1271
  _OP_PARAMS = [
1272
    ("skip_checks", ht.EmptyList,
1273
     ht.TListOf(ht.TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1274
    ("verbose", False, ht.TBool),
1275
    ("error_codes", False, ht.TBool),
1276
    ("debug_simulate_errors", False, ht.TBool),
1277
    ]
1278
  REQ_BGL = False
1279

    
1280
  TCLUSTER = "cluster"
1281
  TNODE = "node"
1282
  TINSTANCE = "instance"
1283

    
1284
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1285
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1286
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1287
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1288
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1289
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1290
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1291
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1292
  ENODEDRBD = (TNODE, "ENODEDRBD")
1293
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1294
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1295
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1296
  ENODEHV = (TNODE, "ENODEHV")
1297
  ENODELVM = (TNODE, "ENODELVM")
1298
  ENODEN1 = (TNODE, "ENODEN1")
1299
  ENODENET = (TNODE, "ENODENET")
1300
  ENODEOS = (TNODE, "ENODEOS")
1301
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1302
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1303
  ENODERPC = (TNODE, "ENODERPC")
1304
  ENODESSH = (TNODE, "ENODESSH")
1305
  ENODEVERSION = (TNODE, "ENODEVERSION")
1306
  ENODESETUP = (TNODE, "ENODESETUP")
1307
  ENODETIME = (TNODE, "ENODETIME")
1308

    
1309
  ETYPE_FIELD = "code"
1310
  ETYPE_ERROR = "ERROR"
1311
  ETYPE_WARNING = "WARNING"
1312

    
1313
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1314

    
1315
  class NodeImage(object):
1316
    """A class representing the logical and physical status of a node.
1317

1318
    @type name: string
1319
    @ivar name: the node name to which this object refers
1320
    @ivar volumes: a structure as returned from
1321
        L{ganeti.backend.GetVolumeList} (runtime)
1322
    @ivar instances: a list of running instances (runtime)
1323
    @ivar pinst: list of configured primary instances (config)
1324
    @ivar sinst: list of configured secondary instances (config)
1325
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1326
        of this node (config)
1327
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1328
    @ivar dfree: free disk, as reported by the node (runtime)
1329
    @ivar offline: the offline status (config)
1330
    @type rpc_fail: boolean
1331
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1332
        not whether the individual keys were correct) (runtime)
1333
    @type lvm_fail: boolean
1334
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1335
    @type hyp_fail: boolean
1336
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1337
    @type ghost: boolean
1338
    @ivar ghost: whether this is a known node or not (config)
1339
    @type os_fail: boolean
1340
    @ivar os_fail: whether the RPC call didn't return valid OS data
1341
    @type oslist: list
1342
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1343
    @type vm_capable: boolean
1344
    @ivar vm_capable: whether the node can host instances
1345

1346
    """
1347
    def __init__(self, offline=False, name=None, vm_capable=True):
1348
      self.name = name
1349
      self.volumes = {}
1350
      self.instances = []
1351
      self.pinst = []
1352
      self.sinst = []
1353
      self.sbp = {}
1354
      self.mfree = 0
1355
      self.dfree = 0
1356
      self.offline = offline
1357
      self.vm_capable = vm_capable
1358
      self.rpc_fail = False
1359
      self.lvm_fail = False
1360
      self.hyp_fail = False
1361
      self.ghost = False
1362
      self.os_fail = False
1363
      self.oslist = {}
1364

    
1365
  def ExpandNames(self):
1366
    self.needed_locks = {
1367
      locking.LEVEL_NODE: locking.ALL_SET,
1368
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1369
    }
1370
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1371

    
1372
  def _Error(self, ecode, item, msg, *args, **kwargs):
1373
    """Format an error message.
1374

1375
    Based on the opcode's error_codes parameter, either format a
1376
    parseable error code, or a simpler error string.
1377

1378
    This must be called only from Exec and functions called from Exec.
1379

1380
    """
1381
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1382
    itype, etxt = ecode
1383
    # first complete the msg
1384
    if args:
1385
      msg = msg % args
1386
    # then format the whole message
1387
    if self.op.error_codes:
1388
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1389
    else:
1390
      if item:
1391
        item = " " + item
1392
      else:
1393
        item = ""
1394
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1395
    # and finally report it via the feedback_fn
1396
    self._feedback_fn("  - %s" % msg)
1397

    
1398
  def _ErrorIf(self, cond, *args, **kwargs):
1399
    """Log an error message if the passed condition is True.
1400

1401
    """
1402
    cond = bool(cond) or self.op.debug_simulate_errors
1403
    if cond:
1404
      self._Error(*args, **kwargs)
1405
    # do not mark the operation as failed for WARN cases only
1406
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1407
      self.bad = self.bad or cond
1408

    
1409
  def _VerifyNode(self, ninfo, nresult):
1410
    """Perform some basic validation on data returned from a node.
1411

1412
      - check the result data structure is well formed and has all the
1413
        mandatory fields
1414
      - check ganeti version
1415

1416
    @type ninfo: L{objects.Node}
1417
    @param ninfo: the node to check
1418
    @param nresult: the results from the node
1419
    @rtype: boolean
1420
    @return: whether overall this call was successful (and we can expect
1421
         reasonable values in the respose)
1422

1423
    """
1424
    node = ninfo.name
1425
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1426

    
1427
    # main result, nresult should be a non-empty dict
1428
    test = not nresult or not isinstance(nresult, dict)
1429
    _ErrorIf(test, self.ENODERPC, node,
1430
                  "unable to verify node: no data returned")
1431
    if test:
1432
      return False
1433

    
1434
    # compares ganeti version
1435
    local_version = constants.PROTOCOL_VERSION
1436
    remote_version = nresult.get("version", None)
1437
    test = not (remote_version and
1438
                isinstance(remote_version, (list, tuple)) and
1439
                len(remote_version) == 2)
1440
    _ErrorIf(test, self.ENODERPC, node,
1441
             "connection to node returned invalid data")
1442
    if test:
1443
      return False
1444

    
1445
    test = local_version != remote_version[0]
1446
    _ErrorIf(test, self.ENODEVERSION, node,
1447
             "incompatible protocol versions: master %s,"
1448
             " node %s", local_version, remote_version[0])
1449
    if test:
1450
      return False
1451

    
1452
    # node seems compatible, we can actually try to look into its results
1453

    
1454
    # full package version
1455
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1456
                  self.ENODEVERSION, node,
1457
                  "software version mismatch: master %s, node %s",
1458
                  constants.RELEASE_VERSION, remote_version[1],
1459
                  code=self.ETYPE_WARNING)
1460

    
1461
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1462
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1463
      for hv_name, hv_result in hyp_result.iteritems():
1464
        test = hv_result is not None
1465
        _ErrorIf(test, self.ENODEHV, node,
1466
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1467

    
1468
    test = nresult.get(constants.NV_NODESETUP,
1469
                           ["Missing NODESETUP results"])
1470
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1471
             "; ".join(test))
1472

    
1473
    return True
1474

    
1475
  def _VerifyNodeTime(self, ninfo, nresult,
1476
                      nvinfo_starttime, nvinfo_endtime):
1477
    """Check the node time.
1478

1479
    @type ninfo: L{objects.Node}
1480
    @param ninfo: the node to check
1481
    @param nresult: the remote results for the node
1482
    @param nvinfo_starttime: the start time of the RPC call
1483
    @param nvinfo_endtime: the end time of the RPC call
1484

1485
    """
1486
    node = ninfo.name
1487
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1488

    
1489
    ntime = nresult.get(constants.NV_TIME, None)
1490
    try:
1491
      ntime_merged = utils.MergeTime(ntime)
1492
    except (ValueError, TypeError):
1493
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1494
      return
1495

    
1496
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1497
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1498
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1499
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1500
    else:
1501
      ntime_diff = None
1502

    
1503
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1504
             "Node time diverges by at least %s from master node time",
1505
             ntime_diff)
1506

    
1507
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1508
    """Check the node time.
1509

1510
    @type ninfo: L{objects.Node}
1511
    @param ninfo: the node to check
1512
    @param nresult: the remote results for the node
1513
    @param vg_name: the configured VG name
1514

1515
    """
1516
    if vg_name is None:
1517
      return
1518

    
1519
    node = ninfo.name
1520
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1521

    
1522
    # checks vg existence and size > 20G
1523
    vglist = nresult.get(constants.NV_VGLIST, None)
1524
    test = not vglist
1525
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1526
    if not test:
1527
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1528
                                            constants.MIN_VG_SIZE)
1529
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1530

    
1531
    # check pv names
1532
    pvlist = nresult.get(constants.NV_PVLIST, None)
1533
    test = pvlist is None
1534
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1535
    if not test:
1536
      # check that ':' is not present in PV names, since it's a
1537
      # special character for lvcreate (denotes the range of PEs to
1538
      # use on the PV)
1539
      for _, pvname, owner_vg in pvlist:
1540
        test = ":" in pvname
1541
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1542
                 " '%s' of VG '%s'", pvname, owner_vg)
1543

    
1544
  def _VerifyNodeNetwork(self, ninfo, nresult):
1545
    """Check the node time.
1546

1547
    @type ninfo: L{objects.Node}
1548
    @param ninfo: the node to check
1549
    @param nresult: the remote results for the node
1550

1551
    """
1552
    node = ninfo.name
1553
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1554

    
1555
    test = constants.NV_NODELIST not in nresult
1556
    _ErrorIf(test, self.ENODESSH, node,
1557
             "node hasn't returned node ssh connectivity data")
1558
    if not test:
1559
      if nresult[constants.NV_NODELIST]:
1560
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1561
          _ErrorIf(True, self.ENODESSH, node,
1562
                   "ssh communication with node '%s': %s", a_node, a_msg)
1563

    
1564
    test = constants.NV_NODENETTEST not in nresult
1565
    _ErrorIf(test, self.ENODENET, node,
1566
             "node hasn't returned node tcp connectivity data")
1567
    if not test:
1568
      if nresult[constants.NV_NODENETTEST]:
1569
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1570
        for anode in nlist:
1571
          _ErrorIf(True, self.ENODENET, node,
1572
                   "tcp communication with node '%s': %s",
1573
                   anode, nresult[constants.NV_NODENETTEST][anode])
1574

    
1575
    test = constants.NV_MASTERIP not in nresult
1576
    _ErrorIf(test, self.ENODENET, node,
1577
             "node hasn't returned node master IP reachability data")
1578
    if not test:
1579
      if not nresult[constants.NV_MASTERIP]:
1580
        if node == self.master_node:
1581
          msg = "the master node cannot reach the master IP (not configured?)"
1582
        else:
1583
          msg = "cannot reach the master IP"
1584
        _ErrorIf(True, self.ENODENET, node, msg)
1585

    
1586
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1587
                      diskstatus):
1588
    """Verify an instance.
1589

1590
    This function checks to see if the required block devices are
1591
    available on the instance's node.
1592

1593
    """
1594
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1595
    node_current = instanceconfig.primary_node
1596

    
1597
    node_vol_should = {}
1598
    instanceconfig.MapLVsByNode(node_vol_should)
1599

    
1600
    for node in node_vol_should:
1601
      n_img = node_image[node]
1602
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1603
        # ignore missing volumes on offline or broken nodes
1604
        continue
1605
      for volume in node_vol_should[node]:
1606
        test = volume not in n_img.volumes
1607
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1608
                 "volume %s missing on node %s", volume, node)
1609

    
1610
    if instanceconfig.admin_up:
1611
      pri_img = node_image[node_current]
1612
      test = instance not in pri_img.instances and not pri_img.offline
1613
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1614
               "instance not running on its primary node %s",
1615
               node_current)
1616

    
1617
    for node, n_img in node_image.items():
1618
      if (not node == node_current):
1619
        test = instance in n_img.instances
1620
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1621
                 "instance should not run on node %s", node)
1622

    
1623
    diskdata = [(nname, success, status, idx)
1624
                for (nname, disks) in diskstatus.items()
1625
                for idx, (success, status) in enumerate(disks)]
1626

    
1627
    for nname, success, bdev_status, idx in diskdata:
1628
      _ErrorIf(instanceconfig.admin_up and not success,
1629
               self.EINSTANCEFAULTYDISK, instance,
1630
               "couldn't retrieve status for disk/%s on %s: %s",
1631
               idx, nname, bdev_status)
1632
      _ErrorIf((instanceconfig.admin_up and success and
1633
                bdev_status.ldisk_status == constants.LDS_FAULTY),
1634
               self.EINSTANCEFAULTYDISK, instance,
1635
               "disk/%s on %s is faulty", idx, nname)
1636

    
1637
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1638
    """Verify if there are any unknown volumes in the cluster.
1639

1640
    The .os, .swap and backup volumes are ignored. All other volumes are
1641
    reported as unknown.
1642

1643
    @type reserved: L{ganeti.utils.FieldSet}
1644
    @param reserved: a FieldSet of reserved volume names
1645

1646
    """
1647
    for node, n_img in node_image.items():
1648
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1649
        # skip non-healthy nodes
1650
        continue
1651
      for volume in n_img.volumes:
1652
        test = ((node not in node_vol_should or
1653
                volume not in node_vol_should[node]) and
1654
                not reserved.Matches(volume))
1655
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1656
                      "volume %s is unknown", volume)
1657

    
1658
  def _VerifyOrphanInstances(self, instancelist, node_image):
1659
    """Verify the list of running instances.
1660

1661
    This checks what instances are running but unknown to the cluster.
1662

1663
    """
1664
    for node, n_img in node_image.items():
1665
      for o_inst in n_img.instances:
1666
        test = o_inst not in instancelist
1667
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1668
                      "instance %s on node %s should not exist", o_inst, node)
1669

    
1670
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1671
    """Verify N+1 Memory Resilience.
1672

1673
    Check that if one single node dies we can still start all the
1674
    instances it was primary for.
1675

1676
    """
1677
    for node, n_img in node_image.items():
1678
      # This code checks that every node which is now listed as
1679
      # secondary has enough memory to host all instances it is
1680
      # supposed to should a single other node in the cluster fail.
1681
      # FIXME: not ready for failover to an arbitrary node
1682
      # FIXME: does not support file-backed instances
1683
      # WARNING: we currently take into account down instances as well
1684
      # as up ones, considering that even if they're down someone
1685
      # might want to start them even in the event of a node failure.
1686
      for prinode, instances in n_img.sbp.items():
1687
        needed_mem = 0
1688
        for instance in instances:
1689
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1690
          if bep[constants.BE_AUTO_BALANCE]:
1691
            needed_mem += bep[constants.BE_MEMORY]
1692
        test = n_img.mfree < needed_mem
1693
        self._ErrorIf(test, self.ENODEN1, node,
1694
                      "not enough memory on to accommodate"
1695
                      " failovers should peer node %s fail", prinode)
1696

    
1697
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1698
                       master_files):
1699
    """Verifies and computes the node required file checksums.
1700

1701
    @type ninfo: L{objects.Node}
1702
    @param ninfo: the node to check
1703
    @param nresult: the remote results for the node
1704
    @param file_list: required list of files
1705
    @param local_cksum: dictionary of local files and their checksums
1706
    @param master_files: list of files that only masters should have
1707

1708
    """
1709
    node = ninfo.name
1710
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1711

    
1712
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1713
    test = not isinstance(remote_cksum, dict)
1714
    _ErrorIf(test, self.ENODEFILECHECK, node,
1715
             "node hasn't returned file checksum data")
1716
    if test:
1717
      return
1718

    
1719
    for file_name in file_list:
1720
      node_is_mc = ninfo.master_candidate
1721
      must_have = (file_name not in master_files) or node_is_mc
1722
      # missing
1723
      test1 = file_name not in remote_cksum
1724
      # invalid checksum
1725
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1726
      # existing and good
1727
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1728
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1729
               "file '%s' missing", file_name)
1730
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1731
               "file '%s' has wrong checksum", file_name)
1732
      # not candidate and this is not a must-have file
1733
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1734
               "file '%s' should not exist on non master"
1735
               " candidates (and the file is outdated)", file_name)
1736
      # all good, except non-master/non-must have combination
1737
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1738
               "file '%s' should not exist"
1739
               " on non master candidates", file_name)
1740

    
1741
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1742
                      drbd_map):
1743
    """Verifies and the node DRBD status.
1744

1745
    @type ninfo: L{objects.Node}
1746
    @param ninfo: the node to check
1747
    @param nresult: the remote results for the node
1748
    @param instanceinfo: the dict of instances
1749
    @param drbd_helper: the configured DRBD usermode helper
1750
    @param drbd_map: the DRBD map as returned by
1751
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1752

1753
    """
1754
    node = ninfo.name
1755
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1756

    
1757
    if drbd_helper:
1758
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1759
      test = (helper_result == None)
1760
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
1761
               "no drbd usermode helper returned")
1762
      if helper_result:
1763
        status, payload = helper_result
1764
        test = not status
1765
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1766
                 "drbd usermode helper check unsuccessful: %s", payload)
1767
        test = status and (payload != drbd_helper)
1768
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1769
                 "wrong drbd usermode helper: %s", payload)
1770

    
1771
    # compute the DRBD minors
1772
    node_drbd = {}
1773
    for minor, instance in drbd_map[node].items():
1774
      test = instance not in instanceinfo
1775
      _ErrorIf(test, self.ECLUSTERCFG, None,
1776
               "ghost instance '%s' in temporary DRBD map", instance)
1777
        # ghost instance should not be running, but otherwise we
1778
        # don't give double warnings (both ghost instance and
1779
        # unallocated minor in use)
1780
      if test:
1781
        node_drbd[minor] = (instance, False)
1782
      else:
1783
        instance = instanceinfo[instance]
1784
        node_drbd[minor] = (instance.name, instance.admin_up)
1785

    
1786
    # and now check them
1787
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1788
    test = not isinstance(used_minors, (tuple, list))
1789
    _ErrorIf(test, self.ENODEDRBD, node,
1790
             "cannot parse drbd status file: %s", str(used_minors))
1791
    if test:
1792
      # we cannot check drbd status
1793
      return
1794

    
1795
    for minor, (iname, must_exist) in node_drbd.items():
1796
      test = minor not in used_minors and must_exist
1797
      _ErrorIf(test, self.ENODEDRBD, node,
1798
               "drbd minor %d of instance %s is not active", minor, iname)
1799
    for minor in used_minors:
1800
      test = minor not in node_drbd
1801
      _ErrorIf(test, self.ENODEDRBD, node,
1802
               "unallocated drbd minor %d is in use", minor)
1803

    
1804
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1805
    """Builds the node OS structures.
1806

1807
    @type ninfo: L{objects.Node}
1808
    @param ninfo: the node to check
1809
    @param nresult: the remote results for the node
1810
    @param nimg: the node image object
1811

1812
    """
1813
    node = ninfo.name
1814
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1815

    
1816
    remote_os = nresult.get(constants.NV_OSLIST, None)
1817
    test = (not isinstance(remote_os, list) or
1818
            not compat.all(isinstance(v, list) and len(v) == 7
1819
                           for v in remote_os))
1820

    
1821
    _ErrorIf(test, self.ENODEOS, node,
1822
             "node hasn't returned valid OS data")
1823

    
1824
    nimg.os_fail = test
1825

    
1826
    if test:
1827
      return
1828

    
1829
    os_dict = {}
1830

    
1831
    for (name, os_path, status, diagnose,
1832
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1833

    
1834
      if name not in os_dict:
1835
        os_dict[name] = []
1836

    
1837
      # parameters is a list of lists instead of list of tuples due to
1838
      # JSON lacking a real tuple type, fix it:
1839
      parameters = [tuple(v) for v in parameters]
1840
      os_dict[name].append((os_path, status, diagnose,
1841
                            set(variants), set(parameters), set(api_ver)))
1842

    
1843
    nimg.oslist = os_dict
1844

    
1845
  def _VerifyNodeOS(self, ninfo, nimg, base):
1846
    """Verifies the node OS list.
1847

1848
    @type ninfo: L{objects.Node}
1849
    @param ninfo: the node to check
1850
    @param nimg: the node image object
1851
    @param base: the 'template' node we match against (e.g. from the master)
1852

1853
    """
1854
    node = ninfo.name
1855
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1856

    
1857
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1858

    
1859
    for os_name, os_data in nimg.oslist.items():
1860
      assert os_data, "Empty OS status for OS %s?!" % os_name
1861
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1862
      _ErrorIf(not f_status, self.ENODEOS, node,
1863
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1864
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1865
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1866
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1867
      # this will catched in backend too
1868
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1869
               and not f_var, self.ENODEOS, node,
1870
               "OS %s with API at least %d does not declare any variant",
1871
               os_name, constants.OS_API_V15)
1872
      # comparisons with the 'base' image
1873
      test = os_name not in base.oslist
1874
      _ErrorIf(test, self.ENODEOS, node,
1875
               "Extra OS %s not present on reference node (%s)",
1876
               os_name, base.name)
1877
      if test:
1878
        continue
1879
      assert base.oslist[os_name], "Base node has empty OS status?"
1880
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1881
      if not b_status:
1882
        # base OS is invalid, skipping
1883
        continue
1884
      for kind, a, b in [("API version", f_api, b_api),
1885
                         ("variants list", f_var, b_var),
1886
                         ("parameters", f_param, b_param)]:
1887
        _ErrorIf(a != b, self.ENODEOS, node,
1888
                 "OS %s %s differs from reference node %s: %s vs. %s",
1889
                 kind, os_name, base.name,
1890
                 utils.CommaJoin(a), utils.CommaJoin(b))
1891

    
1892
    # check any missing OSes
1893
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1894
    _ErrorIf(missing, self.ENODEOS, node,
1895
             "OSes present on reference node %s but missing on this node: %s",
1896
             base.name, utils.CommaJoin(missing))
1897

    
1898
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1899
    """Verifies and updates the node volume data.
1900

1901
    This function will update a L{NodeImage}'s internal structures
1902
    with data from the remote call.
1903

1904
    @type ninfo: L{objects.Node}
1905
    @param ninfo: the node to check
1906
    @param nresult: the remote results for the node
1907
    @param nimg: the node image object
1908
    @param vg_name: the configured VG name
1909

1910
    """
1911
    node = ninfo.name
1912
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1913

    
1914
    nimg.lvm_fail = True
1915
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1916
    if vg_name is None:
1917
      pass
1918
    elif isinstance(lvdata, basestring):
1919
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1920
               utils.SafeEncode(lvdata))
1921
    elif not isinstance(lvdata, dict):
1922
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1923
    else:
1924
      nimg.volumes = lvdata
1925
      nimg.lvm_fail = False
1926

    
1927
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1928
    """Verifies and updates the node instance list.
1929

1930
    If the listing was successful, then updates this node's instance
1931
    list. Otherwise, it marks the RPC call as failed for the instance
1932
    list key.
1933

1934
    @type ninfo: L{objects.Node}
1935
    @param ninfo: the node to check
1936
    @param nresult: the remote results for the node
1937
    @param nimg: the node image object
1938

1939
    """
1940
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1941
    test = not isinstance(idata, list)
1942
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1943
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1944
    if test:
1945
      nimg.hyp_fail = True
1946
    else:
1947
      nimg.instances = idata
1948

    
1949
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1950
    """Verifies and computes a node information map
1951

1952
    @type ninfo: L{objects.Node}
1953
    @param ninfo: the node to check
1954
    @param nresult: the remote results for the node
1955
    @param nimg: the node image object
1956
    @param vg_name: the configured VG name
1957

1958
    """
1959
    node = ninfo.name
1960
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1961

    
1962
    # try to read free memory (from the hypervisor)
1963
    hv_info = nresult.get(constants.NV_HVINFO, None)
1964
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1965
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1966
    if not test:
1967
      try:
1968
        nimg.mfree = int(hv_info["memory_free"])
1969
      except (ValueError, TypeError):
1970
        _ErrorIf(True, self.ENODERPC, node,
1971
                 "node returned invalid nodeinfo, check hypervisor")
1972

    
1973
    # FIXME: devise a free space model for file based instances as well
1974
    if vg_name is not None:
1975
      test = (constants.NV_VGLIST not in nresult or
1976
              vg_name not in nresult[constants.NV_VGLIST])
1977
      _ErrorIf(test, self.ENODELVM, node,
1978
               "node didn't return data for the volume group '%s'"
1979
               " - it is either missing or broken", vg_name)
1980
      if not test:
1981
        try:
1982
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1983
        except (ValueError, TypeError):
1984
          _ErrorIf(True, self.ENODERPC, node,
1985
                   "node returned invalid LVM info, check LVM status")
1986

    
1987
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1988
    """Gets per-disk status information for all instances.
1989

1990
    @type nodelist: list of strings
1991
    @param nodelist: Node names
1992
    @type node_image: dict of (name, L{objects.Node})
1993
    @param node_image: Node objects
1994
    @type instanceinfo: dict of (name, L{objects.Instance})
1995
    @param instanceinfo: Instance objects
1996

1997
    """
1998
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1999

    
2000
    node_disks = {}
2001
    node_disks_devonly = {}
2002

    
2003
    for nname in nodelist:
2004
      disks = [(inst, disk)
2005
               for instlist in [node_image[nname].pinst,
2006
                                node_image[nname].sinst]
2007
               for inst in instlist
2008
               for disk in instanceinfo[inst].disks]
2009

    
2010
      if not disks:
2011
        # No need to collect data
2012
        continue
2013

    
2014
      node_disks[nname] = disks
2015

    
2016
      # Creating copies as SetDiskID below will modify the objects and that can
2017
      # lead to incorrect data returned from nodes
2018
      devonly = [dev.Copy() for (_, dev) in disks]
2019

    
2020
      for dev in devonly:
2021
        self.cfg.SetDiskID(dev, nname)
2022

    
2023
      node_disks_devonly[nname] = devonly
2024

    
2025
    assert len(node_disks) == len(node_disks_devonly)
2026

    
2027
    # Collect data from all nodes with disks
2028
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2029
                                                          node_disks_devonly)
2030

    
2031
    assert len(result) == len(node_disks)
2032

    
2033
    instdisk = {}
2034

    
2035
    for (nname, nres) in result.items():
2036
      if nres.offline:
2037
        # Ignore offline node
2038
        continue
2039

    
2040
      disks = node_disks[nname]
2041

    
2042
      msg = nres.fail_msg
2043
      _ErrorIf(msg, self.ENODERPC, nname,
2044
               "while getting disk information: %s", nres.fail_msg)
2045
      if msg:
2046
        # No data from this node
2047
        data = len(disks) * [None]
2048
      else:
2049
        data = nres.payload
2050

    
2051
      for ((inst, _), status) in zip(disks, data):
2052
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2053

    
2054
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2055
                      len(nnames) <= len(instanceinfo[inst].all_nodes)
2056
                      for inst, nnames in instdisk.items()
2057
                      for nname, statuses in nnames.items())
2058

    
2059
    return instdisk
2060

    
2061
  def BuildHooksEnv(self):
2062
    """Build hooks env.
2063

2064
    Cluster-Verify hooks just ran in the post phase and their failure makes
2065
    the output be logged in the verify output and the verification to fail.
2066

2067
    """
2068
    all_nodes = self.cfg.GetNodeList()
2069
    env = {
2070
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2071
      }
2072
    for node in self.cfg.GetAllNodesInfo().values():
2073
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2074

    
2075
    return env, [], all_nodes
2076

    
2077
  def Exec(self, feedback_fn):
2078
    """Verify integrity of cluster, performing various test on nodes.
2079

2080
    """
2081
    self.bad = False
2082
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2083
    verbose = self.op.verbose
2084
    self._feedback_fn = feedback_fn
2085
    feedback_fn("* Verifying global settings")
2086
    for msg in self.cfg.VerifyConfig():
2087
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2088

    
2089
    # Check the cluster certificates
2090
    for cert_filename in constants.ALL_CERT_FILES:
2091
      (errcode, msg) = _VerifyCertificate(cert_filename)
2092
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2093

    
2094
    vg_name = self.cfg.GetVGName()
2095
    drbd_helper = self.cfg.GetDRBDHelper()
2096
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2097
    cluster = self.cfg.GetClusterInfo()
2098
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
2099
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2100
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2101
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2102
                        for iname in instancelist)
2103
    i_non_redundant = [] # Non redundant instances
2104
    i_non_a_balanced = [] # Non auto-balanced instances
2105
    n_offline = 0 # Count of offline nodes
2106
    n_drained = 0 # Count of nodes being drained
2107
    node_vol_should = {}
2108

    
2109
    # FIXME: verify OS list
2110
    # do local checksums
2111
    master_files = [constants.CLUSTER_CONF_FILE]
2112
    master_node = self.master_node = self.cfg.GetMasterNode()
2113
    master_ip = self.cfg.GetMasterIP()
2114

    
2115
    file_names = ssconf.SimpleStore().GetFileList()
2116
    file_names.extend(constants.ALL_CERT_FILES)
2117
    file_names.extend(master_files)
2118
    if cluster.modify_etc_hosts:
2119
      file_names.append(constants.ETC_HOSTS)
2120

    
2121
    local_checksums = utils.FingerprintFiles(file_names)
2122

    
2123
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2124
    node_verify_param = {
2125
      constants.NV_FILELIST: file_names,
2126
      constants.NV_NODELIST: [node.name for node in nodeinfo
2127
                              if not node.offline],
2128
      constants.NV_HYPERVISOR: hypervisors,
2129
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2130
                                  node.secondary_ip) for node in nodeinfo
2131
                                 if not node.offline],
2132
      constants.NV_INSTANCELIST: hypervisors,
2133
      constants.NV_VERSION: None,
2134
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2135
      constants.NV_NODESETUP: None,
2136
      constants.NV_TIME: None,
2137
      constants.NV_MASTERIP: (master_node, master_ip),
2138
      constants.NV_OSLIST: None,
2139
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2140
      }
2141

    
2142
    if vg_name is not None:
2143
      node_verify_param[constants.NV_VGLIST] = None
2144
      node_verify_param[constants.NV_LVLIST] = vg_name
2145
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2146
      node_verify_param[constants.NV_DRBDLIST] = None
2147

    
2148
    if drbd_helper:
2149
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2150

    
2151
    # Build our expected cluster state
2152
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2153
                                                 name=node.name,
2154
                                                 vm_capable=node.vm_capable))
2155
                      for node in nodeinfo)
2156

    
2157
    for instance in instancelist:
2158
      inst_config = instanceinfo[instance]
2159

    
2160
      for nname in inst_config.all_nodes:
2161
        if nname not in node_image:
2162
          # ghost node
2163
          gnode = self.NodeImage(name=nname)
2164
          gnode.ghost = True
2165
          node_image[nname] = gnode
2166

    
2167
      inst_config.MapLVsByNode(node_vol_should)
2168

    
2169
      pnode = inst_config.primary_node
2170
      node_image[pnode].pinst.append(instance)
2171

    
2172
      for snode in inst_config.secondary_nodes:
2173
        nimg = node_image[snode]
2174
        nimg.sinst.append(instance)
2175
        if pnode not in nimg.sbp:
2176
          nimg.sbp[pnode] = []
2177
        nimg.sbp[pnode].append(instance)
2178

    
2179
    # At this point, we have the in-memory data structures complete,
2180
    # except for the runtime information, which we'll gather next
2181

    
2182
    # Due to the way our RPC system works, exact response times cannot be
2183
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2184
    # time before and after executing the request, we can at least have a time
2185
    # window.
2186
    nvinfo_starttime = time.time()
2187
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2188
                                           self.cfg.GetClusterName())
2189
    nvinfo_endtime = time.time()
2190

    
2191
    all_drbd_map = self.cfg.ComputeDRBDMap()
2192

    
2193
    feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2194
    instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2195

    
2196
    feedback_fn("* Verifying node status")
2197

    
2198
    refos_img = None
2199

    
2200
    for node_i in nodeinfo:
2201
      node = node_i.name
2202
      nimg = node_image[node]
2203

    
2204
      if node_i.offline:
2205
        if verbose:
2206
          feedback_fn("* Skipping offline node %s" % (node,))
2207
        n_offline += 1
2208
        continue
2209

    
2210
      if node == master_node:
2211
        ntype = "master"
2212
      elif node_i.master_candidate:
2213
        ntype = "master candidate"
2214
      elif node_i.drained:
2215
        ntype = "drained"
2216
        n_drained += 1
2217
      else:
2218
        ntype = "regular"
2219
      if verbose:
2220
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2221

    
2222
      msg = all_nvinfo[node].fail_msg
2223
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2224
      if msg:
2225
        nimg.rpc_fail = True
2226
        continue
2227

    
2228
      nresult = all_nvinfo[node].payload
2229

    
2230
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2231
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2232
      self._VerifyNodeNetwork(node_i, nresult)
2233
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2234
                            master_files)
2235

    
2236
      if nimg.vm_capable:
2237
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2238
        self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2239
                             all_drbd_map)
2240

    
2241
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2242
        self._UpdateNodeInstances(node_i, nresult, nimg)
2243
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2244
        self._UpdateNodeOS(node_i, nresult, nimg)
2245
        if not nimg.os_fail:
2246
          if refos_img is None:
2247
            refos_img = nimg
2248
          self._VerifyNodeOS(node_i, nimg, refos_img)
2249

    
2250
    feedback_fn("* Verifying instance status")
2251
    for instance in instancelist:
2252
      if verbose:
2253
        feedback_fn("* Verifying instance %s" % instance)
2254
      inst_config = instanceinfo[instance]
2255
      self._VerifyInstance(instance, inst_config, node_image,
2256
                           instdisk[instance])
2257
      inst_nodes_offline = []
2258

    
2259
      pnode = inst_config.primary_node
2260
      pnode_img = node_image[pnode]
2261
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2262
               self.ENODERPC, pnode, "instance %s, connection to"
2263
               " primary node failed", instance)
2264

    
2265
      if pnode_img.offline:
2266
        inst_nodes_offline.append(pnode)
2267

    
2268
      # If the instance is non-redundant we cannot survive losing its primary
2269
      # node, so we are not N+1 compliant. On the other hand we have no disk
2270
      # templates with more than one secondary so that situation is not well
2271
      # supported either.
2272
      # FIXME: does not support file-backed instances
2273
      if not inst_config.secondary_nodes:
2274
        i_non_redundant.append(instance)
2275
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2276
               instance, "instance has multiple secondary nodes: %s",
2277
               utils.CommaJoin(inst_config.secondary_nodes),
2278
               code=self.ETYPE_WARNING)
2279

    
2280
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2281
        i_non_a_balanced.append(instance)
2282

    
2283
      for snode in inst_config.secondary_nodes:
2284
        s_img = node_image[snode]
2285
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2286
                 "instance %s, connection to secondary node failed", instance)
2287

    
2288
        if s_img.offline:
2289
          inst_nodes_offline.append(snode)
2290

    
2291
      # warn that the instance lives on offline nodes
2292
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2293
               "instance lives on offline node(s) %s",
2294
               utils.CommaJoin(inst_nodes_offline))
2295
      # ... or ghost/non-vm_capable nodes
2296
      for node in inst_config.all_nodes:
2297
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2298
                 "instance lives on ghost node %s", node)
2299
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2300
                 instance, "instance lives on non-vm_capable node %s", node)
2301

    
2302
    feedback_fn("* Verifying orphan volumes")
2303
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2304
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2305

    
2306
    feedback_fn("* Verifying orphan instances")
2307
    self._VerifyOrphanInstances(instancelist, node_image)
2308

    
2309
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2310
      feedback_fn("* Verifying N+1 Memory redundancy")
2311
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
2312

    
2313
    feedback_fn("* Other Notes")
2314
    if i_non_redundant:
2315
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2316
                  % len(i_non_redundant))
2317

    
2318
    if i_non_a_balanced:
2319
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2320
                  % len(i_non_a_balanced))
2321

    
2322
    if n_offline:
2323
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2324

    
2325
    if n_drained:
2326
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2327

    
2328
    return not self.bad
2329

    
2330
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2331
    """Analyze the post-hooks' result
2332

2333
    This method analyses the hook result, handles it, and sends some
2334
    nicely-formatted feedback back to the user.
2335

2336
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2337
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2338
    @param hooks_results: the results of the multi-node hooks rpc call
2339
    @param feedback_fn: function used send feedback back to the caller
2340
    @param lu_result: previous Exec result
2341
    @return: the new Exec result, based on the previous result
2342
        and hook results
2343

2344
    """
2345
    # We only really run POST phase hooks, and are only interested in
2346
    # their results
2347
    if phase == constants.HOOKS_PHASE_POST:
2348
      # Used to change hooks' output to proper indentation
2349
      feedback_fn("* Hooks Results")
2350
      assert hooks_results, "invalid result from hooks"
2351

    
2352
      for node_name in hooks_results:
2353
        res = hooks_results[node_name]
2354
        msg = res.fail_msg
2355
        test = msg and not res.offline
2356
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2357
                      "Communication failure in hooks execution: %s", msg)
2358
        if res.offline or msg:
2359
          # No need to investigate payload if node is offline or gave an error.
2360
          # override manually lu_result here as _ErrorIf only
2361
          # overrides self.bad
2362
          lu_result = 1
2363
          continue
2364
        for script, hkr, output in res.payload:
2365
          test = hkr == constants.HKR_FAIL
2366
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2367
                        "Script %s failed, output:", script)
2368
          if test:
2369
            output = self._HOOKS_INDENT_RE.sub('      ', output)
2370
            feedback_fn("%s" % output)
2371
            lu_result = 0
2372

    
2373
      return lu_result
2374

    
2375

    
2376
class LUVerifyDisks(NoHooksLU):
2377
  """Verifies the cluster disks status.
2378

2379
  """
2380
  REQ_BGL = False
2381

    
2382
  def ExpandNames(self):
2383
    self.needed_locks = {
2384
      locking.LEVEL_NODE: locking.ALL_SET,
2385
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2386
    }
2387
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2388

    
2389
  def Exec(self, feedback_fn):
2390
    """Verify integrity of cluster disks.
2391

2392
    @rtype: tuple of three items
2393
    @return: a tuple of (dict of node-to-node_error, list of instances
2394
        which need activate-disks, dict of instance: (node, volume) for
2395
        missing volumes
2396

2397
    """
2398
    result = res_nodes, res_instances, res_missing = {}, [], {}
2399

    
2400
    nodes = utils.NiceSort(self.cfg.GetNodeList())
2401
    instances = [self.cfg.GetInstanceInfo(name)
2402
                 for name in self.cfg.GetInstanceList()]
2403

    
2404
    nv_dict = {}
2405
    for inst in instances:
2406
      inst_lvs = {}
2407
      if (not inst.admin_up or
2408
          inst.disk_template not in constants.DTS_NET_MIRROR):
2409
        continue
2410
      inst.MapLVsByNode(inst_lvs)
2411
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2412
      for node, vol_list in inst_lvs.iteritems():
2413
        for vol in vol_list:
2414
          nv_dict[(node, vol)] = inst
2415

    
2416
    if not nv_dict:
2417
      return result
2418

    
2419
    vg_names = self.rpc.call_vg_list(nodes)
2420
    vg_names.Raise("Cannot get list of VGs")
2421

    
2422
    for node in nodes:
2423
      # node_volume
2424
      node_res = self.rpc.call_lv_list([node],
2425
                                       vg_names[node].payload.keys())[node]
2426
      if node_res.offline:
2427
        continue
2428
      msg = node_res.fail_msg
2429
      if msg:
2430
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2431
        res_nodes[node] = msg
2432
        continue
2433

    
2434
      lvs = node_res.payload
2435
      for lv_name, (_, _, lv_online) in lvs.items():
2436
        inst = nv_dict.pop((node, lv_name), None)
2437
        if (not lv_online and inst is not None
2438
            and inst.name not in res_instances):
2439
          res_instances.append(inst.name)
2440

    
2441
    # any leftover items in nv_dict are missing LVs, let's arrange the
2442
    # data better
2443
    for key, inst in nv_dict.iteritems():
2444
      if inst.name not in res_missing:
2445
        res_missing[inst.name] = []
2446
      res_missing[inst.name].append(key)
2447

    
2448
    return result
2449

    
2450

    
2451
class LURepairDiskSizes(NoHooksLU):
2452
  """Verifies the cluster disks sizes.
2453

2454
  """
2455
  _OP_PARAMS = [("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString))]
2456
  REQ_BGL = False
2457

    
2458
  def ExpandNames(self):
2459
    if self.op.instances:
2460
      self.wanted_names = []
2461
      for name in self.op.instances:
2462
        full_name = _ExpandInstanceName(self.cfg, name)
2463
        self.wanted_names.append(full_name)
2464
      self.needed_locks = {
2465
        locking.LEVEL_NODE: [],
2466
        locking.LEVEL_INSTANCE: self.wanted_names,
2467
        }
2468
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2469
    else:
2470
      self.wanted_names = None
2471
      self.needed_locks = {
2472
        locking.LEVEL_NODE: locking.ALL_SET,
2473
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2474
        }
2475
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2476

    
2477
  def DeclareLocks(self, level):
2478
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2479
      self._LockInstancesNodes(primary_only=True)
2480

    
2481
  def CheckPrereq(self):
2482
    """Check prerequisites.
2483

2484
    This only checks the optional instance list against the existing names.
2485

2486
    """
2487
    if self.wanted_names is None:
2488
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2489

    
2490
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2491
                             in self.wanted_names]
2492

    
2493
  def _EnsureChildSizes(self, disk):
2494
    """Ensure children of the disk have the needed disk size.
2495

2496
    This is valid mainly for DRBD8 and fixes an issue where the
2497
    children have smaller disk size.
2498

2499
    @param disk: an L{ganeti.objects.Disk} object
2500

2501
    """
2502
    if disk.dev_type == constants.LD_DRBD8:
2503
      assert disk.children, "Empty children for DRBD8?"
2504
      fchild = disk.children[0]
2505
      mismatch = fchild.size < disk.size
2506
      if mismatch:
2507
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2508
                     fchild.size, disk.size)
2509
        fchild.size = disk.size
2510

    
2511
      # and we recurse on this child only, not on the metadev
2512
      return self._EnsureChildSizes(fchild) or mismatch
2513
    else:
2514
      return False
2515

    
2516
  def Exec(self, feedback_fn):
2517
    """Verify the size of cluster disks.
2518

2519
    """
2520
    # TODO: check child disks too
2521
    # TODO: check differences in size between primary/secondary nodes
2522
    per_node_disks = {}
2523
    for instance in self.wanted_instances:
2524
      pnode = instance.primary_node
2525
      if pnode not in per_node_disks:
2526
        per_node_disks[pnode] = []
2527
      for idx, disk in enumerate(instance.disks):
2528
        per_node_disks[pnode].append((instance, idx, disk))
2529

    
2530
    changed = []
2531
    for node, dskl in per_node_disks.items():
2532
      newl = [v[2].Copy() for v in dskl]
2533
      for dsk in newl:
2534
        self.cfg.SetDiskID(dsk, node)
2535
      result = self.rpc.call_blockdev_getsizes(node, newl)
2536
      if result.fail_msg:
2537
        self.LogWarning("Failure in blockdev_getsizes call to node"
2538
                        " %s, ignoring", node)
2539
        continue
2540
      if len(result.data) != len(dskl):
2541
        self.LogWarning("Invalid result from node %s, ignoring node results",
2542
                        node)
2543
        continue
2544
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2545
        if size is None:
2546
          self.LogWarning("Disk %d of instance %s did not return size"
2547
                          " information, ignoring", idx, instance.name)
2548
          continue
2549
        if not isinstance(size, (int, long)):
2550
          self.LogWarning("Disk %d of instance %s did not return valid"
2551
                          " size information, ignoring", idx, instance.name)
2552
          continue
2553
        size = size >> 20
2554
        if size != disk.size:
2555
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2556
                       " correcting: recorded %d, actual %d", idx,
2557
                       instance.name, disk.size, size)
2558
          disk.size = size
2559
          self.cfg.Update(instance, feedback_fn)
2560
          changed.append((instance.name, idx, size))
2561
        if self._EnsureChildSizes(disk):
2562
          self.cfg.Update(instance, feedback_fn)
2563
          changed.append((instance.name, idx, disk.size))
2564
    return changed
2565

    
2566

    
2567
class LURenameCluster(LogicalUnit):
2568
  """Rename the cluster.
2569

2570
  """
2571
  HPATH = "cluster-rename"
2572
  HTYPE = constants.HTYPE_CLUSTER
2573
  _OP_PARAMS = [("name", ht.NoDefault, ht.TNonEmptyString)]
2574

    
2575
  def BuildHooksEnv(self):
2576
    """Build hooks env.
2577

2578
    """
2579
    env = {
2580
      "OP_TARGET": self.cfg.GetClusterName(),
2581
      "NEW_NAME": self.op.name,
2582
      }
2583
    mn = self.cfg.GetMasterNode()
2584
    all_nodes = self.cfg.GetNodeList()
2585
    return env, [mn], all_nodes
2586

    
2587
  def CheckPrereq(self):
2588
    """Verify that the passed name is a valid one.
2589

2590
    """
2591
    hostname = netutils.GetHostname(name=self.op.name,
2592
                                    family=self.cfg.GetPrimaryIPFamily())
2593

    
2594
    new_name = hostname.name
2595
    self.ip = new_ip = hostname.ip
2596
    old_name = self.cfg.GetClusterName()
2597
    old_ip = self.cfg.GetMasterIP()
2598
    if new_name == old_name and new_ip == old_ip:
2599
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2600
                                 " cluster has changed",
2601
                                 errors.ECODE_INVAL)
2602
    if new_ip != old_ip:
2603
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2604
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2605
                                   " reachable on the network" %
2606
                                   new_ip, errors.ECODE_NOTUNIQUE)
2607

    
2608
    self.op.name = new_name
2609

    
2610
  def Exec(self, feedback_fn):
2611
    """Rename the cluster.
2612

2613
    """
2614
    clustername = self.op.name
2615
    ip = self.ip
2616

    
2617
    # shutdown the master IP
2618
    master = self.cfg.GetMasterNode()
2619
    result = self.rpc.call_node_stop_master(master, False)
2620
    result.Raise("Could not disable the master role")
2621

    
2622
    try:
2623
      cluster = self.cfg.GetClusterInfo()
2624
      cluster.cluster_name = clustername
2625
      cluster.master_ip = ip
2626
      self.cfg.Update(cluster, feedback_fn)
2627

    
2628
      # update the known hosts file
2629
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2630
      node_list = self.cfg.GetOnlineNodeList()
2631
      try:
2632
        node_list.remove(master)
2633
      except ValueError:
2634
        pass
2635
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2636
    finally:
2637
      result = self.rpc.call_node_start_master(master, False, False)
2638
      msg = result.fail_msg
2639
      if msg:
2640
        self.LogWarning("Could not re-enable the master role on"
2641
                        " the master, please restart manually: %s", msg)
2642

    
2643
    return clustername
2644

    
2645

    
2646
class LUSetClusterParams(LogicalUnit):
2647
  """Change the parameters of the cluster.
2648

2649
  """
2650
  HPATH = "cluster-modify"
2651
  HTYPE = constants.HTYPE_CLUSTER
2652
  _OP_PARAMS = [
2653
    ("vg_name", None, ht.TMaybeString),
2654
    ("enabled_hypervisors", None,
2655
     ht.TOr(ht.TAnd(ht.TListOf(ht.TElemOf(constants.HYPER_TYPES)), ht.TTrue),
2656
            ht.TNone)),
2657
    ("hvparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2658
                              ht.TNone)),
2659
    ("beparams", None, ht.TOr(ht.TDict, ht.TNone)),
2660
    ("os_hvp", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2661
                            ht.TNone)),
2662
    ("osparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2663
                              ht.TNone)),
2664
    ("candidate_pool_size", None, ht.TOr(ht.TStrictPositiveInt, ht.TNone)),
2665
    ("uid_pool", None, ht.NoType),
2666
    ("add_uids", None, ht.NoType),
2667
    ("remove_uids", None, ht.NoType),
2668
    ("maintain_node_health", None, ht.TMaybeBool),
2669
    ("prealloc_wipe_disks", None, ht.TMaybeBool),
2670
    ("nicparams", None, ht.TOr(ht.TDict, ht.TNone)),
2671
    ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
2672
    ("drbd_helper", None, ht.TOr(ht.TString, ht.TNone)),
2673
    ("default_iallocator", None, ht.TOr(ht.TString, ht.TNone)),
2674
    ("reserved_lvs", None, ht.TOr(ht.TListOf(ht.TNonEmptyString), ht.TNone)),
2675
    ("hidden_os", None, ht.TOr(ht.TListOf(\
2676
          ht.TAnd(ht.TList,
2677
                ht.TIsLength(2),
2678
                ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2679
          ht.TNone)),
2680
    ("blacklisted_os", None, ht.TOr(ht.TListOf(\
2681
          ht.TAnd(ht.TList,
2682
                ht.TIsLength(2),
2683
                ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2684
          ht.TNone)),
2685
    ]
2686
  REQ_BGL = False
2687

    
2688
  def CheckArguments(self):
2689
    """Check parameters
2690

2691
    """
2692
    if self.op.uid_pool:
2693
      uidpool.CheckUidPool(self.op.uid_pool)
2694

    
2695
    if self.op.add_uids:
2696
      uidpool.CheckUidPool(self.op.add_uids)
2697

    
2698
    if self.op.remove_uids:
2699
      uidpool.CheckUidPool(self.op.remove_uids)
2700

    
2701
  def ExpandNames(self):
2702
    # FIXME: in the future maybe other cluster params won't require checking on
2703
    # all nodes to be modified.
2704
    self.needed_locks = {
2705
      locking.LEVEL_NODE: locking.ALL_SET,
2706
    }
2707
    self.share_locks[locking.LEVEL_NODE] = 1
2708

    
2709
  def BuildHooksEnv(self):
2710
    """Build hooks env.
2711

2712
    """
2713
    env = {
2714
      "OP_TARGET": self.cfg.GetClusterName(),
2715
      "NEW_VG_NAME": self.op.vg_name,
2716
      }
2717
    mn = self.cfg.GetMasterNode()
2718
    return env, [mn], [mn]
2719

    
2720
  def CheckPrereq(self):
2721
    """Check prerequisites.
2722

2723
    This checks whether the given params don't conflict and
2724
    if the given volume group is valid.
2725

2726
    """
2727
    if self.op.vg_name is not None and not self.op.vg_name:
2728
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2729
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2730
                                   " instances exist", errors.ECODE_INVAL)
2731

    
2732
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
2733
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2734
        raise errors.OpPrereqError("Cannot disable drbd helper while"
2735
                                   " drbd-based instances exist",
2736
                                   errors.ECODE_INVAL)
2737

    
2738
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2739

    
2740
    # if vg_name not None, checks given volume group on all nodes
2741
    if self.op.vg_name:
2742
      vglist = self.rpc.call_vg_list(node_list)
2743
      for node in node_list:
2744
        msg = vglist[node].fail_msg
2745
        if msg:
2746
          # ignoring down node
2747
          self.LogWarning("Error while gathering data on node %s"
2748
                          " (ignoring node): %s", node, msg)
2749
          continue
2750
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2751
                                              self.op.vg_name,
2752
                                              constants.MIN_VG_SIZE)
2753
        if vgstatus:
2754
          raise errors.OpPrereqError("Error on node '%s': %s" %
2755
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2756

    
2757
    if self.op.drbd_helper:
2758
      # checks given drbd helper on all nodes
2759
      helpers = self.rpc.call_drbd_helper(node_list)
2760
      for node in node_list:
2761
        ninfo = self.cfg.GetNodeInfo(node)
2762
        if ninfo.offline:
2763
          self.LogInfo("Not checking drbd helper on offline node %s", node)
2764
          continue
2765
        msg = helpers[node].fail_msg
2766
        if msg:
2767
          raise errors.OpPrereqError("Error checking drbd helper on node"
2768
                                     " '%s': %s" % (node, msg),
2769
                                     errors.ECODE_ENVIRON)
2770
        node_helper = helpers[node].payload
2771
        if node_helper != self.op.drbd_helper:
2772
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2773
                                     (node, node_helper), errors.ECODE_ENVIRON)
2774

    
2775
    self.cluster = cluster = self.cfg.GetClusterInfo()
2776
    # validate params changes
2777
    if self.op.beparams:
2778
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2779
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2780

    
2781
    if self.op.ndparams:
2782
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2783
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2784

    
2785
    if self.op.nicparams:
2786
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2787
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2788
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2789
      nic_errors = []
2790

    
2791
      # check all instances for consistency
2792
      for instance in self.cfg.GetAllInstancesInfo().values():
2793
        for nic_idx, nic in enumerate(instance.nics):
2794
          params_copy = copy.deepcopy(nic.nicparams)
2795
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2796

    
2797
          # check parameter syntax
2798
          try:
2799
            objects.NIC.CheckParameterSyntax(params_filled)
2800
          except errors.ConfigurationError, err:
2801
            nic_errors.append("Instance %s, nic/%d: %s" %
2802
                              (instance.name, nic_idx, err))
2803

    
2804
          # if we're moving instances to routed, check that they have an ip
2805
          target_mode = params_filled[constants.NIC_MODE]
2806
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2807
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2808
                              (instance.name, nic_idx))
2809
      if nic_errors:
2810
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2811
                                   "\n".join(nic_errors))
2812

    
2813
    # hypervisor list/parameters
2814
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2815
    if self.op.hvparams:
2816
      for hv_name, hv_dict in self.op.hvparams.items():
2817
        if hv_name not in self.new_hvparams:
2818
          self.new_hvparams[hv_name] = hv_dict
2819
        else:
2820
          self.new_hvparams[hv_name].update(hv_dict)
2821

    
2822
    # os hypervisor parameters
2823
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2824
    if self.op.os_hvp:
2825
      for os_name, hvs in self.op.os_hvp.items():
2826
        if os_name not in self.new_os_hvp:
2827
          self.new_os_hvp[os_name] = hvs
2828
        else:
2829
          for hv_name, hv_dict in hvs.items():
2830
            if hv_name not in self.new_os_hvp[os_name]:
2831
              self.new_os_hvp[os_name][hv_name] = hv_dict
2832
            else:
2833
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2834

    
2835
    # os parameters
2836
    self.new_osp = objects.FillDict(cluster.osparams, {})
2837
    if self.op.osparams:
2838
      for os_name, osp in self.op.osparams.items():
2839
        if os_name not in self.new_osp:
2840
          self.new_osp[os_name] = {}
2841

    
2842
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2843
                                                  use_none=True)
2844

    
2845
        if not self.new_osp[os_name]:
2846
          # we removed all parameters
2847
          del self.new_osp[os_name]
2848
        else:
2849
          # check the parameter validity (remote check)
2850
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2851
                         os_name, self.new_osp[os_name])
2852

    
2853
    # changes to the hypervisor list
2854
    if self.op.enabled_hypervisors is not None:
2855
      self.hv_list = self.op.enabled_hypervisors
2856
      for hv in self.hv_list:
2857
        # if the hypervisor doesn't already exist in the cluster
2858
        # hvparams, we initialize it to empty, and then (in both
2859
        # cases) we make sure to fill the defaults, as we might not
2860
        # have a complete defaults list if the hypervisor wasn't
2861
        # enabled before
2862
        if hv not in new_hvp:
2863
          new_hvp[hv] = {}
2864
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2865
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2866
    else:
2867
      self.hv_list = cluster.enabled_hypervisors
2868

    
2869
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2870
      # either the enabled list has changed, or the parameters have, validate
2871
      for hv_name, hv_params in self.new_hvparams.items():
2872
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2873
            (self.op.enabled_hypervisors and
2874
             hv_name in self.op.enabled_hypervisors)):
2875
          # either this is a new hypervisor, or its parameters have changed
2876
          hv_class = hypervisor.GetHypervisor(hv_name)
2877
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2878
          hv_class.CheckParameterSyntax(hv_params)
2879
          _CheckHVParams(self, node_list, hv_name, hv_params)
2880

    
2881
    if self.op.os_hvp:
2882
      # no need to check any newly-enabled hypervisors, since the
2883
      # defaults have already been checked in the above code-block
2884
      for os_name, os_hvp in self.new_os_hvp.items():
2885
        for hv_name, hv_params in os_hvp.items():
2886
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2887
          # we need to fill in the new os_hvp on top of the actual hv_p
2888
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2889
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2890
          hv_class = hypervisor.GetHypervisor(hv_name)
2891
          hv_class.CheckParameterSyntax(new_osp)
2892
          _CheckHVParams(self, node_list, hv_name, new_osp)
2893

    
2894
    if self.op.default_iallocator:
2895
      alloc_script = utils.FindFile(self.op.default_iallocator,
2896
                                    constants.IALLOCATOR_SEARCH_PATH,
2897
                                    os.path.isfile)
2898
      if alloc_script is None:
2899
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2900
                                   " specified" % self.op.default_iallocator,
2901
                                   errors.ECODE_INVAL)
2902

    
2903
  def Exec(self, feedback_fn):
2904
    """Change the parameters of the cluster.
2905

2906
    """
2907
    if self.op.vg_name is not None:
2908
      new_volume = self.op.vg_name
2909
      if not new_volume:
2910
        new_volume = None
2911
      if new_volume != self.cfg.GetVGName():
2912
        self.cfg.SetVGName(new_volume)
2913
      else:
2914
        feedback_fn("Cluster LVM configuration already in desired"
2915
                    " state, not changing")
2916
    if self.op.drbd_helper is not None:
2917
      new_helper = self.op.drbd_helper
2918
      if not new_helper:
2919
        new_helper = None
2920
      if new_helper != self.cfg.GetDRBDHelper():
2921
        self.cfg.SetDRBDHelper(new_helper)
2922
      else:
2923
        feedback_fn("Cluster DRBD helper already in desired state,"
2924
                    " not changing")
2925
    if self.op.hvparams:
2926
      self.cluster.hvparams = self.new_hvparams
2927
    if self.op.os_hvp:
2928
      self.cluster.os_hvp = self.new_os_hvp
2929
    if self.op.enabled_hypervisors is not None:
2930
      self.cluster.hvparams = self.new_hvparams
2931
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2932
    if self.op.beparams:
2933
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2934
    if self.op.nicparams:
2935
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2936
    if self.op.osparams:
2937
      self.cluster.osparams = self.new_osp
2938
    if self.op.ndparams:
2939
      self.cluster.ndparams = self.new_ndparams
2940

    
2941
    if self.op.candidate_pool_size is not None:
2942
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2943
      # we need to update the pool size here, otherwise the save will fail
2944
      _AdjustCandidatePool(self, [])
2945

    
2946
    if self.op.maintain_node_health is not None:
2947
      self.cluster.maintain_node_health = self.op.maintain_node_health
2948

    
2949
    if self.op.prealloc_wipe_disks is not None:
2950
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
2951

    
2952
    if self.op.add_uids is not None:
2953
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2954

    
2955
    if self.op.remove_uids is not None:
2956
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2957

    
2958
    if self.op.uid_pool is not None:
2959
      self.cluster.uid_pool = self.op.uid_pool
2960

    
2961
    if self.op.default_iallocator is not None:
2962
      self.cluster.default_iallocator = self.op.default_iallocator
2963

    
2964
    if self.op.reserved_lvs is not None:
2965
      self.cluster.reserved_lvs = self.op.reserved_lvs
2966

    
2967
    def helper_os(aname, mods, desc):
2968
      desc += " OS list"
2969
      lst = getattr(self.cluster, aname)
2970
      for key, val in mods:
2971
        if key == constants.DDM_ADD:
2972
          if val in lst:
2973
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
2974
          else:
2975
            lst.append(val)
2976
        elif key == constants.DDM_REMOVE:
2977
          if val in lst:
2978
            lst.remove(val)
2979
          else:
2980
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
2981
        else:
2982
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
2983

    
2984
    if self.op.hidden_os:
2985
      helper_os("hidden_os", self.op.hidden_os, "hidden")
2986

    
2987
    if self.op.blacklisted_os:
2988
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
2989

    
2990
    self.cfg.Update(self.cluster, feedback_fn)
2991

    
2992

    
2993
def _UploadHelper(lu, nodes, fname):
2994
  """Helper for uploading a file and showing warnings.
2995

2996
  """
2997
  if os.path.exists(fname):
2998
    result = lu.rpc.call_upload_file(nodes, fname)
2999
    for to_node, to_result in result.items():
3000
      msg = to_result.fail_msg
3001
      if msg:
3002
        msg = ("Copy of file %s to node %s failed: %s" %
3003
               (fname, to_node, msg))
3004
        lu.proc.LogWarning(msg)
3005

    
3006

    
3007
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3008
  """Distribute additional files which are part of the cluster configuration.
3009

3010
  ConfigWriter takes care of distributing the config and ssconf files, but
3011
  there are more files which should be distributed to all nodes. This function
3012
  makes sure those are copied.
3013

3014
  @param lu: calling logical unit
3015
  @param additional_nodes: list of nodes not in the config to distribute to
3016
  @type additional_vm: boolean
3017
  @param additional_vm: whether the additional nodes are vm-capable or not
3018

3019
  """
3020
  # 1. Gather target nodes
3021
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3022
  dist_nodes = lu.cfg.GetOnlineNodeList()
3023
  nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
3024
  vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
3025
  if additional_nodes is not None:
3026
    dist_nodes.extend(additional_nodes)
3027
    if additional_vm:
3028
      vm_nodes.extend(additional_nodes)
3029
  if myself.name in dist_nodes:
3030
    dist_nodes.remove(myself.name)
3031
  if myself.name in vm_nodes:
3032
    vm_nodes.remove(myself.name)
3033

    
3034
  # 2. Gather files to distribute
3035
  dist_files = set([constants.ETC_HOSTS,
3036
                    constants.SSH_KNOWN_HOSTS_FILE,
3037
                    constants.RAPI_CERT_FILE,
3038
                    constants.RAPI_USERS_FILE,
3039
                    constants.CONFD_HMAC_KEY,
3040
                    constants.CLUSTER_DOMAIN_SECRET_FILE,
3041
                   ])
3042

    
3043
  vm_files = set()
3044
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3045
  for hv_name in enabled_hypervisors:
3046
    hv_class = hypervisor.GetHypervisor(hv_name)
3047
    vm_files.update(hv_class.GetAncillaryFiles())
3048

    
3049
  # 3. Perform the files upload
3050
  for fname in dist_files:
3051
    _UploadHelper(lu, dist_nodes, fname)
3052
  for fname in vm_files:
3053
    _UploadHelper(lu, vm_nodes, fname)
3054

    
3055

    
3056
class LURedistributeConfig(NoHooksLU):
3057
  """Force the redistribution of cluster configuration.
3058

3059
  This is a very simple LU.
3060

3061
  """
3062
  REQ_BGL = False
3063

    
3064
  def ExpandNames(self):
3065
    self.needed_locks = {
3066
      locking.LEVEL_NODE: locking.ALL_SET,
3067
    }
3068
    self.share_locks[locking.LEVEL_NODE] = 1
3069

    
3070
  def Exec(self, feedback_fn):
3071
    """Redistribute the configuration.
3072

3073
    """
3074
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3075
    _RedistributeAncillaryFiles(self)
3076

    
3077

    
3078
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3079
  """Sleep and poll for an instance's disk to sync.
3080

3081
  """
3082
  if not instance.disks or disks is not None and not disks:
3083
    return True
3084

    
3085
  disks = _ExpandCheckDisks(instance, disks)
3086

    
3087
  if not oneshot:
3088
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3089

    
3090
  node = instance.primary_node
3091

    
3092
  for dev in disks:
3093
    lu.cfg.SetDiskID(dev, node)
3094

    
3095
  # TODO: Convert to utils.Retry
3096

    
3097
  retries = 0
3098
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3099
  while True:
3100
    max_time = 0
3101
    done = True
3102
    cumul_degraded = False
3103
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3104
    msg = rstats.fail_msg
3105
    if msg:
3106
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3107
      retries += 1
3108
      if retries >= 10:
3109
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3110
                                 " aborting." % node)
3111
      time.sleep(6)
3112
      continue
3113
    rstats = rstats.payload
3114
    retries = 0
3115
    for i, mstat in enumerate(rstats):
3116
      if mstat is None:
3117
        lu.LogWarning("Can't compute data for node %s/%s",
3118
                           node, disks[i].iv_name)
3119
        continue
3120

    
3121
      cumul_degraded = (cumul_degraded or
3122
                        (mstat.is_degraded and mstat.sync_percent is None))
3123
      if mstat.sync_percent is not None:
3124
        done = False
3125
        if mstat.estimated_time is not None:
3126
          rem_time = ("%s remaining (estimated)" %
3127
                      utils.FormatSeconds(mstat.estimated_time))
3128
          max_time = mstat.estimated_time
3129
        else:
3130
          rem_time = "no time estimate"
3131
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3132
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3133

    
3134
    # if we're done but degraded, let's do a few small retries, to
3135
    # make sure we see a stable and not transient situation; therefore
3136
    # we force restart of the loop
3137
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3138
      logging.info("Degraded disks found, %d retries left", degr_retries)
3139
      degr_retries -= 1
3140
      time.sleep(1)
3141
      continue
3142

    
3143
    if done or oneshot:
3144
      break
3145

    
3146
    time.sleep(min(60, max_time))
3147

    
3148
  if done:
3149
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3150
  return not cumul_degraded
3151

    
3152

    
3153
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3154
  """Check that mirrors are not degraded.
3155

3156
  The ldisk parameter, if True, will change the test from the
3157
  is_degraded attribute (which represents overall non-ok status for
3158
  the device(s)) to the ldisk (representing the local storage status).
3159

3160
  """
3161
  lu.cfg.SetDiskID(dev, node)
3162

    
3163
  result = True
3164

    
3165
  if on_primary or dev.AssembleOnSecondary():
3166
    rstats = lu.rpc.call_blockdev_find(node, dev)
3167
    msg = rstats.fail_msg
3168
    if msg:
3169
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3170
      result = False
3171
    elif not rstats.payload:
3172
      lu.LogWarning("Can't find disk on node %s", node)
3173
      result = False
3174
    else:
3175
      if ldisk:
3176
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3177
      else:
3178
        result = result and not rstats.payload.is_degraded
3179

    
3180
  if dev.children:
3181
    for child in dev.children:
3182
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3183

    
3184
  return result
3185

    
3186

    
3187
class LUOutOfBand(NoHooksLU):
3188
  """Logical unit for OOB handling.
3189

3190
  """
3191
  _OP_PARAMS = [
3192
    _PNodeName,
3193
    ("command", None, ht.TElemOf(constants.OOB_COMMANDS)),
3194
    ("timeout", constants.OOB_TIMEOUT, ht.TInt),
3195
    ]
3196
  REG_BGL = False
3197

    
3198
  def CheckPrereq(self):
3199
    """Check prerequisites.
3200

3201
    This checks:
3202
     - the node exists in the configuration
3203
     - OOB is supported
3204

3205
    Any errors are signaled by raising errors.OpPrereqError.
3206

3207
    """
3208
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3209
    node = self.cfg.GetNodeInfo(self.op.node_name)
3210

    
3211
    if node is None:
3212
      raise errors.OpPrereqError("Node %s not found" % self.op.node_name)
3213

    
3214
    self.oob_program = self.cfg.GetOobProgram(node)
3215

    
3216
    if not self.oob_program:
3217
      raise errors.OpPrereqError("OOB is not supported for node %s" %
3218
                                 self.op.node_name)
3219

    
3220
    self.op.node_name = node.name
3221
    self.node = node
3222

    
3223
  def ExpandNames(self):
3224
    """Gather locks we need.
3225

3226
    """
3227
    self.needed_locks = {
3228
      locking.LEVEL_NODE: [self.op.node_name],
3229
      }
3230

    
3231
  def Exec(self, feedback_fn):
3232
    """Execute OOB and return result if we expect any.
3233

3234
    """
3235
    master_node = self.cfg.GetMasterNode()
3236

    
3237
    logging.info("Executing out-of-band command '%s' using '%s' on %s",
3238
                 self.op.command, self.oob_program, self.op.node_name)
3239
    result = self.rpc.call_run_oob(master_node, self.oob_program,
3240
                                   self.op.command, self.op.node_name,
3241
                                   self.op.timeout)
3242

    
3243
    result.Raise("An error occurred on execution of OOB helper")
3244

    
3245
    if self.op.command == constants.OOB_HEALTH:
3246
      # For health we should log important events
3247
      for item, status in result.payload:
3248
        if status in [constants.OOB_STATUS_WARNING,
3249
                      constants.OOB_STATUS_CRITICAL]:
3250
          logging.warning("On node '%s' item '%s' has status '%s'",
3251
                          self.op.node_name, item, status)
3252

    
3253
    return result.payload
3254

    
3255

    
3256
class LUDiagnoseOS(NoHooksLU):
3257
  """Logical unit for OS diagnose/query.
3258

3259
  """
3260
  _OP_PARAMS = [
3261
    _POutputFields,
3262
    ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3263
    ]
3264
  REQ_BGL = False
3265
  _HID = "hidden"
3266
  _BLK = "blacklisted"
3267
  _VLD = "valid"
3268
  _FIELDS_STATIC = utils.FieldSet()
3269
  _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3270
                                   "parameters", "api_versions", _HID, _BLK)
3271

    
3272
  def CheckArguments(self):
3273
    if self.op.names:
3274
      raise errors.OpPrereqError("Selective OS query not supported",
3275
                                 errors.ECODE_INVAL)
3276

    
3277
    _CheckOutputFields(static=self._FIELDS_STATIC,
3278
                       dynamic=self._FIELDS_DYNAMIC,
3279
                       selected=self.op.output_fields)
3280

    
3281
  def ExpandNames(self):
3282
    # Lock all nodes, in shared mode
3283
    # Temporary removal of locks, should be reverted later
3284
    # TODO: reintroduce locks when they are lighter-weight
3285
    self.needed_locks = {}
3286
    #self.share_locks[locking.LEVEL_NODE] = 1
3287
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3288

    
3289
  @staticmethod
3290
  def _DiagnoseByOS(rlist):
3291
    """Remaps a per-node return list into an a per-os per-node dictionary
3292

3293
    @param rlist: a map with node names as keys and OS objects as values
3294

3295
    @rtype: dict
3296
    @return: a dictionary with osnames as keys and as value another
3297
        map, with nodes as keys and tuples of (path, status, diagnose,
3298
        variants, parameters, api_versions) as values, eg::
3299

3300
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3301
                                     (/srv/..., False, "invalid api")],
3302
                           "node2": [(/srv/..., True, "", [], [])]}
3303
          }
3304

3305
    """
3306
    all_os = {}
3307
    # we build here the list of nodes that didn't fail the RPC (at RPC
3308
    # level), so that nodes with a non-responding node daemon don't
3309
    # make all OSes invalid
3310
    good_nodes = [node_name for node_name in rlist
3311
                  if not rlist[node_name].fail_msg]
3312
    for node_name, nr in rlist.items():
3313
      if nr.fail_msg or not nr.payload:
3314
        continue
3315
      for (name, path, status, diagnose, variants,
3316
           params, api_versions) in nr.payload:
3317
        if name not in all_os:
3318
          # build a list of nodes for this os containing empty lists
3319
          # for each node in node_list
3320
          all_os[name] = {}
3321
          for nname in good_nodes:
3322
            all_os[name][nname] = []
3323
        # convert params from [name, help] to (name, help)
3324
        params = [tuple(v) for v in params]
3325
        all_os[name][node_name].append((path, status, diagnose,
3326
                                        variants, params, api_versions))
3327
    return all_os
3328

    
3329
  def Exec(self, feedback_fn):
3330
    """Compute the list of OSes.
3331

3332
    """
3333
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3334
    node_data = self.rpc.call_os_diagnose(valid_nodes)
3335
    pol = self._DiagnoseByOS(node_data)
3336
    output = []
3337
    cluster = self.cfg.GetClusterInfo()
3338

    
3339
    for os_name in utils.NiceSort(pol.keys()):
3340
      os_data = pol[os_name]
3341
      row = []
3342
      valid = True
3343
      (variants, params, api_versions) = null_state = (set(), set(), set())
3344
      for idx, osl in enumerate(os_data.values()):
3345
        valid = bool(valid and osl and osl[0][1])
3346
        if not valid:
3347
          (variants, params, api_versions) = null_state
3348
          break
3349
        node_variants, node_params, node_api = osl[0][3:6]
3350
        if idx == 0: # first entry
3351
          variants = set(node_variants)
3352
          params = set(node_params)
3353
          api_versions = set(node_api)
3354
        else: # keep consistency
3355
          variants.intersection_update(node_variants)
3356
          params.intersection_update(node_params)
3357
          api_versions.intersection_update(node_api)
3358

    
3359
      is_hid = os_name in cluster.hidden_os
3360
      is_blk = os_name in cluster.blacklisted_os
3361
      if ((self._HID not in self.op.output_fields and is_hid) or
3362
          (self._BLK not in self.op.output_fields and is_blk) or
3363
          (self._VLD not in self.op.output_fields and not valid)):
3364
        continue
3365

    
3366
      for field in self.op.output_fields:
3367
        if field == "name":
3368
          val = os_name
3369
        elif field == self._VLD:
3370
          val = valid
3371
        elif field == "node_status":
3372
          # this is just a copy of the dict
3373
          val = {}
3374
          for node_name, nos_list in os_data.items():
3375
            val[node_name] = nos_list
3376
        elif field == "variants":
3377
          val = utils.NiceSort(list(variants))
3378
        elif field == "parameters":
3379
          val = list(params)
3380
        elif field == "api_versions":
3381
          val = list(api_versions)
3382
        elif field == self._HID:
3383
          val = is_hid
3384
        elif field == self._BLK:
3385
          val = is_blk
3386
        else:
3387
          raise errors.ParameterError(field)
3388
        row.append(val)
3389
      output.append(row)
3390

    
3391
    return output
3392

    
3393

    
3394
class LURemoveNode(LogicalUnit):
3395
  """Logical unit for removing a node.
3396

3397
  """
3398
  HPATH = "node-remove"
3399
  HTYPE = constants.HTYPE_NODE
3400
  _OP_PARAMS = [
3401
    _PNodeName,
3402
    ]
3403

    
3404
  def BuildHooksEnv(self):
3405
    """Build hooks env.
3406

3407
    This doesn't run on the target node in the pre phase as a failed
3408
    node would then be impossible to remove.
3409

3410
    """
3411
    env = {
3412
      "OP_TARGET": self.op.node_name,
3413
      "NODE_NAME": self.op.node_name,
3414
      }
3415
    all_nodes = self.cfg.GetNodeList()
3416
    try:
3417
      all_nodes.remove(self.op.node_name)
3418
    except ValueError:
3419
      logging.warning("Node %s which is about to be removed not found"
3420
                      " in the all nodes list", self.op.node_name)
3421
    return env, all_nodes, all_nodes
3422

    
3423
  def CheckPrereq(self):
3424
    """Check prerequisites.
3425

3426
    This checks:
3427
     - the node exists in the configuration
3428
     - it does not have primary or secondary instances
3429
     - it's not the master
3430

3431
    Any errors are signaled by raising errors.OpPrereqError.
3432

3433
    """
3434
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3435
    node = self.cfg.GetNodeInfo(self.op.node_name)
3436
    assert node is not None
3437

    
3438
    instance_list = self.cfg.GetInstanceList()
3439

    
3440
    masternode = self.cfg.GetMasterNode()
3441
    if node.name == masternode:
3442
      raise errors.OpPrereqError("Node is the master node,"
3443
                                 " you need to failover first.",
3444
                                 errors.ECODE_INVAL)
3445

    
3446
    for instance_name in instance_list:
3447
      instance = self.cfg.GetInstanceInfo(instance_name)
3448
      if node.name in instance.all_nodes:
3449
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3450
                                   " please remove first." % instance_name,
3451
                                   errors.ECODE_INVAL)
3452
    self.op.node_name = node.name
3453
    self.node = node
3454

    
3455
  def Exec(self, feedback_fn):
3456
    """Removes the node from the cluster.
3457

3458
    """
3459
    node = self.node
3460
    logging.info("Stopping the node daemon and removing configs from node %s",
3461
                 node.name)
3462

    
3463
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3464

    
3465
    # Promote nodes to master candidate as needed
3466
    _AdjustCandidatePool(self, exceptions=[node.name])
3467
    self.context.RemoveNode(node.name)
3468

    
3469
    # Run post hooks on the node before it's removed
3470
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3471
    try:
3472
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3473
    except:
3474
      # pylint: disable-msg=W0702
3475
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
3476

    
3477
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3478
    msg = result.fail_msg
3479
    if msg:
3480
      self.LogWarning("Errors encountered on the remote node while leaving"
3481
                      " the cluster: %s", msg)
3482

    
3483
    # Remove node from our /etc/hosts
3484
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3485
      master_node = self.cfg.GetMasterNode()
3486
      result = self.rpc.call_etc_hosts_modify(master_node,
3487
                                              constants.ETC_HOSTS_REMOVE,
3488
                                              node.name, None)
3489
      result.Raise("Can't update hosts file with new host data")
3490
      _RedistributeAncillaryFiles(self)
3491

    
3492

    
3493
class _NodeQuery(_QueryBase):
3494
  FIELDS = query.NODE_FIELDS
3495

    
3496
  def ExpandNames(self, lu):
3497
    lu.needed_locks = {}
3498
    lu.share_locks[locking.LEVEL_NODE] = 1
3499

    
3500
    if self.names:
3501
      self.wanted = _GetWantedNodes(lu, self.names)
3502
    else:
3503
      self.wanted = locking.ALL_SET
3504

    
3505
    self.do_locking = (self.use_locking and
3506
                       query.NQ_LIVE in self.requested_data)
3507

    
3508
    if self.do_locking:
3509
      # if we don't request only static fields, we need to lock the nodes
3510
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3511

    
3512
  def DeclareLocks(self, _):
3513
    pass
3514

    
3515
  def _GetQueryData(self, lu):
3516
    """Computes the list of nodes and their attributes.
3517

3518
    """
3519
    all_info = lu.cfg.GetAllNodesInfo()
3520

    
3521
    if self.do_locking:
3522
      nodenames = lu.acquired_locks[locking.LEVEL_NODE]
3523
    elif self.wanted != locking.ALL_SET:
3524
      nodenames = self.wanted
3525
      missing = set(nodenames).difference(all_info.keys())
3526
      if missing:
3527
        raise errors.OpExecError("Some nodes were removed before retrieving"
3528
                                 " their data: %s" % missing)
3529
    else:
3530
      nodenames = all_info.keys()
3531

    
3532
    nodenames = utils.NiceSort(nodenames)
3533

    
3534
    # Gather data as requested
3535
    if query.NQ_LIVE in self.requested_data:
3536
      node_data = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
3537
                                        lu.cfg.GetHypervisorType())
3538
      live_data = dict((name, nresult.payload)
3539
                       for (name, nresult) in node_data.items()
3540
                       if not nresult.fail_msg and nresult.payload)
3541
    else:
3542
      live_data = None
3543

    
3544
    if query.NQ_INST in self.requested_data:
3545
      node_to_primary = dict([(name, set()) for name in nodenames])
3546
      node_to_secondary = dict([(name, set()) for name in nodenames])
3547

    
3548
      inst_data = lu.cfg.GetAllInstancesInfo()
3549

    
3550
      for inst in inst_data.values():
3551
        if inst.primary_node in node_to_primary:
3552
          node_to_primary[inst.primary_node].add(inst.name)
3553
        for secnode in inst.secondary_nodes:
3554
          if secnode in node_to_secondary:
3555
            node_to_secondary[secnode].add(inst.name)
3556
    else:
3557
      node_to_primary = None
3558
      node_to_secondary = None
3559

    
3560
    if query.NQ_GROUP in self.requested_data:
3561
      groups = lu.cfg.GetAllNodeGroupsInfo()
3562
    else:
3563
      groups = {}
3564

    
3565
    return query.NodeQueryData([all_info[name] for name in nodenames],
3566
                               live_data, lu.cfg.GetMasterNode(),
3567
                               node_to_primary, node_to_secondary, groups)
3568

    
3569

    
3570
class LUQueryNodes(NoHooksLU):
3571
  """Logical unit for querying nodes.
3572

3573
  """
3574
  # pylint: disable-msg=W0142
3575
  _OP_PARAMS = [
3576
    _POutputFields,
3577
    ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3578
    ("use_locking", False, ht.TBool),
3579
    ]
3580
  REQ_BGL = False
3581

    
3582
  def CheckArguments(self):
3583
    self.nq = _NodeQuery(self.op.names, self.op.output_fields,
3584
                         self.op.use_locking)
3585

    
3586
  def ExpandNames(self):
3587
    self.nq.ExpandNames(self)
3588

    
3589
  def Exec(self, feedback_fn):
3590
    return self.nq.OldStyleQuery(self)
3591

    
3592

    
3593
class LUQueryNodeVolumes(NoHooksLU):
3594
  """Logical unit for getting volumes on node(s).
3595

3596
  """
3597
  _OP_PARAMS = [
3598
    _POutputFields,
3599
    ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3600
    ]
3601
  REQ_BGL = False
3602
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3603
  _FIELDS_STATIC = utils.FieldSet("node")
3604

    
3605
  def CheckArguments(self):
3606
    _CheckOutputFields(static=self._FIELDS_STATIC,
3607
                       dynamic=self._FIELDS_DYNAMIC,
3608
                       selected=self.op.output_fields)
3609

    
3610
  def ExpandNames(self):
3611
    self.needed_locks = {}
3612
    self.share_locks[locking.LEVEL_NODE] = 1
3613
    if not self.op.nodes:
3614
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3615
    else:
3616
      self.needed_locks[locking.LEVEL_NODE] = \
3617
        _GetWantedNodes(self, self.op.nodes)
3618

    
3619
  def Exec(self, feedback_fn):
3620
    """Computes the list of nodes and their attributes.
3621

3622
    """
3623
    nodenames = self.acquired_locks[locking.LEVEL_NODE]
3624
    volumes = self.rpc.call_node_volumes(nodenames)
3625

    
3626
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3627
             in self.cfg.GetInstanceList()]
3628

    
3629
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3630

    
3631
    output = []
3632
    for node in nodenames:
3633
      nresult = volumes[node]
3634
      if nresult.offline:
3635
        continue
3636
      msg = nresult.fail_msg
3637
      if msg:
3638
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3639
        continue
3640

    
3641
      node_vols = nresult.payload[:]
3642
      node_vols.sort(key=lambda vol: vol['dev'])
3643

    
3644
      for vol in node_vols:
3645
        node_output = []
3646
        for field in self.op.output_fields:
3647
          if field == "node":
3648
            val = node
3649
          elif field == "phys":
3650
            val = vol['dev']
3651
          elif field == "vg":
3652
            val = vol['vg']
3653
          elif field == "name":
3654
            val = vol['name']
3655
          elif field == "size":
3656
            val = int(float(vol['size']))
3657
          elif field == "instance":
3658
            for inst in ilist:
3659
              if node not in lv_by_node[inst]:
3660
                continue
3661
              if vol['name'] in lv_by_node[inst][node]:
3662
                val = inst.name
3663
                break
3664
            else:
3665
              val = '-'
3666
          else:
3667
            raise errors.ParameterError(field)
3668
          node_output.append(str(val))
3669

    
3670
        output.append(node_output)
3671

    
3672
    return output
3673

    
3674

    
3675
class LUQueryNodeStorage(NoHooksLU):
3676
  """Logical unit for getting information on storage units on node(s).
3677

3678
  """
3679
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3680
  _OP_PARAMS = [
3681
    _POutputFields,
3682
    ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3683
    ("storage_type", ht.NoDefault, _CheckStorageType),
3684
    ("name", None, ht.TMaybeString),
3685
    ]
3686
  REQ_BGL = False
3687

    
3688
  def CheckArguments(self):
3689
    _CheckOutputFields(static=self._FIELDS_STATIC,
3690
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3691
                       selected=self.op.output_fields)
3692

    
3693
  def ExpandNames(self):
3694
    self.needed_locks = {}
3695
    self.share_locks[locking.LEVEL_NODE] = 1
3696

    
3697
    if self.op.nodes:
3698
      self.needed_locks[locking.LEVEL_NODE] = \
3699
        _GetWantedNodes(self, self.op.nodes)
3700
    else:
3701
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3702

    
3703
  def Exec(self, feedback_fn):
3704
    """Computes the list of nodes and their attributes.
3705

3706
    """
3707
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3708

    
3709
    # Always get name to sort by
3710
    if constants.SF_NAME in self.op.output_fields:
3711
      fields = self.op.output_fields[:]
3712
    else:
3713
      fields = [constants.SF_NAME] + self.op.output_fields
3714

    
3715
    # Never ask for node or type as it's only known to the LU
3716
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3717
      while extra in fields:
3718
        fields.remove(extra)
3719

    
3720
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3721
    name_idx = field_idx[constants.SF_NAME]
3722

    
3723
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3724
    data = self.rpc.call_storage_list(self.nodes,
3725
                                      self.op.storage_type, st_args,
3726
                                      self.op.name, fields)
3727

    
3728
    result = []
3729

    
3730
    for node in utils.NiceSort(self.nodes):
3731
      nresult = data[node]
3732
      if nresult.offline:
3733
        continue
3734

    
3735
      msg = nresult.fail_msg
3736
      if msg:
3737
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3738
        continue
3739

    
3740
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3741

    
3742
      for name in utils.NiceSort(rows.keys()):
3743
        row = rows[name]
3744

    
3745
        out = []
3746

    
3747
        for field in self.op.output_fields:
3748
          if field == constants.SF_NODE:
3749
            val = node
3750
          elif field == constants.SF_TYPE:
3751
            val = self.op.storage_type
3752
          elif field in field_idx:
3753
            val = row[field_idx[field]]
3754
          else:
3755
            raise errors.ParameterError(field)
3756

    
3757
          out.append(val)
3758

    
3759
        result.append(out)
3760

    
3761
    return result
3762

    
3763

    
3764
def _InstanceQuery(*args): # pylint: disable-msg=W0613
3765
  """Dummy until instance queries have been converted to query2.
3766

3767
  """
3768
  raise NotImplementedError
3769

    
3770

    
3771
#: Query type implementations
3772
_QUERY_IMPL = {
3773
  constants.QR_INSTANCE: _InstanceQuery,
3774
  constants.QR_NODE: _NodeQuery,
3775
  }
3776

    
3777

    
3778
def _GetQueryImplementation(name):
3779
  """Returns the implemtnation for a query type.
3780

3781
  @param name: Query type, must be one of L{constants.QR_OP_QUERY}
3782

3783
  """
3784
  try:
3785
    return _QUERY_IMPL[name]
3786
  except KeyError:
3787
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
3788
                               errors.ECODE_INVAL)
3789

    
3790

    
3791
class LUQuery(NoHooksLU):
3792
  """Query for resources/items of a certain kind.
3793

3794
  """
3795
  # pylint: disable-msg=W0142
3796
  _OP_PARAMS = [
3797
    ("what", ht.NoDefault, ht.TElemOf(constants.QR_OP_QUERY)),
3798
    ("fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
3799
    ("filter", None, ht.TOr(ht.TNone,
3800
                            ht.TListOf(ht.TOr(ht.TNonEmptyString, ht.TList)))),
3801
    ]
3802
  REQ_BGL = False
3803

    
3804
  def CheckArguments(self):
3805
    qcls = _GetQueryImplementation(self.op.what)
3806
    names = qlang.ReadSimpleFilter("name", self.op.filter)
3807

    
3808
    self.impl = qcls(names, self.op.fields, False)
3809

    
3810
  def ExpandNames(self):
3811
    self.impl.ExpandNames(self)
3812

    
3813
  def DeclareLocks(self, level):
3814
    self.impl.DeclareLocks(self, level)
3815

    
3816
  def Exec(self, feedback_fn):
3817
    return self.impl.NewStyleQuery(self)
3818

    
3819

    
3820
class LUQueryFields(NoHooksLU):
3821
  """Query for resources/items of a certain kind.
3822

3823
  """
3824
  # pylint: disable-msg=W0142
3825
  _OP_PARAMS = [
3826
    ("what", ht.NoDefault, ht.TElemOf(constants.QR_OP_QUERY)),
3827
    ("fields", None, ht.TOr(ht.TNone, ht.TListOf(ht.TNonEmptyString))),
3828
    ]
3829
  REQ_BGL = False
3830

    
3831
  def CheckArguments(self):
3832
    self.qcls = _GetQueryImplementation(self.op.what)
3833

    
3834
  def ExpandNames(self):
3835
    self.needed_locks = {}
3836

    
3837
  def Exec(self, feedback_fn):
3838
    return self.qcls.FieldsQuery(self.op.fields)
3839

    
3840

    
3841
class LUModifyNodeStorage(NoHooksLU):
3842
  """Logical unit for modifying a storage volume on a node.
3843

3844
  """
3845
  _OP_PARAMS = [
3846
    _PNodeName,
3847
    ("storage_type", ht.NoDefault, _CheckStorageType),
3848
    ("name", ht.NoDefault, ht.TNonEmptyString),
3849
    ("changes", ht.NoDefault, ht.TDict),
3850
    ]
3851
  REQ_BGL = False
3852

    
3853
  def CheckArguments(self):
3854
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3855

    
3856
    storage_type = self.op.storage_type
3857

    
3858
    try:
3859
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3860
    except KeyError:
3861
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3862
                                 " modified" % storage_type,
3863
                                 errors.ECODE_INVAL)
3864

    
3865
    diff = set(self.op.changes.keys()) - modifiable
3866
    if diff:
3867
      raise errors.OpPrereqError("The following fields can not be modified for"
3868
                                 " storage units of type '%s': %r" %
3869
                                 (storage_type, list(diff)),
3870
                                 errors.ECODE_INVAL)
3871

    
3872
  def ExpandNames(self):
3873
    self.needed_locks = {
3874
      locking.LEVEL_NODE: self.op.node_name,
3875
      }
3876

    
3877
  def Exec(self, feedback_fn):
3878
    """Computes the list of nodes and their attributes.
3879

3880
    """
3881
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3882
    result = self.rpc.call_storage_modify(self.op.node_name,
3883
                                          self.op.storage_type, st_args,
3884
                                          self.op.name, self.op.changes)
3885
    result.Raise("Failed to modify storage unit '%s' on %s" %
3886
                 (self.op.name, self.op.node_name))
3887

    
3888

    
3889
class LUAddNode(LogicalUnit):
3890
  """Logical unit for adding node to the cluster.
3891

3892
  """
3893
  HPATH = "node-add"
3894
  HTYPE = constants.HTYPE_NODE
3895
  _OP_PARAMS = [
3896
    _PNodeName,
3897
    ("primary_ip", None, ht.NoType),
3898
    ("secondary_ip", None, ht.TMaybeString),
3899
    ("readd", False, ht.TBool),
3900
    ("group", None, ht.TMaybeString),
3901
    ("master_capable", None, ht.TMaybeBool),
3902
    ("vm_capable", None, ht.TMaybeBool),
3903
    ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
3904
    ]
3905
  _NFLAGS = ["master_capable", "vm_capable"]
3906

    
3907
  def CheckArguments(self):
3908
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
3909
    # validate/normalize the node name
3910
    self.hostname = netutils.GetHostname(name=self.op.node_name,
3911
                                         family=self.primary_ip_family)
3912
    self.op.node_name = self.hostname.name
3913
    if self.op.readd and self.op.group:
3914
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
3915
                                 " being readded", errors.ECODE_INVAL)
3916

    
3917
  def BuildHooksEnv(self):
3918
    """Build hooks env.
3919

3920
    This will run on all nodes before, and on all nodes + the new node after.
3921

3922
    """
3923
    env = {
3924
      "OP_TARGET": self.op.node_name,
3925
      "NODE_NAME": self.op.node_name,
3926
      "NODE_PIP": self.op.primary_ip,
3927
      "NODE_SIP": self.op.secondary_ip,
3928
      "MASTER_CAPABLE": str(self.op.master_capable),
3929
      "VM_CAPABLE": str(self.op.vm_capable),
3930
      }
3931
    nodes_0 = self.cfg.GetNodeList()
3932
    nodes_1 = nodes_0 + [self.op.node_name, ]
3933
    return env, nodes_0, nodes_1
3934

    
3935
  def CheckPrereq(self):
3936
    """Check prerequisites.
3937

3938
    This checks:
3939
     - the new node is not already in the config
3940
     - it is resolvable
3941
     - its parameters (single/dual homed) matches the cluster
3942

3943
    Any errors are signaled by raising errors.OpPrereqError.
3944

3945
    """
3946
    cfg = self.cfg
3947
    hostname = self.hostname
3948
    node = hostname.name
3949
    primary_ip = self.op.primary_ip = hostname.ip
3950
    if self.op.secondary_ip is None:
3951
      if self.primary_ip_family == netutils.IP6Address.family:
3952
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
3953
                                   " IPv4 address must be given as secondary",
3954
                                   errors.ECODE_INVAL)
3955
      self.op.secondary_ip = primary_ip
3956

    
3957
    secondary_ip = self.op.secondary_ip
3958
    if not netutils.IP4Address.IsValid(secondary_ip):
3959
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
3960
                                 " address" % secondary_ip, errors.ECODE_INVAL)
3961

    
3962
    node_list = cfg.GetNodeList()
3963
    if not self.op.readd and node in node_list:
3964
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3965
                                 node, errors.ECODE_EXISTS)
3966
    elif self.op.readd and node not in node_list:
3967
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3968
                                 errors.ECODE_NOENT)
3969

    
3970
    self.changed_primary_ip = False
3971

    
3972
    for existing_node_name in node_list:
3973
      existing_node = cfg.GetNodeInfo(existing_node_name)
3974

    
3975
      if self.op.readd and node == existing_node_name:
3976
        if existing_node.secondary_ip != secondary_ip:
3977
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
3978
                                     " address configuration as before",
3979
                                     errors.ECODE_INVAL)
3980
        if existing_node.primary_ip != primary_ip:
3981
          self.changed_primary_ip = True
3982

    
3983
        continue
3984

    
3985
      if (existing_node.primary_ip == primary_ip or
3986
          existing_node.secondary_ip == primary_ip or
3987
          existing_node.primary_ip == secondary_ip or
3988
          existing_node.secondary_ip == secondary_ip):
3989
        raise errors.OpPrereqError("New node ip address(es) conflict with"
3990
                                   " existing node %s" % existing_node.name,
3991
                                   errors.ECODE_NOTUNIQUE)
3992

    
3993
    # After this 'if' block, None is no longer a valid value for the
3994
    # _capable op attributes
3995
    if self.op.readd:
3996
      old_node = self.cfg.GetNodeInfo(node)
3997
      assert old_node is not None, "Can't retrieve locked node %s" % node
3998
      for attr in self._NFLAGS:
3999
        if getattr(self.op, attr) is None:
4000
          setattr(self.op, attr, getattr(old_node, attr))
4001
    else:
4002
      for attr in self._NFLAGS:
4003
        if getattr(self.op, attr) is None:
4004
          setattr(self.op, attr, True)
4005

    
4006
    if self.op.readd and not self.op.vm_capable:
4007
      pri, sec = cfg.GetNodeInstances(node)
4008
      if pri or sec:
4009
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4010
                                   " flag set to false, but it already holds"
4011
                                   " instances" % node,
4012
                                   errors.ECODE_STATE)
4013

    
4014
    # check that the type of the node (single versus dual homed) is the
4015
    # same as for the master
4016
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4017
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4018
    newbie_singlehomed = secondary_ip == primary_ip
4019
    if master_singlehomed != newbie_singlehomed:
4020
      if master_singlehomed:
4021
        raise errors.OpPrereqError("The master has no secondary ip but the"
4022
                                   " new node has one",
4023
                                   errors.ECODE_INVAL)
4024
      else:
4025
        raise errors.OpPrereqError("The master has a secondary ip but the"
4026
                                   " new node doesn't have one",
4027
                                   errors.ECODE_INVAL)
4028

    
4029
    # checks reachability
4030
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4031
      raise errors.OpPrereqError("Node not reachable by ping",
4032
                                 errors.ECODE_ENVIRON)
4033

    
4034
    if not newbie_singlehomed:
4035
      # check reachability from my secondary ip to newbie's secondary ip
4036
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4037
                           source=myself.secondary_ip):
4038
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4039
                                   " based ping to node daemon port",
4040
                                   errors.ECODE_ENVIRON)
4041

    
4042
    if self.op.readd:
4043
      exceptions = [node]
4044
    else:
4045
      exceptions = []
4046

    
4047
    if self.op.master_capable:
4048
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4049
    else:
4050
      self.master_candidate = False
4051

    
4052
    if self.op.readd:
4053
      self.new_node = old_node
4054
    else:
4055
      node_group = cfg.LookupNodeGroup(self.op.group)
4056
      self.new_node = objects.Node(name=node,
4057
                                   primary_ip=primary_ip,
4058
                                   secondary_ip=secondary_ip,
4059
                                   master_candidate=self.master_candidate,
4060
                                   offline=False, drained=False,
4061
                                   group=node_group)
4062

    
4063
    if self.op.ndparams:
4064
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4065

    
4066
  def Exec(self, feedback_fn):
4067
    """Adds the new node to the cluster.
4068

4069
    """
4070
    new_node = self.new_node
4071
    node = new_node.name
4072

    
4073
    # for re-adds, reset the offline/drained/master-candidate flags;
4074
    # we need to reset here, otherwise offline would prevent RPC calls
4075
    # later in the procedure; this also means that if the re-add
4076
    # fails, we are left with a non-offlined, broken node
4077
    if self.op.readd:
4078
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4079
      self.LogInfo("Readding a node, the offline/drained flags were reset")
4080
      # if we demote the node, we do cleanup later in the procedure
4081
      new_node.master_candidate = self.master_candidate
4082
      if self.changed_primary_ip:
4083
        new_node.primary_ip = self.op.primary_ip
4084

    
4085
    # copy the master/vm_capable flags
4086
    for attr in self._NFLAGS:
4087
      setattr(new_node, attr, getattr(self.op, attr))
4088

    
4089
    # notify the user about any possible mc promotion
4090
    if new_node.master_candidate:
4091
      self.LogInfo("Node will be a master candidate")
4092

    
4093
    if self.op.ndparams:
4094
      new_node.ndparams = self.op.ndparams
4095

    
4096
    # check connectivity
4097
    result = self.rpc.call_version([node])[node]
4098
    result.Raise("Can't get version information from node %s" % node)
4099
    if constants.PROTOCOL_VERSION == result.payload:
4100
      logging.info("Communication to node %s fine, sw version %s match",
4101
                   node, result.payload)
4102
    else:
4103
      raise errors.OpExecError("Version mismatch master version %s,"
4104
                               " node version %s" %
4105
                               (constants.PROTOCOL_VERSION, result.payload))
4106

    
4107
    # Add node to our /etc/hosts, and add key to known_hosts
4108
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4109
      master_node = self.cfg.GetMasterNode()
4110
      result = self.rpc.call_etc_hosts_modify(master_node,
4111
                                              constants.ETC_HOSTS_ADD,
4112
                                              self.hostname.name,
4113
                                              self.hostname.ip)
4114
      result.Raise("Can't update hosts file with new host data")
4115

    
4116
    if new_node.secondary_ip != new_node.primary_ip:
4117
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4118
                               False)
4119

    
4120
    node_verify_list = [self.cfg.GetMasterNode()]
4121
    node_verify_param = {
4122
      constants.NV_NODELIST: [node],
4123
      # TODO: do a node-net-test as well?
4124
    }
4125

    
4126
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4127
                                       self.cfg.GetClusterName())
4128
    for verifier in node_verify_list:
4129
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
4130
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
4131
      if nl_payload:
4132
        for failed in nl_payload:
4133
          feedback_fn("ssh/hostname verification failed"
4134
                      " (checking from %s): %s" %
4135
                      (verifier, nl_payload[failed]))
4136
        raise errors.OpExecError("ssh/hostname verification failed.")
4137

    
4138
    if self.op.readd:
4139
      _RedistributeAncillaryFiles(self)
4140
      self.context.ReaddNode(new_node)
4141
      # make sure we redistribute the config
4142
      self.cfg.Update(new_node, feedback_fn)
4143
      # and make sure the new node will not have old files around
4144
      if not new_node.master_candidate:
4145
        result = self.rpc.call_node_demote_from_mc(new_node.name)
4146
        msg = result.fail_msg
4147
        if msg:
4148
          self.LogWarning("Node failed to demote itself from master"
4149
                          " candidate status: %s" % msg)
4150
    else:
4151
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
4152
                                  additional_vm=self.op.vm_capable)
4153
      self.context.AddNode(new_node, self.proc.GetECId())
4154

    
4155

    
4156
class LUSetNodeParams(LogicalUnit):
4157
  """Modifies the parameters of a node.
4158

4159
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4160
      to the node role (as _ROLE_*)
4161
  @cvar _R2F: a dictionary from node role to tuples of flags
4162
  @cvar _FLAGS: a list of attribute names corresponding to the flags
4163

4164
  """
4165
  HPATH = "node-modify"
4166
  HTYPE = constants.HTYPE_NODE
4167
  _OP_PARAMS = [
4168
    _PNodeName,
4169
    ("master_candidate", None, ht.TMaybeBool),
4170
    ("offline", None, ht.TMaybeBool),
4171
    ("drained", None, ht.TMaybeBool),
4172
    ("auto_promote", False, ht.TBool),
4173
    ("master_capable", None, ht.TMaybeBool),
4174
    ("vm_capable", None, ht.TMaybeBool),
4175
    ("secondary_ip", None, ht.TMaybeString),
4176
    ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
4177
    _PForce,
4178
    ]
4179
  REQ_BGL = False
4180
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4181
  _F2R = {
4182
    (True, False, False): _ROLE_CANDIDATE,
4183
    (False, True, False): _ROLE_DRAINED,
4184
    (False, False, True): _ROLE_OFFLINE,
4185
    (False, False, False): _ROLE_REGULAR,
4186
    }
4187
  _R2F = dict((v, k) for k, v in _F2R.items())
4188
  _FLAGS = ["master_candidate", "drained", "offline"]
4189

    
4190
  def CheckArguments(self):
4191
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4192
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4193
                self.op.master_capable, self.op.vm_capable,
4194
                self.op.secondary_ip, self.op.ndparams]
4195
    if all_mods.count(None) == len(all_mods):
4196
      raise errors.OpPrereqError("Please pass at least one modification",
4197
                                 errors.ECODE_INVAL)
4198
    if all_mods.count(True) > 1:
4199
      raise errors.OpPrereqError("Can't set the node into more than one"
4200
                                 " state at the same time",
4201
                                 errors.ECODE_INVAL)
4202

    
4203
    # Boolean value that tells us whether we might be demoting from MC
4204
    self.might_demote = (self.op.master_candidate == False or
4205
                         self.op.offline == True or
4206
                         self.op.drained == True or
4207
                         self.op.master_capable == False)
4208

    
4209
    if self.op.secondary_ip:
4210
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4211
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4212
                                   " address" % self.op.secondary_ip,
4213
                                   errors.ECODE_INVAL)
4214

    
4215
    self.lock_all = self.op.auto_promote and self.might_demote
4216
    self.lock_instances = self.op.secondary_ip is not None
4217

    
4218
  def ExpandNames(self):
4219
    if self.lock_all:
4220
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4221
    else:
4222
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4223

    
4224
    if self.lock_instances:
4225
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4226

    
4227
  def DeclareLocks(self, level):
4228
    # If we have locked all instances, before waiting to lock nodes, release
4229
    # all the ones living on nodes unrelated to the current operation.
4230
    if level == locking.LEVEL_NODE and self.lock_instances:
4231
      instances_release = []
4232
      instances_keep = []
4233
      self.affected_instances = []
4234
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4235
        for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4236
          instance = self.context.cfg.GetInstanceInfo(instance_name)
4237
          i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR
4238
          if i_mirrored and self.op.node_name in instance.all_nodes:
4239
            instances_keep.append(instance_name)
4240
            self.affected_instances.append(instance)
4241
          else:
4242
            instances_release.append(instance_name)
4243
        if instances_release:
4244
          self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4245
          self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4246

    
4247
  def BuildHooksEnv(self):
4248
    """Build hooks env.
4249

4250
    This runs on the master node.
4251

4252
    """
4253
    env = {
4254
      "OP_TARGET": self.op.node_name,
4255
      "MASTER_CANDIDATE": str(self.op.master_candidate),
4256
      "OFFLINE": str(self.op.offline),
4257
      "DRAINED": str(self.op.drained),
4258
      "MASTER_CAPABLE": str(self.op.master_capable),
4259
      "VM_CAPABLE": str(self.op.vm_capable),
4260
      }
4261
    nl = [self.cfg.GetMasterNode(),
4262
          self.op.node_name]
4263
    return env, nl, nl
4264

    
4265
  def CheckPrereq(self):
4266
    """Check prerequisites.
4267

4268
    This only checks the instance list against the existing names.
4269

4270
    """
4271
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4272

    
4273
    if (self.op.master_candidate is not None or
4274
        self.op.drained is not None or
4275
        self.op.offline is not None):
4276
      # we can't change the master's node flags
4277
      if self.op.node_name == self.cfg.GetMasterNode():
4278
        raise errors.OpPrereqError("The master role can be changed"
4279
                                   " only via master-failover",
4280
                                   errors.ECODE_INVAL)
4281

    
4282
    if self.op.master_candidate and not node.master_capable:
4283
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4284
                                 " it a master candidate" % node.name,
4285
                                 errors.ECODE_STATE)
4286

    
4287
    if self.op.vm_capable == False:
4288
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4289
      if ipri or isec:
4290
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4291
                                   " the vm_capable flag" % node.name,
4292
                                   errors.ECODE_STATE)
4293

    
4294
    if node.master_candidate and self.might_demote and not self.lock_all:
4295
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
4296
      # check if after removing the current node, we're missing master
4297
      # candidates
4298
      (mc_remaining, mc_should, _) = \
4299
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4300
      if mc_remaining < mc_should:
4301
        raise errors.OpPrereqError("Not enough master candidates, please"
4302
                                   " pass auto_promote to allow promotion",
4303
                                   errors.ECODE_STATE)
4304

    
4305
    self.old_flags = old_flags = (node.master_candidate,
4306
                                  node.drained, node.offline)
4307
    assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4308
    self.old_role = old_role = self._F2R[old_flags]
4309

    
4310
    # Check for ineffective changes
4311
    for attr in self._FLAGS:
4312
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4313
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4314
        setattr(self.op, attr, None)
4315

    
4316
    # Past this point, any flag change to False means a transition
4317
    # away from the respective state, as only real changes are kept
4318

    
4319
    # If we're being deofflined/drained, we'll MC ourself if needed
4320
    if (self.op.drained == False or self.op.offline == False or
4321
        (self.op.master_capable and not node.master_capable)):
4322
      if _DecideSelfPromotion(self):
4323
        self.op.master_candidate = True
4324
        self.LogInfo("Auto-promoting node to master candidate")
4325

    
4326
    # If we're no longer master capable, we'll demote ourselves from MC
4327
    if self.op.master_capable == False and node.master_candidate:
4328
      self.LogInfo("Demoting from master candidate")
4329
      self.op.master_candidate = False
4330

    
4331
    # Compute new role
4332
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4333
    if self.op.master_candidate:
4334
      new_role = self._ROLE_CANDIDATE
4335
    elif self.op.drained:
4336
      new_role = self._ROLE_DRAINED
4337
    elif self.op.offline:
4338
      new_role = self._ROLE_OFFLINE
4339
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4340
      # False is still in new flags, which means we're un-setting (the
4341
      # only) True flag
4342
      new_role = self._ROLE_REGULAR
4343
    else: # no new flags, nothing, keep old role
4344
      new_role = old_role
4345

    
4346
    self.new_role = new_role
4347

    
4348
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
4349
      # Trying to transition out of offline status
4350
      result = self.rpc.call_version([node.name])[node.name]
4351
      if result.fail_msg:
4352
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4353
                                   " to report its version: %s" %
4354
                                   (node.name, result.fail_msg),
4355
                                   errors.ECODE_STATE)
4356
      else:
4357
        self.LogWarning("Transitioning node from offline to online state"
4358
                        " without using re-add. Please make sure the node"
4359
                        " is healthy!")
4360

    
4361
    if self.op.secondary_ip:
4362
      # Ok even without locking, because this can't be changed by any LU
4363
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4364
      master_singlehomed = master.secondary_ip == master.primary_ip
4365
      if master_singlehomed and self.op.secondary_ip:
4366
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4367
                                   " homed cluster", errors.ECODE_INVAL)
4368

    
4369
      if node.offline:
4370
        if self.affected_instances:
4371
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
4372
                                     " node has instances (%s) configured"
4373
                                     " to use it" % self.affected_instances)
4374
      else:
4375
        # On online nodes, check that no instances are running, and that
4376
        # the node has the new ip and we can reach it.
4377
        for instance in self.affected_instances:
4378
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
4379

    
4380
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4381
        if master.name != node.name:
4382
          # check reachability from master secondary ip to new secondary ip
4383
          if not netutils.TcpPing(self.op.secondary_ip,
4384
                                  constants.DEFAULT_NODED_PORT,
4385
                                  source=master.secondary_ip):
4386
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4387
                                       " based ping to node daemon port",
4388
                                       errors.ECODE_ENVIRON)
4389

    
4390
    if self.op.ndparams:
4391
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4392
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4393
      self.new_ndparams = new_ndparams
4394

    
4395
  def Exec(self, feedback_fn):
4396
    """Modifies a node.
4397

4398
    """
4399
    node = self.node
4400
    old_role = self.old_role
4401
    new_role = self.new_role
4402

    
4403
    result = []
4404

    
4405
    if self.op.ndparams:
4406
      node.ndparams = self.new_ndparams
4407

    
4408
    for attr in ["master_capable", "vm_capable"]:
4409
      val = getattr(self.op, attr)
4410
      if val is not None:
4411
        setattr(node, attr, val)
4412
        result.append((attr, str(val)))
4413

    
4414
    if new_role != old_role:
4415
      # Tell the node to demote itself, if no longer MC and not offline
4416
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4417
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4418
        if msg:
4419
          self.LogWarning("Node failed to demote itself: %s", msg)
4420

    
4421
      new_flags = self._R2F[new_role]
4422
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4423
        if of != nf:
4424
          result.append((desc, str(nf)))
4425
      (node.master_candidate, node.drained, node.offline) = new_flags
4426

    
4427
      # we locked all nodes, we adjust the CP before updating this node
4428
      if self.lock_all:
4429
        _AdjustCandidatePool(self, [node.name])
4430

    
4431
    if self.op.secondary_ip:
4432
      node.secondary_ip = self.op.secondary_ip
4433
      result.append(("secondary_ip", self.op.secondary_ip))
4434

    
4435
    # this will trigger configuration file update, if needed
4436
    self.cfg.Update(node, feedback_fn)
4437

    
4438
    # this will trigger job queue propagation or cleanup if the mc
4439
    # flag changed
4440
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4441
      self.context.ReaddNode(node)
4442

    
4443
    return result
4444

    
4445

    
4446
class LUPowercycleNode(NoHooksLU):
4447
  """Powercycles a node.
4448

4449
  """
4450
  _OP_PARAMS = [
4451
    _PNodeName,
4452
    _PForce,
4453
    ]
4454
  REQ_BGL = False
4455

    
4456
  def CheckArguments(self):
4457
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4458
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4459
      raise errors.OpPrereqError("The node is the master and the force"
4460
                                 " parameter was not set",
4461
                                 errors.ECODE_INVAL)
4462

    
4463
  def ExpandNames(self):
4464
    """Locking for PowercycleNode.
4465

4466
    This is a last-resort option and shouldn't block on other
4467
    jobs. Therefore, we grab no locks.
4468

4469
    """
4470
    self.needed_locks = {}
4471

    
4472
  def Exec(self, feedback_fn):
4473
    """Reboots a node.
4474

4475
    """
4476
    result = self.rpc.call_node_powercycle(self.op.node_name,
4477
                                           self.cfg.GetHypervisorType())
4478
    result.Raise("Failed to schedule the reboot")
4479
    return result.payload
4480

    
4481

    
4482
class LUQueryClusterInfo(NoHooksLU):
4483
  """Query cluster configuration.
4484

4485
  """
4486
  REQ_BGL = False
4487

    
4488
  def ExpandNames(self):
4489
    self.needed_locks = {}
4490

    
4491
  def Exec(self, feedback_fn):
4492
    """Return cluster config.
4493

4494
    """
4495
    cluster = self.cfg.GetClusterInfo()
4496
    os_hvp = {}
4497

    
4498
    # Filter just for enabled hypervisors
4499
    for os_name, hv_dict in cluster.os_hvp.items():
4500
      os_hvp[os_name] = {}
4501
      for hv_name, hv_params in hv_dict.items():
4502
        if hv_name in cluster.enabled_hypervisors:
4503
          os_hvp[os_name][hv_name] = hv_params
4504

    
4505
    # Convert ip_family to ip_version
4506
    primary_ip_version = constants.IP4_VERSION
4507
    if cluster.primary_ip_family == netutils.IP6Address.family:
4508
      primary_ip_version = constants.IP6_VERSION
4509

    
4510
    result = {
4511
      "software_version": constants.RELEASE_VERSION,
4512
      "protocol_version": constants.PROTOCOL_VERSION,
4513
      "config_version": constants.CONFIG_VERSION,
4514
      "os_api_version": max(constants.OS_API_VERSIONS),
4515
      "export_version": constants.EXPORT_VERSION,
4516
      "architecture": (platform.architecture()[0], platform.machine()),
4517
      "name": cluster.cluster_name,
4518
      "master": cluster.master_node,
4519
      "default_hypervisor": cluster.enabled_hypervisors[0],
4520
      "enabled_hypervisors": cluster.enabled_hypervisors,
4521
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4522
                        for hypervisor_name in cluster.enabled_hypervisors]),
4523
      "os_hvp": os_hvp,
4524
      "beparams": cluster.beparams,
4525
      "osparams": cluster.osparams,
4526
      "nicparams": cluster.nicparams,
4527
      "candidate_pool_size": cluster.candidate_pool_size,
4528
      "master_netdev": cluster.master_netdev,
4529
      "volume_group_name": cluster.volume_group_name,
4530
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
4531
      "file_storage_dir": cluster.file_storage_dir,
4532
      "maintain_node_health": cluster.maintain_node_health,
4533
      "ctime": cluster.ctime,
4534
      "mtime": cluster.mtime,
4535
      "uuid": cluster.uuid,
4536
      "tags": list(cluster.GetTags()),
4537
      "uid_pool": cluster.uid_pool,
4538
      "default_iallocator": cluster.default_iallocator,
4539
      "reserved_lvs": cluster.reserved_lvs,
4540
      "primary_ip_version": primary_ip_version,
4541
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4542
      }
4543

    
4544
    return result
4545

    
4546

    
4547
class LUQueryConfigValues(NoHooksLU):
4548
  """Return configuration values.
4549

4550
  """
4551
  _OP_PARAMS = [_POutputFields]
4552
  REQ_BGL = False
4553
  _FIELDS_DYNAMIC = utils.FieldSet()
4554
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4555
                                  "watcher_pause", "volume_group_name")
4556

    
4557
  def CheckArguments(self):
4558
    _CheckOutputFields(static=self._FIELDS_STATIC,
4559
                       dynamic=self._FIELDS_DYNAMIC,
4560
                       selected=self.op.output_fields)
4561

    
4562
  def ExpandNames(self):
4563
    self.needed_locks = {}
4564

    
4565
  def Exec(self, feedback_fn):
4566
    """Dump a representation of the cluster config to the standard output.
4567

4568
    """
4569
    values = []
4570
    for field in self.op.output_fields:
4571
      if field == "cluster_name":
4572
        entry = self.cfg.GetClusterName()
4573
      elif field == "master_node":
4574
        entry = self.cfg.GetMasterNode()
4575
      elif field == "drain_flag":
4576
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4577
      elif field == "watcher_pause":
4578
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4579
      elif field == "volume_group_name":
4580
        entry = self.cfg.GetVGName()
4581
      else:
4582
        raise errors.ParameterError(field)
4583
      values.append(entry)
4584
    return values
4585

    
4586

    
4587
class LUActivateInstanceDisks(NoHooksLU):
4588
  """Bring up an instance's disks.
4589

4590
  """
4591
  _OP_PARAMS = [
4592
    _PInstanceName,
4593
    ("ignore_size", False, ht.TBool),
4594
    ]
4595
  REQ_BGL = False
4596

    
4597
  def ExpandNames(self):
4598
    self._ExpandAndLockInstance()
4599
    self.needed_locks[locking.LEVEL_NODE] = []
4600
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4601

    
4602
  def DeclareLocks(self, level):
4603
    if level == locking.LEVEL_NODE:
4604
      self._LockInstancesNodes()
4605

    
4606
  def CheckPrereq(self):
4607
    """Check prerequisites.
4608

4609
    This checks that the instance is in the cluster.
4610

4611
    """
4612
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4613
    assert self.instance is not None, \
4614
      "Cannot retrieve locked instance %s" % self.op.instance_name
4615
    _CheckNodeOnline(self, self.instance.primary_node)
4616

    
4617
  def Exec(self, feedback_fn):
4618
    """Activate the disks.
4619

4620
    """
4621
    disks_ok, disks_info = \
4622
              _AssembleInstanceDisks(self, self.instance,
4623
                                     ignore_size=self.op.ignore_size)
4624
    if not disks_ok:
4625
      raise errors.OpExecError("Cannot activate block devices")
4626

    
4627
    return disks_info
4628

    
4629

    
4630
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4631
                           ignore_size=False):
4632
  """Prepare the block devices for an instance.
4633

4634
  This sets up the block devices on all nodes.
4635

4636
  @type lu: L{LogicalUnit}
4637
  @param lu: the logical unit on whose behalf we execute
4638
  @type instance: L{objects.Instance}
4639
  @param instance: the instance for whose disks we assemble
4640
  @type disks: list of L{objects.Disk} or None
4641
  @param disks: which disks to assemble (or all, if None)
4642
  @type ignore_secondaries: boolean
4643
  @param ignore_secondaries: if true, errors on secondary nodes
4644
      won't result in an error return from the function
4645
  @type ignore_size: boolean
4646
  @param ignore_size: if true, the current known size of the disk
4647
      will not be used during the disk activation, useful for cases
4648
      when the size is wrong
4649
  @return: False if the operation failed, otherwise a list of
4650
      (host, instance_visible_name, node_visible_name)
4651
      with the mapping from node devices to instance devices
4652

4653
  """
4654
  device_info = []
4655
  disks_ok = True
4656
  iname = instance.name
4657
  disks = _ExpandCheckDisks(instance, disks)
4658

    
4659
  # With the two passes mechanism we try to reduce the window of
4660
  # opportunity for the race condition of switching DRBD to primary
4661
  # before handshaking occured, but we do not eliminate it
4662

    
4663
  # The proper fix would be to wait (with some limits) until the
4664
  # connection has been made and drbd transitions from WFConnection
4665
  # into any other network-connected state (Connected, SyncTarget,
4666
  # SyncSource, etc.)
4667

    
4668
  # 1st pass, assemble on all nodes in secondary mode
4669
  for inst_disk in disks:
4670
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4671
      if ignore_size:
4672
        node_disk = node_disk.Copy()
4673
        node_disk.UnsetSize()
4674
      lu.cfg.SetDiskID(node_disk, node)
4675
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4676
      msg = result.fail_msg
4677
      if msg:
4678
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4679
                           " (is_primary=False, pass=1): %s",
4680
                           inst_disk.iv_name, node, msg)
4681
        if not ignore_secondaries:
4682
          disks_ok = False
4683

    
4684
  # FIXME: race condition on drbd migration to primary
4685

    
4686
  # 2nd pass, do only the primary node
4687
  for inst_disk in disks:
4688
    dev_path = None
4689

    
4690
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4691
      if node != instance.primary_node:
4692
        continue
4693
      if ignore_size:
4694
        node_disk = node_disk.Copy()
4695
        node_disk.UnsetSize()
4696
      lu.cfg.SetDiskID(node_disk, node)
4697
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4698
      msg = result.fail_msg
4699
      if msg:
4700
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4701
                           " (is_primary=True, pass=2): %s",
4702
                           inst_disk.iv_name, node, msg)
4703
        disks_ok = False
4704
      else:
4705
        dev_path = result.payload
4706

    
4707
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4708

    
4709
  # leave the disks configured for the primary node
4710
  # this is a workaround that would be fixed better by
4711
  # improving the logical/physical id handling
4712
  for disk in disks:
4713
    lu.cfg.SetDiskID(disk, instance.primary_node)
4714

    
4715
  return disks_ok, device_info
4716

    
4717

    
4718
def _StartInstanceDisks(lu, instance, force):
4719
  """Start the disks of an instance.
4720

4721
  """
4722
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4723
                                           ignore_secondaries=force)
4724
  if not disks_ok:
4725
    _ShutdownInstanceDisks(lu, instance)
4726
    if force is not None and not force:
4727
      lu.proc.LogWarning("", hint="If the message above refers to a"
4728
                         " secondary node,"
4729
                         " you can retry the operation using '--force'.")
4730
    raise errors.OpExecError("Disk consistency error")
4731

    
4732

    
4733
class LUDeactivateInstanceDisks(NoHooksLU):
4734
  """Shutdown an instance's disks.
4735

4736
  """
4737
  _OP_PARAMS = [
4738
    _PInstanceName,
4739
    ]
4740
  REQ_BGL = False
4741

    
4742
  def ExpandNames(self):
4743
    self._ExpandAndLockInstance()
4744
    self.needed_locks[locking.LEVEL_NODE] = []
4745
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4746

    
4747
  def DeclareLocks(self, level):
4748
    if level == locking.LEVEL_NODE:
4749
      self._LockInstancesNodes()
4750

    
4751
  def CheckPrereq(self):
4752
    """Check prerequisites.
4753

4754
    This checks that the instance is in the cluster.
4755

4756
    """
4757
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4758
    assert self.instance is not None, \
4759
      "Cannot retrieve locked instance %s" % self.op.instance_name
4760

    
4761
  def Exec(self, feedback_fn):
4762
    """Deactivate the disks
4763

4764
    """
4765
    instance = self.instance
4766
    _SafeShutdownInstanceDisks(self, instance)
4767

    
4768

    
4769
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4770
  """Shutdown block devices of an instance.
4771

4772
  This function checks if an instance is running, before calling
4773
  _ShutdownInstanceDisks.
4774

4775
  """
4776
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4777
  _ShutdownInstanceDisks(lu, instance, disks=disks)
4778

    
4779

    
4780
def _ExpandCheckDisks(instance, disks):
4781
  """Return the instance disks selected by the disks list
4782

4783
  @type disks: list of L{objects.Disk} or None
4784
  @param disks: selected disks
4785
  @rtype: list of L{objects.Disk}
4786
  @return: selected instance disks to act on
4787

4788
  """
4789
  if disks is None:
4790
    return instance.disks
4791
  else:
4792
    if not set(disks).issubset(instance.disks):
4793
      raise errors.ProgrammerError("Can only act on disks belonging to the"
4794
                                   " target instance")
4795
    return disks
4796

    
4797

    
4798
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4799
  """Shutdown block devices of an instance.
4800

4801
  This does the shutdown on all nodes of the instance.
4802

4803
  If the ignore_primary is false, errors on the primary node are
4804
  ignored.
4805

4806
  """
4807
  all_result = True
4808
  disks = _ExpandCheckDisks(instance, disks)
4809

    
4810
  for disk in disks:
4811
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4812
      lu.cfg.SetDiskID(top_disk, node)
4813
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4814
      msg = result.fail_msg
4815
      if msg:
4816
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4817
                      disk.iv_name, node, msg)
4818
        if not ignore_primary or node != instance.primary_node:
4819
          all_result = False
4820
  return all_result
4821

    
4822

    
4823
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4824
  """Checks if a node has enough free memory.
4825

4826
  This function check if a given node has the needed amount of free
4827
  memory. In case the node has less memory or we cannot get the
4828
  information from the node, this function raise an OpPrereqError
4829
  exception.
4830

4831
  @type lu: C{LogicalUnit}
4832
  @param lu: a logical unit from which we get configuration data
4833
  @type node: C{str}
4834
  @param node: the node to check
4835
  @type reason: C{str}
4836
  @param reason: string to use in the error message
4837
  @type requested: C{int}
4838
  @param requested: the amount of memory in MiB to check for
4839
  @type hypervisor_name: C{str}
4840
  @param hypervisor_name: the hypervisor to ask for memory stats
4841
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4842
      we cannot check the node
4843

4844
  """
4845
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
4846
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4847
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4848
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4849
  if not isinstance(free_mem, int):
4850
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4851
                               " was '%s'" % (node, free_mem),
4852
                               errors.ECODE_ENVIRON)
4853
  if requested > free_mem:
4854
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4855
                               " needed %s MiB, available %s MiB" %
4856
                               (node, reason, requested, free_mem),
4857
                               errors.ECODE_NORES)
4858

    
4859

    
4860
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
4861
  """Checks if nodes have enough free disk space in the all VGs.
4862

4863
  This function check if all given nodes have the needed amount of
4864
  free disk. In case any node has less disk or we cannot get the
4865
  information from the node, this function raise an OpPrereqError
4866
  exception.
4867

4868
  @type lu: C{LogicalUnit}
4869
  @param lu: a logical unit from which we get configuration data
4870
  @type nodenames: C{list}
4871
  @param nodenames: the list of node names to check
4872
  @type req_sizes: C{dict}
4873
  @param req_sizes: the hash of vg and corresponding amount of disk in
4874
      MiB to check for
4875
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
4876
      or we cannot check the node
4877

4878
  """
4879
  if req_sizes is not None:
4880
    for vg, req_size in req_sizes.iteritems():
4881
      _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
4882

    
4883

    
4884
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
4885
  """Checks if nodes have enough free disk space in the specified VG.
4886

4887
  This function check if all given nodes have the needed amount of
4888
  free disk. In case any node has less disk or we cannot get the
4889
  information from the node, this function raise an OpPrereqError
4890
  exception.
4891

4892
  @type lu: C{LogicalUnit}
4893
  @param lu: a logical unit from which we get configuration data
4894
  @type nodenames: C{list}
4895
  @param nodenames: the list of node names to check
4896
  @type vg: C{str}
4897
  @param vg: the volume group to check
4898
  @type requested: C{int}
4899
  @param requested: the amount of disk in MiB to check for
4900
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
4901
      or we cannot check the node
4902

4903
  """
4904
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
4905
  for node in nodenames:
4906
    info = nodeinfo[node]
4907
    info.Raise("Cannot get current information from node %s" % node,
4908
               prereq=True, ecode=errors.ECODE_ENVIRON)
4909
    vg_free = info.payload.get("vg_free", None)
4910
    if not isinstance(vg_free, int):
4911
      raise errors.OpPrereqError("Can't compute free disk space on node"
4912
                                 " %s for vg %s, result was '%s'" %
4913
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
4914
    if requested > vg_free:
4915
      raise errors.OpPrereqError("Not enough disk space on target node %s"
4916
                                 " vg %s: required %d MiB, available %d MiB" %
4917
                                 (node, vg, requested, vg_free),
4918
                                 errors.ECODE_NORES)
4919

    
4920

    
4921
class LUStartupInstance(LogicalUnit):
4922
  """Starts an instance.
4923

4924
  """
4925
  HPATH = "instance-start"
4926
  HTYPE = constants.HTYPE_INSTANCE
4927
  _OP_PARAMS = [
4928
    _PInstanceName,
4929
    _PForce,
4930
    _PIgnoreOfflineNodes,
4931
    ("hvparams", ht.EmptyDict, ht.TDict),
4932
    ("beparams", ht.EmptyDict, ht.TDict),
4933
    ]
4934
  REQ_BGL = False
4935

    
4936
  def CheckArguments(self):
4937
    # extra beparams
4938
    if self.op.beparams:
4939
      # fill the beparams dict
4940
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4941

    
4942
  def ExpandNames(self):
4943
    self._ExpandAndLockInstance()
4944

    
4945
  def BuildHooksEnv(self):
4946
    """Build hooks env.
4947

4948
    This runs on master, primary and secondary nodes of the instance.
4949

4950
    """
4951
    env = {
4952
      "FORCE": self.op.force,
4953
      }
4954
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4955
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4956
    return env, nl, nl
4957

    
4958
  def CheckPrereq(self):
4959
    """Check prerequisites.
4960

4961
    This checks that the instance is in the cluster.
4962

4963
    """
4964
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4965
    assert self.instance is not None, \
4966
      "Cannot retrieve locked instance %s" % self.op.instance_name
4967

    
4968
    # extra hvparams
4969
    if self.op.hvparams:
4970
      # check hypervisor parameter syntax (locally)
4971
      cluster = self.cfg.GetClusterInfo()
4972
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4973
      filled_hvp = cluster.FillHV(instance)
4974
      filled_hvp.update(self.op.hvparams)
4975
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4976
      hv_type.CheckParameterSyntax(filled_hvp)
4977
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4978

    
4979
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
4980

    
4981
    if self.primary_offline and self.op.ignore_offline_nodes:
4982
      self.proc.LogWarning("Ignoring offline primary node")
4983

    
4984
      if self.op.hvparams or self.op.beparams:
4985
        self.proc.LogWarning("Overridden parameters are ignored")
4986
    else:
4987
      _CheckNodeOnline(self, instance.primary_node)
4988

    
4989
      bep = self.cfg.GetClusterInfo().FillBE(instance)
4990

    
4991
      # check bridges existence
4992
      _CheckInstanceBridgesExist(self, instance)
4993

    
4994
      remote_info = self.rpc.call_instance_info(instance.primary_node,
4995
                                                instance.name,
4996
                                                instance.hypervisor)
4997
      remote_info.Raise("Error checking node %s" % instance.primary_node,
4998
                        prereq=True, ecode=errors.ECODE_ENVIRON)
4999
      if not remote_info.payload: # not running already
5000
        _CheckNodeFreeMemory(self, instance.primary_node,
5001
                             "starting instance %s" % instance.name,
5002
                             bep[constants.BE_MEMORY], instance.hypervisor)
5003

    
5004
  def Exec(self, feedback_fn):
5005
    """Start the instance.
5006

5007
    """
5008
    instance = self.instance
5009
    force = self.op.force
5010

    
5011
    self.cfg.MarkInstanceUp(instance.name)
5012

    
5013
    if self.primary_offline:
5014
      assert self.op.ignore_offline_nodes
5015
      self.proc.LogInfo("Primary node offline, marked instance as started")
5016
    else:
5017
      node_current = instance.primary_node
5018

    
5019
      _StartInstanceDisks(self, instance, force)
5020

    
5021
      result = self.rpc.call_instance_start(node_current, instance,
5022
                                            self.op.hvparams, self.op.beparams)
5023
      msg = result.fail_msg
5024
      if msg:
5025
        _ShutdownInstanceDisks(self, instance)
5026
        raise errors.OpExecError("Could not start instance: %s" % msg)
5027

    
5028

    
5029
class LURebootInstance(LogicalUnit):
5030
  """Reboot an instance.
5031

5032
  """
5033
  HPATH = "instance-reboot"
5034
  HTYPE = constants.HTYPE_INSTANCE
5035
  _OP_PARAMS = [
5036
    _PInstanceName,
5037
    ("ignore_secondaries", False, ht.TBool),
5038
    ("reboot_type", ht.NoDefault, ht.TElemOf(constants.REBOOT_TYPES)),
5039
    _PShutdownTimeout,
5040
    ]
5041
  REQ_BGL = False
5042

    
5043
  def ExpandNames(self):
5044
    self._ExpandAndLockInstance()
5045

    
5046
  def BuildHooksEnv(self):
5047
    """Build hooks env.
5048

5049
    This runs on master, primary and secondary nodes of the instance.
5050

5051
    """
5052
    env = {
5053
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5054
      "REBOOT_TYPE": self.op.reboot_type,
5055
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5056
      }
5057
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5058
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5059
    return env, nl, nl
5060

    
5061
  def CheckPrereq(self):
5062
    """Check prerequisites.
5063

5064
    This checks that the instance is in the cluster.
5065

5066
    """
5067
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5068
    assert self.instance is not None, \
5069
      "Cannot retrieve locked instance %s" % self.op.instance_name
5070

    
5071
    _CheckNodeOnline(self, instance.primary_node)
5072

    
5073
    # check bridges existence
5074
    _CheckInstanceBridgesExist(self, instance)
5075

    
5076
  def Exec(self, feedback_fn):
5077
    """Reboot the instance.
5078

5079
    """
5080
    instance = self.instance
5081
    ignore_secondaries = self.op.ignore_secondaries
5082
    reboot_type = self.op.reboot_type
5083

    
5084
    node_current = instance.primary_node
5085

    
5086
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5087
                       constants.INSTANCE_REBOOT_HARD]:
5088
      for disk in instance.disks:
5089
        self.cfg.SetDiskID(disk, node_current)
5090
      result = self.rpc.call_instance_reboot(node_current, instance,
5091
                                             reboot_type,
5092
                                             self.op.shutdown_timeout)
5093
      result.Raise("Could not reboot instance")
5094
    else:
5095
      result = self.rpc.call_instance_shutdown(node_current, instance,
5096
                                               self.op.shutdown_timeout)
5097
      result.Raise("Could not shutdown instance for full reboot")
5098
      _ShutdownInstanceDisks(self, instance)
5099
      _StartInstanceDisks(self, instance, ignore_secondaries)
5100
      result = self.rpc.call_instance_start(node_current, instance, None, None)
5101
      msg = result.fail_msg
5102
      if msg:
5103
        _ShutdownInstanceDisks(self, instance)
5104
        raise errors.OpExecError("Could not start instance for"
5105
                                 " full reboot: %s" % msg)
5106

    
5107
    self.cfg.MarkInstanceUp(instance.name)
5108

    
5109

    
5110
class LUShutdownInstance(LogicalUnit):
5111
  """Shutdown an instance.
5112

5113
  """
5114
  HPATH = "instance-stop"
5115
  HTYPE = constants.HTYPE_INSTANCE
5116
  _OP_PARAMS = [
5117
    _PInstanceName,
5118
    _PIgnoreOfflineNodes,
5119
    ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, ht.TPositiveInt),
5120
    ]
5121
  REQ_BGL = False
5122

    
5123
  def ExpandNames(self):
5124
    self._ExpandAndLockInstance()
5125

    
5126
  def BuildHooksEnv(self):
5127
    """Build hooks env.
5128

5129
    This runs on master, primary and secondary nodes of the instance.
5130

5131
    """
5132
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5133
    env["TIMEOUT"] = self.op.timeout
5134
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5135
    return env, nl, nl
5136

    
5137
  def CheckPrereq(self):
5138
    """Check prerequisites.
5139

5140
    This checks that the instance is in the cluster.
5141

5142
    """
5143
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5144
    assert self.instance is not None, \
5145
      "Cannot retrieve locked instance %s" % self.op.instance_name
5146

    
5147
    self.primary_offline = \
5148
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
5149

    
5150
    if self.primary_offline and self.op.ignore_offline_nodes:
5151
      self.proc.LogWarning("Ignoring offline primary node")
5152
    else:
5153
      _CheckNodeOnline(self, self.instance.primary_node)
5154

    
5155
  def Exec(self, feedback_fn):
5156
    """Shutdown the instance.
5157

5158
    """
5159
    instance = self.instance
5160
    node_current = instance.primary_node
5161
    timeout = self.op.timeout
5162

    
5163
    self.cfg.MarkInstanceDown(instance.name)
5164

    
5165
    if self.primary_offline:
5166
      assert self.op.ignore_offline_nodes
5167
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
5168
    else:
5169
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5170
      msg = result.fail_msg
5171
      if msg:
5172
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5173

    
5174
      _ShutdownInstanceDisks(self, instance)
5175

    
5176

    
5177
class LUReinstallInstance(LogicalUnit):
5178
  """Reinstall an instance.
5179

5180
  """
5181
  HPATH = "instance-reinstall"
5182
  HTYPE = constants.HTYPE_INSTANCE
5183
  _OP_PARAMS = [
5184
    _PInstanceName,
5185
    ("os_type", None, ht.TMaybeString),
5186
    ("force_variant", False, ht.TBool),
5187
    ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
5188
    ]
5189
  REQ_BGL = False
5190

    
5191
  def ExpandNames(self):
5192
    self._ExpandAndLockInstance()
5193

    
5194
  def BuildHooksEnv(self):
5195
    """Build hooks env.
5196

5197
    This runs on master, primary and secondary nodes of the instance.
5198

5199
    """
5200
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5201
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5202
    return env, nl, nl
5203

    
5204
  def CheckPrereq(self):
5205
    """Check prerequisites.
5206

5207
    This checks that the instance is in the cluster and is not running.
5208

5209
    """
5210
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5211
    assert instance is not None, \
5212
      "Cannot retrieve locked instance %s" % self.op.instance_name
5213
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5214
                     " offline, cannot reinstall")
5215
    for node in instance.secondary_nodes:
5216
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
5217
                       " cannot reinstall")
5218

    
5219
    if instance.disk_template == constants.DT_DISKLESS:
5220
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5221
                                 self.op.instance_name,
5222
                                 errors.ECODE_INVAL)
5223
    _CheckInstanceDown(self, instance, "cannot reinstall")
5224

    
5225
    if self.op.os_type is not None:
5226
      # OS verification
5227
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5228
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5229
      instance_os = self.op.os_type
5230
    else:
5231
      instance_os = instance.os
5232

    
5233
    nodelist = list(instance.all_nodes)
5234

    
5235
    if self.op.osparams:
5236
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5237
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5238
      self.os_inst = i_osdict # the new dict (without defaults)
5239
    else:
5240
      self.os_inst = None
5241

    
5242
    self.instance = instance
5243

    
5244
  def Exec(self, feedback_fn):
5245
    """Reinstall the instance.
5246

5247
    """
5248
    inst = self.instance
5249

    
5250
    if self.op.os_type is not None:
5251
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5252
      inst.os = self.op.os_type
5253
      # Write to configuration
5254
      self.cfg.Update(inst, feedback_fn)
5255

    
5256
    _StartInstanceDisks(self, inst, None)
5257
    try:
5258
      feedback_fn("Running the instance OS create scripts...")
5259
      # FIXME: pass debug option from opcode to backend
5260
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5261
                                             self.op.debug_level,
5262
                                             osparams=self.os_inst)
5263
      result.Raise("Could not install OS for instance %s on node %s" %
5264
                   (inst.name, inst.primary_node))
5265
    finally:
5266
      _ShutdownInstanceDisks(self, inst)
5267

    
5268

    
5269
class LURecreateInstanceDisks(LogicalUnit):
5270
  """Recreate an instance's missing disks.
5271

5272
  """
5273
  HPATH = "instance-recreate-disks"
5274
  HTYPE = constants.HTYPE_INSTANCE
5275
  _OP_PARAMS = [
5276
    _PInstanceName,
5277
    ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
5278
    ]
5279
  REQ_BGL = False
5280

    
5281
  def ExpandNames(self):
5282
    self._ExpandAndLockInstance()
5283

    
5284
  def BuildHooksEnv(self):
5285
    """Build hooks env.
5286

5287
    This runs on master, primary and secondary nodes of the instance.
5288

5289
    """
5290
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5291
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5292
    return env, nl, nl
5293

    
5294
  def CheckPrereq(self):
5295
    """Check prerequisites.
5296

5297
    This checks that the instance is in the cluster and is not running.
5298

5299
    """
5300
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5301
    assert instance is not None, \
5302
      "Cannot retrieve locked instance %s" % self.op.instance_name
5303
    _CheckNodeOnline(self, instance.primary_node)
5304

    
5305
    if instance.disk_template == constants.DT_DISKLESS:
5306
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5307
                                 self.op.instance_name, errors.ECODE_INVAL)
5308
    _CheckInstanceDown(self, instance, "cannot recreate disks")
5309

    
5310
    if not self.op.disks:
5311
      self.op.disks = range(len(instance.disks))
5312
    else:
5313
      for idx in self.op.disks:
5314
        if idx >= len(instance.disks):
5315
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5316
                                     errors.ECODE_INVAL)
5317

    
5318
    self.instance = instance
5319

    
5320
  def Exec(self, feedback_fn):
5321
    """Recreate the disks.
5322

5323
    """
5324
    to_skip = []
5325
    for idx, _ in enumerate(self.instance.disks):
5326
      if idx not in self.op.disks: # disk idx has not been passed in
5327
        to_skip.append(idx)
5328
        continue
5329

    
5330
    _CreateDisks(self, self.instance, to_skip=to_skip)
5331

    
5332

    
5333
class LURenameInstance(LogicalUnit):
5334
  """Rename an instance.
5335

5336
  """
5337
  HPATH = "instance-rename"
5338
  HTYPE = constants.HTYPE_INSTANCE
5339
  _OP_PARAMS = [
5340
    _PInstanceName,
5341
    ("new_name", ht.NoDefault, ht.TNonEmptyString),
5342
    ("ip_check", False, ht.TBool),
5343
    ("name_check", True, ht.TBool),
5344
    ]
5345

    
5346
  def CheckArguments(self):
5347
    """Check arguments.
5348

5349
    """
5350
    if self.op.ip_check and not self.op.name_check:
5351
      # TODO: make the ip check more flexible and not depend on the name check
5352
      raise errors.OpPrereqError("Cannot do ip check without a name check",
5353
                                 errors.ECODE_INVAL)
5354

    
5355
  def BuildHooksEnv(self):
5356
    """Build hooks env.
5357

5358
    This runs on master, primary and secondary nodes of the instance.
5359

5360
    """
5361
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5362
    env["INSTANCE_NEW_NAME"] = self.op.new_name
5363
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5364
    return env, nl, nl
5365

    
5366
  def CheckPrereq(self):
5367
    """Check prerequisites.
5368

5369
    This checks that the instance is in the cluster and is not running.
5370

5371
    """
5372
    self.op.instance_name = _ExpandInstanceName(self.cfg,
5373
                                                self.op.instance_name)
5374
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5375
    assert instance is not None
5376
    _CheckNodeOnline(self, instance.primary_node)
5377
    _CheckInstanceDown(self, instance, "cannot rename")
5378
    self.instance = instance
5379

    
5380
    new_name = self.op.new_name
5381
    if self.op.name_check:
5382
      hostname = netutils.GetHostname(name=new_name)
5383
      new_name = self.op.new_name = hostname.name
5384
      if (self.op.ip_check and
5385
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5386
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
5387
                                   (hostname.ip, new_name),
5388
                                   errors.ECODE_NOTUNIQUE)
5389

    
5390
    instance_list = self.cfg.GetInstanceList()
5391
    if new_name in instance_list:
5392
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5393
                                 new_name, errors.ECODE_EXISTS)
5394

    
5395
  def Exec(self, feedback_fn):
5396
    """Reinstall the instance.
5397

5398
    """
5399
    inst = self.instance
5400
    old_name = inst.name
5401

    
5402
    if inst.disk_template == constants.DT_FILE:
5403
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5404

    
5405
    self.cfg.RenameInstance(inst.name, self.op.new_name)
5406
    # Change the instance lock. This is definitely safe while we hold the BGL
5407
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5408
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5409

    
5410
    # re-read the instance from the configuration after rename
5411
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
5412

    
5413
    if inst.disk_template == constants.DT_FILE:
5414
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5415
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5416
                                                     old_file_storage_dir,
5417
                                                     new_file_storage_dir)
5418
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
5419
                   " (but the instance has been renamed in Ganeti)" %
5420
                   (inst.primary_node, old_file_storage_dir,
5421
                    new_file_storage_dir))
5422

    
5423
    _StartInstanceDisks(self, inst, None)
5424
    try:
5425
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5426
                                                 old_name, self.op.debug_level)
5427
      msg = result.fail_msg
5428
      if msg:
5429
        msg = ("Could not run OS rename script for instance %s on node %s"
5430
               " (but the instance has been renamed in Ganeti): %s" %
5431
               (inst.name, inst.primary_node, msg))
5432
        self.proc.LogWarning(msg)
5433
    finally:
5434
      _ShutdownInstanceDisks(self, inst)
5435

    
5436
    return inst.name
5437

    
5438

    
5439
class LURemoveInstance(LogicalUnit):
5440
  """Remove an instance.
5441

5442
  """
5443
  HPATH = "instance-remove"
5444
  HTYPE = constants.HTYPE_INSTANCE
5445
  _OP_PARAMS = [
5446
    _PInstanceName,
5447
    ("ignore_failures", False, ht.TBool),
5448
    _PShutdownTimeout,
5449
    ]
5450
  REQ_BGL = False
5451

    
5452
  def ExpandNames(self):
5453
    self._ExpandAndLockInstance()
5454
    self.needed_locks[locking.LEVEL_NODE] = []
5455
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5456

    
5457
  def DeclareLocks(self, level):
5458
    if level == locking.LEVEL_NODE:
5459
      self._LockInstancesNodes()
5460

    
5461
  def BuildHooksEnv(self):
5462
    """Build hooks env.
5463

5464
    This runs on master, primary and secondary nodes of the instance.
5465

5466
    """
5467
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5468
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5469
    nl = [self.cfg.GetMasterNode()]
5470
    nl_post = list(self.instance.all_nodes) + nl
5471
    return env, nl, nl_post
5472

    
5473
  def CheckPrereq(self):
5474
    """Check prerequisites.
5475

5476
    This checks that the instance is in the cluster.
5477

5478
    """
5479
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5480
    assert self.instance is not None, \
5481
      "Cannot retrieve locked instance %s" % self.op.instance_name
5482

    
5483
  def Exec(self, feedback_fn):
5484
    """Remove the instance.
5485

5486
    """
5487
    instance = self.instance
5488
    logging.info("Shutting down instance %s on node %s",
5489
                 instance.name, instance.primary_node)
5490

    
5491
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5492
                                             self.op.shutdown_timeout)
5493
    msg = result.fail_msg
5494
    if msg:
5495
      if self.op.ignore_failures:
5496
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
5497
      else:
5498
        raise errors.OpExecError("Could not shutdown instance %s on"
5499
                                 " node %s: %s" %
5500
                                 (instance.name, instance.primary_node, msg))
5501

    
5502
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5503

    
5504

    
5505
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5506
  """Utility function to remove an instance.
5507

5508
  """
5509
  logging.info("Removing block devices for instance %s", instance.name)
5510

    
5511
  if not _RemoveDisks(lu, instance):
5512
    if not ignore_failures:
5513
      raise errors.OpExecError("Can't remove instance's disks")
5514
    feedback_fn("Warning: can't remove instance's disks")
5515

    
5516
  logging.info("Removing instance %s out of cluster config", instance.name)
5517

    
5518
  lu.cfg.RemoveInstance(instance.name)
5519

    
5520
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5521
    "Instance lock removal conflict"
5522

    
5523
  # Remove lock for the instance
5524
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5525

    
5526

    
5527
class LUQueryInstances(NoHooksLU):
5528
  """Logical unit for querying instances.
5529

5530
  """
5531
  # pylint: disable-msg=W0142
5532
  _OP_PARAMS = [
5533
    _POutputFields,
5534
    ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
5535
    ("use_locking", False, ht.TBool),
5536
    ]
5537
  REQ_BGL = False
5538
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5539
                    "serial_no", "ctime", "mtime", "uuid"]
5540
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5541
                                    "admin_state",
5542
                                    "disk_template", "ip", "mac", "bridge",
5543
                                    "nic_mode", "nic_link",
5544
                                    "sda_size", "sdb_size", "vcpus", "tags",
5545
                                    "network_port", "beparams",
5546
                                    r"(disk)\.(size)/([0-9]+)",
5547
                                    r"(disk)\.(sizes)", "disk_usage",
5548
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5549
                                    r"(nic)\.(bridge)/([0-9]+)",
5550
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
5551
                                    r"(disk|nic)\.(count)",
5552
                                    "hvparams", "custom_hvparams",
5553
                                    "custom_beparams", "custom_nicparams",
5554
                                    ] + _SIMPLE_FIELDS +
5555
                                  ["hv/%s" % name
5556
                                   for name in constants.HVS_PARAMETERS
5557
                                   if name not in constants.HVC_GLOBALS] +
5558
                                  ["be/%s" % name
5559
                                   for name in constants.BES_PARAMETERS])
5560
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state",
5561
                                   "oper_ram",
5562
                                   "oper_vcpus",
5563
                                   "status")
5564

    
5565

    
5566
  def CheckArguments(self):
5567
    _CheckOutputFields(static=self._FIELDS_STATIC,
5568
                       dynamic=self._FIELDS_DYNAMIC,
5569
                       selected=self.op.output_fields)
5570

    
5571
  def ExpandNames(self):
5572
    self.needed_locks = {}
5573
    self.share_locks[locking.LEVEL_INSTANCE] = 1
5574
    self.share_locks[locking.LEVEL_NODE] = 1
5575

    
5576
    if self.op.names:
5577
      self.wanted = _GetWantedInstances(self, self.op.names)
5578
    else:
5579
      self.wanted = locking.ALL_SET
5580

    
5581
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5582
    self.do_locking = self.do_node_query and self.op.use_locking
5583
    if self.do_locking:
5584
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5585
      self.needed_locks[locking.LEVEL_NODE] = []
5586
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5587

    
5588
  def DeclareLocks(self, level):
5589
    if level == locking.LEVEL_NODE and self.do_locking:
5590
      self._LockInstancesNodes()
5591

    
5592
  def Exec(self, feedback_fn):
5593
    """Computes the list of nodes and their attributes.
5594

5595
    """
5596
    # pylint: disable-msg=R0912
5597
    # way too many branches here
5598
    all_info = self.cfg.GetAllInstancesInfo()
5599
    if self.wanted == locking.ALL_SET:
5600
      # caller didn't specify instance names, so ordering is not important
5601
      if self.do_locking:
5602
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5603
      else:
5604
        instance_names = all_info.keys()
5605
      instance_names = utils.NiceSort(instance_names)
5606
    else:
5607
      # caller did specify names, so we must keep the ordering
5608
      if self.do_locking:
5609
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5610
      else:
5611
        tgt_set = all_info.keys()
5612
      missing = set(self.wanted).difference(tgt_set)
5613
      if missing:
5614
        raise errors.OpExecError("Some instances were removed before"
5615
                                 " retrieving their data: %s" % missing)
5616
      instance_names = self.wanted
5617

    
5618
    instance_list = [all_info[iname] for iname in instance_names]
5619

    
5620
    # begin data gathering
5621

    
5622
    nodes = frozenset([inst.primary_node for inst in instance_list])
5623
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
5624

    
5625
    bad_nodes = []
5626
    off_nodes = []
5627
    if self.do_node_query:
5628
      live_data = {}
5629
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5630
      for name in nodes:
5631
        result = node_data[name]
5632
        if result.offline:
5633
          # offline nodes will be in both lists
5634
          off_nodes.append(name)
5635
        if result.fail_msg:
5636
          bad_nodes.append(name)
5637
        else:
5638
          if result.payload:
5639
            live_data.update(result.payload)
5640
          # else no instance is alive
5641
    else:
5642
      live_data = dict([(name, {}) for name in instance_names])
5643

    
5644
    # end data gathering
5645

    
5646
    HVPREFIX = "hv/"
5647
    BEPREFIX = "be/"
5648
    output = []
5649
    cluster = self.cfg.GetClusterInfo()
5650
    for instance in instance_list:
5651
      iout = []
5652
      i_hv = cluster.FillHV(instance, skip_globals=True)
5653
      i_be = cluster.FillBE(instance)
5654
      i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5655
      for field in self.op.output_fields:
5656
        st_match = self._FIELDS_STATIC.Matches(field)
5657
        if field in self._SIMPLE_FIELDS:
5658
          val = getattr(instance, field)
5659
        elif field == "pnode":
5660
          val = instance.primary_node
5661
        elif field == "snodes":
5662
          val = list(instance.secondary_nodes)
5663
        elif field == "admin_state":
5664
          val = instance.admin_up
5665
        elif field == "oper_state":
5666
          if instance.primary_node in bad_nodes:
5667
            val = None
5668
          else:
5669
            val = bool(live_data.get(instance.name))
5670
        elif field == "status":
5671
          if instance.primary_node in off_nodes:
5672
            val = "ERROR_nodeoffline"
5673
          elif instance.primary_node in bad_nodes:
5674
            val = "ERROR_nodedown"
5675
          else:
5676
            running = bool(live_data.get(instance.name))
5677
            if running:
5678
              if instance.admin_up:
5679
                val = "running"
5680
              else:
5681
                val = "ERROR_up"
5682
            else:
5683
              if instance.admin_up:
5684
                val = "ERROR_down"
5685
              else:
5686
                val = "ADMIN_down"
5687
        elif field == "oper_ram":
5688
          if instance.primary_node in bad_nodes:
5689
            val = None
5690
          elif instance.name in live_data:
5691
            val = live_data[instance.name].get("memory", "?")
5692
          else:
5693
            val = "-"
5694
        elif field == "oper_vcpus":
5695
          if instance.primary_node in bad_nodes:
5696
            val = None
5697
          elif instance.name in live_data:
5698
            val = live_data[instance.name].get("vcpus", "?")
5699
          else:
5700
            val = "-"
5701
        elif field == "vcpus":
5702
          val = i_be[constants.BE_VCPUS]
5703
        elif field == "disk_template":
5704
          val = instance.disk_template
5705
        elif field == "ip":
5706
          if instance.nics:
5707
            val = instance.nics[0].ip
5708
          else:
5709
            val = None
5710
        elif field == "nic_mode":
5711
          if instance.nics:
5712
            val = i_nicp[0][constants.NIC_MODE]
5713
          else:
5714
            val = None
5715
        elif field == "nic_link":
5716
          if instance.nics:
5717
            val = i_nicp[0][constants.NIC_LINK]
5718
          else:
5719
            val = None
5720
        elif field == "bridge":
5721
          if (instance.nics and
5722
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5723
            val = i_nicp[0][constants.NIC_LINK]
5724
          else:
5725
            val = None
5726
        elif field == "mac":
5727
          if instance.nics:
5728
            val = instance.nics[0].mac
5729
          else:
5730
            val = None
5731
        elif field == "custom_nicparams":
5732
          val = [nic.nicparams for nic in instance.nics]
5733
        elif field == "sda_size" or field == "sdb_size":
5734
          idx = ord(field[2]) - ord('a')
5735
          try:
5736
            val = instance.FindDisk(idx).size
5737
          except errors.OpPrereqError:
5738
            val = None
5739
        elif field == "disk_usage": # total disk usage per node
5740
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
5741
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5742
        elif field == "tags":
5743
          val = list(instance.GetTags())
5744
        elif field == "custom_hvparams":
5745
          val = instance.hvparams # not filled!
5746
        elif field == "hvparams":
5747
          val = i_hv
5748
        elif (field.startswith(HVPREFIX) and
5749
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5750
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5751
          val = i_hv.get(field[len(HVPREFIX):], None)
5752
        elif field == "custom_beparams":
5753
          val = instance.beparams
5754
        elif field == "beparams":
5755
          val = i_be
5756
        elif (field.startswith(BEPREFIX) and
5757
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5758
          val = i_be.get(field[len(BEPREFIX):], None)
5759
        elif st_match and st_match.groups():
5760
          # matches a variable list
5761
          st_groups = st_match.groups()
5762
          if st_groups and st_groups[0] == "disk":
5763
            if st_groups[1] == "count":
5764
              val = len(instance.disks)
5765
            elif st_groups[1] == "sizes":
5766
              val = [disk.size for disk in instance.disks]
5767
            elif st_groups[1] == "size":
5768
              try:
5769
                val = instance.FindDisk(st_groups[2]).size
5770
              except errors.OpPrereqError:
5771
                val = None
5772
            else:
5773
              assert False, "Unhandled disk parameter"
5774
          elif st_groups[0] == "nic":
5775
            if st_groups[1] == "count":
5776
              val = len(instance.nics)
5777
            elif st_groups[1] == "macs":
5778
              val = [nic.mac for nic in instance.nics]
5779
            elif st_groups[1] == "ips":
5780
              val = [nic.ip for nic in instance.nics]
5781
            elif st_groups[1] == "modes":
5782
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5783
            elif st_groups[1] == "links":
5784
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5785
            elif st_groups[1] == "bridges":
5786
              val = []
5787
              for nicp in i_nicp:
5788
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5789
                  val.append(nicp[constants.NIC_LINK])
5790
                else:
5791
                  val.append(None)
5792
            else:
5793
              # index-based item
5794
              nic_idx = int(st_groups[2])
5795
              if nic_idx >= len(instance.nics):
5796
                val = None
5797
              else:
5798
                if st_groups[1] == "mac":
5799
                  val = instance.nics[nic_idx].mac
5800
                elif st_groups[1] == "ip":
5801
                  val = instance.nics[nic_idx].ip
5802
                elif st_groups[1] == "mode":
5803
                  val = i_nicp[nic_idx][constants.NIC_MODE]
5804
                elif st_groups[1] == "link":
5805
                  val = i_nicp[nic_idx][constants.NIC_LINK]
5806
                elif st_groups[1] == "bridge":
5807
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5808
                  if nic_mode == constants.NIC_MODE_BRIDGED:
5809
                    val = i_nicp[nic_idx][constants.NIC_LINK]
5810
                  else:
5811
                    val = None
5812
                else:
5813
                  assert False, "Unhandled NIC parameter"
5814
          else:
5815
            assert False, ("Declared but unhandled variable parameter '%s'" %
5816
                           field)
5817
        else:
5818
          assert False, "Declared but unhandled parameter '%s'" % field
5819
        iout.append(val)
5820
      output.append(iout)
5821

    
5822
    return output
5823

    
5824

    
5825
class LUFailoverInstance(LogicalUnit):
5826
  """Failover an instance.
5827

5828
  """
5829
  HPATH = "instance-failover"
5830
  HTYPE = constants.HTYPE_INSTANCE
5831
  _OP_PARAMS = [
5832
    _PInstanceName,
5833
    ("ignore_consistency", False, ht.TBool),
5834
    _PShutdownTimeout,
5835
    ]
5836
  REQ_BGL = False
5837

    
5838
  def ExpandNames(self):
5839
    self._ExpandAndLockInstance()
5840
    self.needed_locks[locking.LEVEL_NODE] = []
5841
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5842

    
5843
  def DeclareLocks(self, level):
5844
    if level == locking.LEVEL_NODE:
5845
      self._LockInstancesNodes()
5846

    
5847
  def BuildHooksEnv(self):
5848
    """Build hooks env.
5849

5850
    This runs on master, primary and secondary nodes of the instance.
5851

5852
    """
5853
    instance = self.instance
5854
    source_node = instance.primary_node
5855
    target_node = instance.secondary_nodes[0]
5856
    env = {
5857
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5858
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5859
      "OLD_PRIMARY": source_node,
5860
      "OLD_SECONDARY": target_node,
5861
      "NEW_PRIMARY": target_node,
5862
      "NEW_SECONDARY": source_node,
5863
      }
5864
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5865
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5866
    nl_post = list(nl)
5867
    nl_post.append(source_node)
5868
    return env, nl, nl_post
5869

    
5870
  def CheckPrereq(self):
5871
    """Check prerequisites.
5872

5873
    This checks that the instance is in the cluster.
5874

5875
    """
5876
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5877
    assert self.instance is not None, \
5878
      "Cannot retrieve locked instance %s" % self.op.instance_name
5879

    
5880
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5881
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5882
      raise errors.OpPrereqError("Instance's disk layout is not"
5883
                                 " network mirrored, cannot failover.",
5884
                                 errors.ECODE_STATE)
5885

    
5886
    secondary_nodes = instance.secondary_nodes
5887
    if not secondary_nodes:
5888
      raise errors.ProgrammerError("no secondary node but using "
5889
                                   "a mirrored disk template")
5890

    
5891
    target_node = secondary_nodes[0]
5892
    _CheckNodeOnline(self, target_node)
5893
    _CheckNodeNotDrained(self, target_node)
5894
    if instance.admin_up:
5895
      # check memory requirements on the secondary node
5896
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5897
                           instance.name, bep[constants.BE_MEMORY],
5898
                           instance.hypervisor)
5899
    else:
5900
      self.LogInfo("Not checking memory on the secondary node as"
5901
                   " instance will not be started")
5902

    
5903
    # check bridge existance
5904
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5905

    
5906
  def Exec(self, feedback_fn):
5907
    """Failover an instance.
5908

5909
    The failover is done by shutting it down on its present node and
5910
    starting it on the secondary.
5911

5912
    """
5913
    instance = self.instance
5914
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5915

    
5916
    source_node = instance.primary_node
5917
    target_node = instance.secondary_nodes[0]
5918

    
5919
    if instance.admin_up:
5920
      feedback_fn("* checking disk consistency between source and target")
5921
      for dev in instance.disks:
5922
        # for drbd, these are drbd over lvm
5923
        if not _CheckDiskConsistency(self, dev, target_node, False):
5924
          if not self.op.ignore_consistency:
5925
            raise errors.OpExecError("Disk %s is degraded on target node,"
5926
                                     " aborting failover." % dev.iv_name)
5927
    else:
5928
      feedback_fn("* not checking disk consistency as instance is not running")
5929

    
5930
    feedback_fn("* shutting down instance on source node")
5931
    logging.info("Shutting down instance %s on node %s",
5932
                 instance.name, source_node)
5933

    
5934
    result = self.rpc.call_instance_shutdown(source_node, instance,
5935
                                             self.op.shutdown_timeout)
5936
    msg = result.fail_msg
5937
    if msg:
5938
      if self.op.ignore_consistency or primary_node.offline:
5939
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5940
                             " Proceeding anyway. Please make sure node"
5941
                             " %s is down. Error details: %s",
5942
                             instance.name, source_node, source_node, msg)
5943
      else:
5944
        raise errors.OpExecError("Could not shutdown instance %s on"
5945
                                 " node %s: %s" %
5946
                                 (instance.name, source_node, msg))
5947

    
5948
    feedback_fn("* deactivating the instance's disks on source node")
5949
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5950
      raise errors.OpExecError("Can't shut down the instance's disks.")
5951

    
5952
    instance.primary_node = target_node
5953
    # distribute new instance config to the other nodes
5954
    self.cfg.Update(instance, feedback_fn)
5955

    
5956
    # Only start the instance if it's marked as up
5957
    if instance.admin_up:
5958
      feedback_fn("* activating the instance's disks on target node")
5959
      logging.info("Starting instance %s on node %s",
5960
                   instance.name, target_node)
5961

    
5962
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5963
                                           ignore_secondaries=True)
5964
      if not disks_ok:
5965
        _ShutdownInstanceDisks(self, instance)
5966
        raise errors.OpExecError("Can't activate the instance's disks")
5967

    
5968
      feedback_fn("* starting the instance on the target node")
5969
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5970
      msg = result.fail_msg
5971
      if msg:
5972
        _ShutdownInstanceDisks(self, instance)
5973
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5974
                                 (instance.name, target_node, msg))
5975

    
5976

    
5977
class LUMigrateInstance(LogicalUnit):
5978
  """Migrate an instance.
5979

5980
  This is migration without shutting down, compared to the failover,
5981
  which is done with shutdown.
5982

5983
  """
5984
  HPATH = "instance-migrate"
5985
  HTYPE = constants.HTYPE_INSTANCE
5986
  _OP_PARAMS = [
5987
    _PInstanceName,
5988
    _PMigrationMode,
5989
    _PMigrationLive,
5990
    ("cleanup", False, ht.TBool),
5991
    ]
5992

    
5993
  REQ_BGL = False
5994

    
5995
  def ExpandNames(self):
5996
    self._ExpandAndLockInstance()
5997

    
5998
    self.needed_locks[locking.LEVEL_NODE] = []
5999
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6000

    
6001
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6002
                                       self.op.cleanup)
6003
    self.tasklets = [self._migrater]
6004

    
6005
  def DeclareLocks(self, level):
6006
    if level == locking.LEVEL_NODE:
6007
      self._LockInstancesNodes()
6008

    
6009
  def BuildHooksEnv(self):
6010
    """Build hooks env.
6011

6012
    This runs on master, primary and secondary nodes of the instance.
6013

6014
    """
6015
    instance = self._migrater.instance
6016
    source_node = instance.primary_node
6017
    target_node = instance.secondary_nodes[0]
6018
    env = _BuildInstanceHookEnvByObject(self, instance)
6019
    env["MIGRATE_LIVE"] = self._migrater.live
6020
    env["MIGRATE_CLEANUP"] = self.op.cleanup
6021
    env.update({
6022
        "OLD_PRIMARY": source_node,
6023
        "OLD_SECONDARY": target_node,
6024
        "NEW_PRIMARY": target_node,
6025
        "NEW_SECONDARY": source_node,
6026
        })
6027
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6028
    nl_post = list(nl)
6029
    nl_post.append(source_node)
6030
    return env, nl, nl_post
6031

    
6032

    
6033
class LUMoveInstance(LogicalUnit):
6034
  """Move an instance by data-copying.
6035

6036
  """
6037
  HPATH = "instance-move"
6038
  HTYPE = constants.HTYPE_INSTANCE
6039
  _OP_PARAMS = [
6040
    _PInstanceName,
6041
    ("target_node", ht.NoDefault, ht.TNonEmptyString),
6042
    _PShutdownTimeout,
6043
    ]
6044
  REQ_BGL = False
6045

    
6046
  def ExpandNames(self):
6047
    self._ExpandAndLockInstance()
6048
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6049
    self.op.target_node = target_node
6050
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6051
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6052

    
6053
  def DeclareLocks(self, level):
6054
    if level == locking.LEVEL_NODE:
6055
      self._LockInstancesNodes(primary_only=True)
6056

    
6057
  def BuildHooksEnv(self):
6058
    """Build hooks env.
6059

6060
    This runs on master, primary and secondary nodes of the instance.
6061

6062
    """
6063
    env = {
6064
      "TARGET_NODE": self.op.target_node,
6065
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6066
      }
6067
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6068
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
6069
                                       self.op.target_node]
6070
    return env, nl, nl
6071

    
6072
  def CheckPrereq(self):
6073
    """Check prerequisites.
6074

6075
    This checks that the instance is in the cluster.
6076

6077
    """
6078
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6079
    assert self.instance is not None, \
6080
      "Cannot retrieve locked instance %s" % self.op.instance_name
6081

    
6082
    node = self.cfg.GetNodeInfo(self.op.target_node)
6083
    assert node is not None, \
6084
      "Cannot retrieve locked node %s" % self.op.target_node
6085

    
6086
    self.target_node = target_node = node.name
6087

    
6088
    if target_node == instance.primary_node:
6089
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6090
                                 (instance.name, target_node),
6091
                                 errors.ECODE_STATE)
6092

    
6093
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6094

    
6095
    for idx, dsk in enumerate(instance.disks):
6096
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6097
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6098
                                   " cannot copy" % idx, errors.ECODE_STATE)
6099

    
6100
    _CheckNodeOnline(self, target_node)
6101
    _CheckNodeNotDrained(self, target_node)
6102
    _CheckNodeVmCapable(self, target_node)
6103

    
6104
    if instance.admin_up:
6105
      # check memory requirements on the secondary node
6106
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6107
                           instance.name, bep[constants.BE_MEMORY],
6108
                           instance.hypervisor)
6109
    else:
6110
      self.LogInfo("Not checking memory on the secondary node as"
6111
                   " instance will not be started")
6112

    
6113
    # check bridge existance
6114
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6115

    
6116
  def Exec(self, feedback_fn):
6117
    """Move an instance.
6118

6119
    The move is done by shutting it down on its present node, copying
6120
    the data over (slow) and starting it on the new node.
6121

6122
    """
6123
    instance = self.instance
6124

    
6125
    source_node = instance.primary_node
6126
    target_node = self.target_node
6127

    
6128
    self.LogInfo("Shutting down instance %s on source node %s",
6129
                 instance.name, source_node)
6130

    
6131
    result = self.rpc.call_instance_shutdown(source_node, instance,
6132
                                             self.op.shutdown_timeout)
6133
    msg = result.fail_msg
6134
    if msg:
6135
      if self.op.ignore_consistency:
6136
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6137
                             " Proceeding anyway. Please make sure node"
6138
                             " %s is down. Error details: %s",
6139
                             instance.name, source_node, source_node, msg)
6140
      else:
6141
        raise errors.OpExecError("Could not shutdown instance %s on"
6142
                                 " node %s: %s" %
6143
                                 (instance.name, source_node, msg))
6144

    
6145
    # create the target disks
6146
    try:
6147
      _CreateDisks(self, instance, target_node=target_node)
6148
    except errors.OpExecError:
6149
      self.LogWarning("Device creation failed, reverting...")
6150
      try:
6151
        _RemoveDisks(self, instance, target_node=target_node)
6152
      finally:
6153
        self.cfg.ReleaseDRBDMinors(instance.name)
6154
        raise
6155

    
6156
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6157

    
6158
    errs = []
6159
    # activate, get path, copy the data over
6160
    for idx, disk in enumerate(instance.disks):
6161
      self.LogInfo("Copying data for disk %d", idx)
6162
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6163
                                               instance.name, True)
6164
      if result.fail_msg:
6165
        self.LogWarning("Can't assemble newly created disk %d: %s",
6166
                        idx, result.fail_msg)
6167
        errs.append(result.fail_msg)
6168
        break
6169
      dev_path = result.payload
6170
      result = self.rpc.call_blockdev_export(source_node, disk,
6171
                                             target_node, dev_path,
6172
                                             cluster_name)
6173
      if result.fail_msg:
6174
        self.LogWarning("Can't copy data over for disk %d: %s",
6175
                        idx, result.fail_msg)
6176
        errs.append(result.fail_msg)
6177
        break
6178

    
6179
    if errs:
6180
      self.LogWarning("Some disks failed to copy, aborting")
6181
      try:
6182
        _RemoveDisks(self, instance, target_node=target_node)
6183
      finally:
6184
        self.cfg.ReleaseDRBDMinors(instance.name)
6185
        raise errors.OpExecError("Errors during disk copy: %s" %
6186
                                 (",".join(errs),))
6187

    
6188
    instance.primary_node = target_node
6189
    self.cfg.Update(instance, feedback_fn)
6190

    
6191
    self.LogInfo("Removing the disks on the original node")
6192
    _RemoveDisks(self, instance, target_node=source_node)
6193

    
6194
    # Only start the instance if it's marked as up
6195
    if instance.admin_up:
6196
      self.LogInfo("Starting instance %s on node %s",
6197
                   instance.name, target_node)
6198

    
6199
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6200
                                           ignore_secondaries=True)
6201
      if not disks_ok:
6202
        _ShutdownInstanceDisks(self, instance)
6203
        raise errors.OpExecError("Can't activate the instance's disks")
6204

    
6205
      result = self.rpc.call_instance_start(target_node, instance, None, None)
6206
      msg = result.fail_msg
6207
      if msg:
6208
        _ShutdownInstanceDisks(self, instance)
6209
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6210
                                 (instance.name, target_node, msg))
6211

    
6212

    
6213
class LUMigrateNode(LogicalUnit):
6214
  """Migrate all instances from a node.
6215

6216
  """
6217
  HPATH = "node-migrate"
6218
  HTYPE = constants.HTYPE_NODE
6219
  _OP_PARAMS = [
6220
    _PNodeName,
6221
    _PMigrationMode,
6222
    _PMigrationLive,
6223
    ]
6224
  REQ_BGL = False
6225

    
6226
  def ExpandNames(self):
6227
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6228

    
6229
    self.needed_locks = {
6230
      locking.LEVEL_NODE: [self.op.node_name],
6231
      }
6232

    
6233
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6234

    
6235
    # Create tasklets for migrating instances for all instances on this node
6236
    names = []
6237
    tasklets = []
6238

    
6239
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6240
      logging.debug("Migrating instance %s", inst.name)
6241
      names.append(inst.name)
6242

    
6243
      tasklets.append(TLMigrateInstance(self, inst.name, False))
6244

    
6245
    self.tasklets = tasklets
6246

    
6247
    # Declare instance locks
6248
    self.needed_locks[locking.LEVEL_INSTANCE] = names
6249

    
6250
  def DeclareLocks(self, level):
6251
    if level == locking.LEVEL_NODE:
6252
      self._LockInstancesNodes()
6253

    
6254
  def BuildHooksEnv(self):
6255
    """Build hooks env.
6256

6257
    This runs on the master, the primary and all the secondaries.
6258

6259
    """
6260
    env = {
6261
      "NODE_NAME": self.op.node_name,
6262
      }
6263

    
6264
    nl = [self.cfg.GetMasterNode()]
6265

    
6266
    return (env, nl, nl)
6267

    
6268

    
6269
class TLMigrateInstance(Tasklet):
6270
  """Tasklet class for instance migration.
6271

6272
  @type live: boolean
6273
  @ivar live: whether the migration will be done live or non-live;
6274
      this variable is initalized only after CheckPrereq has run
6275

6276
  """
6277
  def __init__(self, lu, instance_name, cleanup):
6278
    """Initializes this class.
6279

6280
    """
6281
    Tasklet.__init__(self, lu)
6282

    
6283
    # Parameters
6284
    self.instance_name = instance_name
6285
    self.cleanup = cleanup
6286
    self.live = False # will be overridden later
6287

    
6288
  def CheckPrereq(self):
6289
    """Check prerequisites.
6290

6291
    This checks that the instance is in the cluster.
6292

6293
    """
6294
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6295
    instance = self.cfg.GetInstanceInfo(instance_name)
6296
    assert instance is not None
6297

    
6298
    if instance.disk_template != constants.DT_DRBD8:
6299
      raise errors.OpPrereqError("Instance's disk layout is not"
6300
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
6301

    
6302
    secondary_nodes = instance.secondary_nodes
6303
    if not secondary_nodes:
6304
      raise errors.ConfigurationError("No secondary node but using"
6305
                                      " drbd8 disk template")
6306

    
6307
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
6308

    
6309
    target_node = secondary_nodes[0]
6310
    # check memory requirements on the secondary node
6311
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6312
                         instance.name, i_be[constants.BE_MEMORY],
6313
                         instance.hypervisor)
6314

    
6315
    # check bridge existance
6316
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6317

    
6318
    if not self.cleanup:
6319
      _CheckNodeNotDrained(self.lu, target_node)
6320
      result = self.rpc.call_instance_migratable(instance.primary_node,
6321
                                                 instance)
6322
      result.Raise("Can't migrate, please use failover",
6323
                   prereq=True, ecode=errors.ECODE_STATE)
6324

    
6325
    self.instance = instance
6326

    
6327
    if self.lu.op.live is not None and self.lu.op.mode is not None:
6328
      raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6329
                                 " parameters are accepted",
6330
                                 errors.ECODE_INVAL)
6331
    if self.lu.op.live is not None:
6332
      if self.lu.op.live:
6333
        self.lu.op.mode = constants.HT_MIGRATION_LIVE
6334
      else:
6335
        self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6336
      # reset the 'live' parameter to None so that repeated
6337
      # invocations of CheckPrereq do not raise an exception
6338
      self.lu.op.live = None
6339
    elif self.lu.op.mode is None:
6340
      # read the default value from the hypervisor
6341
      i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6342
      self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6343

    
6344
    self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6345

    
6346
  def _WaitUntilSync(self):
6347
    """Poll with custom rpc for disk sync.
6348

6349
    This uses our own step-based rpc call.
6350

6351
    """
6352
    self.feedback_fn("* wait until resync is done")
6353
    all_done = False
6354
    while not all_done:
6355
      all_done = True
6356
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6357
                                            self.nodes_ip,
6358
                                            self.instance.disks)
6359
      min_percent = 100
6360
      for node, nres in result.items():
6361
        nres.Raise("Cannot resync disks on node %s" % node)
6362
        node_done, node_percent = nres.payload
6363
        all_done = all_done and node_done
6364
        if node_percent is not None:
6365
          min_percent = min(min_percent, node_percent)
6366
      if not all_done:
6367
        if min_percent < 100:
6368
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
6369
        time.sleep(2)
6370

    
6371
  def _EnsureSecondary(self, node):
6372
    """Demote a node to secondary.
6373

6374
    """
6375
    self.feedback_fn("* switching node %s to secondary mode" % node)
6376

    
6377
    for dev in self.instance.disks:
6378
      self.cfg.SetDiskID(dev, node)
6379

    
6380
    result = self.rpc.call_blockdev_close(node, self.instance.name,
6381
                                          self.instance.disks)
6382
    result.Raise("Cannot change disk to secondary on node %s" % node)
6383

    
6384
  def _GoStandalone(self):
6385
    """Disconnect from the network.
6386

6387
    """
6388
    self.feedback_fn("* changing into standalone mode")
6389
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6390
                                               self.instance.disks)
6391
    for node, nres in result.items():
6392
      nres.Raise("Cannot disconnect disks node %s" % node)
6393

    
6394
  def _GoReconnect(self, multimaster):
6395
    """Reconnect to the network.
6396

6397
    """
6398
    if multimaster:
6399
      msg = "dual-master"
6400
    else:
6401
      msg = "single-master"
6402
    self.feedback_fn("* changing disks into %s mode" % msg)
6403
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6404
                                           self.instance.disks,
6405
                                           self.instance.name, multimaster)
6406
    for node, nres in result.items():
6407
      nres.Raise("Cannot change disks config on node %s" % node)
6408

    
6409
  def _ExecCleanup(self):
6410
    """Try to cleanup after a failed migration.
6411

6412
    The cleanup is done by:
6413
      - check that the instance is running only on one node
6414
        (and update the config if needed)
6415
      - change disks on its secondary node to secondary
6416
      - wait until disks are fully synchronized
6417
      - disconnect from the network
6418
      - change disks into single-master mode
6419
      - wait again until disks are fully synchronized
6420

6421
    """
6422
    instance = self.instance
6423
    target_node = self.target_node
6424
    source_node = self.source_node
6425

    
6426
    # check running on only one node
6427
    self.feedback_fn("* checking where the instance actually runs"
6428
                     " (if this hangs, the hypervisor might be in"
6429
                     " a bad state)")
6430
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6431
    for node, result in ins_l.items():
6432
      result.Raise("Can't contact node %s" % node)
6433

    
6434
    runningon_source = instance.name in ins_l[source_node].payload
6435
    runningon_target = instance.name in ins_l[target_node].payload
6436

    
6437
    if runningon_source and runningon_target:
6438
      raise errors.OpExecError("Instance seems to be running on two nodes,"
6439
                               " or the hypervisor is confused. You will have"
6440
                               " to ensure manually that it runs only on one"
6441
                               " and restart this operation.")
6442

    
6443
    if not (runningon_source or runningon_target):
6444
      raise errors.OpExecError("Instance does not seem to be running at all."
6445
                               " In this case, it's safer to repair by"
6446
                               " running 'gnt-instance stop' to ensure disk"
6447
                               " shutdown, and then restarting it.")
6448

    
6449
    if runningon_target:
6450
      # the migration has actually succeeded, we need to update the config
6451
      self.feedback_fn("* instance running on secondary node (%s),"
6452
                       " updating config" % target_node)
6453
      instance.primary_node = target_node
6454
      self.cfg.Update(instance, self.feedback_fn)
6455
      demoted_node = source_node
6456
    else:
6457
      self.feedback_fn("* instance confirmed to be running on its"
6458
                       " primary node (%s)" % source_node)
6459
      demoted_node = target_node
6460

    
6461
    self._EnsureSecondary(demoted_node)
6462
    try:
6463
      self._WaitUntilSync()
6464
    except errors.OpExecError:
6465
      # we ignore here errors, since if the device is standalone, it
6466
      # won't be able to sync
6467
      pass
6468
    self._GoStandalone()
6469
    self._GoReconnect(False)
6470
    self._WaitUntilSync()
6471

    
6472
    self.feedback_fn("* done")
6473

    
6474
  def _RevertDiskStatus(self):
6475
    """Try to revert the disk status after a failed migration.
6476

6477
    """
6478
    target_node = self.target_node
6479
    try:
6480
      self._EnsureSecondary(target_node)
6481
      self._GoStandalone()
6482
      self._GoReconnect(False)
6483
      self._WaitUntilSync()
6484
    except errors.OpExecError, err:
6485
      self.lu.LogWarning("Migration failed and I can't reconnect the"
6486
                         " drives: error '%s'\n"
6487
                         "Please look and recover the instance status" %
6488
                         str(err))
6489

    
6490
  def _AbortMigration(self):
6491
    """Call the hypervisor code to abort a started migration.
6492

6493
    """
6494
    instance = self.instance
6495
    target_node = self.target_node
6496
    migration_info = self.migration_info
6497

    
6498
    abort_result = self.rpc.call_finalize_migration(target_node,
6499
                                                    instance,
6500
                                                    migration_info,
6501
                                                    False)
6502
    abort_msg = abort_result.fail_msg
6503
    if abort_msg:
6504
      logging.error("Aborting migration failed on target node %s: %s",
6505
                    target_node, abort_msg)
6506
      # Don't raise an exception here, as we stil have to try to revert the
6507
      # disk status, even if this step failed.
6508

    
6509
  def _ExecMigration(self):
6510
    """Migrate an instance.
6511

6512
    The migrate is done by:
6513
      - change the disks into dual-master mode
6514
      - wait until disks are fully synchronized again
6515
      - migrate the instance
6516
      - change disks on the new secondary node (the old primary) to secondary
6517
      - wait until disks are fully synchronized
6518
      - change disks into single-master mode
6519

6520
    """
6521
    instance = self.instance
6522
    target_node = self.target_node
6523
    source_node = self.source_node
6524

    
6525
    self.feedback_fn("* checking disk consistency between source and target")
6526
    for dev in instance.disks:
6527
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6528
        raise errors.OpExecError("Disk %s is degraded or not fully"
6529
                                 " synchronized on target node,"
6530
                                 " aborting migrate." % dev.iv_name)
6531

    
6532
    # First get the migration information from the remote node
6533
    result = self.rpc.call_migration_info(source_node, instance)
6534
    msg = result.fail_msg
6535
    if msg:
6536
      log_err = ("Failed fetching source migration information from %s: %s" %
6537
                 (source_node, msg))
6538
      logging.error(log_err)
6539
      raise errors.OpExecError(log_err)
6540

    
6541
    self.migration_info = migration_info = result.payload
6542

    
6543
    # Then switch the disks to master/master mode
6544
    self._EnsureSecondary(target_node)
6545
    self._GoStandalone()
6546
    self._GoReconnect(True)
6547
    self._WaitUntilSync()
6548

    
6549
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
6550
    result = self.rpc.call_accept_instance(target_node,
6551
                                           instance,
6552
                                           migration_info,
6553
                                           self.nodes_ip[target_node])
6554

    
6555
    msg = result.fail_msg
6556
    if msg:
6557
      logging.error("Instance pre-migration failed, trying to revert"
6558
                    " disk status: %s", msg)
6559
      self.feedback_fn("Pre-migration failed, aborting")
6560
      self._AbortMigration()
6561
      self._RevertDiskStatus()
6562
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6563
                               (instance.name, msg))
6564

    
6565
    self.feedback_fn("* migrating instance to %s" % target_node)
6566
    time.sleep(10)
6567
    result = self.rpc.call_instance_migrate(source_node, instance,
6568
                                            self.nodes_ip[target_node],
6569
                                            self.live)
6570
    msg = result.fail_msg
6571
    if msg:
6572
      logging.error("Instance migration failed, trying to revert"
6573
                    " disk status: %s", msg)
6574
      self.feedback_fn("Migration failed, aborting")
6575
      self._AbortMigration()
6576
      self._RevertDiskStatus()
6577
      raise errors.OpExecError("Could not migrate instance %s: %s" %
6578
                               (instance.name, msg))
6579
    time.sleep(10)
6580

    
6581
    instance.primary_node = target_node
6582
    # distribute new instance config to the other nodes
6583
    self.cfg.Update(instance, self.feedback_fn)
6584

    
6585
    result = self.rpc.call_finalize_migration(target_node,
6586
                                              instance,
6587
                                              migration_info,
6588
                                              True)
6589
    msg = result.fail_msg
6590
    if msg:
6591
      logging.error("Instance migration succeeded, but finalization failed:"
6592
                    " %s", msg)
6593
      raise errors.OpExecError("Could not finalize instance migration: %s" %
6594
                               msg)
6595

    
6596
    self._EnsureSecondary(source_node)
6597
    self._WaitUntilSync()
6598
    self._GoStandalone()
6599
    self._GoReconnect(False)
6600
    self._WaitUntilSync()
6601

    
6602
    self.feedback_fn("* done")
6603

    
6604
  def Exec(self, feedback_fn):
6605
    """Perform the migration.
6606

6607
    """
6608
    feedback_fn("Migrating instance %s" % self.instance.name)
6609

    
6610
    self.feedback_fn = feedback_fn
6611

    
6612
    self.source_node = self.instance.primary_node
6613
    self.target_node = self.instance.secondary_nodes[0]
6614
    self.all_nodes = [self.source_node, self.target_node]
6615
    self.nodes_ip = {
6616
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6617
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6618
      }
6619

    
6620
    if self.cleanup:
6621
      return self._ExecCleanup()
6622
    else:
6623
      return self._ExecMigration()
6624

    
6625

    
6626
def _CreateBlockDev(lu, node, instance, device, force_create,
6627
                    info, force_open):
6628
  """Create a tree of block devices on a given node.
6629

6630
  If this device type has to be created on secondaries, create it and
6631
  all its children.
6632

6633
  If not, just recurse to children keeping the same 'force' value.
6634

6635
  @param lu: the lu on whose behalf we execute
6636
  @param node: the node on which to create the device
6637
  @type instance: L{objects.Instance}
6638
  @param instance: the instance which owns the device
6639
  @type device: L{objects.Disk}
6640
  @param device: the device to create
6641
  @type force_create: boolean
6642
  @param force_create: whether to force creation of this device; this
6643
      will be change to True whenever we find a device which has
6644
      CreateOnSecondary() attribute
6645
  @param info: the extra 'metadata' we should attach to the device
6646
      (this will be represented as a LVM tag)
6647
  @type force_open: boolean
6648
  @param force_open: this parameter will be passes to the
6649
      L{backend.BlockdevCreate} function where it specifies
6650
      whether we run on primary or not, and it affects both
6651
      the child assembly and the device own Open() execution
6652

6653
  """
6654
  if device.CreateOnSecondary():
6655
    force_create = True
6656

    
6657
  if device.children:
6658
    for child in device.children:
6659
      _CreateBlockDev(lu, node, instance, child, force_create,
6660
                      info, force_open)
6661

    
6662
  if not force_create:
6663
    return
6664

    
6665
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6666

    
6667

    
6668
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6669
  """Create a single block device on a given node.
6670

6671
  This will not recurse over children of the device, so they must be
6672
  created in advance.
6673

6674
  @param lu: the lu on whose behalf we execute
6675
  @param node: the node on which to create the device
6676
  @type instance: L{objects.Instance}
6677
  @param instance: the instance which owns the device
6678
  @type device: L{objects.Disk}
6679
  @param device: the device to create
6680
  @param info: the extra 'metadata' we should attach to the device
6681
      (this will be represented as a LVM tag)
6682
  @type force_open: boolean
6683
  @param force_open: this parameter will be passes to the
6684
      L{backend.BlockdevCreate} function where it specifies
6685
      whether we run on primary or not, and it affects both
6686
      the child assembly and the device own Open() execution
6687

6688
  """
6689
  lu.cfg.SetDiskID(device, node)
6690
  result = lu.rpc.call_blockdev_create(node, device, device.size,
6691
                                       instance.name, force_open, info)
6692
  result.Raise("Can't create block device %s on"
6693
               " node %s for instance %s" % (device, node, instance.name))
6694
  if device.physical_id is None:
6695
    device.physical_id = result.payload
6696

    
6697

    
6698
def _GenerateUniqueNames(lu, exts):
6699
  """Generate a suitable LV name.
6700

6701
  This will generate a logical volume name for the given instance.
6702

6703
  """
6704
  results = []
6705
  for val in exts:
6706
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6707
    results.append("%s%s" % (new_id, val))
6708
  return results
6709

    
6710

    
6711
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6712
                         p_minor, s_minor):
6713
  """Generate a drbd8 device complete with its children.
6714

6715
  """
6716
  port = lu.cfg.AllocatePort()
6717
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6718
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6719
                          logical_id=(vgname, names[0]))
6720
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6721
                          logical_id=(vgname, names[1]))
6722
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6723
                          logical_id=(primary, secondary, port,
6724
                                      p_minor, s_minor,
6725
                                      shared_secret),
6726
                          children=[dev_data, dev_meta],
6727
                          iv_name=iv_name)
6728
  return drbd_dev
6729

    
6730

    
6731
def _GenerateDiskTemplate(lu, template_name,
6732
                          instance_name, primary_node,
6733
                          secondary_nodes, disk_info,
6734
                          file_storage_dir, file_driver,
6735
                          base_index, feedback_fn):
6736
  """Generate the entire disk layout for a given template type.
6737

6738
  """
6739
  #TODO: compute space requirements
6740

    
6741
  vgname = lu.cfg.GetVGName()
6742
  disk_count = len(disk_info)
6743
  disks = []
6744
  if template_name == constants.DT_DISKLESS:
6745
    pass
6746
  elif template_name == constants.DT_PLAIN:
6747
    if len(secondary_nodes) != 0:
6748
      raise errors.ProgrammerError("Wrong template configuration")
6749

    
6750
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6751
                                      for i in range(disk_count)])
6752
    for idx, disk in enumerate(disk_info):
6753
      disk_index = idx + base_index
6754
      vg = disk.get("vg", vgname)
6755
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6756
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6757
                              logical_id=(vg, names[idx]),
6758
                              iv_name="disk/%d" % disk_index,
6759
                              mode=disk["mode"])
6760
      disks.append(disk_dev)
6761
  elif template_name == constants.DT_DRBD8:
6762
    if len(secondary_nodes) != 1:
6763
      raise errors.ProgrammerError("Wrong template configuration")
6764
    remote_node = secondary_nodes[0]
6765
    minors = lu.cfg.AllocateDRBDMinor(
6766
      [primary_node, remote_node] * len(disk_info), instance_name)
6767

    
6768
    names = []
6769
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6770
                                               for i in range(disk_count)]):
6771
      names.append(lv_prefix + "_data")
6772
      names.append(lv_prefix + "_meta")
6773
    for idx, disk in enumerate(disk_info):
6774
      disk_index = idx + base_index
6775
      vg = disk.get("vg", vgname)
6776
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6777
                                      disk["size"], vg, names[idx*2:idx*2+2],
6778
                                      "disk/%d" % disk_index,
6779
                                      minors[idx*2], minors[idx*2+1])
6780
      disk_dev.mode = disk["mode"]
6781
      disks.append(disk_dev)
6782
  elif template_name == constants.DT_FILE:
6783
    if len(secondary_nodes) != 0:
6784
      raise errors.ProgrammerError("Wrong template configuration")
6785

    
6786
    _RequireFileStorage()
6787

    
6788
    for idx, disk in enumerate(disk_info):
6789
      disk_index = idx + base_index
6790
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6791
                              iv_name="disk/%d" % disk_index,
6792
                              logical_id=(file_driver,
6793
                                          "%s/disk%d" % (file_storage_dir,
6794
                                                         disk_index)),
6795
                              mode=disk["mode"])
6796
      disks.append(disk_dev)
6797
  else:
6798
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6799
  return disks
6800

    
6801

    
6802
def _GetInstanceInfoText(instance):
6803
  """Compute that text that should be added to the disk's metadata.
6804

6805
  """
6806
  return "originstname+%s" % instance.name
6807

    
6808

    
6809
def _CalcEta(time_taken, written, total_size):
6810
  """Calculates the ETA based on size written and total size.
6811

6812
  @param time_taken: The time taken so far
6813
  @param written: amount written so far
6814
  @param total_size: The total size of data to be written
6815
  @return: The remaining time in seconds
6816

6817
  """
6818
  avg_time = time_taken / float(written)
6819
  return (total_size - written) * avg_time
6820

    
6821

    
6822
def _WipeDisks(lu, instance):
6823
  """Wipes instance disks.
6824

6825
  @type lu: L{LogicalUnit}
6826
  @param lu: the logical unit on whose behalf we execute
6827
  @type instance: L{objects.Instance}
6828
  @param instance: the instance whose disks we should create
6829
  @return: the success of the wipe
6830

6831
  """
6832
  node = instance.primary_node
6833
  for idx, device in enumerate(instance.disks):
6834
    lu.LogInfo("* Wiping disk %d", idx)
6835
    logging.info("Wiping disk %d for instance %s", idx, instance.name)
6836

    
6837
    # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6838
    # MAX_WIPE_CHUNK at max
6839
    wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6840
                          constants.MIN_WIPE_CHUNK_PERCENT)
6841

    
6842
    offset = 0
6843
    size = device.size
6844
    last_output = 0
6845
    start_time = time.time()
6846

    
6847
    while offset < size:
6848
      wipe_size = min(wipe_chunk_size, size - offset)
6849
      result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6850
      result.Raise("Could not wipe disk %d at offset %d for size %d" %
6851
                   (idx, offset, wipe_size))
6852
      now = time.time()
6853
      offset += wipe_size
6854
      if now - last_output >= 60:
6855
        eta = _CalcEta(now - start_time, offset, size)
6856
        lu.LogInfo(" - done: %.1f%% ETA: %s" %
6857
                   (offset / float(size) * 100, utils.FormatSeconds(eta)))
6858
        last_output = now
6859

    
6860

    
6861
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6862
  """Create all disks for an instance.
6863

6864
  This abstracts away some work from AddInstance.
6865

6866
  @type lu: L{LogicalUnit}
6867
  @param lu: the logical unit on whose behalf we execute
6868
  @type instance: L{objects.Instance}
6869
  @param instance: the instance whose disks we should create
6870
  @type to_skip: list
6871
  @param to_skip: list of indices to skip
6872
  @type target_node: string
6873
  @param target_node: if passed, overrides the target node for creation
6874
  @rtype: boolean
6875
  @return: the success of the creation
6876

6877
  """
6878
  info = _GetInstanceInfoText(instance)
6879
  if target_node is None:
6880
    pnode = instance.primary_node
6881
    all_nodes = instance.all_nodes
6882
  else:
6883
    pnode = target_node
6884
    all_nodes = [pnode]
6885

    
6886
  if instance.disk_template == constants.DT_FILE:
6887
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6888
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6889

    
6890
    result.Raise("Failed to create directory '%s' on"
6891
                 " node %s" % (file_storage_dir, pnode))
6892

    
6893
  # Note: this needs to be kept in sync with adding of disks in
6894
  # LUSetInstanceParams
6895
  for idx, device in enumerate(instance.disks):
6896
    if to_skip and idx in to_skip:
6897
      continue
6898
    logging.info("Creating volume %s for instance %s",
6899
                 device.iv_name, instance.name)
6900
    #HARDCODE
6901
    for node in all_nodes:
6902
      f_create = node == pnode
6903
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6904

    
6905

    
6906
def _RemoveDisks(lu, instance, target_node=None):
6907
  """Remove all disks for an instance.
6908

6909
  This abstracts away some work from `AddInstance()` and
6910
  `RemoveInstance()`. Note that in case some of the devices couldn't
6911
  be removed, the removal will continue with the other ones (compare
6912
  with `_CreateDisks()`).
6913

6914
  @type lu: L{LogicalUnit}
6915
  @param lu: the logical unit on whose behalf we execute
6916
  @type instance: L{objects.Instance}
6917
  @param instance: the instance whose disks we should remove
6918
  @type target_node: string
6919
  @param target_node: used to override the node on which to remove the disks
6920
  @rtype: boolean
6921
  @return: the success of the removal
6922

6923
  """
6924
  logging.info("Removing block devices for instance %s", instance.name)
6925

    
6926
  all_result = True
6927
  for device in instance.disks:
6928
    if target_node:
6929
      edata = [(target_node, device)]
6930
    else:
6931
      edata = device.ComputeNodeTree(instance.primary_node)
6932
    for node, disk in edata:
6933
      lu.cfg.SetDiskID(disk, node)
6934
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6935
      if msg:
6936
        lu.LogWarning("Could not remove block device %s on node %s,"
6937
                      " continuing anyway: %s", device.iv_name, node, msg)
6938
        all_result = False
6939

    
6940
  if instance.disk_template == constants.DT_FILE:
6941
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6942
    if target_node:
6943
      tgt = target_node
6944
    else:
6945
      tgt = instance.primary_node
6946
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6947
    if result.fail_msg:
6948
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6949
                    file_storage_dir, instance.primary_node, result.fail_msg)
6950
      all_result = False
6951

    
6952
  return all_result
6953

    
6954

    
6955
def _ComputeDiskSizePerVG(disk_template, disks):
6956
  """Compute disk size requirements in the volume group
6957

6958
  """
6959
  def _compute(disks, payload):
6960
    """Universal algorithm
6961

6962
    """
6963
    vgs = {}
6964
    for disk in disks:
6965
      vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
6966

    
6967
    return vgs
6968

    
6969
  # Required free disk space as a function of disk and swap space
6970
  req_size_dict = {
6971
    constants.DT_DISKLESS: None,
6972
    constants.DT_PLAIN: _compute(disks, 0),
6973
    # 128 MB are added for drbd metadata for each disk
6974
    constants.DT_DRBD8: _compute(disks, 128),
6975
    constants.DT_FILE: None,
6976
  }
6977

    
6978
  if disk_template not in req_size_dict:
6979
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6980
                                 " is unknown" %  disk_template)
6981

    
6982
  return req_size_dict[disk_template]
6983

    
6984
def _ComputeDiskSize(disk_template, disks):
6985
  """Compute disk size requirements in the volume group
6986

6987
  """
6988
  # Required free disk space as a function of disk and swap space
6989
  req_size_dict = {
6990
    constants.DT_DISKLESS: None,
6991
    constants.DT_PLAIN: sum(d["size"] for d in disks),
6992
    # 128 MB are added for drbd metadata for each disk
6993
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6994
    constants.DT_FILE: None,
6995
  }
6996

    
6997
  if disk_template not in req_size_dict:
6998
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6999
                                 " is unknown" %  disk_template)
7000

    
7001
  return req_size_dict[disk_template]
7002

    
7003

    
7004
def _CheckHVParams(lu, nodenames, hvname, hvparams):
7005
  """Hypervisor parameter validation.
7006

7007
  This function abstract the hypervisor parameter validation to be
7008
  used in both instance create and instance modify.
7009

7010
  @type lu: L{LogicalUnit}
7011
  @param lu: the logical unit for which we check
7012
  @type nodenames: list
7013
  @param nodenames: the list of nodes on which we should check
7014
  @type hvname: string
7015
  @param hvname: the name of the hypervisor we should use
7016
  @type hvparams: dict
7017
  @param hvparams: the parameters which we need to check
7018
  @raise errors.OpPrereqError: if the parameters are not valid
7019

7020
  """
7021
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7022
                                                  hvname,
7023
                                                  hvparams)
7024
  for node in nodenames:
7025
    info = hvinfo[node]
7026
    if info.offline:
7027
      continue
7028
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
7029

    
7030

    
7031
def _CheckOSParams(lu, required, nodenames, osname, osparams):
7032
  """OS parameters validation.
7033

7034
  @type lu: L{LogicalUnit}
7035
  @param lu: the logical unit for which we check
7036
  @type required: boolean
7037
  @param required: whether the validation should fail if the OS is not
7038
      found
7039
  @type nodenames: list
7040
  @param nodenames: the list of nodes on which we should check
7041
  @type osname: string
7042
  @param osname: the name of the hypervisor we should use
7043
  @type osparams: dict
7044
  @param osparams: the parameters which we need to check
7045
  @raise errors.OpPrereqError: if the parameters are not valid
7046

7047
  """
7048
  result = lu.rpc.call_os_validate(required, nodenames, osname,
7049
                                   [constants.OS_VALIDATE_PARAMETERS],
7050
                                   osparams)
7051
  for node, nres in result.items():
7052
    # we don't check for offline cases since this should be run only
7053
    # against the master node and/or an instance's nodes
7054
    nres.Raise("OS Parameters validation failed on node %s" % node)
7055
    if not nres.payload:
7056
      lu.LogInfo("OS %s not found on node %s, validation skipped",
7057
                 osname, node)
7058

    
7059

    
7060
class LUCreateInstance(LogicalUnit):
7061
  """Create an instance.
7062

7063
  """
7064
  HPATH = "instance-add"
7065
  HTYPE = constants.HTYPE_INSTANCE
7066
  _OP_PARAMS = [
7067
    _PInstanceName,
7068
    ("mode", ht.NoDefault, ht.TElemOf(constants.INSTANCE_CREATE_MODES)),
7069
    ("start", True, ht.TBool),
7070
    ("wait_for_sync", True, ht.TBool),
7071
    ("ip_check", True, ht.TBool),
7072
    ("name_check", True, ht.TBool),
7073
    ("disks", ht.NoDefault, ht.TListOf(ht.TDict)),
7074
    ("nics", ht.NoDefault, ht.TListOf(ht.TDict)),
7075
    ("hvparams", ht.EmptyDict, ht.TDict),
7076
    ("beparams", ht.EmptyDict, ht.TDict),
7077
    ("osparams", ht.EmptyDict, ht.TDict),
7078
    ("no_install", None, ht.TMaybeBool),
7079
    ("os_type", None, ht.TMaybeString),
7080
    ("force_variant", False, ht.TBool),
7081
    ("source_handshake", None, ht.TOr(ht.TList, ht.TNone)),
7082
    ("source_x509_ca", None, ht.TMaybeString),
7083
    ("source_instance_name", None, ht.TMaybeString),
7084
    ("source_shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
7085
     ht.TPositiveInt),
7086
    ("src_node", None, ht.TMaybeString),
7087
    ("src_path", None, ht.TMaybeString),
7088
    ("pnode", None, ht.TMaybeString),
7089
    ("snode", None, ht.TMaybeString),
7090
    ("iallocator", None, ht.TMaybeString),
7091
    ("hypervisor", None, ht.TMaybeString),
7092
    ("disk_template", ht.NoDefault, _CheckDiskTemplate),
7093
    ("identify_defaults", False, ht.TBool),
7094
    ("file_driver", None, ht.TOr(ht.TNone, ht.TElemOf(constants.FILE_DRIVER))),
7095
    ("file_storage_dir", None, ht.TMaybeString),
7096
    ]
7097
  REQ_BGL = False
7098

    
7099
  def CheckArguments(self):
7100
    """Check arguments.
7101

7102
    """
7103
    # do not require name_check to ease forward/backward compatibility
7104
    # for tools
7105
    if self.op.no_install and self.op.start:
7106
      self.LogInfo("No-installation mode selected, disabling startup")
7107
      self.op.start = False
7108
    # validate/normalize the instance name
7109
    self.op.instance_name = \
7110
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
7111

    
7112
    if self.op.ip_check and not self.op.name_check:
7113
      # TODO: make the ip check more flexible and not depend on the name check
7114
      raise errors.OpPrereqError("Cannot do ip check without a name check",
7115
                                 errors.ECODE_INVAL)
7116

    
7117
    # check nics' parameter names
7118
    for nic in self.op.nics:
7119
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7120

    
7121
    # check disks. parameter names and consistent adopt/no-adopt strategy
7122
    has_adopt = has_no_adopt = False
7123
    for disk in self.op.disks:
7124
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7125
      if "adopt" in disk:
7126
        has_adopt = True
7127
      else:
7128
        has_no_adopt = True
7129
    if has_adopt and has_no_adopt:
7130
      raise errors.OpPrereqError("Either all disks are adopted or none is",
7131
                                 errors.ECODE_INVAL)
7132
    if has_adopt:
7133
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7134
        raise errors.OpPrereqError("Disk adoption is not supported for the"
7135
                                   " '%s' disk template" %
7136
                                   self.op.disk_template,
7137
                                   errors.ECODE_INVAL)
7138
      if self.op.iallocator is not None:
7139
        raise errors.OpPrereqError("Disk adoption not allowed with an"
7140
                                   " iallocator script", errors.ECODE_INVAL)
7141
      if self.op.mode == constants.INSTANCE_IMPORT:
7142
        raise errors.OpPrereqError("Disk adoption not allowed for"
7143
                                   " instance import", errors.ECODE_INVAL)
7144

    
7145
    self.adopt_disks = has_adopt
7146

    
7147
    # instance name verification
7148
    if self.op.name_check:
7149
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7150
      self.op.instance_name = self.hostname1.name
7151
      # used in CheckPrereq for ip ping check
7152
      self.check_ip = self.hostname1.ip
7153
    else:
7154
      self.check_ip = None
7155

    
7156
    # file storage checks
7157
    if (self.op.file_driver and
7158
        not self.op.file_driver in constants.FILE_DRIVER):
7159
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
7160
                                 self.op.file_driver, errors.ECODE_INVAL)
7161

    
7162
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7163
      raise errors.OpPrereqError("File storage directory path not absolute",
7164
                                 errors.ECODE_INVAL)
7165

    
7166
    ### Node/iallocator related checks
7167
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7168

    
7169
    if self.op.pnode is not None:
7170
      if self.op.disk_template in constants.DTS_NET_MIRROR:
7171
        if self.op.snode is None:
7172
          raise errors.OpPrereqError("The networked disk templates need"
7173
                                     " a mirror node", errors.ECODE_INVAL)
7174
      elif self.op.snode:
7175
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7176
                        " template")
7177
        self.op.snode = None
7178

    
7179
    self._cds = _GetClusterDomainSecret()
7180

    
7181
    if self.op.mode == constants.INSTANCE_IMPORT:
7182
      # On import force_variant must be True, because if we forced it at
7183
      # initial install, our only chance when importing it back is that it
7184
      # works again!
7185
      self.op.force_variant = True
7186

    
7187
      if self.op.no_install:
7188
        self.LogInfo("No-installation mode has no effect during import")
7189

    
7190
    elif self.op.mode == constants.INSTANCE_CREATE:
7191
      if self.op.os_type is None:
7192
        raise errors.OpPrereqError("No guest OS specified",
7193
                                   errors.ECODE_INVAL)
7194
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7195
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7196
                                   " installation" % self.op.os_type,
7197
                                   errors.ECODE_STATE)
7198
      if self.op.disk_template is None:
7199
        raise errors.OpPrereqError("No disk template specified",
7200
                                   errors.ECODE_INVAL)
7201

    
7202
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7203
      # Check handshake to ensure both clusters have the same domain secret
7204
      src_handshake = self.op.source_handshake
7205
      if not src_handshake:
7206
        raise errors.OpPrereqError("Missing source handshake",
7207
                                   errors.ECODE_INVAL)
7208

    
7209
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7210
                                                           src_handshake)
7211
      if errmsg:
7212
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7213
                                   errors.ECODE_INVAL)
7214

    
7215
      # Load and check source CA
7216
      self.source_x509_ca_pem = self.op.source_x509_ca
7217
      if not self.source_x509_ca_pem:
7218
        raise errors.OpPrereqError("Missing source X509 CA",
7219
                                   errors.ECODE_INVAL)
7220

    
7221
      try:
7222
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7223
                                                    self._cds)
7224
      except OpenSSL.crypto.Error, err:
7225
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7226
                                   (err, ), errors.ECODE_INVAL)
7227

    
7228
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7229
      if errcode is not None:
7230
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7231
                                   errors.ECODE_INVAL)
7232

    
7233
      self.source_x509_ca = cert
7234

    
7235
      src_instance_name = self.op.source_instance_name
7236
      if not src_instance_name:
7237
        raise errors.OpPrereqError("Missing source instance name",
7238
                                   errors.ECODE_INVAL)
7239

    
7240
      self.source_instance_name = \
7241
          netutils.GetHostname(name=src_instance_name).name
7242

    
7243
    else:
7244
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
7245
                                 self.op.mode, errors.ECODE_INVAL)
7246

    
7247
  def ExpandNames(self):
7248
    """ExpandNames for CreateInstance.
7249

7250
    Figure out the right locks for instance creation.
7251

7252
    """
7253
    self.needed_locks = {}
7254

    
7255
    instance_name = self.op.instance_name
7256
    # this is just a preventive check, but someone might still add this
7257
    # instance in the meantime, and creation will fail at lock-add time
7258
    if instance_name in self.cfg.GetInstanceList():
7259
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7260
                                 instance_name, errors.ECODE_EXISTS)
7261

    
7262
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7263

    
7264
    if self.op.iallocator:
7265
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7266
    else:
7267
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7268
      nodelist = [self.op.pnode]
7269
      if self.op.snode is not None:
7270
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7271
        nodelist.append(self.op.snode)
7272
      self.needed_locks[locking.LEVEL_NODE] = nodelist
7273

    
7274
    # in case of import lock the source node too
7275
    if self.op.mode == constants.INSTANCE_IMPORT:
7276
      src_node = self.op.src_node
7277
      src_path = self.op.src_path
7278

    
7279
      if src_path is None:
7280
        self.op.src_path = src_path = self.op.instance_name
7281

    
7282
      if src_node is None:
7283
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7284
        self.op.src_node = None
7285
        if os.path.isabs(src_path):
7286
          raise errors.OpPrereqError("Importing an instance from an absolute"
7287
                                     " path requires a source node option.",
7288
                                     errors.ECODE_INVAL)
7289
      else:
7290
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7291
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7292
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
7293
        if not os.path.isabs(src_path):
7294
          self.op.src_path = src_path = \
7295
            utils.PathJoin(constants.EXPORT_DIR, src_path)
7296

    
7297
  def _RunAllocator(self):
7298
    """Run the allocator based on input opcode.
7299

7300
    """
7301
    nics = [n.ToDict() for n in self.nics]
7302
    ial = IAllocator(self.cfg, self.rpc,
7303
                     mode=constants.IALLOCATOR_MODE_ALLOC,
7304
                     name=self.op.instance_name,
7305
                     disk_template=self.op.disk_template,
7306
                     tags=[],
7307
                     os=self.op.os_type,
7308
                     vcpus=self.be_full[constants.BE_VCPUS],
7309
                     mem_size=self.be_full[constants.BE_MEMORY],
7310
                     disks=self.disks,
7311
                     nics=nics,
7312
                     hypervisor=self.op.hypervisor,
7313
                     )
7314

    
7315
    ial.Run(self.op.iallocator)
7316

    
7317
    if not ial.success:
7318
      raise errors.OpPrereqError("Can't compute nodes using"
7319
                                 " iallocator '%s': %s" %
7320
                                 (self.op.iallocator, ial.info),
7321
                                 errors.ECODE_NORES)
7322
    if len(ial.result) != ial.required_nodes:
7323
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7324
                                 " of nodes (%s), required %s" %
7325
                                 (self.op.iallocator, len(ial.result),
7326
                                  ial.required_nodes), errors.ECODE_FAULT)
7327
    self.op.pnode = ial.result[0]
7328
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7329
                 self.op.instance_name, self.op.iallocator,
7330
                 utils.CommaJoin(ial.result))
7331
    if ial.required_nodes == 2:
7332
      self.op.snode = ial.result[1]
7333

    
7334
  def BuildHooksEnv(self):
7335
    """Build hooks env.
7336

7337
    This runs on master, primary and secondary nodes of the instance.
7338

7339
    """
7340
    env = {
7341
      "ADD_MODE": self.op.mode,
7342
      }
7343
    if self.op.mode == constants.INSTANCE_IMPORT:
7344
      env["SRC_NODE"] = self.op.src_node
7345
      env["SRC_PATH"] = self.op.src_path
7346
      env["SRC_IMAGES"] = self.src_images
7347

    
7348
    env.update(_BuildInstanceHookEnv(
7349
      name=self.op.instance_name,
7350
      primary_node=self.op.pnode,
7351
      secondary_nodes=self.secondaries,
7352
      status=self.op.start,
7353
      os_type=self.op.os_type,
7354
      memory=self.be_full[constants.BE_MEMORY],
7355
      vcpus=self.be_full[constants.BE_VCPUS],
7356
      nics=_NICListToTuple(self, self.nics),
7357
      disk_template=self.op.disk_template,
7358
      disks=[(d["size"], d["mode"]) for d in self.disks],
7359
      bep=self.be_full,
7360
      hvp=self.hv_full,
7361
      hypervisor_name=self.op.hypervisor,
7362
    ))
7363

    
7364
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7365
          self.secondaries)
7366
    return env, nl, nl
7367

    
7368
  def _ReadExportInfo(self):
7369
    """Reads the export information from disk.
7370

7371
    It will override the opcode source node and path with the actual
7372
    information, if these two were not specified before.
7373

7374
    @return: the export information
7375

7376
    """
7377
    assert self.op.mode == constants.INSTANCE_IMPORT
7378

    
7379
    src_node = self.op.src_node
7380
    src_path = self.op.src_path
7381

    
7382
    if src_node is None:
7383
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7384
      exp_list = self.rpc.call_export_list(locked_nodes)
7385
      found = False
7386
      for node in exp_list:
7387
        if exp_list[node].fail_msg:
7388
          continue
7389
        if src_path in exp_list[node].payload:
7390
          found = True
7391
          self.op.src_node = src_node = node
7392
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7393
                                                       src_path)
7394
          break
7395
      if not found:
7396
        raise errors.OpPrereqError("No export found for relative path %s" %
7397
                                    src_path, errors.ECODE_INVAL)
7398

    
7399
    _CheckNodeOnline(self, src_node)
7400
    result = self.rpc.call_export_info(src_node, src_path)
7401
    result.Raise("No export or invalid export found in dir %s" % src_path)
7402

    
7403
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7404
    if not export_info.has_section(constants.INISECT_EXP):
7405
      raise errors.ProgrammerError("Corrupted export config",
7406
                                   errors.ECODE_ENVIRON)
7407

    
7408
    ei_version = export_info.get(constants.INISECT_EXP, "version")
7409
    if (int(ei_version) != constants.EXPORT_VERSION):
7410
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7411
                                 (ei_version, constants.EXPORT_VERSION),
7412
                                 errors.ECODE_ENVIRON)
7413
    return export_info
7414

    
7415
  def _ReadExportParams(self, einfo):
7416
    """Use export parameters as defaults.
7417

7418
    In case the opcode doesn't specify (as in override) some instance
7419
    parameters, then try to use them from the export information, if
7420
    that declares them.
7421

7422
    """
7423
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7424

    
7425
    if self.op.disk_template is None:
7426
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
7427
        self.op.disk_template = einfo.get(constants.INISECT_INS,
7428
                                          "disk_template")
7429
      else:
7430
        raise errors.OpPrereqError("No disk template specified and the export"
7431
                                   " is missing the disk_template information",
7432
                                   errors.ECODE_INVAL)
7433

    
7434
    if not self.op.disks:
7435
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
7436
        disks = []
7437
        # TODO: import the disk iv_name too
7438
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7439
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7440
          disks.append({"size": disk_sz})
7441
        self.op.disks = disks
7442
      else:
7443
        raise errors.OpPrereqError("No disk info specified and the export"
7444
                                   " is missing the disk information",
7445
                                   errors.ECODE_INVAL)
7446

    
7447
    if (not self.op.nics and
7448
        einfo.has_option(constants.INISECT_INS, "nic_count")):
7449
      nics = []
7450
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7451
        ndict = {}
7452
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7453
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7454
          ndict[name] = v
7455
        nics.append(ndict)
7456
      self.op.nics = nics
7457

    
7458
    if (self.op.hypervisor is None and
7459
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
7460
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7461
    if einfo.has_section(constants.INISECT_HYP):
7462
      # use the export parameters but do not override the ones
7463
      # specified by the user
7464
      for name, value in einfo.items(constants.INISECT_HYP):
7465
        if name not in self.op.hvparams:
7466
          self.op.hvparams[name] = value
7467

    
7468
    if einfo.has_section(constants.INISECT_BEP):
7469
      # use the parameters, without overriding
7470
      for name, value in einfo.items(constants.INISECT_BEP):
7471
        if name not in self.op.beparams:
7472
          self.op.beparams[name] = value
7473
    else:
7474
      # try to read the parameters old style, from the main section
7475
      for name in constants.BES_PARAMETERS:
7476
        if (name not in self.op.beparams and
7477
            einfo.has_option(constants.INISECT_INS, name)):
7478
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7479

    
7480
    if einfo.has_section(constants.INISECT_OSP):
7481
      # use the parameters, without overriding
7482
      for name, value in einfo.items(constants.INISECT_OSP):
7483
        if name not in self.op.osparams:
7484
          self.op.osparams[name] = value
7485

    
7486
  def _RevertToDefaults(self, cluster):
7487
    """Revert the instance parameters to the default values.
7488

7489
    """
7490
    # hvparams
7491
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7492
    for name in self.op.hvparams.keys():
7493
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7494
        del self.op.hvparams[name]
7495
    # beparams
7496
    be_defs = cluster.SimpleFillBE({})
7497
    for name in self.op.beparams.keys():
7498
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
7499
        del self.op.beparams[name]
7500
    # nic params
7501
    nic_defs = cluster.SimpleFillNIC({})
7502
    for nic in self.op.nics:
7503
      for name in constants.NICS_PARAMETERS:
7504
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7505
          del nic[name]
7506
    # osparams
7507
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7508
    for name in self.op.osparams.keys():
7509
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
7510
        del self.op.osparams[name]
7511

    
7512
  def CheckPrereq(self):
7513
    """Check prerequisites.
7514

7515
    """
7516
    if self.op.mode == constants.INSTANCE_IMPORT:
7517
      export_info = self._ReadExportInfo()
7518
      self._ReadExportParams(export_info)
7519

    
7520
    _CheckDiskTemplate(self.op.disk_template)
7521

    
7522
    if (not self.cfg.GetVGName() and
7523
        self.op.disk_template not in constants.DTS_NOT_LVM):
7524
      raise errors.OpPrereqError("Cluster does not support lvm-based"
7525
                                 " instances", errors.ECODE_STATE)
7526

    
7527
    if self.op.hypervisor is None:
7528
      self.op.hypervisor = self.cfg.GetHypervisorType()
7529

    
7530
    cluster = self.cfg.GetClusterInfo()
7531
    enabled_hvs = cluster.enabled_hypervisors
7532
    if self.op.hypervisor not in enabled_hvs:
7533
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7534
                                 " cluster (%s)" % (self.op.hypervisor,
7535
                                  ",".join(enabled_hvs)),
7536
                                 errors.ECODE_STATE)
7537

    
7538
    # check hypervisor parameter syntax (locally)
7539
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7540
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7541
                                      self.op.hvparams)
7542
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7543
    hv_type.CheckParameterSyntax(filled_hvp)
7544
    self.hv_full = filled_hvp
7545
    # check that we don't specify global parameters on an instance
7546
    _CheckGlobalHvParams(self.op.hvparams)
7547

    
7548
    # fill and remember the beparams dict
7549
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7550
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
7551

    
7552
    # build os parameters
7553
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7554

    
7555
    # now that hvp/bep are in final format, let's reset to defaults,
7556
    # if told to do so
7557
    if self.op.identify_defaults:
7558
      self._RevertToDefaults(cluster)
7559

    
7560
    # NIC buildup
7561
    self.nics = []
7562
    for idx, nic in enumerate(self.op.nics):
7563
      nic_mode_req = nic.get("mode", None)
7564
      nic_mode = nic_mode_req
7565
      if nic_mode is None:
7566
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7567

    
7568
      # in routed mode, for the first nic, the default ip is 'auto'
7569
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7570
        default_ip_mode = constants.VALUE_AUTO
7571
      else:
7572
        default_ip_mode = constants.VALUE_NONE
7573

    
7574
      # ip validity checks
7575
      ip = nic.get("ip", default_ip_mode)
7576
      if ip is None or ip.lower() == constants.VALUE_NONE:
7577
        nic_ip = None
7578
      elif ip.lower() == constants.VALUE_AUTO:
7579
        if not self.op.name_check:
7580
          raise errors.OpPrereqError("IP address set to auto but name checks"
7581
                                     " have been skipped",
7582
                                     errors.ECODE_INVAL)
7583
        nic_ip = self.hostname1.ip
7584
      else:
7585
        if not netutils.IPAddress.IsValid(ip):
7586
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7587
                                     errors.ECODE_INVAL)
7588
        nic_ip = ip
7589

    
7590
      # TODO: check the ip address for uniqueness
7591
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7592
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
7593
                                   errors.ECODE_INVAL)
7594

    
7595
      # MAC address verification
7596
      mac = nic.get("mac", constants.VALUE_AUTO)
7597
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7598
        mac = utils.NormalizeAndValidateMac(mac)
7599

    
7600
        try:
7601
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
7602
        except errors.ReservationError:
7603
          raise errors.OpPrereqError("MAC address %s already in use"
7604
                                     " in cluster" % mac,
7605
                                     errors.ECODE_NOTUNIQUE)
7606

    
7607
      # bridge verification
7608
      bridge = nic.get("bridge", None)
7609
      link = nic.get("link", None)
7610
      if bridge and link:
7611
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7612
                                   " at the same time", errors.ECODE_INVAL)
7613
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7614
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7615
                                   errors.ECODE_INVAL)
7616
      elif bridge:
7617
        link = bridge
7618

    
7619
      nicparams = {}
7620
      if nic_mode_req:
7621
        nicparams[constants.NIC_MODE] = nic_mode_req
7622
      if link:
7623
        nicparams[constants.NIC_LINK] = link
7624

    
7625
      check_params = cluster.SimpleFillNIC(nicparams)
7626
      objects.NIC.CheckParameterSyntax(check_params)
7627
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7628

    
7629
    # disk checks/pre-build
7630
    self.disks = []
7631
    for disk in self.op.disks:
7632
      mode = disk.get("mode", constants.DISK_RDWR)
7633
      if mode not in constants.DISK_ACCESS_SET:
7634
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7635
                                   mode, errors.ECODE_INVAL)
7636
      size = disk.get("size", None)
7637
      if size is None:
7638
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7639
      try:
7640
        size = int(size)
7641
      except (TypeError, ValueError):
7642
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7643
                                   errors.ECODE_INVAL)
7644
      vg = disk.get("vg", self.cfg.GetVGName())
7645
      new_disk = {"size": size, "mode": mode, "vg": vg}
7646
      if "adopt" in disk:
7647
        new_disk["adopt"] = disk["adopt"]
7648
      self.disks.append(new_disk)
7649

    
7650
    if self.op.mode == constants.INSTANCE_IMPORT:
7651

    
7652
      # Check that the new instance doesn't have less disks than the export
7653
      instance_disks = len(self.disks)
7654
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7655
      if instance_disks < export_disks:
7656
        raise errors.OpPrereqError("Not enough disks to import."
7657
                                   " (instance: %d, export: %d)" %
7658
                                   (instance_disks, export_disks),
7659
                                   errors.ECODE_INVAL)
7660

    
7661
      disk_images = []
7662
      for idx in range(export_disks):
7663
        option = 'disk%d_dump' % idx
7664
        if export_info.has_option(constants.INISECT_INS, option):
7665
          # FIXME: are the old os-es, disk sizes, etc. useful?
7666
          export_name = export_info.get(constants.INISECT_INS, option)
7667
          image = utils.PathJoin(self.op.src_path, export_name)
7668
          disk_images.append(image)
7669
        else:
7670
          disk_images.append(False)
7671

    
7672
      self.src_images = disk_images
7673

    
7674
      old_name = export_info.get(constants.INISECT_INS, 'name')
7675
      try:
7676
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7677
      except (TypeError, ValueError), err:
7678
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
7679
                                   " an integer: %s" % str(err),
7680
                                   errors.ECODE_STATE)
7681
      if self.op.instance_name == old_name:
7682
        for idx, nic in enumerate(self.nics):
7683
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7684
            nic_mac_ini = 'nic%d_mac' % idx
7685
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7686

    
7687
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7688

    
7689
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
7690
    if self.op.ip_check:
7691
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7692
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
7693
                                   (self.check_ip, self.op.instance_name),
7694
                                   errors.ECODE_NOTUNIQUE)
7695

    
7696
    #### mac address generation
7697
    # By generating here the mac address both the allocator and the hooks get
7698
    # the real final mac address rather than the 'auto' or 'generate' value.
7699
    # There is a race condition between the generation and the instance object
7700
    # creation, which means that we know the mac is valid now, but we're not
7701
    # sure it will be when we actually add the instance. If things go bad
7702
    # adding the instance will abort because of a duplicate mac, and the
7703
    # creation job will fail.
7704
    for nic in self.nics:
7705
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7706
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7707

    
7708
    #### allocator run
7709

    
7710
    if self.op.iallocator is not None:
7711
      self._RunAllocator()
7712

    
7713
    #### node related checks
7714

    
7715
    # check primary node
7716
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7717
    assert self.pnode is not None, \
7718
      "Cannot retrieve locked node %s" % self.op.pnode
7719
    if pnode.offline:
7720
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7721
                                 pnode.name, errors.ECODE_STATE)
7722
    if pnode.drained:
7723
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7724
                                 pnode.name, errors.ECODE_STATE)
7725
    if not pnode.vm_capable:
7726
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7727
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
7728

    
7729
    self.secondaries = []
7730

    
7731
    # mirror node verification
7732
    if self.op.disk_template in constants.DTS_NET_MIRROR:
7733
      if self.op.snode == pnode.name:
7734
        raise errors.OpPrereqError("The secondary node cannot be the"
7735
                                   " primary node.", errors.ECODE_INVAL)
7736
      _CheckNodeOnline(self, self.op.snode)
7737
      _CheckNodeNotDrained(self, self.op.snode)
7738
      _CheckNodeVmCapable(self, self.op.snode)
7739
      self.secondaries.append(self.op.snode)
7740

    
7741
    nodenames = [pnode.name] + self.secondaries
7742

    
7743
    if not self.adopt_disks:
7744
      # Check lv size requirements, if not adopting
7745
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7746
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7747

    
7748
    else: # instead, we must check the adoption data
7749
      all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks])
7750
      if len(all_lvs) != len(self.disks):
7751
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
7752
                                   errors.ECODE_INVAL)
7753
      for lv_name in all_lvs:
7754
        try:
7755
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
7756
          # to ReserveLV uses the same syntax
7757
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7758
        except errors.ReservationError:
7759
          raise errors.OpPrereqError("LV named %s used by another instance" %
7760
                                     lv_name, errors.ECODE_NOTUNIQUE)
7761

    
7762
      vg_names = self.rpc.call_vg_list([pnode.name])
7763
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
7764

    
7765
      node_lvs = self.rpc.call_lv_list([pnode.name],
7766
                                       vg_names[pnode.name].payload.keys()
7767
                                      )[pnode.name]
7768
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7769
      node_lvs = node_lvs.payload
7770

    
7771
      delta = all_lvs.difference(node_lvs.keys())
7772
      if delta:
7773
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
7774
                                   utils.CommaJoin(delta),
7775
                                   errors.ECODE_INVAL)
7776
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7777
      if online_lvs:
7778
        raise errors.OpPrereqError("Online logical volumes found, cannot"
7779
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
7780
                                   errors.ECODE_STATE)
7781
      # update the size of disk based on what is found
7782
      for dsk in self.disks:
7783
        dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0]))
7784

    
7785
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7786

    
7787
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7788
    # check OS parameters (remotely)
7789
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7790

    
7791
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7792

    
7793
    # memory check on primary node
7794
    if self.op.start:
7795
      _CheckNodeFreeMemory(self, self.pnode.name,
7796
                           "creating instance %s" % self.op.instance_name,
7797
                           self.be_full[constants.BE_MEMORY],
7798
                           self.op.hypervisor)
7799

    
7800
    self.dry_run_result = list(nodenames)
7801

    
7802
  def Exec(self, feedback_fn):
7803
    """Create and add the instance to the cluster.
7804

7805
    """
7806
    instance = self.op.instance_name
7807
    pnode_name = self.pnode.name
7808

    
7809
    ht_kind = self.op.hypervisor
7810
    if ht_kind in constants.HTS_REQ_PORT:
7811
      network_port = self.cfg.AllocatePort()
7812
    else:
7813
      network_port = None
7814

    
7815
    if constants.ENABLE_FILE_STORAGE:
7816
      # this is needed because os.path.join does not accept None arguments
7817
      if self.op.file_storage_dir is None:
7818
        string_file_storage_dir = ""
7819
      else:
7820
        string_file_storage_dir = self.op.file_storage_dir
7821

    
7822
      # build the full file storage dir path
7823
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7824
                                        string_file_storage_dir, instance)
7825
    else:
7826
      file_storage_dir = ""
7827

    
7828
    disks = _GenerateDiskTemplate(self,
7829
                                  self.op.disk_template,
7830
                                  instance, pnode_name,
7831
                                  self.secondaries,
7832
                                  self.disks,
7833
                                  file_storage_dir,
7834
                                  self.op.file_driver,
7835
                                  0,
7836
                                  feedback_fn)
7837

    
7838
    iobj = objects.Instance(name=instance, os=self.op.os_type,
7839
                            primary_node=pnode_name,
7840
                            nics=self.nics, disks=disks,
7841
                            disk_template=self.op.disk_template,
7842
                            admin_up=False,
7843
                            network_port=network_port,
7844
                            beparams=self.op.beparams,
7845
                            hvparams=self.op.hvparams,
7846
                            hypervisor=self.op.hypervisor,
7847
                            osparams=self.op.osparams,
7848
                            )
7849

    
7850
    if self.adopt_disks:
7851
      # rename LVs to the newly-generated names; we need to construct
7852
      # 'fake' LV disks with the old data, plus the new unique_id
7853
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7854
      rename_to = []
7855
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7856
        rename_to.append(t_dsk.logical_id)
7857
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7858
        self.cfg.SetDiskID(t_dsk, pnode_name)
7859
      result = self.rpc.call_blockdev_rename(pnode_name,
7860
                                             zip(tmp_disks, rename_to))
7861
      result.Raise("Failed to rename adoped LVs")
7862
    else:
7863
      feedback_fn("* creating instance disks...")
7864
      try:
7865
        _CreateDisks(self, iobj)
7866
      except errors.OpExecError:
7867
        self.LogWarning("Device creation failed, reverting...")
7868
        try:
7869
          _RemoveDisks(self, iobj)
7870
        finally:
7871
          self.cfg.ReleaseDRBDMinors(instance)
7872
          raise
7873

    
7874
      if self.cfg.GetClusterInfo().prealloc_wipe_disks:
7875
        feedback_fn("* wiping instance disks...")
7876
        try:
7877
          _WipeDisks(self, iobj)
7878
        except errors.OpExecError:
7879
          self.LogWarning("Device wiping failed, reverting...")
7880
          try:
7881
            _RemoveDisks(self, iobj)
7882
          finally:
7883
            self.cfg.ReleaseDRBDMinors(instance)
7884
            raise
7885

    
7886
    feedback_fn("adding instance %s to cluster config" % instance)
7887

    
7888
    self.cfg.AddInstance(iobj, self.proc.GetECId())
7889

    
7890
    # Declare that we don't want to remove the instance lock anymore, as we've
7891
    # added the instance to the config
7892
    del self.remove_locks[locking.LEVEL_INSTANCE]
7893
    # Unlock all the nodes
7894
    if self.op.mode == constants.INSTANCE_IMPORT:
7895
      nodes_keep = [self.op.src_node]
7896
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7897
                       if node != self.op.src_node]
7898
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7899
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7900
    else:
7901
      self.context.glm.release(locking.LEVEL_NODE)
7902
      del self.acquired_locks[locking.LEVEL_NODE]
7903

    
7904
    if self.op.wait_for_sync:
7905
      disk_abort = not _WaitForSync(self, iobj)
7906
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
7907
      # make sure the disks are not degraded (still sync-ing is ok)
7908
      time.sleep(15)
7909
      feedback_fn("* checking mirrors status")
7910
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7911
    else:
7912
      disk_abort = False
7913

    
7914
    if disk_abort:
7915
      _RemoveDisks(self, iobj)
7916
      self.cfg.RemoveInstance(iobj.name)
7917
      # Make sure the instance lock gets removed
7918
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7919
      raise errors.OpExecError("There are some degraded disks for"
7920
                               " this instance")
7921

    
7922
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7923
      if self.op.mode == constants.INSTANCE_CREATE:
7924
        if not self.op.no_install:
7925
          feedback_fn("* running the instance OS create scripts...")
7926
          # FIXME: pass debug option from opcode to backend
7927
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7928
                                                 self.op.debug_level)
7929
          result.Raise("Could not add os for instance %s"
7930
                       " on node %s" % (instance, pnode_name))
7931

    
7932
      elif self.op.mode == constants.INSTANCE_IMPORT:
7933
        feedback_fn("* running the instance OS import scripts...")
7934

    
7935
        transfers = []
7936

    
7937
        for idx, image in enumerate(self.src_images):
7938
          if not image:
7939
            continue
7940

    
7941
          # FIXME: pass debug option from opcode to backend
7942
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7943
                                             constants.IEIO_FILE, (image, ),
7944
                                             constants.IEIO_SCRIPT,
7945
                                             (iobj.disks[idx], idx),
7946
                                             None)
7947
          transfers.append(dt)
7948

    
7949
        import_result = \
7950
          masterd.instance.TransferInstanceData(self, feedback_fn,
7951
                                                self.op.src_node, pnode_name,
7952
                                                self.pnode.secondary_ip,
7953
                                                iobj, transfers)
7954
        if not compat.all(import_result):
7955
          self.LogWarning("Some disks for instance %s on node %s were not"
7956
                          " imported successfully" % (instance, pnode_name))
7957

    
7958
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7959
        feedback_fn("* preparing remote import...")
7960
        # The source cluster will stop the instance before attempting to make a
7961
        # connection. In some cases stopping an instance can take a long time,
7962
        # hence the shutdown timeout is added to the connection timeout.
7963
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
7964
                           self.op.source_shutdown_timeout)
7965
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7966

    
7967
        assert iobj.primary_node == self.pnode.name
7968
        disk_results = \
7969
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
7970
                                        self.source_x509_ca,
7971
                                        self._cds, timeouts)
7972
        if not compat.all(disk_results):
7973
          # TODO: Should the instance still be started, even if some disks
7974
          # failed to import (valid for local imports, too)?
7975
          self.LogWarning("Some disks for instance %s on node %s were not"
7976
                          " imported successfully" % (instance, pnode_name))
7977

    
7978
        # Run rename script on newly imported instance
7979
        assert iobj.name == instance
7980
        feedback_fn("Running rename script for %s" % instance)
7981
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7982
                                                   self.source_instance_name,
7983
                                                   self.op.debug_level)
7984
        if result.fail_msg:
7985
          self.LogWarning("Failed to run rename script for %s on node"
7986
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
7987

    
7988
      else:
7989
        # also checked in the prereq part
7990
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7991
                                     % self.op.mode)
7992

    
7993
    if self.op.start:
7994
      iobj.admin_up = True
7995
      self.cfg.Update(iobj, feedback_fn)
7996
      logging.info("Starting instance %s on node %s", instance, pnode_name)
7997
      feedback_fn("* starting instance...")
7998
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7999
      result.Raise("Could not start instance")
8000

    
8001
    return list(iobj.all_nodes)
8002

    
8003

    
8004
class LUConnectConsole(NoHooksLU):
8005
  """Connect to an instance's console.
8006

8007
  This is somewhat special in that it returns the command line that
8008
  you need to run on the master node in order to connect to the
8009
  console.
8010

8011
  """
8012
  _OP_PARAMS = [
8013
    _PInstanceName
8014
    ]
8015
  REQ_BGL = False
8016

    
8017
  def ExpandNames(self):
8018
    self._ExpandAndLockInstance()
8019

    
8020
  def CheckPrereq(self):
8021
    """Check prerequisites.
8022

8023
    This checks that the instance is in the cluster.
8024

8025
    """
8026
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8027
    assert self.instance is not None, \
8028
      "Cannot retrieve locked instance %s" % self.op.instance_name
8029
    _CheckNodeOnline(self, self.instance.primary_node)
8030

    
8031
  def Exec(self, feedback_fn):
8032
    """Connect to the console of an instance
8033

8034
    """
8035
    instance = self.instance
8036
    node = instance.primary_node
8037

    
8038
    node_insts = self.rpc.call_instance_list([node],
8039
                                             [instance.hypervisor])[node]
8040
    node_insts.Raise("Can't get node information from %s" % node)
8041

    
8042
    if instance.name not in node_insts.payload:
8043
      if instance.admin_up:
8044
        state = "ERROR_down"
8045
      else:
8046
        state = "ADMIN_down"
8047
      raise errors.OpExecError("Instance %s is not running (state %s)" %
8048
                               (instance.name, state))
8049

    
8050
    logging.debug("Connecting to console of %s on %s", instance.name, node)
8051

    
8052
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
8053
    cluster = self.cfg.GetClusterInfo()
8054
    # beparams and hvparams are passed separately, to avoid editing the
8055
    # instance and then saving the defaults in the instance itself.
8056
    hvparams = cluster.FillHV(instance)
8057
    beparams = cluster.FillBE(instance)
8058
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
8059

    
8060
    # build ssh cmdline
8061
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
8062

    
8063

    
8064
class LUReplaceDisks(LogicalUnit):
8065
  """Replace the disks of an instance.
8066

8067
  """
8068
  HPATH = "mirrors-replace"
8069
  HTYPE = constants.HTYPE_INSTANCE
8070
  _OP_PARAMS = [
8071
    _PInstanceName,
8072
    ("mode", ht.NoDefault, ht.TElemOf(constants.REPLACE_MODES)),
8073
    ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
8074
    ("remote_node", None, ht.TMaybeString),
8075
    ("iallocator", None, ht.TMaybeString),
8076
    ("early_release", False, ht.TBool),
8077
    ]
8078
  REQ_BGL = False
8079

    
8080
  def CheckArguments(self):
8081
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8082
                                  self.op.iallocator)
8083

    
8084
  def ExpandNames(self):
8085
    self._ExpandAndLockInstance()
8086

    
8087
    if self.op.iallocator is not None:
8088
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8089

    
8090
    elif self.op.remote_node is not None:
8091
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8092
      self.op.remote_node = remote_node
8093

    
8094
      # Warning: do not remove the locking of the new secondary here
8095
      # unless DRBD8.AddChildren is changed to work in parallel;
8096
      # currently it doesn't since parallel invocations of
8097
      # FindUnusedMinor will conflict
8098
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
8099
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8100

    
8101
    else:
8102
      self.needed_locks[locking.LEVEL_NODE] = []
8103
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8104

    
8105
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8106
                                   self.op.iallocator, self.op.remote_node,
8107
                                   self.op.disks, False, self.op.early_release)
8108

    
8109
    self.tasklets = [self.replacer]
8110

    
8111
  def DeclareLocks(self, level):
8112
    # If we're not already locking all nodes in the set we have to declare the
8113
    # instance's primary/secondary nodes.
8114
    if (level == locking.LEVEL_NODE and
8115
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
8116
      self._LockInstancesNodes()
8117

    
8118
  def BuildHooksEnv(self):
8119
    """Build hooks env.
8120

8121
    This runs on the master, the primary and all the secondaries.
8122

8123
    """
8124
    instance = self.replacer.instance
8125
    env = {
8126
      "MODE": self.op.mode,
8127
      "NEW_SECONDARY": self.op.remote_node,
8128
      "OLD_SECONDARY": instance.secondary_nodes[0],
8129
      }
8130
    env.update(_BuildInstanceHookEnvByObject(self, instance))
8131
    nl = [
8132
      self.cfg.GetMasterNode(),
8133
      instance.primary_node,
8134
      ]
8135
    if self.op.remote_node is not None:
8136
      nl.append(self.op.remote_node)
8137
    return env, nl, nl
8138

    
8139

    
8140
class TLReplaceDisks(Tasklet):
8141
  """Replaces disks for an instance.
8142

8143
  Note: Locking is not within the scope of this class.
8144

8145
  """
8146
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8147
               disks, delay_iallocator, early_release):
8148
    """Initializes this class.
8149

8150
    """
8151
    Tasklet.__init__(self, lu)
8152

    
8153
    # Parameters
8154
    self.instance_name = instance_name
8155
    self.mode = mode
8156
    self.iallocator_name = iallocator_name
8157
    self.remote_node = remote_node
8158
    self.disks = disks
8159
    self.delay_iallocator = delay_iallocator
8160
    self.early_release = early_release
8161

    
8162
    # Runtime data
8163
    self.instance = None
8164
    self.new_node = None
8165
    self.target_node = None
8166
    self.other_node = None
8167
    self.remote_node_info = None
8168
    self.node_secondary_ip = None
8169

    
8170
  @staticmethod
8171
  def CheckArguments(mode, remote_node, iallocator):
8172
    """Helper function for users of this class.
8173

8174
    """
8175
    # check for valid parameter combination
8176
    if mode == constants.REPLACE_DISK_CHG:
8177
      if remote_node is None and iallocator is None:
8178
        raise errors.OpPrereqError("When changing the secondary either an"
8179
                                   " iallocator script must be used or the"
8180
                                   " new node given", errors.ECODE_INVAL)
8181

    
8182
      if remote_node is not None and iallocator is not None:
8183
        raise errors.OpPrereqError("Give either the iallocator or the new"
8184
                                   " secondary, not both", errors.ECODE_INVAL)
8185

    
8186
    elif remote_node is not None or iallocator is not None:
8187
      # Not replacing the secondary
8188
      raise errors.OpPrereqError("The iallocator and new node options can"
8189
                                 " only be used when changing the"
8190
                                 " secondary node", errors.ECODE_INVAL)
8191

    
8192
  @staticmethod
8193
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8194
    """Compute a new secondary node using an IAllocator.
8195

8196
    """
8197
    ial = IAllocator(lu.cfg, lu.rpc,
8198
                     mode=constants.IALLOCATOR_MODE_RELOC,
8199
                     name=instance_name,
8200
                     relocate_from=relocate_from)
8201

    
8202
    ial.Run(iallocator_name)
8203

    
8204
    if not ial.success:
8205
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8206
                                 " %s" % (iallocator_name, ial.info),
8207
                                 errors.ECODE_NORES)
8208

    
8209
    if len(ial.result) != ial.required_nodes:
8210
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8211
                                 " of nodes (%s), required %s" %
8212
                                 (iallocator_name,
8213
                                  len(ial.result), ial.required_nodes),
8214
                                 errors.ECODE_FAULT)
8215

    
8216
    remote_node_name = ial.result[0]
8217

    
8218
    lu.LogInfo("Selected new secondary for instance '%s': %s",
8219
               instance_name, remote_node_name)
8220

    
8221
    return remote_node_name
8222

    
8223
  def _FindFaultyDisks(self, node_name):
8224
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8225
                                    node_name, True)
8226

    
8227
  def CheckPrereq(self):
8228
    """Check prerequisites.
8229

8230
    This checks that the instance is in the cluster.
8231

8232
    """
8233
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8234
    assert instance is not None, \
8235
      "Cannot retrieve locked instance %s" % self.instance_name
8236

    
8237
    if instance.disk_template != constants.DT_DRBD8:
8238
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8239
                                 " instances", errors.ECODE_INVAL)
8240

    
8241
    if len(instance.secondary_nodes) != 1:
8242
      raise errors.OpPrereqError("The instance has a strange layout,"
8243
                                 " expected one secondary but found %d" %
8244
                                 len(instance.secondary_nodes),
8245
                                 errors.ECODE_FAULT)
8246

    
8247
    if not self.delay_iallocator:
8248
      self._CheckPrereq2()
8249

    
8250
  def _CheckPrereq2(self):
8251
    """Check prerequisites, second part.
8252

8253
    This function should always be part of CheckPrereq. It was separated and is
8254
    now called from Exec because during node evacuation iallocator was only
8255
    called with an unmodified cluster model, not taking planned changes into
8256
    account.
8257

8258
    """
8259
    instance = self.instance
8260
    secondary_node = instance.secondary_nodes[0]
8261

    
8262
    if self.iallocator_name is None:
8263
      remote_node = self.remote_node
8264
    else:
8265
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8266
                                       instance.name, instance.secondary_nodes)
8267

    
8268
    if remote_node is not None:
8269
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8270
      assert self.remote_node_info is not None, \
8271
        "Cannot retrieve locked node %s" % remote_node
8272
    else:
8273
      self.remote_node_info = None
8274

    
8275
    if remote_node == self.instance.primary_node:
8276
      raise errors.OpPrereqError("The specified node is the primary node of"
8277
                                 " the instance.", errors.ECODE_INVAL)
8278

    
8279
    if remote_node == secondary_node:
8280
      raise errors.OpPrereqError("The specified node is already the"
8281
                                 " secondary node of the instance.",
8282
                                 errors.ECODE_INVAL)
8283

    
8284
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8285
                                    constants.REPLACE_DISK_CHG):
8286
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
8287
                                 errors.ECODE_INVAL)
8288

    
8289
    if self.mode == constants.REPLACE_DISK_AUTO:
8290
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
8291
      faulty_secondary = self._FindFaultyDisks(secondary_node)
8292

    
8293
      if faulty_primary and faulty_secondary:
8294
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8295
                                   " one node and can not be repaired"
8296
                                   " automatically" % self.instance_name,
8297
                                   errors.ECODE_STATE)
8298

    
8299
      if faulty_primary:
8300
        self.disks = faulty_primary
8301
        self.target_node = instance.primary_node
8302
        self.other_node = secondary_node
8303
        check_nodes = [self.target_node, self.other_node]
8304
      elif faulty_secondary:
8305
        self.disks = faulty_secondary
8306
        self.target_node = secondary_node
8307
        self.other_node = instance.primary_node
8308
        check_nodes = [self.target_node, self.other_node]
8309
      else:
8310
        self.disks = []
8311
        check_nodes = []
8312

    
8313
    else:
8314
      # Non-automatic modes
8315
      if self.mode == constants.REPLACE_DISK_PRI:
8316
        self.target_node = instance.primary_node
8317
        self.other_node = secondary_node
8318
        check_nodes = [self.target_node, self.other_node]
8319

    
8320
      elif self.mode == constants.REPLACE_DISK_SEC:
8321
        self.target_node = secondary_node
8322
        self.other_node = instance.primary_node
8323
        check_nodes = [self.target_node, self.other_node]
8324

    
8325
      elif self.mode == constants.REPLACE_DISK_CHG:
8326
        self.new_node = remote_node
8327
        self.other_node = instance.primary_node
8328
        self.target_node = secondary_node
8329
        check_nodes = [self.new_node, self.other_node]
8330

    
8331
        _CheckNodeNotDrained(self.lu, remote_node)
8332
        _CheckNodeVmCapable(self.lu, remote_node)
8333

    
8334
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
8335
        assert old_node_info is not None
8336
        if old_node_info.offline and not self.early_release:
8337
          # doesn't make sense to delay the release
8338
          self.early_release = True
8339
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8340
                          " early-release mode", secondary_node)
8341

    
8342
      else:
8343
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8344
                                     self.mode)
8345

    
8346
      # If not specified all disks should be replaced
8347
      if not self.disks:
8348
        self.disks = range(len(self.instance.disks))
8349

    
8350
    for node in check_nodes:
8351
      _CheckNodeOnline(self.lu, node)
8352

    
8353
    # Check whether disks are valid
8354
    for disk_idx in self.disks:
8355
      instance.FindDisk(disk_idx)
8356

    
8357
    # Get secondary node IP addresses
8358
    node_2nd_ip = {}
8359

    
8360
    for node_name in [self.target_node, self.other_node, self.new_node]:
8361
      if node_name is not None:
8362
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8363

    
8364
    self.node_secondary_ip = node_2nd_ip
8365

    
8366
  def Exec(self, feedback_fn):
8367
    """Execute disk replacement.
8368

8369
    This dispatches the disk replacement to the appropriate handler.
8370

8371
    """
8372
    if self.delay_iallocator:
8373
      self._CheckPrereq2()
8374

    
8375
    if not self.disks:
8376
      feedback_fn("No disks need replacement")
8377
      return
8378

    
8379
    feedback_fn("Replacing disk(s) %s for %s" %
8380
                (utils.CommaJoin(self.disks), self.instance.name))
8381

    
8382
    activate_disks = (not self.instance.admin_up)
8383

    
8384
    # Activate the instance disks if we're replacing them on a down instance
8385
    if activate_disks:
8386
      _StartInstanceDisks(self.lu, self.instance, True)
8387

    
8388
    try:
8389
      # Should we replace the secondary node?
8390
      if self.new_node is not None:
8391
        fn = self._ExecDrbd8Secondary
8392
      else:
8393
        fn = self._ExecDrbd8DiskOnly
8394

    
8395
      return fn(feedback_fn)
8396

    
8397
    finally:
8398
      # Deactivate the instance disks if we're replacing them on a
8399
      # down instance
8400
      if activate_disks:
8401
        _SafeShutdownInstanceDisks(self.lu, self.instance)
8402

    
8403
  def _CheckVolumeGroup(self, nodes):
8404
    self.lu.LogInfo("Checking volume groups")
8405

    
8406
    vgname = self.cfg.GetVGName()
8407

    
8408
    # Make sure volume group exists on all involved nodes
8409
    results = self.rpc.call_vg_list(nodes)
8410
    if not results:
8411
      raise errors.OpExecError("Can't list volume groups on the nodes")
8412

    
8413
    for node in nodes:
8414
      res = results[node]
8415
      res.Raise("Error checking node %s" % node)
8416
      if vgname not in res.payload:
8417
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
8418
                                 (vgname, node))
8419

    
8420
  def _CheckDisksExistence(self, nodes):
8421
    # Check disk existence
8422
    for idx, dev in enumerate(self.instance.disks):
8423
      if idx not in self.disks:
8424
        continue
8425

    
8426
      for node in nodes:
8427
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8428
        self.cfg.SetDiskID(dev, node)
8429

    
8430
        result = self.rpc.call_blockdev_find(node, dev)
8431

    
8432
        msg = result.fail_msg
8433
        if msg or not result.payload:
8434
          if not msg:
8435
            msg = "disk not found"
8436
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8437
                                   (idx, node, msg))
8438

    
8439
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8440
    for idx, dev in enumerate(self.instance.disks):
8441
      if idx not in self.disks:
8442
        continue
8443

    
8444
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8445
                      (idx, node_name))
8446

    
8447
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8448
                                   ldisk=ldisk):
8449
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8450
                                 " replace disks for instance %s" %
8451
                                 (node_name, self.instance.name))
8452

    
8453
  def _CreateNewStorage(self, node_name):
8454
    vgname = self.cfg.GetVGName()
8455
    iv_names = {}
8456

    
8457
    for idx, dev in enumerate(self.instance.disks):
8458
      if idx not in self.disks:
8459
        continue
8460

    
8461
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8462

    
8463
      self.cfg.SetDiskID(dev, node_name)
8464

    
8465
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8466
      names = _GenerateUniqueNames(self.lu, lv_names)
8467

    
8468
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8469
                             logical_id=(vgname, names[0]))
8470
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8471
                             logical_id=(vgname, names[1]))
8472

    
8473
      new_lvs = [lv_data, lv_meta]
8474
      old_lvs = dev.children
8475
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8476

    
8477
      # we pass force_create=True to force the LVM creation
8478
      for new_lv in new_lvs:
8479
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8480
                        _GetInstanceInfoText(self.instance), False)
8481

    
8482
    return iv_names
8483

    
8484
  def _CheckDevices(self, node_name, iv_names):
8485
    for name, (dev, _, _) in iv_names.iteritems():
8486
      self.cfg.SetDiskID(dev, node_name)
8487

    
8488
      result = self.rpc.call_blockdev_find(node_name, dev)
8489

    
8490
      msg = result.fail_msg
8491
      if msg or not result.payload:
8492
        if not msg:
8493
          msg = "disk not found"
8494
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
8495
                                 (name, msg))
8496

    
8497
      if result.payload.is_degraded:
8498
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
8499

    
8500
  def _RemoveOldStorage(self, node_name, iv_names):
8501
    for name, (_, old_lvs, _) in iv_names.iteritems():
8502
      self.lu.LogInfo("Remove logical volumes for %s" % name)
8503

    
8504
      for lv in old_lvs:
8505
        self.cfg.SetDiskID(lv, node_name)
8506

    
8507
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8508
        if msg:
8509
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
8510
                             hint="remove unused LVs manually")
8511

    
8512
  def _ReleaseNodeLock(self, node_name):
8513
    """Releases the lock for a given node."""
8514
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8515

    
8516
  def _ExecDrbd8DiskOnly(self, feedback_fn):
8517
    """Replace a disk on the primary or secondary for DRBD 8.
8518

8519
    The algorithm for replace is quite complicated:
8520

8521
      1. for each disk to be replaced:
8522

8523
        1. create new LVs on the target node with unique names
8524
        1. detach old LVs from the drbd device
8525
        1. rename old LVs to name_replaced.<time_t>
8526
        1. rename new LVs to old LVs
8527
        1. attach the new LVs (with the old names now) to the drbd device
8528

8529
      1. wait for sync across all devices
8530

8531
      1. for each modified disk:
8532

8533
        1. remove old LVs (which have the name name_replaces.<time_t>)
8534

8535
    Failures are not very well handled.
8536

8537
    """
8538
    steps_total = 6
8539

    
8540
    # Step: check device activation
8541
    self.lu.LogStep(1, steps_total, "Check device existence")
8542
    self._CheckDisksExistence([self.other_node, self.target_node])
8543
    self._CheckVolumeGroup([self.target_node, self.other_node])
8544

    
8545
    # Step: check other node consistency
8546
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8547
    self._CheckDisksConsistency(self.other_node,
8548
                                self.other_node == self.instance.primary_node,
8549
                                False)
8550

    
8551
    # Step: create new storage
8552
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8553
    iv_names = self._CreateNewStorage(self.target_node)
8554

    
8555
    # Step: for each lv, detach+rename*2+attach
8556
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8557
    for dev, old_lvs, new_lvs in iv_names.itervalues():
8558
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8559

    
8560
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8561
                                                     old_lvs)
8562
      result.Raise("Can't detach drbd from local storage on node"
8563
                   " %s for device %s" % (self.target_node, dev.iv_name))
8564
      #dev.children = []
8565
      #cfg.Update(instance)
8566

    
8567
      # ok, we created the new LVs, so now we know we have the needed
8568
      # storage; as such, we proceed on the target node to rename
8569
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8570
      # using the assumption that logical_id == physical_id (which in
8571
      # turn is the unique_id on that node)
8572

    
8573
      # FIXME(iustin): use a better name for the replaced LVs
8574
      temp_suffix = int(time.time())
8575
      ren_fn = lambda d, suff: (d.physical_id[0],
8576
                                d.physical_id[1] + "_replaced-%s" % suff)
8577

    
8578
      # Build the rename list based on what LVs exist on the node
8579
      rename_old_to_new = []
8580
      for to_ren in old_lvs:
8581
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8582
        if not result.fail_msg and result.payload:
8583
          # device exists
8584
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8585

    
8586
      self.lu.LogInfo("Renaming the old LVs on the target node")
8587
      result = self.rpc.call_blockdev_rename(self.target_node,
8588
                                             rename_old_to_new)
8589
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
8590

    
8591
      # Now we rename the new LVs to the old LVs
8592
      self.lu.LogInfo("Renaming the new LVs on the target node")
8593
      rename_new_to_old = [(new, old.physical_id)
8594
                           for old, new in zip(old_lvs, new_lvs)]
8595
      result = self.rpc.call_blockdev_rename(self.target_node,
8596
                                             rename_new_to_old)
8597
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
8598

    
8599
      for old, new in zip(old_lvs, new_lvs):
8600
        new.logical_id = old.logical_id
8601
        self.cfg.SetDiskID(new, self.target_node)
8602

    
8603
      for disk in old_lvs:
8604
        disk.logical_id = ren_fn(disk, temp_suffix)
8605
        self.cfg.SetDiskID(disk, self.target_node)
8606

    
8607
      # Now that the new lvs have the old name, we can add them to the device
8608
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8609
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8610
                                                  new_lvs)
8611
      msg = result.fail_msg
8612
      if msg:
8613
        for new_lv in new_lvs:
8614
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
8615
                                               new_lv).fail_msg
8616
          if msg2:
8617
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8618
                               hint=("cleanup manually the unused logical"
8619
                                     "volumes"))
8620
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8621

    
8622
      dev.children = new_lvs
8623

    
8624
      self.cfg.Update(self.instance, feedback_fn)
8625

    
8626
    cstep = 5
8627
    if self.early_release:
8628
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8629
      cstep += 1
8630
      self._RemoveOldStorage(self.target_node, iv_names)
8631
      # WARNING: we release both node locks here, do not do other RPCs
8632
      # than WaitForSync to the primary node
8633
      self._ReleaseNodeLock([self.target_node, self.other_node])
8634

    
8635
    # Wait for sync
8636
    # This can fail as the old devices are degraded and _WaitForSync
8637
    # does a combined result over all disks, so we don't check its return value
8638
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8639
    cstep += 1
8640
    _WaitForSync(self.lu, self.instance)
8641

    
8642
    # Check all devices manually
8643
    self._CheckDevices(self.instance.primary_node, iv_names)
8644

    
8645
    # Step: remove old storage
8646
    if not self.early_release:
8647
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8648
      cstep += 1
8649
      self._RemoveOldStorage(self.target_node, iv_names)
8650

    
8651
  def _ExecDrbd8Secondary(self, feedback_fn):
8652
    """Replace the secondary node for DRBD 8.
8653

8654
    The algorithm for replace is quite complicated:
8655
      - for all disks of the instance:
8656
        - create new LVs on the new node with same names
8657
        - shutdown the drbd device on the old secondary
8658
        - disconnect the drbd network on the primary
8659
        - create the drbd device on the new secondary
8660
        - network attach the drbd on the primary, using an artifice:
8661
          the drbd code for Attach() will connect to the network if it
8662
          finds a device which is connected to the good local disks but
8663
          not network enabled
8664
      - wait for sync across all devices
8665
      - remove all disks from the old secondary
8666

8667
    Failures are not very well handled.
8668

8669
    """
8670
    steps_total = 6
8671

    
8672
    # Step: check device activation
8673
    self.lu.LogStep(1, steps_total, "Check device existence")
8674
    self._CheckDisksExistence([self.instance.primary_node])
8675
    self._CheckVolumeGroup([self.instance.primary_node])
8676

    
8677
    # Step: check other node consistency
8678
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8679
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
8680

    
8681
    # Step: create new storage
8682
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8683
    for idx, dev in enumerate(self.instance.disks):
8684
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8685
                      (self.new_node, idx))
8686
      # we pass force_create=True to force LVM creation
8687
      for new_lv in dev.children:
8688
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8689
                        _GetInstanceInfoText(self.instance), False)
8690

    
8691
    # Step 4: dbrd minors and drbd setups changes
8692
    # after this, we must manually remove the drbd minors on both the
8693
    # error and the success paths
8694
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8695
    minors = self.cfg.AllocateDRBDMinor([self.new_node
8696
                                         for dev in self.instance.disks],
8697
                                        self.instance.name)
8698
    logging.debug("Allocated minors %r", minors)
8699

    
8700
    iv_names = {}
8701
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8702
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8703
                      (self.new_node, idx))
8704
      # create new devices on new_node; note that we create two IDs:
8705
      # one without port, so the drbd will be activated without
8706
      # networking information on the new node at this stage, and one
8707
      # with network, for the latter activation in step 4
8708
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8709
      if self.instance.primary_node == o_node1:
8710
        p_minor = o_minor1
8711
      else:
8712
        assert self.instance.primary_node == o_node2, "Three-node instance?"
8713
        p_minor = o_minor2
8714

    
8715
      new_alone_id = (self.instance.primary_node, self.new_node, None,
8716
                      p_minor, new_minor, o_secret)
8717
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
8718
                    p_minor, new_minor, o_secret)
8719

    
8720
      iv_names[idx] = (dev, dev.children, new_net_id)
8721
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8722
                    new_net_id)
8723
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8724
                              logical_id=new_alone_id,
8725
                              children=dev.children,
8726
                              size=dev.size)
8727
      try:
8728
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8729
                              _GetInstanceInfoText(self.instance), False)
8730
      except errors.GenericError:
8731
        self.cfg.ReleaseDRBDMinors(self.instance.name)
8732
        raise
8733

    
8734
    # We have new devices, shutdown the drbd on the old secondary
8735
    for idx, dev in enumerate(self.instance.disks):
8736
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8737
      self.cfg.SetDiskID(dev, self.target_node)
8738
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8739
      if msg:
8740
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8741
                           "node: %s" % (idx, msg),
8742
                           hint=("Please cleanup this device manually as"
8743
                                 " soon as possible"))
8744

    
8745
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8746
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8747
                                               self.node_secondary_ip,
8748
                                               self.instance.disks)\
8749
                                              [self.instance.primary_node]
8750

    
8751
    msg = result.fail_msg
8752
    if msg:
8753
      # detaches didn't succeed (unlikely)
8754
      self.cfg.ReleaseDRBDMinors(self.instance.name)
8755
      raise errors.OpExecError("Can't detach the disks from the network on"
8756
                               " old node: %s" % (msg,))
8757

    
8758
    # if we managed to detach at least one, we update all the disks of
8759
    # the instance to point to the new secondary
8760
    self.lu.LogInfo("Updating instance configuration")
8761
    for dev, _, new_logical_id in iv_names.itervalues():
8762
      dev.logical_id = new_logical_id
8763
      self.cfg.SetDiskID(dev, self.instance.primary_node)
8764

    
8765
    self.cfg.Update(self.instance, feedback_fn)
8766

    
8767
    # and now perform the drbd attach
8768
    self.lu.LogInfo("Attaching primary drbds to new secondary"
8769
                    " (standalone => connected)")
8770
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8771
                                            self.new_node],
8772
                                           self.node_secondary_ip,
8773
                                           self.instance.disks,
8774
                                           self.instance.name,
8775
                                           False)
8776
    for to_node, to_result in result.items():
8777
      msg = to_result.fail_msg
8778
      if msg:
8779
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8780
                           to_node, msg,
8781
                           hint=("please do a gnt-instance info to see the"
8782
                                 " status of disks"))
8783
    cstep = 5
8784
    if self.early_release:
8785
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8786
      cstep += 1
8787
      self._RemoveOldStorage(self.target_node, iv_names)
8788
      # WARNING: we release all node locks here, do not do other RPCs
8789
      # than WaitForSync to the primary node
8790
      self._ReleaseNodeLock([self.instance.primary_node,
8791
                             self.target_node,
8792
                             self.new_node])
8793

    
8794
    # Wait for sync
8795
    # This can fail as the old devices are degraded and _WaitForSync
8796
    # does a combined result over all disks, so we don't check its return value
8797
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8798
    cstep += 1
8799
    _WaitForSync(self.lu, self.instance)
8800

    
8801
    # Check all devices manually
8802
    self._CheckDevices(self.instance.primary_node, iv_names)
8803

    
8804
    # Step: remove old storage
8805
    if not self.early_release:
8806
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8807
      self._RemoveOldStorage(self.target_node, iv_names)
8808

    
8809

    
8810
class LURepairNodeStorage(NoHooksLU):
8811
  """Repairs the volume group on a node.
8812

8813
  """
8814
  _OP_PARAMS = [
8815
    _PNodeName,
8816
    ("storage_type", ht.NoDefault, _CheckStorageType),
8817
    ("name", ht.NoDefault, ht.TNonEmptyString),
8818
    ("ignore_consistency", False, ht.TBool),
8819
    ]
8820
  REQ_BGL = False
8821

    
8822
  def CheckArguments(self):
8823
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8824

    
8825
    storage_type = self.op.storage_type
8826

    
8827
    if (constants.SO_FIX_CONSISTENCY not in
8828
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8829
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
8830
                                 " repaired" % storage_type,
8831
                                 errors.ECODE_INVAL)
8832

    
8833
  def ExpandNames(self):
8834
    self.needed_locks = {
8835
      locking.LEVEL_NODE: [self.op.node_name],
8836
      }
8837

    
8838
  def _CheckFaultyDisks(self, instance, node_name):
8839
    """Ensure faulty disks abort the opcode or at least warn."""
8840
    try:
8841
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8842
                                  node_name, True):
8843
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8844
                                   " node '%s'" % (instance.name, node_name),
8845
                                   errors.ECODE_STATE)
8846
    except errors.OpPrereqError, err:
8847
      if self.op.ignore_consistency:
8848
        self.proc.LogWarning(str(err.args[0]))
8849
      else:
8850
        raise
8851

    
8852
  def CheckPrereq(self):
8853
    """Check prerequisites.
8854

8855
    """
8856
    # Check whether any instance on this node has faulty disks
8857
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8858
      if not inst.admin_up:
8859
        continue
8860
      check_nodes = set(inst.all_nodes)
8861
      check_nodes.discard(self.op.node_name)
8862
      for inst_node_name in check_nodes:
8863
        self._CheckFaultyDisks(inst, inst_node_name)
8864

    
8865
  def Exec(self, feedback_fn):
8866
    feedback_fn("Repairing storage unit '%s' on %s ..." %
8867
                (self.op.name, self.op.node_name))
8868

    
8869
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8870
    result = self.rpc.call_storage_execute(self.op.node_name,
8871
                                           self.op.storage_type, st_args,
8872
                                           self.op.name,
8873
                                           constants.SO_FIX_CONSISTENCY)
8874
    result.Raise("Failed to repair storage unit '%s' on %s" %
8875
                 (self.op.name, self.op.node_name))
8876

    
8877

    
8878
class LUNodeEvacuationStrategy(NoHooksLU):
8879
  """Computes the node evacuation strategy.
8880

8881
  """
8882
  _OP_PARAMS = [
8883
    ("nodes", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
8884
    ("remote_node", None, ht.TMaybeString),
8885
    ("iallocator", None, ht.TMaybeString),
8886
    ]
8887
  REQ_BGL = False
8888

    
8889
  def CheckArguments(self):
8890
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8891

    
8892
  def ExpandNames(self):
8893
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8894
    self.needed_locks = locks = {}
8895
    if self.op.remote_node is None:
8896
      locks[locking.LEVEL_NODE] = locking.ALL_SET
8897
    else:
8898
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8899
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8900

    
8901
  def Exec(self, feedback_fn):
8902
    if self.op.remote_node is not None:
8903
      instances = []
8904
      for node in self.op.nodes:
8905
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8906
      result = []
8907
      for i in instances:
8908
        if i.primary_node == self.op.remote_node:
8909
          raise errors.OpPrereqError("Node %s is the primary node of"
8910
                                     " instance %s, cannot use it as"
8911
                                     " secondary" %
8912
                                     (self.op.remote_node, i.name),
8913
                                     errors.ECODE_INVAL)
8914
        result.append([i.name, self.op.remote_node])
8915
    else:
8916
      ial = IAllocator(self.cfg, self.rpc,
8917
                       mode=constants.IALLOCATOR_MODE_MEVAC,
8918
                       evac_nodes=self.op.nodes)
8919
      ial.Run(self.op.iallocator, validate=True)
8920
      if not ial.success:
8921
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8922
                                 errors.ECODE_NORES)
8923
      result = ial.result
8924
    return result
8925

    
8926

    
8927
class LUGrowDisk(LogicalUnit):
8928
  """Grow a disk of an instance.
8929

8930
  """
8931
  HPATH = "disk-grow"
8932
  HTYPE = constants.HTYPE_INSTANCE
8933
  _OP_PARAMS = [
8934
    _PInstanceName,
8935
    ("disk", ht.NoDefault, ht.TInt),
8936
    ("amount", ht.NoDefault, ht.TInt),
8937
    ("wait_for_sync", True, ht.TBool),
8938
    ]
8939
  REQ_BGL = False
8940

    
8941
  def ExpandNames(self):
8942
    self._ExpandAndLockInstance()
8943
    self.needed_locks[locking.LEVEL_NODE] = []
8944
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8945

    
8946
  def DeclareLocks(self, level):
8947
    if level == locking.LEVEL_NODE:
8948
      self._LockInstancesNodes()
8949

    
8950
  def BuildHooksEnv(self):
8951
    """Build hooks env.
8952

8953
    This runs on the master, the primary and all the secondaries.
8954

8955
    """
8956
    env = {
8957
      "DISK": self.op.disk,
8958
      "AMOUNT": self.op.amount,
8959
      }
8960
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8961
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8962
    return env, nl, nl
8963

    
8964
  def CheckPrereq(self):
8965
    """Check prerequisites.
8966

8967
    This checks that the instance is in the cluster.
8968

8969
    """
8970
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8971
    assert instance is not None, \
8972
      "Cannot retrieve locked instance %s" % self.op.instance_name
8973
    nodenames = list(instance.all_nodes)
8974
    for node in nodenames:
8975
      _CheckNodeOnline(self, node)
8976

    
8977
    self.instance = instance
8978

    
8979
    if instance.disk_template not in constants.DTS_GROWABLE:
8980
      raise errors.OpPrereqError("Instance's disk layout does not support"
8981
                                 " growing.", errors.ECODE_INVAL)
8982

    
8983
    self.disk = instance.FindDisk(self.op.disk)
8984

    
8985
    if instance.disk_template != constants.DT_FILE:
8986
      # TODO: check the free disk space for file, when that feature
8987
      # will be supported
8988
      _CheckNodesFreeDiskPerVG(self, nodenames,
8989
                               {self.disk.physical_id[0]: self.op.amount})
8990

    
8991
  def Exec(self, feedback_fn):
8992
    """Execute disk grow.
8993

8994
    """
8995
    instance = self.instance
8996
    disk = self.disk
8997

    
8998
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8999
    if not disks_ok:
9000
      raise errors.OpExecError("Cannot activate block device to grow")
9001

    
9002
    for node in instance.all_nodes:
9003
      self.cfg.SetDiskID(disk, node)
9004
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
9005
      result.Raise("Grow request failed to node %s" % node)
9006

    
9007
      # TODO: Rewrite code to work properly
9008
      # DRBD goes into sync mode for a short amount of time after executing the
9009
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
9010
      # calling "resize" in sync mode fails. Sleeping for a short amount of
9011
      # time is a work-around.
9012
      time.sleep(5)
9013

    
9014
    disk.RecordGrow(self.op.amount)
9015
    self.cfg.Update(instance, feedback_fn)
9016
    if self.op.wait_for_sync:
9017
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
9018
      if disk_abort:
9019
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
9020
                             " status.\nPlease check the instance.")
9021
      if not instance.admin_up:
9022
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9023
    elif not instance.admin_up:
9024
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
9025
                           " not supposed to be running because no wait for"
9026
                           " sync mode was requested.")
9027

    
9028

    
9029
class LUQueryInstanceData(NoHooksLU):
9030
  """Query runtime instance data.
9031

9032
  """
9033
  _OP_PARAMS = [
9034
    ("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
9035
    ("static", False, ht.TBool),
9036
    ]
9037
  REQ_BGL = False
9038

    
9039
  def ExpandNames(self):
9040
    self.needed_locks = {}
9041
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9042

    
9043
    if self.op.instances:
9044
      self.wanted_names = []
9045
      for name in self.op.instances:
9046
        full_name = _ExpandInstanceName(self.cfg, name)
9047
        self.wanted_names.append(full_name)
9048
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9049
    else:
9050
      self.wanted_names = None
9051
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9052

    
9053
    self.needed_locks[locking.LEVEL_NODE] = []
9054
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9055

    
9056
  def DeclareLocks(self, level):
9057
    if level == locking.LEVEL_NODE:
9058
      self._LockInstancesNodes()
9059

    
9060
  def CheckPrereq(self):
9061
    """Check prerequisites.
9062

9063
    This only checks the optional instance list against the existing names.
9064

9065
    """
9066
    if self.wanted_names is None:
9067
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
9068

    
9069
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
9070
                             in self.wanted_names]
9071

    
9072
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
9073
    """Returns the status of a block device
9074

9075
    """
9076
    if self.op.static or not node:
9077
      return None
9078

    
9079
    self.cfg.SetDiskID(dev, node)
9080

    
9081
    result = self.rpc.call_blockdev_find(node, dev)
9082
    if result.offline:
9083
      return None
9084

    
9085
    result.Raise("Can't compute disk status for %s" % instance_name)
9086

    
9087
    status = result.payload
9088
    if status is None:
9089
      return None
9090

    
9091
    return (status.dev_path, status.major, status.minor,
9092
            status.sync_percent, status.estimated_time,
9093
            status.is_degraded, status.ldisk_status)
9094

    
9095
  def _ComputeDiskStatus(self, instance, snode, dev):
9096
    """Compute block device status.
9097

9098
    """
9099
    if dev.dev_type in constants.LDS_DRBD:
9100
      # we change the snode then (otherwise we use the one passed in)
9101
      if dev.logical_id[0] == instance.primary_node:
9102
        snode = dev.logical_id[1]
9103
      else:
9104
        snode = dev.logical_id[0]
9105

    
9106
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9107
                                              instance.name, dev)
9108
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9109

    
9110
    if dev.children:
9111
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
9112
                      for child in dev.children]
9113
    else:
9114
      dev_children = []
9115

    
9116
    data = {
9117
      "iv_name": dev.iv_name,
9118
      "dev_type": dev.dev_type,
9119
      "logical_id": dev.logical_id,
9120
      "physical_id": dev.physical_id,
9121
      "pstatus": dev_pstatus,
9122
      "sstatus": dev_sstatus,
9123
      "children": dev_children,
9124
      "mode": dev.mode,
9125
      "size": dev.size,
9126
      }
9127

    
9128
    return data
9129

    
9130
  def Exec(self, feedback_fn):
9131
    """Gather and return data"""
9132
    result = {}
9133

    
9134
    cluster = self.cfg.GetClusterInfo()
9135

    
9136
    for instance in self.wanted_instances:
9137
      if not self.op.static:
9138
        remote_info = self.rpc.call_instance_info(instance.primary_node,
9139
                                                  instance.name,
9140
                                                  instance.hypervisor)
9141
        remote_info.Raise("Error checking node %s" % instance.primary_node)
9142
        remote_info = remote_info.payload
9143
        if remote_info and "state" in remote_info:
9144
          remote_state = "up"
9145
        else:
9146
          remote_state = "down"
9147
      else:
9148
        remote_state = None
9149
      if instance.admin_up:
9150
        config_state = "up"
9151
      else:
9152
        config_state = "down"
9153

    
9154
      disks = [self._ComputeDiskStatus(instance, None, device)
9155
               for device in instance.disks]
9156

    
9157
      idict = {
9158
        "name": instance.name,
9159
        "config_state": config_state,
9160
        "run_state": remote_state,
9161
        "pnode": instance.primary_node,
9162
        "snodes": instance.secondary_nodes,
9163
        "os": instance.os,
9164
        # this happens to be the same format used for hooks
9165
        "nics": _NICListToTuple(self, instance.nics),
9166
        "disk_template": instance.disk_template,
9167
        "disks": disks,
9168
        "hypervisor": instance.hypervisor,
9169
        "network_port": instance.network_port,
9170
        "hv_instance": instance.hvparams,
9171
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
9172
        "be_instance": instance.beparams,
9173
        "be_actual": cluster.FillBE(instance),
9174
        "os_instance": instance.osparams,
9175
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9176
        "serial_no": instance.serial_no,
9177
        "mtime": instance.mtime,
9178
        "ctime": instance.ctime,
9179
        "uuid": instance.uuid,
9180
        }
9181

    
9182
      result[instance.name] = idict
9183

    
9184
    return result
9185

    
9186

    
9187
class LUSetInstanceParams(LogicalUnit):
9188
  """Modifies an instances's parameters.
9189

9190
  """
9191
  HPATH = "instance-modify"
9192
  HTYPE = constants.HTYPE_INSTANCE
9193
  _OP_PARAMS = [
9194
    _PInstanceName,
9195
    ("nics", ht.EmptyList, ht.TList),
9196
    ("disks", ht.EmptyList, ht.TList),
9197
    ("beparams", ht.EmptyDict, ht.TDict),
9198
    ("hvparams", ht.EmptyDict, ht.TDict),
9199
    ("disk_template", None, ht.TMaybeString),
9200
    ("remote_node", None, ht.TMaybeString),
9201
    ("os_name", None, ht.TMaybeString),
9202
    ("force_variant", False, ht.TBool),
9203
    ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
9204
    _PForce,
9205
    ]
9206
  REQ_BGL = False
9207

    
9208
  def CheckArguments(self):
9209
    if not (self.op.nics or self.op.disks or self.op.disk_template or
9210
            self.op.hvparams or self.op.beparams or self.op.os_name):
9211
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9212

    
9213
    if self.op.hvparams:
9214
      _CheckGlobalHvParams(self.op.hvparams)
9215

    
9216
    # Disk validation
9217
    disk_addremove = 0
9218
    for disk_op, disk_dict in self.op.disks:
9219
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9220
      if disk_op == constants.DDM_REMOVE:
9221
        disk_addremove += 1
9222
        continue
9223
      elif disk_op == constants.DDM_ADD:
9224
        disk_addremove += 1
9225
      else:
9226
        if not isinstance(disk_op, int):
9227
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9228
        if not isinstance(disk_dict, dict):
9229
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9230
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9231

    
9232
      if disk_op == constants.DDM_ADD:
9233
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
9234
        if mode not in constants.DISK_ACCESS_SET:
9235
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9236
                                     errors.ECODE_INVAL)
9237
        size = disk_dict.get('size', None)
9238
        if size is None:
9239
          raise errors.OpPrereqError("Required disk parameter size missing",
9240
                                     errors.ECODE_INVAL)
9241
        try:
9242
          size = int(size)
9243
        except (TypeError, ValueError), err:
9244
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9245
                                     str(err), errors.ECODE_INVAL)
9246
        disk_dict['size'] = size
9247
      else:
9248
        # modification of disk
9249
        if 'size' in disk_dict:
9250
          raise errors.OpPrereqError("Disk size change not possible, use"
9251
                                     " grow-disk", errors.ECODE_INVAL)
9252

    
9253
    if disk_addremove > 1:
9254
      raise errors.OpPrereqError("Only one disk add or remove operation"
9255
                                 " supported at a time", errors.ECODE_INVAL)
9256

    
9257
    if self.op.disks and self.op.disk_template is not None:
9258
      raise errors.OpPrereqError("Disk template conversion and other disk"
9259
                                 " changes not supported at the same time",
9260
                                 errors.ECODE_INVAL)
9261

    
9262
    if self.op.disk_template:
9263
      _CheckDiskTemplate(self.op.disk_template)
9264
      if (self.op.disk_template in constants.DTS_NET_MIRROR and
9265
          self.op.remote_node is None):
9266
        raise errors.OpPrereqError("Changing the disk template to a mirrored"
9267
                                   " one requires specifying a secondary node",
9268
                                   errors.ECODE_INVAL)
9269

    
9270
    # NIC validation
9271
    nic_addremove = 0
9272
    for nic_op, nic_dict in self.op.nics:
9273
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9274
      if nic_op == constants.DDM_REMOVE:
9275
        nic_addremove += 1
9276
        continue
9277
      elif nic_op == constants.DDM_ADD:
9278
        nic_addremove += 1
9279
      else:
9280
        if not isinstance(nic_op, int):
9281
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9282
        if not isinstance(nic_dict, dict):
9283
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9284
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9285

    
9286
      # nic_dict should be a dict
9287
      nic_ip = nic_dict.get('ip', None)
9288
      if nic_ip is not None:
9289
        if nic_ip.lower() == constants.VALUE_NONE:
9290
          nic_dict['ip'] = None
9291
        else:
9292
          if not netutils.IPAddress.IsValid(nic_ip):
9293
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9294
                                       errors.ECODE_INVAL)
9295

    
9296
      nic_bridge = nic_dict.get('bridge', None)
9297
      nic_link = nic_dict.get('link', None)
9298
      if nic_bridge and nic_link:
9299
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9300
                                   " at the same time", errors.ECODE_INVAL)
9301
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9302
        nic_dict['bridge'] = None
9303
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9304
        nic_dict['link'] = None
9305

    
9306
      if nic_op == constants.DDM_ADD:
9307
        nic_mac = nic_dict.get('mac', None)
9308
        if nic_mac is None:
9309
          nic_dict['mac'] = constants.VALUE_AUTO
9310

    
9311
      if 'mac' in nic_dict:
9312
        nic_mac = nic_dict['mac']
9313
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9314
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9315

    
9316
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9317
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9318
                                     " modifying an existing nic",
9319
                                     errors.ECODE_INVAL)
9320

    
9321
    if nic_addremove > 1:
9322
      raise errors.OpPrereqError("Only one NIC add or remove operation"
9323
                                 " supported at a time", errors.ECODE_INVAL)
9324

    
9325
  def ExpandNames(self):
9326
    self._ExpandAndLockInstance()
9327
    self.needed_locks[locking.LEVEL_NODE] = []
9328
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9329

    
9330
  def DeclareLocks(self, level):
9331
    if level == locking.LEVEL_NODE:
9332
      self._LockInstancesNodes()
9333
      if self.op.disk_template and self.op.remote_node:
9334
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9335
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9336

    
9337
  def BuildHooksEnv(self):
9338
    """Build hooks env.
9339

9340
    This runs on the master, primary and secondaries.
9341

9342
    """
9343
    args = dict()
9344
    if constants.BE_MEMORY in self.be_new:
9345
      args['memory'] = self.be_new[constants.BE_MEMORY]
9346
    if constants.BE_VCPUS in self.be_new:
9347
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
9348
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9349
    # information at all.
9350
    if self.op.nics:
9351
      args['nics'] = []
9352
      nic_override = dict(self.op.nics)
9353
      for idx, nic in enumerate(self.instance.nics):
9354
        if idx in nic_override:
9355
          this_nic_override = nic_override[idx]
9356
        else:
9357
          this_nic_override = {}
9358
        if 'ip' in this_nic_override:
9359
          ip = this_nic_override['ip']
9360
        else:
9361
          ip = nic.ip
9362
        if 'mac' in this_nic_override:
9363
          mac = this_nic_override['mac']
9364
        else:
9365
          mac = nic.mac
9366
        if idx in self.nic_pnew:
9367
          nicparams = self.nic_pnew[idx]
9368
        else:
9369
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9370
        mode = nicparams[constants.NIC_MODE]
9371
        link = nicparams[constants.NIC_LINK]
9372
        args['nics'].append((ip, mac, mode, link))
9373
      if constants.DDM_ADD in nic_override:
9374
        ip = nic_override[constants.DDM_ADD].get('ip', None)
9375
        mac = nic_override[constants.DDM_ADD]['mac']
9376
        nicparams = self.nic_pnew[constants.DDM_ADD]
9377
        mode = nicparams[constants.NIC_MODE]
9378
        link = nicparams[constants.NIC_LINK]
9379
        args['nics'].append((ip, mac, mode, link))
9380
      elif constants.DDM_REMOVE in nic_override:
9381
        del args['nics'][-1]
9382

    
9383
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9384
    if self.op.disk_template:
9385
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9386
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9387
    return env, nl, nl
9388

    
9389
  def CheckPrereq(self):
9390
    """Check prerequisites.
9391

9392
    This only checks the instance list against the existing names.
9393

9394
    """
9395
    # checking the new params on the primary/secondary nodes
9396

    
9397
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9398
    cluster = self.cluster = self.cfg.GetClusterInfo()
9399
    assert self.instance is not None, \
9400
      "Cannot retrieve locked instance %s" % self.op.instance_name
9401
    pnode = instance.primary_node
9402
    nodelist = list(instance.all_nodes)
9403

    
9404
    # OS change
9405
    if self.op.os_name and not self.op.force:
9406
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9407
                      self.op.force_variant)
9408
      instance_os = self.op.os_name
9409
    else:
9410
      instance_os = instance.os
9411

    
9412
    if self.op.disk_template:
9413
      if instance.disk_template == self.op.disk_template:
9414
        raise errors.OpPrereqError("Instance already has disk template %s" %
9415
                                   instance.disk_template, errors.ECODE_INVAL)
9416

    
9417
      if (instance.disk_template,
9418
          self.op.disk_template) not in self._DISK_CONVERSIONS:
9419
        raise errors.OpPrereqError("Unsupported disk template conversion from"
9420
                                   " %s to %s" % (instance.disk_template,
9421
                                                  self.op.disk_template),
9422
                                   errors.ECODE_INVAL)
9423
      _CheckInstanceDown(self, instance, "cannot change disk template")
9424
      if self.op.disk_template in constants.DTS_NET_MIRROR:
9425
        if self.op.remote_node == pnode:
9426
          raise errors.OpPrereqError("Given new secondary node %s is the same"
9427
                                     " as the primary node of the instance" %
9428
                                     self.op.remote_node, errors.ECODE_STATE)
9429
        _CheckNodeOnline(self, self.op.remote_node)
9430
        _CheckNodeNotDrained(self, self.op.remote_node)
9431
        # FIXME: here we assume that the old instance type is DT_PLAIN
9432
        assert instance.disk_template == constants.DT_PLAIN
9433
        disks = [{"size": d.size, "vg": d.logical_id[0]}
9434
                 for d in instance.disks]
9435
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9436
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9437

    
9438
    # hvparams processing
9439
    if self.op.hvparams:
9440
      hv_type = instance.hypervisor
9441
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9442
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9443
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9444

    
9445
      # local check
9446
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9447
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9448
      self.hv_new = hv_new # the new actual values
9449
      self.hv_inst = i_hvdict # the new dict (without defaults)
9450
    else:
9451
      self.hv_new = self.hv_inst = {}
9452

    
9453
    # beparams processing
9454
    if self.op.beparams:
9455
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9456
                                   use_none=True)
9457
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9458
      be_new = cluster.SimpleFillBE(i_bedict)
9459
      self.be_new = be_new # the new actual values
9460
      self.be_inst = i_bedict # the new dict (without defaults)
9461
    else:
9462
      self.be_new = self.be_inst = {}
9463

    
9464
    # osparams processing
9465
    if self.op.osparams:
9466
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9467
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9468
      self.os_inst = i_osdict # the new dict (without defaults)
9469
    else:
9470
      self.os_inst = {}
9471

    
9472
    self.warn = []
9473

    
9474
    if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9475
      mem_check_list = [pnode]
9476
      if be_new[constants.BE_AUTO_BALANCE]:
9477
        # either we changed auto_balance to yes or it was from before
9478
        mem_check_list.extend(instance.secondary_nodes)
9479
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
9480
                                                  instance.hypervisor)
9481
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9482
                                         instance.hypervisor)
9483
      pninfo = nodeinfo[pnode]
9484
      msg = pninfo.fail_msg
9485
      if msg:
9486
        # Assume the primary node is unreachable and go ahead
9487
        self.warn.append("Can't get info from primary node %s: %s" %
9488
                         (pnode,  msg))
9489
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
9490
        self.warn.append("Node data from primary node %s doesn't contain"
9491
                         " free memory information" % pnode)
9492
      elif instance_info.fail_msg:
9493
        self.warn.append("Can't get instance runtime information: %s" %
9494
                        instance_info.fail_msg)
9495
      else:
9496
        if instance_info.payload:
9497
          current_mem = int(instance_info.payload['memory'])
9498
        else:
9499
          # Assume instance not running
9500
          # (there is a slight race condition here, but it's not very probable,
9501
          # and we have no other way to check)
9502
          current_mem = 0
9503
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9504
                    pninfo.payload['memory_free'])
9505
        if miss_mem > 0:
9506
          raise errors.OpPrereqError("This change will prevent the instance"
9507
                                     " from starting, due to %d MB of memory"
9508
                                     " missing on its primary node" % miss_mem,
9509
                                     errors.ECODE_NORES)
9510

    
9511
      if be_new[constants.BE_AUTO_BALANCE]:
9512
        for node, nres in nodeinfo.items():
9513
          if node not in instance.secondary_nodes:
9514
            continue
9515
          msg = nres.fail_msg
9516
          if msg:
9517
            self.warn.append("Can't get info from secondary node %s: %s" %
9518
                             (node, msg))
9519
          elif not isinstance(nres.payload.get('memory_free', None), int):
9520
            self.warn.append("Secondary node %s didn't return free"
9521
                             " memory information" % node)
9522
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9523
            self.warn.append("Not enough memory to failover instance to"
9524
                             " secondary node %s" % node)
9525

    
9526
    # NIC processing
9527
    self.nic_pnew = {}
9528
    self.nic_pinst = {}
9529
    for nic_op, nic_dict in self.op.nics:
9530
      if nic_op == constants.DDM_REMOVE:
9531
        if not instance.nics:
9532
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9533
                                     errors.ECODE_INVAL)
9534
        continue
9535
      if nic_op != constants.DDM_ADD:
9536
        # an existing nic
9537
        if not instance.nics:
9538
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9539
                                     " no NICs" % nic_op,
9540
                                     errors.ECODE_INVAL)
9541
        if nic_op < 0 or nic_op >= len(instance.nics):
9542
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9543
                                     " are 0 to %d" %
9544
                                     (nic_op, len(instance.nics) - 1),
9545
                                     errors.ECODE_INVAL)
9546
        old_nic_params = instance.nics[nic_op].nicparams
9547
        old_nic_ip = instance.nics[nic_op].ip
9548
      else:
9549
        old_nic_params = {}
9550
        old_nic_ip = None
9551

    
9552
      update_params_dict = dict([(key, nic_dict[key])
9553
                                 for key in constants.NICS_PARAMETERS
9554
                                 if key in nic_dict])
9555

    
9556
      if 'bridge' in nic_dict:
9557
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9558

    
9559
      new_nic_params = _GetUpdatedParams(old_nic_params,
9560
                                         update_params_dict)
9561
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9562
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9563
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9564
      self.nic_pinst[nic_op] = new_nic_params
9565
      self.nic_pnew[nic_op] = new_filled_nic_params
9566
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9567

    
9568
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
9569
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9570
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9571
        if msg:
9572
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9573
          if self.op.force:
9574
            self.warn.append(msg)
9575
          else:
9576
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9577
      if new_nic_mode == constants.NIC_MODE_ROUTED:
9578
        if 'ip' in nic_dict:
9579
          nic_ip = nic_dict['ip']
9580
        else:
9581
          nic_ip = old_nic_ip
9582
        if nic_ip is None:
9583
          raise errors.OpPrereqError('Cannot set the nic ip to None'
9584
                                     ' on a routed nic', errors.ECODE_INVAL)
9585
      if 'mac' in nic_dict:
9586
        nic_mac = nic_dict['mac']
9587
        if nic_mac is None:
9588
          raise errors.OpPrereqError('Cannot set the nic mac to None',
9589
                                     errors.ECODE_INVAL)
9590
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9591
          # otherwise generate the mac
9592
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9593
        else:
9594
          # or validate/reserve the current one
9595
          try:
9596
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9597
          except errors.ReservationError:
9598
            raise errors.OpPrereqError("MAC address %s already in use"
9599
                                       " in cluster" % nic_mac,
9600
                                       errors.ECODE_NOTUNIQUE)
9601

    
9602
    # DISK processing
9603
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9604
      raise errors.OpPrereqError("Disk operations not supported for"
9605
                                 " diskless instances",
9606
                                 errors.ECODE_INVAL)
9607
    for disk_op, _ in self.op.disks:
9608
      if disk_op == constants.DDM_REMOVE:
9609
        if len(instance.disks) == 1:
9610
          raise errors.OpPrereqError("Cannot remove the last disk of"
9611
                                     " an instance", errors.ECODE_INVAL)
9612
        _CheckInstanceDown(self, instance, "cannot remove disks")
9613

    
9614
      if (disk_op == constants.DDM_ADD and
9615
          len(instance.nics) >= constants.MAX_DISKS):
9616
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9617
                                   " add more" % constants.MAX_DISKS,
9618
                                   errors.ECODE_STATE)
9619
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9620
        # an existing disk
9621
        if disk_op < 0 or disk_op >= len(instance.disks):
9622
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
9623
                                     " are 0 to %d" %
9624
                                     (disk_op, len(instance.disks)),
9625
                                     errors.ECODE_INVAL)
9626

    
9627
    return
9628

    
9629
  def _ConvertPlainToDrbd(self, feedback_fn):
9630
    """Converts an instance from plain to drbd.
9631

9632
    """
9633
    feedback_fn("Converting template to drbd")
9634
    instance = self.instance
9635
    pnode = instance.primary_node
9636
    snode = self.op.remote_node
9637

    
9638
    # create a fake disk info for _GenerateDiskTemplate
9639
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9640
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9641
                                      instance.name, pnode, [snode],
9642
                                      disk_info, None, None, 0, feedback_fn)
9643
    info = _GetInstanceInfoText(instance)
9644
    feedback_fn("Creating aditional volumes...")
9645
    # first, create the missing data and meta devices
9646
    for disk in new_disks:
9647
      # unfortunately this is... not too nice
9648
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9649
                            info, True)
9650
      for child in disk.children:
9651
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
9652
    # at this stage, all new LVs have been created, we can rename the
9653
    # old ones
9654
    feedback_fn("Renaming original volumes...")
9655
    rename_list = [(o, n.children[0].logical_id)
9656
                   for (o, n) in zip(instance.disks, new_disks)]
9657
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
9658
    result.Raise("Failed to rename original LVs")
9659

    
9660
    feedback_fn("Initializing DRBD devices...")
9661
    # all child devices are in place, we can now create the DRBD devices
9662
    for disk in new_disks:
9663
      for node in [pnode, snode]:
9664
        f_create = node == pnode
9665
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9666

    
9667
    # at this point, the instance has been modified
9668
    instance.disk_template = constants.DT_DRBD8
9669
    instance.disks = new_disks
9670
    self.cfg.Update(instance, feedback_fn)
9671

    
9672
    # disks are created, waiting for sync
9673
    disk_abort = not _WaitForSync(self, instance)
9674
    if disk_abort:
9675
      raise errors.OpExecError("There are some degraded disks for"
9676
                               " this instance, please cleanup manually")
9677

    
9678
  def _ConvertDrbdToPlain(self, feedback_fn):
9679
    """Converts an instance from drbd to plain.
9680

9681
    """
9682
    instance = self.instance
9683
    assert len(instance.secondary_nodes) == 1
9684
    pnode = instance.primary_node
9685
    snode = instance.secondary_nodes[0]
9686
    feedback_fn("Converting template to plain")
9687

    
9688
    old_disks = instance.disks
9689
    new_disks = [d.children[0] for d in old_disks]
9690

    
9691
    # copy over size and mode
9692
    for parent, child in zip(old_disks, new_disks):
9693
      child.size = parent.size
9694
      child.mode = parent.mode
9695

    
9696
    # update instance structure
9697
    instance.disks = new_disks
9698
    instance.disk_template = constants.DT_PLAIN
9699
    self.cfg.Update(instance, feedback_fn)
9700

    
9701
    feedback_fn("Removing volumes on the secondary node...")
9702
    for disk in old_disks:
9703
      self.cfg.SetDiskID(disk, snode)
9704
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9705
      if msg:
9706
        self.LogWarning("Could not remove block device %s on node %s,"
9707
                        " continuing anyway: %s", disk.iv_name, snode, msg)
9708

    
9709
    feedback_fn("Removing unneeded volumes on the primary node...")
9710
    for idx, disk in enumerate(old_disks):
9711
      meta = disk.children[1]
9712
      self.cfg.SetDiskID(meta, pnode)
9713
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9714
      if msg:
9715
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
9716
                        " continuing anyway: %s", idx, pnode, msg)
9717

    
9718
  def Exec(self, feedback_fn):
9719
    """Modifies an instance.
9720

9721
    All parameters take effect only at the next restart of the instance.
9722

9723
    """
9724
    # Process here the warnings from CheckPrereq, as we don't have a
9725
    # feedback_fn there.
9726
    for warn in self.warn:
9727
      feedback_fn("WARNING: %s" % warn)
9728

    
9729
    result = []
9730
    instance = self.instance
9731
    # disk changes
9732
    for disk_op, disk_dict in self.op.disks:
9733
      if disk_op == constants.DDM_REMOVE:
9734
        # remove the last disk
9735
        device = instance.disks.pop()
9736
        device_idx = len(instance.disks)
9737
        for node, disk in device.ComputeNodeTree(instance.primary_node):
9738
          self.cfg.SetDiskID(disk, node)
9739
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9740
          if msg:
9741
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
9742
                            " continuing anyway", device_idx, node, msg)
9743
        result.append(("disk/%d" % device_idx, "remove"))
9744
      elif disk_op == constants.DDM_ADD:
9745
        # add a new disk
9746
        if instance.disk_template == constants.DT_FILE:
9747
          file_driver, file_path = instance.disks[0].logical_id
9748
          file_path = os.path.dirname(file_path)
9749
        else:
9750
          file_driver = file_path = None
9751
        disk_idx_base = len(instance.disks)
9752
        new_disk = _GenerateDiskTemplate(self,
9753
                                         instance.disk_template,
9754
                                         instance.name, instance.primary_node,
9755
                                         instance.secondary_nodes,
9756
                                         [disk_dict],
9757
                                         file_path,
9758
                                         file_driver,
9759
                                         disk_idx_base, feedback_fn)[0]
9760
        instance.disks.append(new_disk)
9761
        info = _GetInstanceInfoText(instance)
9762

    
9763
        logging.info("Creating volume %s for instance %s",
9764
                     new_disk.iv_name, instance.name)
9765
        # Note: this needs to be kept in sync with _CreateDisks
9766
        #HARDCODE
9767
        for node in instance.all_nodes:
9768
          f_create = node == instance.primary_node
9769
          try:
9770
            _CreateBlockDev(self, node, instance, new_disk,
9771
                            f_create, info, f_create)
9772
          except errors.OpExecError, err:
9773
            self.LogWarning("Failed to create volume %s (%s) on"
9774
                            " node %s: %s",
9775
                            new_disk.iv_name, new_disk, node, err)
9776
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9777
                       (new_disk.size, new_disk.mode)))
9778
      else:
9779
        # change a given disk
9780
        instance.disks[disk_op].mode = disk_dict['mode']
9781
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9782

    
9783
    if self.op.disk_template:
9784
      r_shut = _ShutdownInstanceDisks(self, instance)
9785
      if not r_shut:
9786
        raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9787
                                 " proceed with disk template conversion")
9788
      mode = (instance.disk_template, self.op.disk_template)
9789
      try:
9790
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
9791
      except:
9792
        self.cfg.ReleaseDRBDMinors(instance.name)
9793
        raise
9794
      result.append(("disk_template", self.op.disk_template))
9795

    
9796
    # NIC changes
9797
    for nic_op, nic_dict in self.op.nics:
9798
      if nic_op == constants.DDM_REMOVE:
9799
        # remove the last nic
9800
        del instance.nics[-1]
9801
        result.append(("nic.%d" % len(instance.nics), "remove"))
9802
      elif nic_op == constants.DDM_ADD:
9803
        # mac and bridge should be set, by now
9804
        mac = nic_dict['mac']
9805
        ip = nic_dict.get('ip', None)
9806
        nicparams = self.nic_pinst[constants.DDM_ADD]
9807
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9808
        instance.nics.append(new_nic)
9809
        result.append(("nic.%d" % (len(instance.nics) - 1),
9810
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
9811
                       (new_nic.mac, new_nic.ip,
9812
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9813
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9814
                       )))
9815
      else:
9816
        for key in 'mac', 'ip':
9817
          if key in nic_dict:
9818
            setattr(instance.nics[nic_op], key, nic_dict[key])
9819
        if nic_op in self.nic_pinst:
9820
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9821
        for key, val in nic_dict.iteritems():
9822
          result.append(("nic.%s/%d" % (key, nic_op), val))
9823

    
9824
    # hvparams changes
9825
    if self.op.hvparams:
9826
      instance.hvparams = self.hv_inst
9827
      for key, val in self.op.hvparams.iteritems():
9828
        result.append(("hv/%s" % key, val))
9829

    
9830
    # beparams changes
9831
    if self.op.beparams:
9832
      instance.beparams = self.be_inst
9833
      for key, val in self.op.beparams.iteritems():
9834
        result.append(("be/%s" % key, val))
9835

    
9836
    # OS change
9837
    if self.op.os_name:
9838
      instance.os = self.op.os_name
9839

    
9840
    # osparams changes
9841
    if self.op.osparams:
9842
      instance.osparams = self.os_inst
9843
      for key, val in self.op.osparams.iteritems():
9844
        result.append(("os/%s" % key, val))
9845

    
9846
    self.cfg.Update(instance, feedback_fn)
9847

    
9848
    return result
9849

    
9850
  _DISK_CONVERSIONS = {
9851
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9852
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9853
    }
9854

    
9855

    
9856
class LUQueryExports(NoHooksLU):
9857
  """Query the exports list
9858

9859
  """
9860
  _OP_PARAMS = [
9861
    ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
9862
    ("use_locking", False, ht.TBool),
9863
    ]
9864
  REQ_BGL = False
9865

    
9866
  def ExpandNames(self):
9867
    self.needed_locks = {}
9868
    self.share_locks[locking.LEVEL_NODE] = 1
9869
    if not self.op.nodes:
9870
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9871
    else:
9872
      self.needed_locks[locking.LEVEL_NODE] = \
9873
        _GetWantedNodes(self, self.op.nodes)
9874

    
9875
  def Exec(self, feedback_fn):
9876
    """Compute the list of all the exported system images.
9877

9878
    @rtype: dict
9879
    @return: a dictionary with the structure node->(export-list)
9880
        where export-list is a list of the instances exported on
9881
        that node.
9882

9883
    """
9884
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9885
    rpcresult = self.rpc.call_export_list(self.nodes)
9886
    result = {}
9887
    for node in rpcresult:
9888
      if rpcresult[node].fail_msg:
9889
        result[node] = False
9890
      else:
9891
        result[node] = rpcresult[node].payload
9892

    
9893
    return result
9894

    
9895

    
9896
class LUPrepareExport(NoHooksLU):
9897
  """Prepares an instance for an export and returns useful information.
9898

9899
  """
9900
  _OP_PARAMS = [
9901
    _PInstanceName,
9902
    ("mode", ht.NoDefault, ht.TElemOf(constants.EXPORT_MODES)),
9903
    ]
9904
  REQ_BGL = False
9905

    
9906
  def ExpandNames(self):
9907
    self._ExpandAndLockInstance()
9908

    
9909
  def CheckPrereq(self):
9910
    """Check prerequisites.
9911

9912
    """
9913
    instance_name = self.op.instance_name
9914

    
9915
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9916
    assert self.instance is not None, \
9917
          "Cannot retrieve locked instance %s" % self.op.instance_name
9918
    _CheckNodeOnline(self, self.instance.primary_node)
9919

    
9920
    self._cds = _GetClusterDomainSecret()
9921

    
9922
  def Exec(self, feedback_fn):
9923
    """Prepares an instance for an export.
9924

9925
    """
9926
    instance = self.instance
9927

    
9928
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9929
      salt = utils.GenerateSecret(8)
9930

    
9931
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9932
      result = self.rpc.call_x509_cert_create(instance.primary_node,
9933
                                              constants.RIE_CERT_VALIDITY)
9934
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
9935

    
9936
      (name, cert_pem) = result.payload
9937

    
9938
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9939
                                             cert_pem)
9940

    
9941
      return {
9942
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9943
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9944
                          salt),
9945
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9946
        }
9947

    
9948
    return None
9949

    
9950

    
9951
class LUExportInstance(LogicalUnit):
9952
  """Export an instance to an image in the cluster.
9953

9954
  """
9955
  HPATH = "instance-export"
9956
  HTYPE = constants.HTYPE_INSTANCE
9957
  _OP_PARAMS = [
9958
    _PInstanceName,
9959
    ("target_node", ht.NoDefault, ht.TOr(ht.TNonEmptyString, ht.TList)),
9960
    ("shutdown", True, ht.TBool),
9961
    _PShutdownTimeout,
9962
    ("remove_instance", False, ht.TBool),
9963
    ("ignore_remove_failures", False, ht.TBool),
9964
    ("mode", constants.EXPORT_MODE_LOCAL, ht.TElemOf(constants.EXPORT_MODES)),
9965
    ("x509_key_name", None, ht.TOr(ht.TList, ht.TNone)),
9966
    ("destination_x509_ca", None, ht.TMaybeString),
9967
    ]
9968
  REQ_BGL = False
9969

    
9970
  def CheckArguments(self):
9971
    """Check the arguments.
9972

9973
    """
9974
    self.x509_key_name = self.op.x509_key_name
9975
    self.dest_x509_ca_pem = self.op.destination_x509_ca
9976

    
9977
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9978
      if not self.x509_key_name:
9979
        raise errors.OpPrereqError("Missing X509 key name for encryption",
9980
                                   errors.ECODE_INVAL)
9981

    
9982
      if not self.dest_x509_ca_pem:
9983
        raise errors.OpPrereqError("Missing destination X509 CA",
9984
                                   errors.ECODE_INVAL)
9985

    
9986
  def ExpandNames(self):
9987
    self._ExpandAndLockInstance()
9988

    
9989
    # Lock all nodes for local exports
9990
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9991
      # FIXME: lock only instance primary and destination node
9992
      #
9993
      # Sad but true, for now we have do lock all nodes, as we don't know where
9994
      # the previous export might be, and in this LU we search for it and
9995
      # remove it from its current node. In the future we could fix this by:
9996
      #  - making a tasklet to search (share-lock all), then create the
9997
      #    new one, then one to remove, after
9998
      #  - removing the removal operation altogether
9999
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10000

    
10001
  def DeclareLocks(self, level):
10002
    """Last minute lock declaration."""
10003
    # All nodes are locked anyway, so nothing to do here.
10004

    
10005
  def BuildHooksEnv(self):
10006
    """Build hooks env.
10007

10008
    This will run on the master, primary node and target node.
10009

10010
    """
10011
    env = {
10012
      "EXPORT_MODE": self.op.mode,
10013
      "EXPORT_NODE": self.op.target_node,
10014
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10015
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10016
      # TODO: Generic function for boolean env variables
10017
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10018
      }
10019

    
10020
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10021

    
10022
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10023

    
10024
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10025
      nl.append(self.op.target_node)
10026

    
10027
    return env, nl, nl
10028

    
10029
  def CheckPrereq(self):
10030
    """Check prerequisites.
10031

10032
    This checks that the instance and node names are valid.
10033

10034
    """
10035
    instance_name = self.op.instance_name
10036

    
10037
    self.instance = self.cfg.GetInstanceInfo(instance_name)
10038
    assert self.instance is not None, \
10039
          "Cannot retrieve locked instance %s" % self.op.instance_name
10040
    _CheckNodeOnline(self, self.instance.primary_node)
10041

    
10042
    if (self.op.remove_instance and self.instance.admin_up and
10043
        not self.op.shutdown):
10044
      raise errors.OpPrereqError("Can not remove instance without shutting it"
10045
                                 " down before")
10046

    
10047
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10048
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10049
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10050
      assert self.dst_node is not None
10051

    
10052
      _CheckNodeOnline(self, self.dst_node.name)
10053
      _CheckNodeNotDrained(self, self.dst_node.name)
10054

    
10055
      self._cds = None
10056
      self.dest_disk_info = None
10057
      self.dest_x509_ca = None
10058

    
10059
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10060
      self.dst_node = None
10061

    
10062
      if len(self.op.target_node) != len(self.instance.disks):
10063
        raise errors.OpPrereqError(("Received destination information for %s"
10064
                                    " disks, but instance %s has %s disks") %
10065
                                   (len(self.op.target_node), instance_name,
10066
                                    len(self.instance.disks)),
10067
                                   errors.ECODE_INVAL)
10068

    
10069
      cds = _GetClusterDomainSecret()
10070

    
10071
      # Check X509 key name
10072
      try:
10073
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10074
      except (TypeError, ValueError), err:
10075
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10076

    
10077
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10078
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10079
                                   errors.ECODE_INVAL)
10080

    
10081
      # Load and verify CA
10082
      try:
10083
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10084
      except OpenSSL.crypto.Error, err:
10085
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10086
                                   (err, ), errors.ECODE_INVAL)
10087

    
10088
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10089
      if errcode is not None:
10090
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10091
                                   (msg, ), errors.ECODE_INVAL)
10092

    
10093
      self.dest_x509_ca = cert
10094

    
10095
      # Verify target information
10096
      disk_info = []
10097
      for idx, disk_data in enumerate(self.op.target_node):
10098
        try:
10099
          (host, port, magic) = \
10100
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10101
        except errors.GenericError, err:
10102
          raise errors.OpPrereqError("Target info for disk %s: %s" %
10103
                                     (idx, err), errors.ECODE_INVAL)
10104

    
10105
        disk_info.append((host, port, magic))
10106

    
10107
      assert len(disk_info) == len(self.op.target_node)
10108
      self.dest_disk_info = disk_info
10109

    
10110
    else:
10111
      raise errors.ProgrammerError("Unhandled export mode %r" %
10112
                                   self.op.mode)
10113

    
10114
    # instance disk type verification
10115
    # TODO: Implement export support for file-based disks
10116
    for disk in self.instance.disks:
10117
      if disk.dev_type == constants.LD_FILE:
10118
        raise errors.OpPrereqError("Export not supported for instances with"
10119
                                   " file-based disks", errors.ECODE_INVAL)
10120

    
10121
  def _CleanupExports(self, feedback_fn):
10122
    """Removes exports of current instance from all other nodes.
10123

10124
    If an instance in a cluster with nodes A..D was exported to node C, its
10125
    exports will be removed from the nodes A, B and D.
10126

10127
    """
10128
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
10129

    
10130
    nodelist = self.cfg.GetNodeList()
10131
    nodelist.remove(self.dst_node.name)
10132

    
10133
    # on one-node clusters nodelist will be empty after the removal
10134
    # if we proceed the backup would be removed because OpQueryExports
10135
    # substitutes an empty list with the full cluster node list.
10136
    iname = self.instance.name
10137
    if nodelist:
10138
      feedback_fn("Removing old exports for instance %s" % iname)
10139
      exportlist = self.rpc.call_export_list(nodelist)
10140
      for node in exportlist:
10141
        if exportlist[node].fail_msg:
10142
          continue
10143
        if iname in exportlist[node].payload:
10144
          msg = self.rpc.call_export_remove(node, iname).fail_msg
10145
          if msg:
10146
            self.LogWarning("Could not remove older export for instance %s"
10147
                            " on node %s: %s", iname, node, msg)
10148

    
10149
  def Exec(self, feedback_fn):
10150
    """Export an instance to an image in the cluster.
10151

10152
    """
10153
    assert self.op.mode in constants.EXPORT_MODES
10154

    
10155
    instance = self.instance
10156
    src_node = instance.primary_node
10157

    
10158
    if self.op.shutdown:
10159
      # shutdown the instance, but not the disks
10160
      feedback_fn("Shutting down instance %s" % instance.name)
10161
      result = self.rpc.call_instance_shutdown(src_node, instance,
10162
                                               self.op.shutdown_timeout)
10163
      # TODO: Maybe ignore failures if ignore_remove_failures is set
10164
      result.Raise("Could not shutdown instance %s on"
10165
                   " node %s" % (instance.name, src_node))
10166

    
10167
    # set the disks ID correctly since call_instance_start needs the
10168
    # correct drbd minor to create the symlinks
10169
    for disk in instance.disks:
10170
      self.cfg.SetDiskID(disk, src_node)
10171

    
10172
    activate_disks = (not instance.admin_up)
10173

    
10174
    if activate_disks:
10175
      # Activate the instance disks if we'exporting a stopped instance
10176
      feedback_fn("Activating disks for %s" % instance.name)
10177
      _StartInstanceDisks(self, instance, None)
10178

    
10179
    try:
10180
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10181
                                                     instance)
10182

    
10183
      helper.CreateSnapshots()
10184
      try:
10185
        if (self.op.shutdown and instance.admin_up and
10186
            not self.op.remove_instance):
10187
          assert not activate_disks
10188
          feedback_fn("Starting instance %s" % instance.name)
10189
          result = self.rpc.call_instance_start(src_node, instance, None, None)
10190
          msg = result.fail_msg
10191
          if msg:
10192
            feedback_fn("Failed to start instance: %s" % msg)
10193
            _ShutdownInstanceDisks(self, instance)
10194
            raise errors.OpExecError("Could not start instance: %s" % msg)
10195

    
10196
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
10197
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10198
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10199
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
10200
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10201

    
10202
          (key_name, _, _) = self.x509_key_name
10203

    
10204
          dest_ca_pem = \
10205
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10206
                                            self.dest_x509_ca)
10207

    
10208
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10209
                                                     key_name, dest_ca_pem,
10210
                                                     timeouts)
10211
      finally:
10212
        helper.Cleanup()
10213

    
10214
      # Check for backwards compatibility
10215
      assert len(dresults) == len(instance.disks)
10216
      assert compat.all(isinstance(i, bool) for i in dresults), \
10217
             "Not all results are boolean: %r" % dresults
10218

    
10219
    finally:
10220
      if activate_disks:
10221
        feedback_fn("Deactivating disks for %s" % instance.name)
10222
        _ShutdownInstanceDisks(self, instance)
10223

    
10224
    if not (compat.all(dresults) and fin_resu):
10225
      failures = []
10226
      if not fin_resu:
10227
        failures.append("export finalization")
10228
      if not compat.all(dresults):
10229
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10230
                               if not dsk)
10231
        failures.append("disk export: disk(s) %s" % fdsk)
10232

    
10233
      raise errors.OpExecError("Export failed, errors in %s" %
10234
                               utils.CommaJoin(failures))
10235

    
10236
    # At this point, the export was successful, we can cleanup/finish
10237

    
10238
    # Remove instance if requested
10239
    if self.op.remove_instance:
10240
      feedback_fn("Removing instance %s" % instance.name)
10241
      _RemoveInstance(self, feedback_fn, instance,
10242
                      self.op.ignore_remove_failures)
10243

    
10244
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10245
      self._CleanupExports(feedback_fn)
10246

    
10247
    return fin_resu, dresults
10248

    
10249

    
10250
class LURemoveExport(NoHooksLU):
10251
  """Remove exports related to the named instance.
10252

10253
  """
10254
  _OP_PARAMS = [
10255
    _PInstanceName,
10256
    ]
10257
  REQ_BGL = False
10258

    
10259
  def ExpandNames(self):
10260
    self.needed_locks = {}
10261
    # We need all nodes to be locked in order for RemoveExport to work, but we
10262
    # don't need to lock the instance itself, as nothing will happen to it (and
10263
    # we can remove exports also for a removed instance)
10264
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10265

    
10266
  def Exec(self, feedback_fn):
10267
    """Remove any export.
10268

10269
    """
10270
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10271
    # If the instance was not found we'll try with the name that was passed in.
10272
    # This will only work if it was an FQDN, though.
10273
    fqdn_warn = False
10274
    if not instance_name:
10275
      fqdn_warn = True
10276
      instance_name = self.op.instance_name
10277

    
10278
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10279
    exportlist = self.rpc.call_export_list(locked_nodes)
10280
    found = False
10281
    for node in exportlist:
10282
      msg = exportlist[node].fail_msg
10283
      if msg:
10284
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10285
        continue
10286
      if instance_name in exportlist[node].payload:
10287
        found = True
10288
        result = self.rpc.call_export_remove(node, instance_name)
10289
        msg = result.fail_msg
10290
        if msg:
10291
          logging.error("Could not remove export for instance %s"
10292
                        " on node %s: %s", instance_name, node, msg)
10293

    
10294
    if fqdn_warn and not found:
10295
      feedback_fn("Export not found. If trying to remove an export belonging"
10296
                  " to a deleted instance please use its Fully Qualified"
10297
                  " Domain Name.")
10298

    
10299

    
10300
class LUQueryGroups(NoHooksLU):
10301
  """Logical unit for querying node groups.
10302

10303
  """
10304
  # pylint: disable-msg=W0142
10305
  _OP_PARAMS = [
10306
    _POutputFields,
10307
    ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
10308
    ]
10309

    
10310
  REQ_BGL = False
10311

    
10312
  _FIELDS_DYNAMIC = utils.FieldSet()
10313

    
10314
  _SIMPLE_FIELDS = ["name", "uuid", "ctime", "mtime", "serial_no"]
10315

    
10316
  _FIELDS_STATIC = utils.FieldSet(
10317
      "node_cnt", "node_list", "pinst_cnt", "pinst_list", *_SIMPLE_FIELDS)
10318

    
10319
  def CheckArguments(self):
10320
    _CheckOutputFields(static=self._FIELDS_STATIC,
10321
                       dynamic=self._FIELDS_DYNAMIC,
10322
                       selected=self.op.output_fields)
10323

    
10324
  def ExpandNames(self):
10325
    self.needed_locks = {}
10326

    
10327
  def Exec(self, feedback_fn):
10328
    """Computes the list of groups and their attributes.
10329

10330
    """
10331
    all_groups = self.cfg.GetAllNodeGroupsInfo()
10332

    
10333
    if not self.op.names:
10334
      my_groups = utils.NiceSort(all_groups.keys())
10335
    else:
10336
      # Accept names to be either names or UUIDs.
10337
      all_uuid = frozenset(all_groups.keys())
10338
      name_to_uuid = dict((g.name, g.uuid) for g in all_groups.values())
10339
      my_groups = []
10340
      missing = []
10341

    
10342
      for name in self.op.names:
10343
        if name in all_uuid:
10344
          my_groups.append(name)
10345
        elif name in name_to_uuid:
10346
          my_groups.append(name_to_uuid[name])
10347
        else:
10348
          missing.append(name)
10349

    
10350
      if missing:
10351
        raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
10352
                                   errors.ECODE_NOENT)
10353

    
10354
    do_nodes = bool(frozenset(["node_cnt", "node_list"]).
10355
                    intersection(self.op.output_fields))
10356

    
10357
    do_instances = bool(frozenset(["pinst_cnt", "pinst_list"]).
10358
                        intersection(self.op.output_fields))
10359

    
10360
    # We need to map group->[nodes], and group->[instances]. The former is
10361
    # directly attainable, but the latter we have to do through instance->node,
10362
    # hence we need to process nodes even if we only need instance information.
10363
    if do_nodes or do_instances:
10364
      all_nodes = self.cfg.GetAllNodesInfo()
10365
      group_to_nodes = dict((all_groups[name].uuid, []) for name in my_groups)
10366
      node_to_group = {}
10367

    
10368
      for node in all_nodes.values():
10369
        if node.group in group_to_nodes:
10370
          group_to_nodes[node.group].append(node.name)
10371
          node_to_group[node.name] = node.group
10372

    
10373
      if do_instances:
10374
        all_instances = self.cfg.GetAllInstancesInfo()
10375
        group_to_instances = dict((all_groups[name].uuid, [])
10376
                                  for name in my_groups)
10377
        for instance in all_instances.values():
10378
          node = instance.primary_node
10379
          if node in node_to_group:
10380
            group_to_instances[node_to_group[node]].append(instance.name)
10381

    
10382
    output = []
10383

    
10384
    for name in my_groups:
10385
      group = all_groups[name]
10386
      group_output = []
10387

    
10388
      for field in self.op.output_fields:
10389
        if field in self._SIMPLE_FIELDS:
10390
          val = getattr(group, field)
10391
        elif field == "node_list":
10392
          val = utils.NiceSort(group_to_nodes[group.uuid])
10393
        elif field == "node_cnt":
10394
          val = len(group_to_nodes[group.uuid])
10395
        elif field == "pinst_list":
10396
          val = utils.NiceSort(group_to_instances[group.uuid])
10397
        elif field == "pinst_cnt":
10398
          val = len(group_to_instances[group.uuid])
10399
        else:
10400
          raise errors.ParameterError(field)
10401
        group_output.append(val)
10402
      output.append(group_output)
10403

    
10404
    return output
10405

    
10406

    
10407
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10408
  """Generic tags LU.
10409

10410
  This is an abstract class which is the parent of all the other tags LUs.
10411

10412
  """
10413

    
10414
  def ExpandNames(self):
10415
    self.needed_locks = {}
10416
    if self.op.kind == constants.TAG_NODE:
10417
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10418
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
10419
    elif self.op.kind == constants.TAG_INSTANCE:
10420
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10421
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10422

    
10423
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10424
    # not possible to acquire the BGL based on opcode parameters)
10425

    
10426
  def CheckPrereq(self):
10427
    """Check prerequisites.
10428

10429
    """
10430
    if self.op.kind == constants.TAG_CLUSTER:
10431
      self.target = self.cfg.GetClusterInfo()
10432
    elif self.op.kind == constants.TAG_NODE:
10433
      self.target = self.cfg.GetNodeInfo(self.op.name)
10434
    elif self.op.kind == constants.TAG_INSTANCE:
10435
      self.target = self.cfg.GetInstanceInfo(self.op.name)
10436
    else:
10437
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10438
                                 str(self.op.kind), errors.ECODE_INVAL)
10439

    
10440

    
10441
class LUGetTags(TagsLU):
10442
  """Returns the tags of a given object.
10443

10444
  """
10445
  _OP_PARAMS = [
10446
    ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10447
    # Name is only meaningful for nodes and instances
10448
    ("name", ht.NoDefault, ht.TMaybeString),
10449
    ]
10450
  REQ_BGL = False
10451

    
10452
  def ExpandNames(self):
10453
    TagsLU.ExpandNames(self)
10454

    
10455
    # Share locks as this is only a read operation
10456
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10457

    
10458
  def Exec(self, feedback_fn):
10459
    """Returns the tag list.
10460

10461
    """
10462
    return list(self.target.GetTags())
10463

    
10464

    
10465
class LUSearchTags(NoHooksLU):
10466
  """Searches the tags for a given pattern.
10467

10468
  """
10469
  _OP_PARAMS = [
10470
    ("pattern", ht.NoDefault, ht.TNonEmptyString),
10471
    ]
10472
  REQ_BGL = False
10473

    
10474
  def ExpandNames(self):
10475
    self.needed_locks = {}
10476

    
10477
  def CheckPrereq(self):
10478
    """Check prerequisites.
10479

10480
    This checks the pattern passed for validity by compiling it.
10481

10482
    """
10483
    try:
10484
      self.re = re.compile(self.op.pattern)
10485
    except re.error, err:
10486
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10487
                                 (self.op.pattern, err), errors.ECODE_INVAL)
10488

    
10489
  def Exec(self, feedback_fn):
10490
    """Returns the tag list.
10491

10492
    """
10493
    cfg = self.cfg
10494
    tgts = [("/cluster", cfg.GetClusterInfo())]
10495
    ilist = cfg.GetAllInstancesInfo().values()
10496
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10497
    nlist = cfg.GetAllNodesInfo().values()
10498
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10499
    results = []
10500
    for path, target in tgts:
10501
      for tag in target.GetTags():
10502
        if self.re.search(tag):
10503
          results.append((path, tag))
10504
    return results
10505

    
10506

    
10507
class LUAddTags(TagsLU):
10508
  """Sets a tag on a given object.
10509

10510
  """
10511
  _OP_PARAMS = [
10512
    ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10513
    # Name is only meaningful for nodes and instances
10514
    ("name", ht.NoDefault, ht.TMaybeString),
10515
    ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
10516
    ]
10517
  REQ_BGL = False
10518

    
10519
  def CheckPrereq(self):
10520
    """Check prerequisites.
10521

10522
    This checks the type and length of the tag name and value.
10523

10524
    """
10525
    TagsLU.CheckPrereq(self)
10526
    for tag in self.op.tags:
10527
      objects.TaggableObject.ValidateTag(tag)
10528

    
10529
  def Exec(self, feedback_fn):
10530
    """Sets the tag.
10531

10532
    """
10533
    try:
10534
      for tag in self.op.tags:
10535
        self.target.AddTag(tag)
10536
    except errors.TagError, err:
10537
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
10538
    self.cfg.Update(self.target, feedback_fn)
10539

    
10540

    
10541
class LUDelTags(TagsLU):
10542
  """Delete a list of tags from a given object.
10543

10544
  """
10545
  _OP_PARAMS = [
10546
    ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10547
    # Name is only meaningful for nodes and instances
10548
    ("name", ht.NoDefault, ht.TMaybeString),
10549
    ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
10550
    ]
10551
  REQ_BGL = False
10552

    
10553
  def CheckPrereq(self):
10554
    """Check prerequisites.
10555

10556
    This checks that we have the given tag.
10557

10558
    """
10559
    TagsLU.CheckPrereq(self)
10560
    for tag in self.op.tags:
10561
      objects.TaggableObject.ValidateTag(tag)
10562
    del_tags = frozenset(self.op.tags)
10563
    cur_tags = self.target.GetTags()
10564

    
10565
    diff_tags = del_tags - cur_tags
10566
    if diff_tags:
10567
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
10568
      raise errors.OpPrereqError("Tag(s) %s not found" %
10569
                                 (utils.CommaJoin(diff_names), ),
10570
                                 errors.ECODE_NOENT)
10571

    
10572
  def Exec(self, feedback_fn):
10573
    """Remove the tag from the object.
10574

10575
    """
10576
    for tag in self.op.tags:
10577
      self.target.RemoveTag(tag)
10578
    self.cfg.Update(self.target, feedback_fn)
10579

    
10580

    
10581
class LUTestDelay(NoHooksLU):
10582
  """Sleep for a specified amount of time.
10583

10584
  This LU sleeps on the master and/or nodes for a specified amount of
10585
  time.
10586

10587
  """
10588
  _OP_PARAMS = [
10589
    ("duration", ht.NoDefault, ht.TFloat),
10590
    ("on_master", True, ht.TBool),
10591
    ("on_nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
10592
    ("repeat", 0, ht.TPositiveInt)
10593
    ]
10594
  REQ_BGL = False
10595

    
10596
  def ExpandNames(self):
10597
    """Expand names and set required locks.
10598

10599
    This expands the node list, if any.
10600

10601
    """
10602
    self.needed_locks = {}
10603
    if self.op.on_nodes:
10604
      # _GetWantedNodes can be used here, but is not always appropriate to use
10605
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10606
      # more information.
10607
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10608
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10609

    
10610
  def _TestDelay(self):
10611
    """Do the actual sleep.
10612

10613
    """
10614
    if self.op.on_master:
10615
      if not utils.TestDelay(self.op.duration):
10616
        raise errors.OpExecError("Error during master delay test")
10617
    if self.op.on_nodes:
10618
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10619
      for node, node_result in result.items():
10620
        node_result.Raise("Failure during rpc call to node %s" % node)
10621

    
10622
  def Exec(self, feedback_fn):
10623
    """Execute the test delay opcode, with the wanted repetitions.
10624

10625
    """
10626
    if self.op.repeat == 0:
10627
      self._TestDelay()
10628
    else:
10629
      top_value = self.op.repeat - 1
10630
      for i in range(self.op.repeat):
10631
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10632
        self._TestDelay()
10633

    
10634

    
10635
class LUTestJobqueue(NoHooksLU):
10636
  """Utility LU to test some aspects of the job queue.
10637

10638
  """
10639
  _OP_PARAMS = [
10640
    ("notify_waitlock", False, ht.TBool),
10641
    ("notify_exec", False, ht.TBool),
10642
    ("log_messages", ht.EmptyList, ht.TListOf(ht.TString)),
10643
    ("fail", False, ht.TBool),
10644
    ]
10645
  REQ_BGL = False
10646

    
10647
  # Must be lower than default timeout for WaitForJobChange to see whether it
10648
  # notices changed jobs
10649
  _CLIENT_CONNECT_TIMEOUT = 20.0
10650
  _CLIENT_CONFIRM_TIMEOUT = 60.0
10651

    
10652
  @classmethod
10653
  def _NotifyUsingSocket(cls, cb, errcls):
10654
    """Opens a Unix socket and waits for another program to connect.
10655

10656
    @type cb: callable
10657
    @param cb: Callback to send socket name to client
10658
    @type errcls: class
10659
    @param errcls: Exception class to use for errors
10660

10661
    """
10662
    # Using a temporary directory as there's no easy way to create temporary
10663
    # sockets without writing a custom loop around tempfile.mktemp and
10664
    # socket.bind
10665
    tmpdir = tempfile.mkdtemp()
10666
    try:
10667
      tmpsock = utils.PathJoin(tmpdir, "sock")
10668

    
10669
      logging.debug("Creating temporary socket at %s", tmpsock)
10670
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10671
      try:
10672
        sock.bind(tmpsock)
10673
        sock.listen(1)
10674

    
10675
        # Send details to client
10676
        cb(tmpsock)
10677

    
10678
        # Wait for client to connect before continuing
10679
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10680
        try:
10681
          (conn, _) = sock.accept()
10682
        except socket.error, err:
10683
          raise errcls("Client didn't connect in time (%s)" % err)
10684
      finally:
10685
        sock.close()
10686
    finally:
10687
      # Remove as soon as client is connected
10688
      shutil.rmtree(tmpdir)
10689

    
10690
    # Wait for client to close
10691
    try:
10692
      try:
10693
        # pylint: disable-msg=E1101
10694
        # Instance of '_socketobject' has no ... member
10695
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10696
        conn.recv(1)
10697
      except socket.error, err:
10698
        raise errcls("Client failed to confirm notification (%s)" % err)
10699
    finally:
10700
      conn.close()
10701

    
10702
  def _SendNotification(self, test, arg, sockname):
10703
    """Sends a notification to the client.
10704

10705
    @type test: string
10706
    @param test: Test name
10707
    @param arg: Test argument (depends on test)
10708
    @type sockname: string
10709
    @param sockname: Socket path
10710

10711
    """
10712
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10713

    
10714
  def _Notify(self, prereq, test, arg):
10715
    """Notifies the client of a test.
10716

10717
    @type prereq: bool
10718
    @param prereq: Whether this is a prereq-phase test
10719
    @type test: string
10720
    @param test: Test name
10721
    @param arg: Test argument (depends on test)
10722

10723
    """
10724
    if prereq:
10725
      errcls = errors.OpPrereqError
10726
    else:
10727
      errcls = errors.OpExecError
10728

    
10729
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10730
                                                  test, arg),
10731
                                   errcls)
10732

    
10733
  def CheckArguments(self):
10734
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10735
    self.expandnames_calls = 0
10736

    
10737
  def ExpandNames(self):
10738
    checkargs_calls = getattr(self, "checkargs_calls", 0)
10739
    if checkargs_calls < 1:
10740
      raise errors.ProgrammerError("CheckArguments was not called")
10741

    
10742
    self.expandnames_calls += 1
10743

    
10744
    if self.op.notify_waitlock:
10745
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
10746

    
10747
    self.LogInfo("Expanding names")
10748

    
10749
    # Get lock on master node (just to get a lock, not for a particular reason)
10750
    self.needed_locks = {
10751
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10752
      }
10753

    
10754
  def Exec(self, feedback_fn):
10755
    if self.expandnames_calls < 1:
10756
      raise errors.ProgrammerError("ExpandNames was not called")
10757

    
10758
    if self.op.notify_exec:
10759
      self._Notify(False, constants.JQT_EXEC, None)
10760

    
10761
    self.LogInfo("Executing")
10762

    
10763
    if self.op.log_messages:
10764
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10765
      for idx, msg in enumerate(self.op.log_messages):
10766
        self.LogInfo("Sending log message %s", idx + 1)
10767
        feedback_fn(constants.JQT_MSGPREFIX + msg)
10768
        # Report how many test messages have been sent
10769
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10770

    
10771
    if self.op.fail:
10772
      raise errors.OpExecError("Opcode failure was requested")
10773

    
10774
    return True
10775

    
10776

    
10777
class IAllocator(object):
10778
  """IAllocator framework.
10779

10780
  An IAllocator instance has three sets of attributes:
10781
    - cfg that is needed to query the cluster
10782
    - input data (all members of the _KEYS class attribute are required)
10783
    - four buffer attributes (in|out_data|text), that represent the
10784
      input (to the external script) in text and data structure format,
10785
      and the output from it, again in two formats
10786
    - the result variables from the script (success, info, nodes) for
10787
      easy usage
10788

10789
  """
10790
  # pylint: disable-msg=R0902
10791
  # lots of instance attributes
10792
  _ALLO_KEYS = [
10793
    "name", "mem_size", "disks", "disk_template",
10794
    "os", "tags", "nics", "vcpus", "hypervisor",
10795
    ]
10796
  _RELO_KEYS = [
10797
    "name", "relocate_from",
10798
    ]
10799
  _EVAC_KEYS = [
10800
    "evac_nodes",
10801
    ]
10802

    
10803
  def __init__(self, cfg, rpc, mode, **kwargs):
10804
    self.cfg = cfg
10805
    self.rpc = rpc
10806
    # init buffer variables
10807
    self.in_text = self.out_text = self.in_data = self.out_data = None
10808
    # init all input fields so that pylint is happy
10809
    self.mode = mode
10810
    self.mem_size = self.disks = self.disk_template = None
10811
    self.os = self.tags = self.nics = self.vcpus = None
10812
    self.hypervisor = None
10813
    self.relocate_from = None
10814
    self.name = None
10815
    self.evac_nodes = None
10816
    # computed fields
10817
    self.required_nodes = None
10818
    # init result fields
10819
    self.success = self.info = self.result = None
10820
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10821
      keyset = self._ALLO_KEYS
10822
      fn = self._AddNewInstance
10823
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10824
      keyset = self._RELO_KEYS
10825
      fn = self._AddRelocateInstance
10826
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10827
      keyset = self._EVAC_KEYS
10828
      fn = self._AddEvacuateNodes
10829
    else:
10830
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10831
                                   " IAllocator" % self.mode)
10832
    for key in kwargs:
10833
      if key not in keyset:
10834
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
10835
                                     " IAllocator" % key)
10836
      setattr(self, key, kwargs[key])
10837

    
10838
    for key in keyset:
10839
      if key not in kwargs:
10840
        raise errors.ProgrammerError("Missing input parameter '%s' to"
10841
                                     " IAllocator" % key)
10842
    self._BuildInputData(fn)
10843

    
10844
  def _ComputeClusterData(self):
10845
    """Compute the generic allocator input data.
10846

10847
    This is the data that is independent of the actual operation.
10848

10849
    """
10850
    cfg = self.cfg
10851
    cluster_info = cfg.GetClusterInfo()
10852
    # cluster data
10853
    data = {
10854
      "version": constants.IALLOCATOR_VERSION,
10855
      "cluster_name": cfg.GetClusterName(),
10856
      "cluster_tags": list(cluster_info.GetTags()),
10857
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10858
      # we don't have job IDs
10859
      }
10860
    iinfo = cfg.GetAllInstancesInfo().values()
10861
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10862

    
10863
    # node data
10864
    node_list = cfg.GetNodeList()
10865

    
10866
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10867
      hypervisor_name = self.hypervisor
10868
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10869
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10870
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10871
      hypervisor_name = cluster_info.enabled_hypervisors[0]
10872

    
10873
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10874
                                        hypervisor_name)
10875
    node_iinfo = \
10876
      self.rpc.call_all_instances_info(node_list,
10877
                                       cluster_info.enabled_hypervisors)
10878

    
10879
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
10880

    
10881
    data["nodes"] = self._ComputeNodeData(cfg, node_data, node_iinfo, i_list)
10882

    
10883
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
10884

    
10885
    self.in_data = data
10886

    
10887
  @staticmethod
10888
  def _ComputeNodeGroupData(cfg):
10889
    """Compute node groups data.
10890

10891
    """
10892
    ng = {}
10893
    for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
10894
      ng[guuid] = { "name": gdata.name }
10895
    return ng
10896

    
10897
  @staticmethod
10898
  def _ComputeNodeData(cfg, node_data, node_iinfo, i_list):
10899
    """Compute global node data.
10900

10901
    """
10902
    node_results = {}
10903
    for nname, nresult in node_data.items():
10904
      # first fill in static (config-based) values
10905
      ninfo = cfg.GetNodeInfo(nname)
10906
      pnr = {
10907
        "tags": list(ninfo.GetTags()),
10908
        "primary_ip": ninfo.primary_ip,
10909
        "secondary_ip": ninfo.secondary_ip,
10910
        "offline": ninfo.offline,
10911
        "drained": ninfo.drained,
10912
        "master_candidate": ninfo.master_candidate,
10913
        "group": ninfo.group,
10914
        "master_capable": ninfo.master_capable,
10915
        "vm_capable": ninfo.vm_capable,
10916
        }
10917

    
10918
      if not (ninfo.offline or ninfo.drained):
10919
        nresult.Raise("Can't get data for node %s" % nname)
10920
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10921
                                nname)
10922
        remote_info = nresult.payload
10923

    
10924
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
10925
                     'vg_size', 'vg_free', 'cpu_total']:
10926
          if attr not in remote_info:
10927
            raise errors.OpExecError("Node '%s' didn't return attribute"
10928
                                     " '%s'" % (nname, attr))
10929
          if not isinstance(remote_info[attr], int):
10930
            raise errors.OpExecError("Node '%s' returned invalid value"
10931
                                     " for '%s': %s" %
10932
                                     (nname, attr, remote_info[attr]))
10933
        # compute memory used by primary instances
10934
        i_p_mem = i_p_up_mem = 0
10935
        for iinfo, beinfo in i_list:
10936
          if iinfo.primary_node == nname:
10937
            i_p_mem += beinfo[constants.BE_MEMORY]
10938
            if iinfo.name not in node_iinfo[nname].payload:
10939
              i_used_mem = 0
10940
            else:
10941
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10942
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10943
            remote_info['memory_free'] -= max(0, i_mem_diff)
10944

    
10945
            if iinfo.admin_up:
10946
              i_p_up_mem += beinfo[constants.BE_MEMORY]
10947

    
10948
        # compute memory used by instances
10949
        pnr_dyn = {
10950
          "total_memory": remote_info['memory_total'],
10951
          "reserved_memory": remote_info['memory_dom0'],
10952
          "free_memory": remote_info['memory_free'],
10953
          "total_disk": remote_info['vg_size'],
10954
          "free_disk": remote_info['vg_free'],
10955
          "total_cpus": remote_info['cpu_total'],
10956
          "i_pri_memory": i_p_mem,
10957
          "i_pri_up_memory": i_p_up_mem,
10958
          }
10959
        pnr.update(pnr_dyn)
10960

    
10961
      node_results[nname] = pnr
10962

    
10963
    return node_results
10964

    
10965
  @staticmethod
10966
  def _ComputeInstanceData(cluster_info, i_list):
10967
    """Compute global instance data.
10968

10969
    """
10970
    instance_data = {}
10971
    for iinfo, beinfo in i_list:
10972
      nic_data = []
10973
      for nic in iinfo.nics:
10974
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10975
        nic_dict = {"mac": nic.mac,
10976
                    "ip": nic.ip,
10977
                    "mode": filled_params[constants.NIC_MODE],
10978
                    "link": filled_params[constants.NIC_LINK],
10979
                   }
10980
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10981
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10982
        nic_data.append(nic_dict)
10983
      pir = {
10984
        "tags": list(iinfo.GetTags()),
10985
        "admin_up": iinfo.admin_up,
10986
        "vcpus": beinfo[constants.BE_VCPUS],
10987
        "memory": beinfo[constants.BE_MEMORY],
10988
        "os": iinfo.os,
10989
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10990
        "nics": nic_data,
10991
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10992
        "disk_template": iinfo.disk_template,
10993
        "hypervisor": iinfo.hypervisor,
10994
        }
10995
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10996
                                                 pir["disks"])
10997
      instance_data[iinfo.name] = pir
10998

    
10999
    return instance_data
11000

    
11001
  def _AddNewInstance(self):
11002
    """Add new instance data to allocator structure.
11003

11004
    This in combination with _AllocatorGetClusterData will create the
11005
    correct structure needed as input for the allocator.
11006

11007
    The checks for the completeness of the opcode must have already been
11008
    done.
11009

11010
    """
11011
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11012

    
11013
    if self.disk_template in constants.DTS_NET_MIRROR:
11014
      self.required_nodes = 2
11015
    else:
11016
      self.required_nodes = 1
11017
    request = {
11018
      "name": self.name,
11019
      "disk_template": self.disk_template,
11020
      "tags": self.tags,
11021
      "os": self.os,
11022
      "vcpus": self.vcpus,
11023
      "memory": self.mem_size,
11024
      "disks": self.disks,
11025
      "disk_space_total": disk_space,
11026
      "nics": self.nics,
11027
      "required_nodes": self.required_nodes,
11028
      }
11029
    return request
11030

    
11031
  def _AddRelocateInstance(self):
11032
    """Add relocate instance data to allocator structure.
11033

11034
    This in combination with _IAllocatorGetClusterData will create the
11035
    correct structure needed as input for the allocator.
11036

11037
    The checks for the completeness of the opcode must have already been
11038
    done.
11039

11040
    """
11041
    instance = self.cfg.GetInstanceInfo(self.name)
11042
    if instance is None:
11043
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
11044
                                   " IAllocator" % self.name)
11045

    
11046
    if instance.disk_template not in constants.DTS_NET_MIRROR:
11047
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11048
                                 errors.ECODE_INVAL)
11049

    
11050
    if len(instance.secondary_nodes) != 1:
11051
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
11052
                                 errors.ECODE_STATE)
11053

    
11054
    self.required_nodes = 1
11055
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
11056
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11057

    
11058
    request = {
11059
      "name": self.name,
11060
      "disk_space_total": disk_space,
11061
      "required_nodes": self.required_nodes,
11062
      "relocate_from": self.relocate_from,
11063
      }
11064
    return request
11065

    
11066
  def _AddEvacuateNodes(self):
11067
    """Add evacuate nodes data to allocator structure.
11068

11069
    """
11070
    request = {
11071
      "evac_nodes": self.evac_nodes
11072
      }
11073
    return request
11074

    
11075
  def _BuildInputData(self, fn):
11076
    """Build input data structures.
11077

11078
    """
11079
    self._ComputeClusterData()
11080

    
11081
    request = fn()
11082
    request["type"] = self.mode
11083
    self.in_data["request"] = request
11084

    
11085
    self.in_text = serializer.Dump(self.in_data)
11086

    
11087
  def Run(self, name, validate=True, call_fn=None):
11088
    """Run an instance allocator and return the results.
11089

11090
    """
11091
    if call_fn is None:
11092
      call_fn = self.rpc.call_iallocator_runner
11093

    
11094
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11095
    result.Raise("Failure while running the iallocator script")
11096

    
11097
    self.out_text = result.payload
11098
    if validate:
11099
      self._ValidateResult()
11100

    
11101
  def _ValidateResult(self):
11102
    """Process the allocator results.
11103

11104
    This will process and if successful save the result in
11105
    self.out_data and the other parameters.
11106

11107
    """
11108
    try:
11109
      rdict = serializer.Load(self.out_text)
11110
    except Exception, err:
11111
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11112

    
11113
    if not isinstance(rdict, dict):
11114
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
11115

    
11116
    # TODO: remove backwards compatiblity in later versions
11117
    if "nodes" in rdict and "result" not in rdict:
11118
      rdict["result"] = rdict["nodes"]
11119
      del rdict["nodes"]
11120

    
11121
    for key in "success", "info", "result":
11122
      if key not in rdict:
11123
        raise errors.OpExecError("Can't parse iallocator results:"
11124
                                 " missing key '%s'" % key)
11125
      setattr(self, key, rdict[key])
11126

    
11127
    if not isinstance(rdict["result"], list):
11128
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11129
                               " is not a list")
11130
    self.out_data = rdict
11131

    
11132

    
11133
class LUTestAllocator(NoHooksLU):
11134
  """Run allocator tests.
11135

11136
  This LU runs the allocator tests
11137

11138
  """
11139
  _OP_PARAMS = [
11140
    ("direction", ht.NoDefault,
11141
     ht.TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
11142
    ("mode", ht.NoDefault, ht.TElemOf(constants.VALID_IALLOCATOR_MODES)),
11143
    ("name", ht.NoDefault, ht.TNonEmptyString),
11144
    ("nics", ht.NoDefault, ht.TOr(ht.TNone, ht.TListOf(
11145
      ht.TDictOf(ht.TElemOf(["mac", "ip", "bridge"]),
11146
               ht.TOr(ht.TNone, ht.TNonEmptyString))))),
11147
    ("disks", ht.NoDefault, ht.TOr(ht.TNone, ht.TList)),
11148
    ("hypervisor", None, ht.TMaybeString),
11149
    ("allocator", None, ht.TMaybeString),
11150
    ("tags", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
11151
    ("mem_size", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
11152
    ("vcpus", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
11153
    ("os", None, ht.TMaybeString),
11154
    ("disk_template", None, ht.TMaybeString),
11155
    ("evac_nodes", None, ht.TOr(ht.TNone, ht.TListOf(ht.TNonEmptyString))),
11156
    ]
11157

    
11158
  def CheckPrereq(self):
11159
    """Check prerequisites.
11160

11161
    This checks the opcode parameters depending on the director and mode test.
11162

11163
    """
11164
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11165
      for attr in ["mem_size", "disks", "disk_template",
11166
                   "os", "tags", "nics", "vcpus"]:
11167
        if not hasattr(self.op, attr):
11168
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11169
                                     attr, errors.ECODE_INVAL)
11170
      iname = self.cfg.ExpandInstanceName(self.op.name)
11171
      if iname is not None:
11172
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11173
                                   iname, errors.ECODE_EXISTS)
11174
      if not isinstance(self.op.nics, list):
11175
        raise errors.OpPrereqError("Invalid parameter 'nics'",
11176
                                   errors.ECODE_INVAL)
11177
      if not isinstance(self.op.disks, list):
11178
        raise errors.OpPrereqError("Invalid parameter 'disks'",
11179
                                   errors.ECODE_INVAL)
11180
      for row in self.op.disks:
11181
        if (not isinstance(row, dict) or
11182
            "size" not in row or
11183
            not isinstance(row["size"], int) or
11184
            "mode" not in row or
11185
            row["mode"] not in ['r', 'w']):
11186
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
11187
                                     " parameter", errors.ECODE_INVAL)
11188
      if self.op.hypervisor is None:
11189
        self.op.hypervisor = self.cfg.GetHypervisorType()
11190
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11191
      fname = _ExpandInstanceName(self.cfg, self.op.name)
11192
      self.op.name = fname
11193
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11194
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11195
      if not hasattr(self.op, "evac_nodes"):
11196
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11197
                                   " opcode input", errors.ECODE_INVAL)
11198
    else:
11199
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11200
                                 self.op.mode, errors.ECODE_INVAL)
11201

    
11202
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11203
      if self.op.allocator is None:
11204
        raise errors.OpPrereqError("Missing allocator name",
11205
                                   errors.ECODE_INVAL)
11206
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11207
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
11208
                                 self.op.direction, errors.ECODE_INVAL)
11209

    
11210
  def Exec(self, feedback_fn):
11211
    """Run the allocator test.
11212

11213
    """
11214
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11215
      ial = IAllocator(self.cfg, self.rpc,
11216
                       mode=self.op.mode,
11217
                       name=self.op.name,
11218
                       mem_size=self.op.mem_size,
11219
                       disks=self.op.disks,
11220
                       disk_template=self.op.disk_template,
11221
                       os=self.op.os,
11222
                       tags=self.op.tags,
11223
                       nics=self.op.nics,
11224
                       vcpus=self.op.vcpus,
11225
                       hypervisor=self.op.hypervisor,
11226
                       )
11227
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11228
      ial = IAllocator(self.cfg, self.rpc,
11229
                       mode=self.op.mode,
11230
                       name=self.op.name,
11231
                       relocate_from=list(self.relocate_from),
11232
                       )
11233
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11234
      ial = IAllocator(self.cfg, self.rpc,
11235
                       mode=self.op.mode,
11236
                       evac_nodes=self.op.evac_nodes)
11237
    else:
11238
      raise errors.ProgrammerError("Uncatched mode %s in"
11239
                                   " LUTestAllocator.Exec", self.op.mode)
11240

    
11241
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
11242
      result = ial.in_text
11243
    else:
11244
      ial.Run(self.op.allocator, validate=False)
11245
      result = ial.out_text
11246
    return result