Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 483be60d

History | View | Annotate | Download (394.5 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import operator
43

    
44
from ganeti import ssh
45
from ganeti import utils
46
from ganeti import errors
47
from ganeti import hypervisor
48
from ganeti import locking
49
from ganeti import constants
50
from ganeti import objects
51
from ganeti import serializer
52
from ganeti import ssconf
53
from ganeti import uidpool
54
from ganeti import compat
55
from ganeti import masterd
56
from ganeti import netutils
57
from ganeti import ht
58
from ganeti import query
59
from ganeti import qlang
60

    
61
import ganeti.masterd.instance # pylint: disable-msg=W0611
62

    
63
# Common opcode attributes
64

    
65
#: output fields for a query operation
66
_POutputFields = ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString))
67

    
68

    
69
#: the shutdown timeout
70
_PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
71
                     ht.TPositiveInt)
72

    
73
#: the force parameter
74
_PForce = ("force", False, ht.TBool)
75

    
76
#: a required instance name (for single-instance LUs)
77
_PInstanceName = ("instance_name", ht.NoDefault, ht.TNonEmptyString)
78

    
79
#: Whether to ignore offline nodes
80
_PIgnoreOfflineNodes = ("ignore_offline_nodes", False, ht.TBool)
81

    
82
#: a required node name (for single-node LUs)
83
_PNodeName = ("node_name", ht.NoDefault, ht.TNonEmptyString)
84

    
85
#: a required node group name (for single-group LUs)
86
_PGroupName = ("group_name", ht.NoDefault, ht.TNonEmptyString)
87

    
88
#: the migration type (live/non-live)
89
_PMigrationMode = ("mode", None,
90
                   ht.TOr(ht.TNone, ht.TElemOf(constants.HT_MIGRATION_MODES)))
91

    
92
#: the obsolete 'live' mode (boolean)
93
_PMigrationLive = ("live", None, ht.TMaybeBool)
94

    
95

    
96
# End types
97
class LogicalUnit(object):
98
  """Logical Unit base class.
99

100
  Subclasses must follow these rules:
101
    - implement ExpandNames
102
    - implement CheckPrereq (except when tasklets are used)
103
    - implement Exec (except when tasklets are used)
104
    - implement BuildHooksEnv
105
    - redefine HPATH and HTYPE
106
    - optionally redefine their run requirements:
107
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
108

109
  Note that all commands require root permissions.
110

111
  @ivar dry_run_result: the value (if any) that will be returned to the caller
112
      in dry-run mode (signalled by opcode dry_run parameter)
113
  @cvar _OP_PARAMS: a list of opcode attributes, the default values
114
      they should get if not already defined, and types they must match
115

116
  """
117
  HPATH = None
118
  HTYPE = None
119
  _OP_PARAMS = []
120
  REQ_BGL = True
121

    
122
  def __init__(self, processor, op, context, rpc):
123
    """Constructor for LogicalUnit.
124

125
    This needs to be overridden in derived classes in order to check op
126
    validity.
127

128
    """
129
    self.proc = processor
130
    self.op = op
131
    self.cfg = context.cfg
132
    self.context = context
133
    self.rpc = rpc
134
    # Dicts used to declare locking needs to mcpu
135
    self.needed_locks = None
136
    self.acquired_locks = {}
137
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
138
    self.add_locks = {}
139
    self.remove_locks = {}
140
    # Used to force good behavior when calling helper functions
141
    self.recalculate_locks = {}
142
    self.__ssh = None
143
    # logging
144
    self.Log = processor.Log # pylint: disable-msg=C0103
145
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
146
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
147
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
148
    # support for dry-run
149
    self.dry_run_result = None
150
    # support for generic debug attribute
151
    if (not hasattr(self.op, "debug_level") or
152
        not isinstance(self.op.debug_level, int)):
153
      self.op.debug_level = 0
154

    
155
    # Tasklets
156
    self.tasklets = None
157

    
158
    # The new kind-of-type-system
159
    op_id = self.op.OP_ID
160
    for attr_name, aval, test in self._OP_PARAMS:
161
      if not hasattr(op, attr_name):
162
        if aval == ht.NoDefault:
163
          raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
164
                                     (op_id, attr_name), errors.ECODE_INVAL)
165
        else:
166
          if callable(aval):
167
            dval = aval()
168
          else:
169
            dval = aval
170
          setattr(self.op, attr_name, dval)
171
      attr_val = getattr(op, attr_name)
172
      if test == ht.NoType:
173
        # no tests here
174
        continue
175
      if not callable(test):
176
        raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
177
                                     " given type is not a proper type (%s)" %
178
                                     (op_id, attr_name, test))
179
      if not test(attr_val):
180
        logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
181
                      self.op.OP_ID, attr_name, type(attr_val), attr_val)
182
        raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
183
                                   (op_id, attr_name), errors.ECODE_INVAL)
184

    
185
    self.CheckArguments()
186

    
187
  def __GetSSH(self):
188
    """Returns the SshRunner object
189

190
    """
191
    if not self.__ssh:
192
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
193
    return self.__ssh
194

    
195
  ssh = property(fget=__GetSSH)
196

    
197
  def CheckArguments(self):
198
    """Check syntactic validity for the opcode arguments.
199

200
    This method is for doing a simple syntactic check and ensure
201
    validity of opcode parameters, without any cluster-related
202
    checks. While the same can be accomplished in ExpandNames and/or
203
    CheckPrereq, doing these separate is better because:
204

205
      - ExpandNames is left as as purely a lock-related function
206
      - CheckPrereq is run after we have acquired locks (and possible
207
        waited for them)
208

209
    The function is allowed to change the self.op attribute so that
210
    later methods can no longer worry about missing parameters.
211

212
    """
213
    pass
214

    
215
  def ExpandNames(self):
216
    """Expand names for this LU.
217

218
    This method is called before starting to execute the opcode, and it should
219
    update all the parameters of the opcode to their canonical form (e.g. a
220
    short node name must be fully expanded after this method has successfully
221
    completed). This way locking, hooks, logging, etc. can work correctly.
222

223
    LUs which implement this method must also populate the self.needed_locks
224
    member, as a dict with lock levels as keys, and a list of needed lock names
225
    as values. Rules:
226

227
      - use an empty dict if you don't need any lock
228
      - if you don't need any lock at a particular level omit that level
229
      - don't put anything for the BGL level
230
      - if you want all locks at a level use locking.ALL_SET as a value
231

232
    If you need to share locks (rather than acquire them exclusively) at one
233
    level you can modify self.share_locks, setting a true value (usually 1) for
234
    that level. By default locks are not shared.
235

236
    This function can also define a list of tasklets, which then will be
237
    executed in order instead of the usual LU-level CheckPrereq and Exec
238
    functions, if those are not defined by the LU.
239

240
    Examples::
241

242
      # Acquire all nodes and one instance
243
      self.needed_locks = {
244
        locking.LEVEL_NODE: locking.ALL_SET,
245
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
246
      }
247
      # Acquire just two nodes
248
      self.needed_locks = {
249
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
250
      }
251
      # Acquire no locks
252
      self.needed_locks = {} # No, you can't leave it to the default value None
253

254
    """
255
    # The implementation of this method is mandatory only if the new LU is
256
    # concurrent, so that old LUs don't need to be changed all at the same
257
    # time.
258
    if self.REQ_BGL:
259
      self.needed_locks = {} # Exclusive LUs don't need locks.
260
    else:
261
      raise NotImplementedError
262

    
263
  def DeclareLocks(self, level):
264
    """Declare LU locking needs for a level
265

266
    While most LUs can just declare their locking needs at ExpandNames time,
267
    sometimes there's the need to calculate some locks after having acquired
268
    the ones before. This function is called just before acquiring locks at a
269
    particular level, but after acquiring the ones at lower levels, and permits
270
    such calculations. It can be used to modify self.needed_locks, and by
271
    default it does nothing.
272

273
    This function is only called if you have something already set in
274
    self.needed_locks for the level.
275

276
    @param level: Locking level which is going to be locked
277
    @type level: member of ganeti.locking.LEVELS
278

279
    """
280

    
281
  def CheckPrereq(self):
282
    """Check prerequisites for this LU.
283

284
    This method should check that the prerequisites for the execution
285
    of this LU are fulfilled. It can do internode communication, but
286
    it should be idempotent - no cluster or system changes are
287
    allowed.
288

289
    The method should raise errors.OpPrereqError in case something is
290
    not fulfilled. Its return value is ignored.
291

292
    This method should also update all the parameters of the opcode to
293
    their canonical form if it hasn't been done by ExpandNames before.
294

295
    """
296
    if self.tasklets is not None:
297
      for (idx, tl) in enumerate(self.tasklets):
298
        logging.debug("Checking prerequisites for tasklet %s/%s",
299
                      idx + 1, len(self.tasklets))
300
        tl.CheckPrereq()
301
    else:
302
      pass
303

    
304
  def Exec(self, feedback_fn):
305
    """Execute the LU.
306

307
    This method should implement the actual work. It should raise
308
    errors.OpExecError for failures that are somewhat dealt with in
309
    code, or expected.
310

311
    """
312
    if self.tasklets is not None:
313
      for (idx, tl) in enumerate(self.tasklets):
314
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
315
        tl.Exec(feedback_fn)
316
    else:
317
      raise NotImplementedError
318

    
319
  def BuildHooksEnv(self):
320
    """Build hooks environment for this LU.
321

322
    This method should return a three-node tuple consisting of: a dict
323
    containing the environment that will be used for running the
324
    specific hook for this LU, a list of node names on which the hook
325
    should run before the execution, and a list of node names on which
326
    the hook should run after the execution.
327

328
    The keys of the dict must not have 'GANETI_' prefixed as this will
329
    be handled in the hooks runner. Also note additional keys will be
330
    added by the hooks runner. If the LU doesn't define any
331
    environment, an empty dict (and not None) should be returned.
332

333
    No nodes should be returned as an empty list (and not None).
334

335
    Note that if the HPATH for a LU class is None, this function will
336
    not be called.
337

338
    """
339
    raise NotImplementedError
340

    
341
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
342
    """Notify the LU about the results of its hooks.
343

344
    This method is called every time a hooks phase is executed, and notifies
345
    the Logical Unit about the hooks' result. The LU can then use it to alter
346
    its result based on the hooks.  By default the method does nothing and the
347
    previous result is passed back unchanged but any LU can define it if it
348
    wants to use the local cluster hook-scripts somehow.
349

350
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
351
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
352
    @param hook_results: the results of the multi-node hooks rpc call
353
    @param feedback_fn: function used send feedback back to the caller
354
    @param lu_result: the previous Exec result this LU had, or None
355
        in the PRE phase
356
    @return: the new Exec result, based on the previous result
357
        and hook results
358

359
    """
360
    # API must be kept, thus we ignore the unused argument and could
361
    # be a function warnings
362
    # pylint: disable-msg=W0613,R0201
363
    return lu_result
364

    
365
  def _ExpandAndLockInstance(self):
366
    """Helper function to expand and lock an instance.
367

368
    Many LUs that work on an instance take its name in self.op.instance_name
369
    and need to expand it and then declare the expanded name for locking. This
370
    function does it, and then updates self.op.instance_name to the expanded
371
    name. It also initializes needed_locks as a dict, if this hasn't been done
372
    before.
373

374
    """
375
    if self.needed_locks is None:
376
      self.needed_locks = {}
377
    else:
378
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
379
        "_ExpandAndLockInstance called with instance-level locks set"
380
    self.op.instance_name = _ExpandInstanceName(self.cfg,
381
                                                self.op.instance_name)
382
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
383

    
384
  def _LockInstancesNodes(self, primary_only=False):
385
    """Helper function to declare instances' nodes for locking.
386

387
    This function should be called after locking one or more instances to lock
388
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
389
    with all primary or secondary nodes for instances already locked and
390
    present in self.needed_locks[locking.LEVEL_INSTANCE].
391

392
    It should be called from DeclareLocks, and for safety only works if
393
    self.recalculate_locks[locking.LEVEL_NODE] is set.
394

395
    In the future it may grow parameters to just lock some instance's nodes, or
396
    to just lock primaries or secondary nodes, if needed.
397

398
    If should be called in DeclareLocks in a way similar to::
399

400
      if level == locking.LEVEL_NODE:
401
        self._LockInstancesNodes()
402

403
    @type primary_only: boolean
404
    @param primary_only: only lock primary nodes of locked instances
405

406
    """
407
    assert locking.LEVEL_NODE in self.recalculate_locks, \
408
      "_LockInstancesNodes helper function called with no nodes to recalculate"
409

    
410
    # TODO: check if we're really been called with the instance locks held
411

    
412
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
413
    # future we might want to have different behaviors depending on the value
414
    # of self.recalculate_locks[locking.LEVEL_NODE]
415
    wanted_nodes = []
416
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
417
      instance = self.context.cfg.GetInstanceInfo(instance_name)
418
      wanted_nodes.append(instance.primary_node)
419
      if not primary_only:
420
        wanted_nodes.extend(instance.secondary_nodes)
421

    
422
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
423
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
424
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
425
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
426

    
427
    del self.recalculate_locks[locking.LEVEL_NODE]
428

    
429

    
430
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
431
  """Simple LU which runs no hooks.
432

433
  This LU is intended as a parent for other LogicalUnits which will
434
  run no hooks, in order to reduce duplicate code.
435

436
  """
437
  HPATH = None
438
  HTYPE = None
439

    
440
  def BuildHooksEnv(self):
441
    """Empty BuildHooksEnv for NoHooksLu.
442

443
    This just raises an error.
444

445
    """
446
    assert False, "BuildHooksEnv called for NoHooksLUs"
447

    
448

    
449
class Tasklet:
450
  """Tasklet base class.
451

452
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
453
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
454
  tasklets know nothing about locks.
455

456
  Subclasses must follow these rules:
457
    - Implement CheckPrereq
458
    - Implement Exec
459

460
  """
461
  def __init__(self, lu):
462
    self.lu = lu
463

    
464
    # Shortcuts
465
    self.cfg = lu.cfg
466
    self.rpc = lu.rpc
467

    
468
  def CheckPrereq(self):
469
    """Check prerequisites for this tasklets.
470

471
    This method should check whether the prerequisites for the execution of
472
    this tasklet are fulfilled. It can do internode communication, but it
473
    should be idempotent - no cluster or system changes are allowed.
474

475
    The method should raise errors.OpPrereqError in case something is not
476
    fulfilled. Its return value is ignored.
477

478
    This method should also update all parameters to their canonical form if it
479
    hasn't been done before.
480

481
    """
482
    pass
483

    
484
  def Exec(self, feedback_fn):
485
    """Execute the tasklet.
486

487
    This method should implement the actual work. It should raise
488
    errors.OpExecError for failures that are somewhat dealt with in code, or
489
    expected.
490

491
    """
492
    raise NotImplementedError
493

    
494

    
495
class _QueryBase:
496
  """Base for query utility classes.
497

498
  """
499
  #: Attribute holding field definitions
500
  FIELDS = None
501

    
502
  def __init__(self, names, fields, use_locking):
503
    """Initializes this class.
504

505
    """
506
    self.names = names
507
    self.use_locking = use_locking
508

    
509
    self.query = query.Query(self.FIELDS, fields)
510
    self.requested_data = self.query.RequestedData()
511

    
512
    self.do_locking = None
513
    self.wanted = None
514

    
515
  def _GetNames(self, lu, all_names, lock_level):
516
    """Helper function to determine names asked for in the query.
517

518
    """
519
    if self.do_locking:
520
      names = lu.acquired_locks[lock_level]
521
    else:
522
      names = all_names
523

    
524
    if self.wanted == locking.ALL_SET:
525
      assert not self.names
526
      # caller didn't specify names, so ordering is not important
527
      return utils.NiceSort(names)
528

    
529
    # caller specified names and we must keep the same order
530
    assert self.names
531
    assert not self.do_locking or lu.acquired_locks[lock_level]
532

    
533
    missing = set(self.wanted).difference(names)
534
    if missing:
535
      raise errors.OpExecError("Some items were removed before retrieving"
536
                               " their data: %s" % missing)
537

    
538
    # Return expanded names
539
    return self.wanted
540

    
541
  @classmethod
542
  def FieldsQuery(cls, fields):
543
    """Returns list of available fields.
544

545
    @return: List of L{objects.QueryFieldDefinition}
546

547
    """
548
    if fields is None:
549
      # Client requests all fields, sort by name
550
      fdefs = sorted(query.GetAllFields(cls.FIELDS.values()),
551
                     key=operator.attrgetter("name"))
552
    else:
553
      # Keep order as requested by client
554
      fdefs = query.Query(cls.FIELDS, fields).GetFields()
555

    
556
    return objects.QueryFieldsResponse(fields=fdefs).ToDict()
557

    
558
  def ExpandNames(self, lu):
559
    """Expand names for this query.
560

561
    See L{LogicalUnit.ExpandNames}.
562

563
    """
564
    raise NotImplementedError()
565

    
566
  def DeclareLocks(self, lu, level):
567
    """Declare locks for this query.
568

569
    See L{LogicalUnit.DeclareLocks}.
570

571
    """
572
    raise NotImplementedError()
573

    
574
  def _GetQueryData(self, lu):
575
    """Collects all data for this query.
576

577
    @return: Query data object
578

579
    """
580
    raise NotImplementedError()
581

    
582
  def NewStyleQuery(self, lu):
583
    """Collect data and execute query.
584

585
    """
586
    data = self._GetQueryData(lu)
587

    
588
    return objects.QueryResponse(data=self.query.Query(data),
589
                                 fields=self.query.GetFields()).ToDict()
590

    
591
  def OldStyleQuery(self, lu):
592
    """Collect data and execute query.
593

594
    """
595
    return self.query.OldStyleQuery(self._GetQueryData(lu))
596

    
597

    
598
def _GetWantedNodes(lu, nodes):
599
  """Returns list of checked and expanded node names.
600

601
  @type lu: L{LogicalUnit}
602
  @param lu: the logical unit on whose behalf we execute
603
  @type nodes: list
604
  @param nodes: list of node names or None for all nodes
605
  @rtype: list
606
  @return: the list of nodes, sorted
607
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
608

609
  """
610
  if nodes:
611
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
612

    
613
  return utils.NiceSort(lu.cfg.GetNodeList())
614

    
615

    
616
def _GetWantedInstances(lu, instances):
617
  """Returns list of checked and expanded instance names.
618

619
  @type lu: L{LogicalUnit}
620
  @param lu: the logical unit on whose behalf we execute
621
  @type instances: list
622
  @param instances: list of instance names or None for all instances
623
  @rtype: list
624
  @return: the list of instances, sorted
625
  @raise errors.OpPrereqError: if the instances parameter is wrong type
626
  @raise errors.OpPrereqError: if any of the passed instances is not found
627

628
  """
629
  if instances:
630
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
631
  else:
632
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
633
  return wanted
634

    
635

    
636
def _GetUpdatedParams(old_params, update_dict,
637
                      use_default=True, use_none=False):
638
  """Return the new version of a parameter dictionary.
639

640
  @type old_params: dict
641
  @param old_params: old parameters
642
  @type update_dict: dict
643
  @param update_dict: dict containing new parameter values, or
644
      constants.VALUE_DEFAULT to reset the parameter to its default
645
      value
646
  @param use_default: boolean
647
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
648
      values as 'to be deleted' values
649
  @param use_none: boolean
650
  @type use_none: whether to recognise C{None} values as 'to be
651
      deleted' values
652
  @rtype: dict
653
  @return: the new parameter dictionary
654

655
  """
656
  params_copy = copy.deepcopy(old_params)
657
  for key, val in update_dict.iteritems():
658
    if ((use_default and val == constants.VALUE_DEFAULT) or
659
        (use_none and val is None)):
660
      try:
661
        del params_copy[key]
662
      except KeyError:
663
        pass
664
    else:
665
      params_copy[key] = val
666
  return params_copy
667

    
668

    
669
def _CheckOutputFields(static, dynamic, selected):
670
  """Checks whether all selected fields are valid.
671

672
  @type static: L{utils.FieldSet}
673
  @param static: static fields set
674
  @type dynamic: L{utils.FieldSet}
675
  @param dynamic: dynamic fields set
676

677
  """
678
  f = utils.FieldSet()
679
  f.Extend(static)
680
  f.Extend(dynamic)
681

    
682
  delta = f.NonMatching(selected)
683
  if delta:
684
    raise errors.OpPrereqError("Unknown output fields selected: %s"
685
                               % ",".join(delta), errors.ECODE_INVAL)
686

    
687

    
688
def _CheckGlobalHvParams(params):
689
  """Validates that given hypervisor params are not global ones.
690

691
  This will ensure that instances don't get customised versions of
692
  global params.
693

694
  """
695
  used_globals = constants.HVC_GLOBALS.intersection(params)
696
  if used_globals:
697
    msg = ("The following hypervisor parameters are global and cannot"
698
           " be customized at instance level, please modify them at"
699
           " cluster level: %s" % utils.CommaJoin(used_globals))
700
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
701

    
702

    
703
def _CheckNodeOnline(lu, node, msg=None):
704
  """Ensure that a given node is online.
705

706
  @param lu: the LU on behalf of which we make the check
707
  @param node: the node to check
708
  @param msg: if passed, should be a message to replace the default one
709
  @raise errors.OpPrereqError: if the node is offline
710

711
  """
712
  if msg is None:
713
    msg = "Can't use offline node"
714
  if lu.cfg.GetNodeInfo(node).offline:
715
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
716

    
717

    
718
def _CheckNodeNotDrained(lu, node):
719
  """Ensure that a given node is not drained.
720

721
  @param lu: the LU on behalf of which we make the check
722
  @param node: the node to check
723
  @raise errors.OpPrereqError: if the node is drained
724

725
  """
726
  if lu.cfg.GetNodeInfo(node).drained:
727
    raise errors.OpPrereqError("Can't use drained node %s" % node,
728
                               errors.ECODE_STATE)
729

    
730

    
731
def _CheckNodeVmCapable(lu, node):
732
  """Ensure that a given node is vm capable.
733

734
  @param lu: the LU on behalf of which we make the check
735
  @param node: the node to check
736
  @raise errors.OpPrereqError: if the node is not vm capable
737

738
  """
739
  if not lu.cfg.GetNodeInfo(node).vm_capable:
740
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
741
                               errors.ECODE_STATE)
742

    
743

    
744
def _CheckNodeHasOS(lu, node, os_name, force_variant):
745
  """Ensure that a node supports a given OS.
746

747
  @param lu: the LU on behalf of which we make the check
748
  @param node: the node to check
749
  @param os_name: the OS to query about
750
  @param force_variant: whether to ignore variant errors
751
  @raise errors.OpPrereqError: if the node is not supporting the OS
752

753
  """
754
  result = lu.rpc.call_os_get(node, os_name)
755
  result.Raise("OS '%s' not in supported OS list for node %s" %
756
               (os_name, node),
757
               prereq=True, ecode=errors.ECODE_INVAL)
758
  if not force_variant:
759
    _CheckOSVariant(result.payload, os_name)
760

    
761

    
762
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
763
  """Ensure that a node has the given secondary ip.
764

765
  @type lu: L{LogicalUnit}
766
  @param lu: the LU on behalf of which we make the check
767
  @type node: string
768
  @param node: the node to check
769
  @type secondary_ip: string
770
  @param secondary_ip: the ip to check
771
  @type prereq: boolean
772
  @param prereq: whether to throw a prerequisite or an execute error
773
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
774
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
775

776
  """
777
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
778
  result.Raise("Failure checking secondary ip on node %s" % node,
779
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
780
  if not result.payload:
781
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
782
           " please fix and re-run this command" % secondary_ip)
783
    if prereq:
784
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
785
    else:
786
      raise errors.OpExecError(msg)
787

    
788

    
789
def _RequireFileStorage():
790
  """Checks that file storage is enabled.
791

792
  @raise errors.OpPrereqError: when file storage is disabled
793

794
  """
795
  if not constants.ENABLE_FILE_STORAGE:
796
    raise errors.OpPrereqError("File storage disabled at configure time",
797
                               errors.ECODE_INVAL)
798

    
799

    
800
def _CheckDiskTemplate(template):
801
  """Ensure a given disk template is valid.
802

803
  """
804
  if template not in constants.DISK_TEMPLATES:
805
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
806
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
807
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
808
  if template == constants.DT_FILE:
809
    _RequireFileStorage()
810
  return True
811

    
812

    
813
def _CheckStorageType(storage_type):
814
  """Ensure a given storage type is valid.
815

816
  """
817
  if storage_type not in constants.VALID_STORAGE_TYPES:
818
    raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
819
                               errors.ECODE_INVAL)
820
  if storage_type == constants.ST_FILE:
821
    _RequireFileStorage()
822
  return True
823

    
824

    
825
def _GetClusterDomainSecret():
826
  """Reads the cluster domain secret.
827

828
  """
829
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
830
                               strict=True)
831

    
832

    
833
def _CheckInstanceDown(lu, instance, reason):
834
  """Ensure that an instance is not running."""
835
  if instance.admin_up:
836
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
837
                               (instance.name, reason), errors.ECODE_STATE)
838

    
839
  pnode = instance.primary_node
840
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
841
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
842
              prereq=True, ecode=errors.ECODE_ENVIRON)
843

    
844
  if instance.name in ins_l.payload:
845
    raise errors.OpPrereqError("Instance %s is running, %s" %
846
                               (instance.name, reason), errors.ECODE_STATE)
847

    
848

    
849
def _ExpandItemName(fn, name, kind):
850
  """Expand an item name.
851

852
  @param fn: the function to use for expansion
853
  @param name: requested item name
854
  @param kind: text description ('Node' or 'Instance')
855
  @return: the resolved (full) name
856
  @raise errors.OpPrereqError: if the item is not found
857

858
  """
859
  full_name = fn(name)
860
  if full_name is None:
861
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
862
                               errors.ECODE_NOENT)
863
  return full_name
864

    
865

    
866
def _ExpandNodeName(cfg, name):
867
  """Wrapper over L{_ExpandItemName} for nodes."""
868
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
869

    
870

    
871
def _ExpandInstanceName(cfg, name):
872
  """Wrapper over L{_ExpandItemName} for instance."""
873
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
874

    
875

    
876
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
877
                          memory, vcpus, nics, disk_template, disks,
878
                          bep, hvp, hypervisor_name):
879
  """Builds instance related env variables for hooks
880

881
  This builds the hook environment from individual variables.
882

883
  @type name: string
884
  @param name: the name of the instance
885
  @type primary_node: string
886
  @param primary_node: the name of the instance's primary node
887
  @type secondary_nodes: list
888
  @param secondary_nodes: list of secondary nodes as strings
889
  @type os_type: string
890
  @param os_type: the name of the instance's OS
891
  @type status: boolean
892
  @param status: the should_run status of the instance
893
  @type memory: string
894
  @param memory: the memory size of the instance
895
  @type vcpus: string
896
  @param vcpus: the count of VCPUs the instance has
897
  @type nics: list
898
  @param nics: list of tuples (ip, mac, mode, link) representing
899
      the NICs the instance has
900
  @type disk_template: string
901
  @param disk_template: the disk template of the instance
902
  @type disks: list
903
  @param disks: the list of (size, mode) pairs
904
  @type bep: dict
905
  @param bep: the backend parameters for the instance
906
  @type hvp: dict
907
  @param hvp: the hypervisor parameters for the instance
908
  @type hypervisor_name: string
909
  @param hypervisor_name: the hypervisor for the instance
910
  @rtype: dict
911
  @return: the hook environment for this instance
912

913
  """
914
  if status:
915
    str_status = "up"
916
  else:
917
    str_status = "down"
918
  env = {
919
    "OP_TARGET": name,
920
    "INSTANCE_NAME": name,
921
    "INSTANCE_PRIMARY": primary_node,
922
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
923
    "INSTANCE_OS_TYPE": os_type,
924
    "INSTANCE_STATUS": str_status,
925
    "INSTANCE_MEMORY": memory,
926
    "INSTANCE_VCPUS": vcpus,
927
    "INSTANCE_DISK_TEMPLATE": disk_template,
928
    "INSTANCE_HYPERVISOR": hypervisor_name,
929
  }
930

    
931
  if nics:
932
    nic_count = len(nics)
933
    for idx, (ip, mac, mode, link) in enumerate(nics):
934
      if ip is None:
935
        ip = ""
936
      env["INSTANCE_NIC%d_IP" % idx] = ip
937
      env["INSTANCE_NIC%d_MAC" % idx] = mac
938
      env["INSTANCE_NIC%d_MODE" % idx] = mode
939
      env["INSTANCE_NIC%d_LINK" % idx] = link
940
      if mode == constants.NIC_MODE_BRIDGED:
941
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
942
  else:
943
    nic_count = 0
944

    
945
  env["INSTANCE_NIC_COUNT"] = nic_count
946

    
947
  if disks:
948
    disk_count = len(disks)
949
    for idx, (size, mode) in enumerate(disks):
950
      env["INSTANCE_DISK%d_SIZE" % idx] = size
951
      env["INSTANCE_DISK%d_MODE" % idx] = mode
952
  else:
953
    disk_count = 0
954

    
955
  env["INSTANCE_DISK_COUNT"] = disk_count
956

    
957
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
958
    for key, value in source.items():
959
      env["INSTANCE_%s_%s" % (kind, key)] = value
960

    
961
  return env
962

    
963

    
964
def _NICListToTuple(lu, nics):
965
  """Build a list of nic information tuples.
966

967
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
968
  value in LUQueryInstanceData.
969

970
  @type lu:  L{LogicalUnit}
971
  @param lu: the logical unit on whose behalf we execute
972
  @type nics: list of L{objects.NIC}
973
  @param nics: list of nics to convert to hooks tuples
974

975
  """
976
  hooks_nics = []
977
  cluster = lu.cfg.GetClusterInfo()
978
  for nic in nics:
979
    ip = nic.ip
980
    mac = nic.mac
981
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
982
    mode = filled_params[constants.NIC_MODE]
983
    link = filled_params[constants.NIC_LINK]
984
    hooks_nics.append((ip, mac, mode, link))
985
  return hooks_nics
986

    
987

    
988
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
989
  """Builds instance related env variables for hooks from an object.
990

991
  @type lu: L{LogicalUnit}
992
  @param lu: the logical unit on whose behalf we execute
993
  @type instance: L{objects.Instance}
994
  @param instance: the instance for which we should build the
995
      environment
996
  @type override: dict
997
  @param override: dictionary with key/values that will override
998
      our values
999
  @rtype: dict
1000
  @return: the hook environment dictionary
1001

1002
  """
1003
  cluster = lu.cfg.GetClusterInfo()
1004
  bep = cluster.FillBE(instance)
1005
  hvp = cluster.FillHV(instance)
1006
  args = {
1007
    'name': instance.name,
1008
    'primary_node': instance.primary_node,
1009
    'secondary_nodes': instance.secondary_nodes,
1010
    'os_type': instance.os,
1011
    'status': instance.admin_up,
1012
    'memory': bep[constants.BE_MEMORY],
1013
    'vcpus': bep[constants.BE_VCPUS],
1014
    'nics': _NICListToTuple(lu, instance.nics),
1015
    'disk_template': instance.disk_template,
1016
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
1017
    'bep': bep,
1018
    'hvp': hvp,
1019
    'hypervisor_name': instance.hypervisor,
1020
  }
1021
  if override:
1022
    args.update(override)
1023
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1024

    
1025

    
1026
def _AdjustCandidatePool(lu, exceptions):
1027
  """Adjust the candidate pool after node operations.
1028

1029
  """
1030
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1031
  if mod_list:
1032
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1033
               utils.CommaJoin(node.name for node in mod_list))
1034
    for name in mod_list:
1035
      lu.context.ReaddNode(name)
1036
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1037
  if mc_now > mc_max:
1038
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1039
               (mc_now, mc_max))
1040

    
1041

    
1042
def _DecideSelfPromotion(lu, exceptions=None):
1043
  """Decide whether I should promote myself as a master candidate.
1044

1045
  """
1046
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1047
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1048
  # the new node will increase mc_max with one, so:
1049
  mc_should = min(mc_should + 1, cp_size)
1050
  return mc_now < mc_should
1051

    
1052

    
1053
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1054
  """Check that the brigdes needed by a list of nics exist.
1055

1056
  """
1057
  cluster = lu.cfg.GetClusterInfo()
1058
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1059
  brlist = [params[constants.NIC_LINK] for params in paramslist
1060
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1061
  if brlist:
1062
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1063
    result.Raise("Error checking bridges on destination node '%s'" %
1064
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1065

    
1066

    
1067
def _CheckInstanceBridgesExist(lu, instance, node=None):
1068
  """Check that the brigdes needed by an instance exist.
1069

1070
  """
1071
  if node is None:
1072
    node = instance.primary_node
1073
  _CheckNicsBridgesExist(lu, instance.nics, node)
1074

    
1075

    
1076
def _CheckOSVariant(os_obj, name):
1077
  """Check whether an OS name conforms to the os variants specification.
1078

1079
  @type os_obj: L{objects.OS}
1080
  @param os_obj: OS object to check
1081
  @type name: string
1082
  @param name: OS name passed by the user, to check for validity
1083

1084
  """
1085
  if not os_obj.supported_variants:
1086
    return
1087
  variant = objects.OS.GetVariant(name)
1088
  if not variant:
1089
    raise errors.OpPrereqError("OS name must include a variant",
1090
                               errors.ECODE_INVAL)
1091

    
1092
  if variant not in os_obj.supported_variants:
1093
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1094

    
1095

    
1096
def _GetNodeInstancesInner(cfg, fn):
1097
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1098

    
1099

    
1100
def _GetNodeInstances(cfg, node_name):
1101
  """Returns a list of all primary and secondary instances on a node.
1102

1103
  """
1104

    
1105
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1106

    
1107

    
1108
def _GetNodePrimaryInstances(cfg, node_name):
1109
  """Returns primary instances on a node.
1110

1111
  """
1112
  return _GetNodeInstancesInner(cfg,
1113
                                lambda inst: node_name == inst.primary_node)
1114

    
1115

    
1116
def _GetNodeSecondaryInstances(cfg, node_name):
1117
  """Returns secondary instances on a node.
1118

1119
  """
1120
  return _GetNodeInstancesInner(cfg,
1121
                                lambda inst: node_name in inst.secondary_nodes)
1122

    
1123

    
1124
def _GetStorageTypeArgs(cfg, storage_type):
1125
  """Returns the arguments for a storage type.
1126

1127
  """
1128
  # Special case for file storage
1129
  if storage_type == constants.ST_FILE:
1130
    # storage.FileStorage wants a list of storage directories
1131
    return [[cfg.GetFileStorageDir()]]
1132

    
1133
  return []
1134

    
1135

    
1136
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1137
  faulty = []
1138

    
1139
  for dev in instance.disks:
1140
    cfg.SetDiskID(dev, node_name)
1141

    
1142
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1143
  result.Raise("Failed to get disk status from node %s" % node_name,
1144
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1145

    
1146
  for idx, bdev_status in enumerate(result.payload):
1147
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1148
      faulty.append(idx)
1149

    
1150
  return faulty
1151

    
1152

    
1153
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1154
  """Check the sanity of iallocator and node arguments and use the
1155
  cluster-wide iallocator if appropriate.
1156

1157
  Check that at most one of (iallocator, node) is specified. If none is
1158
  specified, then the LU's opcode's iallocator slot is filled with the
1159
  cluster-wide default iallocator.
1160

1161
  @type iallocator_slot: string
1162
  @param iallocator_slot: the name of the opcode iallocator slot
1163
  @type node_slot: string
1164
  @param node_slot: the name of the opcode target node slot
1165

1166
  """
1167
  node = getattr(lu.op, node_slot, None)
1168
  iallocator = getattr(lu.op, iallocator_slot, None)
1169

    
1170
  if node is not None and iallocator is not None:
1171
    raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1172
                               errors.ECODE_INVAL)
1173
  elif node is None and iallocator is None:
1174
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1175
    if default_iallocator:
1176
      setattr(lu.op, iallocator_slot, default_iallocator)
1177
    else:
1178
      raise errors.OpPrereqError("No iallocator or node given and no"
1179
                                 " cluster-wide default iallocator found."
1180
                                 " Please specify either an iallocator or a"
1181
                                 " node, or set a cluster-wide default"
1182
                                 " iallocator.")
1183

    
1184

    
1185
class LUPostInitCluster(LogicalUnit):
1186
  """Logical unit for running hooks after cluster initialization.
1187

1188
  """
1189
  HPATH = "cluster-init"
1190
  HTYPE = constants.HTYPE_CLUSTER
1191

    
1192
  def BuildHooksEnv(self):
1193
    """Build hooks env.
1194

1195
    """
1196
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1197
    mn = self.cfg.GetMasterNode()
1198
    return env, [], [mn]
1199

    
1200
  def Exec(self, feedback_fn):
1201
    """Nothing to do.
1202

1203
    """
1204
    return True
1205

    
1206

    
1207
class LUDestroyCluster(LogicalUnit):
1208
  """Logical unit for destroying the cluster.
1209

1210
  """
1211
  HPATH = "cluster-destroy"
1212
  HTYPE = constants.HTYPE_CLUSTER
1213

    
1214
  def BuildHooksEnv(self):
1215
    """Build hooks env.
1216

1217
    """
1218
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1219
    return env, [], []
1220

    
1221
  def CheckPrereq(self):
1222
    """Check prerequisites.
1223

1224
    This checks whether the cluster is empty.
1225

1226
    Any errors are signaled by raising errors.OpPrereqError.
1227

1228
    """
1229
    master = self.cfg.GetMasterNode()
1230

    
1231
    nodelist = self.cfg.GetNodeList()
1232
    if len(nodelist) != 1 or nodelist[0] != master:
1233
      raise errors.OpPrereqError("There are still %d node(s) in"
1234
                                 " this cluster." % (len(nodelist) - 1),
1235
                                 errors.ECODE_INVAL)
1236
    instancelist = self.cfg.GetInstanceList()
1237
    if instancelist:
1238
      raise errors.OpPrereqError("There are still %d instance(s) in"
1239
                                 " this cluster." % len(instancelist),
1240
                                 errors.ECODE_INVAL)
1241

    
1242
  def Exec(self, feedback_fn):
1243
    """Destroys the cluster.
1244

1245
    """
1246
    master = self.cfg.GetMasterNode()
1247

    
1248
    # Run post hooks on master node before it's removed
1249
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1250
    try:
1251
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1252
    except:
1253
      # pylint: disable-msg=W0702
1254
      self.LogWarning("Errors occurred running hooks on %s" % master)
1255

    
1256
    result = self.rpc.call_node_stop_master(master, False)
1257
    result.Raise("Could not disable the master role")
1258

    
1259
    return master
1260

    
1261

    
1262
def _VerifyCertificate(filename):
1263
  """Verifies a certificate for LUVerifyCluster.
1264

1265
  @type filename: string
1266
  @param filename: Path to PEM file
1267

1268
  """
1269
  try:
1270
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1271
                                           utils.ReadFile(filename))
1272
  except Exception, err: # pylint: disable-msg=W0703
1273
    return (LUVerifyCluster.ETYPE_ERROR,
1274
            "Failed to load X509 certificate %s: %s" % (filename, err))
1275

    
1276
  (errcode, msg) = \
1277
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1278
                                constants.SSL_CERT_EXPIRATION_ERROR)
1279

    
1280
  if msg:
1281
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1282
  else:
1283
    fnamemsg = None
1284

    
1285
  if errcode is None:
1286
    return (None, fnamemsg)
1287
  elif errcode == utils.CERT_WARNING:
1288
    return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1289
  elif errcode == utils.CERT_ERROR:
1290
    return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1291

    
1292
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1293

    
1294

    
1295
class LUVerifyCluster(LogicalUnit):
1296
  """Verifies the cluster status.
1297

1298
  """
1299
  HPATH = "cluster-verify"
1300
  HTYPE = constants.HTYPE_CLUSTER
1301
  _OP_PARAMS = [
1302
    ("skip_checks", ht.EmptyList,
1303
     ht.TListOf(ht.TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1304
    ("verbose", False, ht.TBool),
1305
    ("error_codes", False, ht.TBool),
1306
    ("debug_simulate_errors", False, ht.TBool),
1307
    ]
1308
  REQ_BGL = False
1309

    
1310
  TCLUSTER = "cluster"
1311
  TNODE = "node"
1312
  TINSTANCE = "instance"
1313

    
1314
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1315
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1316
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1317
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1318
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1319
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1320
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1321
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1322
  ENODEDRBD = (TNODE, "ENODEDRBD")
1323
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1324
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1325
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1326
  ENODEHV = (TNODE, "ENODEHV")
1327
  ENODELVM = (TNODE, "ENODELVM")
1328
  ENODEN1 = (TNODE, "ENODEN1")
1329
  ENODENET = (TNODE, "ENODENET")
1330
  ENODEOS = (TNODE, "ENODEOS")
1331
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1332
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1333
  ENODERPC = (TNODE, "ENODERPC")
1334
  ENODESSH = (TNODE, "ENODESSH")
1335
  ENODEVERSION = (TNODE, "ENODEVERSION")
1336
  ENODESETUP = (TNODE, "ENODESETUP")
1337
  ENODETIME = (TNODE, "ENODETIME")
1338

    
1339
  ETYPE_FIELD = "code"
1340
  ETYPE_ERROR = "ERROR"
1341
  ETYPE_WARNING = "WARNING"
1342

    
1343
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1344

    
1345
  class NodeImage(object):
1346
    """A class representing the logical and physical status of a node.
1347

1348
    @type name: string
1349
    @ivar name: the node name to which this object refers
1350
    @ivar volumes: a structure as returned from
1351
        L{ganeti.backend.GetVolumeList} (runtime)
1352
    @ivar instances: a list of running instances (runtime)
1353
    @ivar pinst: list of configured primary instances (config)
1354
    @ivar sinst: list of configured secondary instances (config)
1355
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1356
        of this node (config)
1357
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1358
    @ivar dfree: free disk, as reported by the node (runtime)
1359
    @ivar offline: the offline status (config)
1360
    @type rpc_fail: boolean
1361
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1362
        not whether the individual keys were correct) (runtime)
1363
    @type lvm_fail: boolean
1364
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1365
    @type hyp_fail: boolean
1366
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1367
    @type ghost: boolean
1368
    @ivar ghost: whether this is a known node or not (config)
1369
    @type os_fail: boolean
1370
    @ivar os_fail: whether the RPC call didn't return valid OS data
1371
    @type oslist: list
1372
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1373
    @type vm_capable: boolean
1374
    @ivar vm_capable: whether the node can host instances
1375

1376
    """
1377
    def __init__(self, offline=False, name=None, vm_capable=True):
1378
      self.name = name
1379
      self.volumes = {}
1380
      self.instances = []
1381
      self.pinst = []
1382
      self.sinst = []
1383
      self.sbp = {}
1384
      self.mfree = 0
1385
      self.dfree = 0
1386
      self.offline = offline
1387
      self.vm_capable = vm_capable
1388
      self.rpc_fail = False
1389
      self.lvm_fail = False
1390
      self.hyp_fail = False
1391
      self.ghost = False
1392
      self.os_fail = False
1393
      self.oslist = {}
1394

    
1395
  def ExpandNames(self):
1396
    self.needed_locks = {
1397
      locking.LEVEL_NODE: locking.ALL_SET,
1398
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1399
    }
1400
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1401

    
1402
  def _Error(self, ecode, item, msg, *args, **kwargs):
1403
    """Format an error message.
1404

1405
    Based on the opcode's error_codes parameter, either format a
1406
    parseable error code, or a simpler error string.
1407

1408
    This must be called only from Exec and functions called from Exec.
1409

1410
    """
1411
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1412
    itype, etxt = ecode
1413
    # first complete the msg
1414
    if args:
1415
      msg = msg % args
1416
    # then format the whole message
1417
    if self.op.error_codes:
1418
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1419
    else:
1420
      if item:
1421
        item = " " + item
1422
      else:
1423
        item = ""
1424
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1425
    # and finally report it via the feedback_fn
1426
    self._feedback_fn("  - %s" % msg)
1427

    
1428
  def _ErrorIf(self, cond, *args, **kwargs):
1429
    """Log an error message if the passed condition is True.
1430

1431
    """
1432
    cond = bool(cond) or self.op.debug_simulate_errors
1433
    if cond:
1434
      self._Error(*args, **kwargs)
1435
    # do not mark the operation as failed for WARN cases only
1436
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1437
      self.bad = self.bad or cond
1438

    
1439
  def _VerifyNode(self, ninfo, nresult):
1440
    """Perform some basic validation on data returned from a node.
1441

1442
      - check the result data structure is well formed and has all the
1443
        mandatory fields
1444
      - check ganeti version
1445

1446
    @type ninfo: L{objects.Node}
1447
    @param ninfo: the node to check
1448
    @param nresult: the results from the node
1449
    @rtype: boolean
1450
    @return: whether overall this call was successful (and we can expect
1451
         reasonable values in the respose)
1452

1453
    """
1454
    node = ninfo.name
1455
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1456

    
1457
    # main result, nresult should be a non-empty dict
1458
    test = not nresult or not isinstance(nresult, dict)
1459
    _ErrorIf(test, self.ENODERPC, node,
1460
                  "unable to verify node: no data returned")
1461
    if test:
1462
      return False
1463

    
1464
    # compares ganeti version
1465
    local_version = constants.PROTOCOL_VERSION
1466
    remote_version = nresult.get("version", None)
1467
    test = not (remote_version and
1468
                isinstance(remote_version, (list, tuple)) and
1469
                len(remote_version) == 2)
1470
    _ErrorIf(test, self.ENODERPC, node,
1471
             "connection to node returned invalid data")
1472
    if test:
1473
      return False
1474

    
1475
    test = local_version != remote_version[0]
1476
    _ErrorIf(test, self.ENODEVERSION, node,
1477
             "incompatible protocol versions: master %s,"
1478
             " node %s", local_version, remote_version[0])
1479
    if test:
1480
      return False
1481

    
1482
    # node seems compatible, we can actually try to look into its results
1483

    
1484
    # full package version
1485
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1486
                  self.ENODEVERSION, node,
1487
                  "software version mismatch: master %s, node %s",
1488
                  constants.RELEASE_VERSION, remote_version[1],
1489
                  code=self.ETYPE_WARNING)
1490

    
1491
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1492
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1493
      for hv_name, hv_result in hyp_result.iteritems():
1494
        test = hv_result is not None
1495
        _ErrorIf(test, self.ENODEHV, node,
1496
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1497

    
1498
    test = nresult.get(constants.NV_NODESETUP,
1499
                           ["Missing NODESETUP results"])
1500
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1501
             "; ".join(test))
1502

    
1503
    return True
1504

    
1505
  def _VerifyNodeTime(self, ninfo, nresult,
1506
                      nvinfo_starttime, nvinfo_endtime):
1507
    """Check the node time.
1508

1509
    @type ninfo: L{objects.Node}
1510
    @param ninfo: the node to check
1511
    @param nresult: the remote results for the node
1512
    @param nvinfo_starttime: the start time of the RPC call
1513
    @param nvinfo_endtime: the end time of the RPC call
1514

1515
    """
1516
    node = ninfo.name
1517
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1518

    
1519
    ntime = nresult.get(constants.NV_TIME, None)
1520
    try:
1521
      ntime_merged = utils.MergeTime(ntime)
1522
    except (ValueError, TypeError):
1523
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1524
      return
1525

    
1526
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1527
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1528
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1529
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1530
    else:
1531
      ntime_diff = None
1532

    
1533
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1534
             "Node time diverges by at least %s from master node time",
1535
             ntime_diff)
1536

    
1537
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1538
    """Check the node time.
1539

1540
    @type ninfo: L{objects.Node}
1541
    @param ninfo: the node to check
1542
    @param nresult: the remote results for the node
1543
    @param vg_name: the configured VG name
1544

1545
    """
1546
    if vg_name is None:
1547
      return
1548

    
1549
    node = ninfo.name
1550
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1551

    
1552
    # checks vg existence and size > 20G
1553
    vglist = nresult.get(constants.NV_VGLIST, None)
1554
    test = not vglist
1555
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1556
    if not test:
1557
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1558
                                            constants.MIN_VG_SIZE)
1559
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1560

    
1561
    # check pv names
1562
    pvlist = nresult.get(constants.NV_PVLIST, None)
1563
    test = pvlist is None
1564
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1565
    if not test:
1566
      # check that ':' is not present in PV names, since it's a
1567
      # special character for lvcreate (denotes the range of PEs to
1568
      # use on the PV)
1569
      for _, pvname, owner_vg in pvlist:
1570
        test = ":" in pvname
1571
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1572
                 " '%s' of VG '%s'", pvname, owner_vg)
1573

    
1574
  def _VerifyNodeNetwork(self, ninfo, nresult):
1575
    """Check the node time.
1576

1577
    @type ninfo: L{objects.Node}
1578
    @param ninfo: the node to check
1579
    @param nresult: the remote results for the node
1580

1581
    """
1582
    node = ninfo.name
1583
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1584

    
1585
    test = constants.NV_NODELIST not in nresult
1586
    _ErrorIf(test, self.ENODESSH, node,
1587
             "node hasn't returned node ssh connectivity data")
1588
    if not test:
1589
      if nresult[constants.NV_NODELIST]:
1590
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1591
          _ErrorIf(True, self.ENODESSH, node,
1592
                   "ssh communication with node '%s': %s", a_node, a_msg)
1593

    
1594
    test = constants.NV_NODENETTEST not in nresult
1595
    _ErrorIf(test, self.ENODENET, node,
1596
             "node hasn't returned node tcp connectivity data")
1597
    if not test:
1598
      if nresult[constants.NV_NODENETTEST]:
1599
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1600
        for anode in nlist:
1601
          _ErrorIf(True, self.ENODENET, node,
1602
                   "tcp communication with node '%s': %s",
1603
                   anode, nresult[constants.NV_NODENETTEST][anode])
1604

    
1605
    test = constants.NV_MASTERIP not in nresult
1606
    _ErrorIf(test, self.ENODENET, node,
1607
             "node hasn't returned node master IP reachability data")
1608
    if not test:
1609
      if not nresult[constants.NV_MASTERIP]:
1610
        if node == self.master_node:
1611
          msg = "the master node cannot reach the master IP (not configured?)"
1612
        else:
1613
          msg = "cannot reach the master IP"
1614
        _ErrorIf(True, self.ENODENET, node, msg)
1615

    
1616
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1617
                      diskstatus):
1618
    """Verify an instance.
1619

1620
    This function checks to see if the required block devices are
1621
    available on the instance's node.
1622

1623
    """
1624
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1625
    node_current = instanceconfig.primary_node
1626

    
1627
    node_vol_should = {}
1628
    instanceconfig.MapLVsByNode(node_vol_should)
1629

    
1630
    for node in node_vol_should:
1631
      n_img = node_image[node]
1632
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1633
        # ignore missing volumes on offline or broken nodes
1634
        continue
1635
      for volume in node_vol_should[node]:
1636
        test = volume not in n_img.volumes
1637
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1638
                 "volume %s missing on node %s", volume, node)
1639

    
1640
    if instanceconfig.admin_up:
1641
      pri_img = node_image[node_current]
1642
      test = instance not in pri_img.instances and not pri_img.offline
1643
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1644
               "instance not running on its primary node %s",
1645
               node_current)
1646

    
1647
    for node, n_img in node_image.items():
1648
      if (not node == node_current):
1649
        test = instance in n_img.instances
1650
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1651
                 "instance should not run on node %s", node)
1652

    
1653
    diskdata = [(nname, success, status, idx)
1654
                for (nname, disks) in diskstatus.items()
1655
                for idx, (success, status) in enumerate(disks)]
1656

    
1657
    for nname, success, bdev_status, idx in diskdata:
1658
      _ErrorIf(instanceconfig.admin_up and not success,
1659
               self.EINSTANCEFAULTYDISK, instance,
1660
               "couldn't retrieve status for disk/%s on %s: %s",
1661
               idx, nname, bdev_status)
1662
      _ErrorIf((instanceconfig.admin_up and success and
1663
                bdev_status.ldisk_status == constants.LDS_FAULTY),
1664
               self.EINSTANCEFAULTYDISK, instance,
1665
               "disk/%s on %s is faulty", idx, nname)
1666

    
1667
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1668
    """Verify if there are any unknown volumes in the cluster.
1669

1670
    The .os, .swap and backup volumes are ignored. All other volumes are
1671
    reported as unknown.
1672

1673
    @type reserved: L{ganeti.utils.FieldSet}
1674
    @param reserved: a FieldSet of reserved volume names
1675

1676
    """
1677
    for node, n_img in node_image.items():
1678
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1679
        # skip non-healthy nodes
1680
        continue
1681
      for volume in n_img.volumes:
1682
        test = ((node not in node_vol_should or
1683
                volume not in node_vol_should[node]) and
1684
                not reserved.Matches(volume))
1685
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1686
                      "volume %s is unknown", volume)
1687

    
1688
  def _VerifyOrphanInstances(self, instancelist, node_image):
1689
    """Verify the list of running instances.
1690

1691
    This checks what instances are running but unknown to the cluster.
1692

1693
    """
1694
    for node, n_img in node_image.items():
1695
      for o_inst in n_img.instances:
1696
        test = o_inst not in instancelist
1697
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1698
                      "instance %s on node %s should not exist", o_inst, node)
1699

    
1700
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1701
    """Verify N+1 Memory Resilience.
1702

1703
    Check that if one single node dies we can still start all the
1704
    instances it was primary for.
1705

1706
    """
1707
    for node, n_img in node_image.items():
1708
      # This code checks that every node which is now listed as
1709
      # secondary has enough memory to host all instances it is
1710
      # supposed to should a single other node in the cluster fail.
1711
      # FIXME: not ready for failover to an arbitrary node
1712
      # FIXME: does not support file-backed instances
1713
      # WARNING: we currently take into account down instances as well
1714
      # as up ones, considering that even if they're down someone
1715
      # might want to start them even in the event of a node failure.
1716
      for prinode, instances in n_img.sbp.items():
1717
        needed_mem = 0
1718
        for instance in instances:
1719
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1720
          if bep[constants.BE_AUTO_BALANCE]:
1721
            needed_mem += bep[constants.BE_MEMORY]
1722
        test = n_img.mfree < needed_mem
1723
        self._ErrorIf(test, self.ENODEN1, node,
1724
                      "not enough memory on to accommodate"
1725
                      " failovers should peer node %s fail", prinode)
1726

    
1727
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1728
                       master_files):
1729
    """Verifies and computes the node required file checksums.
1730

1731
    @type ninfo: L{objects.Node}
1732
    @param ninfo: the node to check
1733
    @param nresult: the remote results for the node
1734
    @param file_list: required list of files
1735
    @param local_cksum: dictionary of local files and their checksums
1736
    @param master_files: list of files that only masters should have
1737

1738
    """
1739
    node = ninfo.name
1740
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1741

    
1742
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1743
    test = not isinstance(remote_cksum, dict)
1744
    _ErrorIf(test, self.ENODEFILECHECK, node,
1745
             "node hasn't returned file checksum data")
1746
    if test:
1747
      return
1748

    
1749
    for file_name in file_list:
1750
      node_is_mc = ninfo.master_candidate
1751
      must_have = (file_name not in master_files) or node_is_mc
1752
      # missing
1753
      test1 = file_name not in remote_cksum
1754
      # invalid checksum
1755
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1756
      # existing and good
1757
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1758
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1759
               "file '%s' missing", file_name)
1760
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1761
               "file '%s' has wrong checksum", file_name)
1762
      # not candidate and this is not a must-have file
1763
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1764
               "file '%s' should not exist on non master"
1765
               " candidates (and the file is outdated)", file_name)
1766
      # all good, except non-master/non-must have combination
1767
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1768
               "file '%s' should not exist"
1769
               " on non master candidates", file_name)
1770

    
1771
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1772
                      drbd_map):
1773
    """Verifies and the node DRBD status.
1774

1775
    @type ninfo: L{objects.Node}
1776
    @param ninfo: the node to check
1777
    @param nresult: the remote results for the node
1778
    @param instanceinfo: the dict of instances
1779
    @param drbd_helper: the configured DRBD usermode helper
1780
    @param drbd_map: the DRBD map as returned by
1781
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1782

1783
    """
1784
    node = ninfo.name
1785
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1786

    
1787
    if drbd_helper:
1788
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1789
      test = (helper_result == None)
1790
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
1791
               "no drbd usermode helper returned")
1792
      if helper_result:
1793
        status, payload = helper_result
1794
        test = not status
1795
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1796
                 "drbd usermode helper check unsuccessful: %s", payload)
1797
        test = status and (payload != drbd_helper)
1798
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1799
                 "wrong drbd usermode helper: %s", payload)
1800

    
1801
    # compute the DRBD minors
1802
    node_drbd = {}
1803
    for minor, instance in drbd_map[node].items():
1804
      test = instance not in instanceinfo
1805
      _ErrorIf(test, self.ECLUSTERCFG, None,
1806
               "ghost instance '%s' in temporary DRBD map", instance)
1807
        # ghost instance should not be running, but otherwise we
1808
        # don't give double warnings (both ghost instance and
1809
        # unallocated minor in use)
1810
      if test:
1811
        node_drbd[minor] = (instance, False)
1812
      else:
1813
        instance = instanceinfo[instance]
1814
        node_drbd[minor] = (instance.name, instance.admin_up)
1815

    
1816
    # and now check them
1817
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1818
    test = not isinstance(used_minors, (tuple, list))
1819
    _ErrorIf(test, self.ENODEDRBD, node,
1820
             "cannot parse drbd status file: %s", str(used_minors))
1821
    if test:
1822
      # we cannot check drbd status
1823
      return
1824

    
1825
    for minor, (iname, must_exist) in node_drbd.items():
1826
      test = minor not in used_minors and must_exist
1827
      _ErrorIf(test, self.ENODEDRBD, node,
1828
               "drbd minor %d of instance %s is not active", minor, iname)
1829
    for minor in used_minors:
1830
      test = minor not in node_drbd
1831
      _ErrorIf(test, self.ENODEDRBD, node,
1832
               "unallocated drbd minor %d is in use", minor)
1833

    
1834
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1835
    """Builds the node OS structures.
1836

1837
    @type ninfo: L{objects.Node}
1838
    @param ninfo: the node to check
1839
    @param nresult: the remote results for the node
1840
    @param nimg: the node image object
1841

1842
    """
1843
    node = ninfo.name
1844
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1845

    
1846
    remote_os = nresult.get(constants.NV_OSLIST, None)
1847
    test = (not isinstance(remote_os, list) or
1848
            not compat.all(isinstance(v, list) and len(v) == 7
1849
                           for v in remote_os))
1850

    
1851
    _ErrorIf(test, self.ENODEOS, node,
1852
             "node hasn't returned valid OS data")
1853

    
1854
    nimg.os_fail = test
1855

    
1856
    if test:
1857
      return
1858

    
1859
    os_dict = {}
1860

    
1861
    for (name, os_path, status, diagnose,
1862
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1863

    
1864
      if name not in os_dict:
1865
        os_dict[name] = []
1866

    
1867
      # parameters is a list of lists instead of list of tuples due to
1868
      # JSON lacking a real tuple type, fix it:
1869
      parameters = [tuple(v) for v in parameters]
1870
      os_dict[name].append((os_path, status, diagnose,
1871
                            set(variants), set(parameters), set(api_ver)))
1872

    
1873
    nimg.oslist = os_dict
1874

    
1875
  def _VerifyNodeOS(self, ninfo, nimg, base):
1876
    """Verifies the node OS list.
1877

1878
    @type ninfo: L{objects.Node}
1879
    @param ninfo: the node to check
1880
    @param nimg: the node image object
1881
    @param base: the 'template' node we match against (e.g. from the master)
1882

1883
    """
1884
    node = ninfo.name
1885
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1886

    
1887
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1888

    
1889
    for os_name, os_data in nimg.oslist.items():
1890
      assert os_data, "Empty OS status for OS %s?!" % os_name
1891
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1892
      _ErrorIf(not f_status, self.ENODEOS, node,
1893
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1894
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1895
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1896
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1897
      # this will catched in backend too
1898
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1899
               and not f_var, self.ENODEOS, node,
1900
               "OS %s with API at least %d does not declare any variant",
1901
               os_name, constants.OS_API_V15)
1902
      # comparisons with the 'base' image
1903
      test = os_name not in base.oslist
1904
      _ErrorIf(test, self.ENODEOS, node,
1905
               "Extra OS %s not present on reference node (%s)",
1906
               os_name, base.name)
1907
      if test:
1908
        continue
1909
      assert base.oslist[os_name], "Base node has empty OS status?"
1910
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1911
      if not b_status:
1912
        # base OS is invalid, skipping
1913
        continue
1914
      for kind, a, b in [("API version", f_api, b_api),
1915
                         ("variants list", f_var, b_var),
1916
                         ("parameters", f_param, b_param)]:
1917
        _ErrorIf(a != b, self.ENODEOS, node,
1918
                 "OS %s %s differs from reference node %s: %s vs. %s",
1919
                 kind, os_name, base.name,
1920
                 utils.CommaJoin(a), utils.CommaJoin(b))
1921

    
1922
    # check any missing OSes
1923
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1924
    _ErrorIf(missing, self.ENODEOS, node,
1925
             "OSes present on reference node %s but missing on this node: %s",
1926
             base.name, utils.CommaJoin(missing))
1927

    
1928
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1929
    """Verifies and updates the node volume data.
1930

1931
    This function will update a L{NodeImage}'s internal structures
1932
    with data from the remote call.
1933

1934
    @type ninfo: L{objects.Node}
1935
    @param ninfo: the node to check
1936
    @param nresult: the remote results for the node
1937
    @param nimg: the node image object
1938
    @param vg_name: the configured VG name
1939

1940
    """
1941
    node = ninfo.name
1942
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1943

    
1944
    nimg.lvm_fail = True
1945
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1946
    if vg_name is None:
1947
      pass
1948
    elif isinstance(lvdata, basestring):
1949
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1950
               utils.SafeEncode(lvdata))
1951
    elif not isinstance(lvdata, dict):
1952
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1953
    else:
1954
      nimg.volumes = lvdata
1955
      nimg.lvm_fail = False
1956

    
1957
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1958
    """Verifies and updates the node instance list.
1959

1960
    If the listing was successful, then updates this node's instance
1961
    list. Otherwise, it marks the RPC call as failed for the instance
1962
    list key.
1963

1964
    @type ninfo: L{objects.Node}
1965
    @param ninfo: the node to check
1966
    @param nresult: the remote results for the node
1967
    @param nimg: the node image object
1968

1969
    """
1970
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1971
    test = not isinstance(idata, list)
1972
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1973
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1974
    if test:
1975
      nimg.hyp_fail = True
1976
    else:
1977
      nimg.instances = idata
1978

    
1979
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1980
    """Verifies and computes a node information map
1981

1982
    @type ninfo: L{objects.Node}
1983
    @param ninfo: the node to check
1984
    @param nresult: the remote results for the node
1985
    @param nimg: the node image object
1986
    @param vg_name: the configured VG name
1987

1988
    """
1989
    node = ninfo.name
1990
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1991

    
1992
    # try to read free memory (from the hypervisor)
1993
    hv_info = nresult.get(constants.NV_HVINFO, None)
1994
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1995
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1996
    if not test:
1997
      try:
1998
        nimg.mfree = int(hv_info["memory_free"])
1999
      except (ValueError, TypeError):
2000
        _ErrorIf(True, self.ENODERPC, node,
2001
                 "node returned invalid nodeinfo, check hypervisor")
2002

    
2003
    # FIXME: devise a free space model for file based instances as well
2004
    if vg_name is not None:
2005
      test = (constants.NV_VGLIST not in nresult or
2006
              vg_name not in nresult[constants.NV_VGLIST])
2007
      _ErrorIf(test, self.ENODELVM, node,
2008
               "node didn't return data for the volume group '%s'"
2009
               " - it is either missing or broken", vg_name)
2010
      if not test:
2011
        try:
2012
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2013
        except (ValueError, TypeError):
2014
          _ErrorIf(True, self.ENODERPC, node,
2015
                   "node returned invalid LVM info, check LVM status")
2016

    
2017
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2018
    """Gets per-disk status information for all instances.
2019

2020
    @type nodelist: list of strings
2021
    @param nodelist: Node names
2022
    @type node_image: dict of (name, L{objects.Node})
2023
    @param node_image: Node objects
2024
    @type instanceinfo: dict of (name, L{objects.Instance})
2025
    @param instanceinfo: Instance objects
2026

2027
    """
2028
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2029

    
2030
    node_disks = {}
2031
    node_disks_devonly = {}
2032

    
2033
    for nname in nodelist:
2034
      disks = [(inst, disk)
2035
               for instlist in [node_image[nname].pinst,
2036
                                node_image[nname].sinst]
2037
               for inst in instlist
2038
               for disk in instanceinfo[inst].disks]
2039

    
2040
      if not disks:
2041
        # No need to collect data
2042
        continue
2043

    
2044
      node_disks[nname] = disks
2045

    
2046
      # Creating copies as SetDiskID below will modify the objects and that can
2047
      # lead to incorrect data returned from nodes
2048
      devonly = [dev.Copy() for (_, dev) in disks]
2049

    
2050
      for dev in devonly:
2051
        self.cfg.SetDiskID(dev, nname)
2052

    
2053
      node_disks_devonly[nname] = devonly
2054

    
2055
    assert len(node_disks) == len(node_disks_devonly)
2056

    
2057
    # Collect data from all nodes with disks
2058
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2059
                                                          node_disks_devonly)
2060

    
2061
    assert len(result) == len(node_disks)
2062

    
2063
    instdisk = {}
2064

    
2065
    for (nname, nres) in result.items():
2066
      if nres.offline:
2067
        # Ignore offline node
2068
        continue
2069

    
2070
      disks = node_disks[nname]
2071

    
2072
      msg = nres.fail_msg
2073
      _ErrorIf(msg, self.ENODERPC, nname,
2074
               "while getting disk information: %s", nres.fail_msg)
2075
      if msg:
2076
        # No data from this node
2077
        data = len(disks) * [None]
2078
      else:
2079
        data = nres.payload
2080

    
2081
      for ((inst, _), status) in zip(disks, data):
2082
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2083

    
2084
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2085
                      len(nnames) <= len(instanceinfo[inst].all_nodes)
2086
                      for inst, nnames in instdisk.items()
2087
                      for nname, statuses in nnames.items())
2088

    
2089
    return instdisk
2090

    
2091
  def BuildHooksEnv(self):
2092
    """Build hooks env.
2093

2094
    Cluster-Verify hooks just ran in the post phase and their failure makes
2095
    the output be logged in the verify output and the verification to fail.
2096

2097
    """
2098
    all_nodes = self.cfg.GetNodeList()
2099
    env = {
2100
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2101
      }
2102
    for node in self.cfg.GetAllNodesInfo().values():
2103
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2104

    
2105
    return env, [], all_nodes
2106

    
2107
  def Exec(self, feedback_fn):
2108
    """Verify integrity of cluster, performing various test on nodes.
2109

2110
    """
2111
    self.bad = False
2112
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2113
    verbose = self.op.verbose
2114
    self._feedback_fn = feedback_fn
2115
    feedback_fn("* Verifying global settings")
2116
    for msg in self.cfg.VerifyConfig():
2117
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2118

    
2119
    # Check the cluster certificates
2120
    for cert_filename in constants.ALL_CERT_FILES:
2121
      (errcode, msg) = _VerifyCertificate(cert_filename)
2122
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2123

    
2124
    vg_name = self.cfg.GetVGName()
2125
    drbd_helper = self.cfg.GetDRBDHelper()
2126
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2127
    cluster = self.cfg.GetClusterInfo()
2128
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
2129
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2130
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2131
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2132
                        for iname in instancelist)
2133
    i_non_redundant = [] # Non redundant instances
2134
    i_non_a_balanced = [] # Non auto-balanced instances
2135
    n_offline = 0 # Count of offline nodes
2136
    n_drained = 0 # Count of nodes being drained
2137
    node_vol_should = {}
2138

    
2139
    # FIXME: verify OS list
2140
    # do local checksums
2141
    master_files = [constants.CLUSTER_CONF_FILE]
2142
    master_node = self.master_node = self.cfg.GetMasterNode()
2143
    master_ip = self.cfg.GetMasterIP()
2144

    
2145
    file_names = ssconf.SimpleStore().GetFileList()
2146
    file_names.extend(constants.ALL_CERT_FILES)
2147
    file_names.extend(master_files)
2148
    if cluster.modify_etc_hosts:
2149
      file_names.append(constants.ETC_HOSTS)
2150

    
2151
    local_checksums = utils.FingerprintFiles(file_names)
2152

    
2153
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2154
    node_verify_param = {
2155
      constants.NV_FILELIST: file_names,
2156
      constants.NV_NODELIST: [node.name for node in nodeinfo
2157
                              if not node.offline],
2158
      constants.NV_HYPERVISOR: hypervisors,
2159
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2160
                                  node.secondary_ip) for node in nodeinfo
2161
                                 if not node.offline],
2162
      constants.NV_INSTANCELIST: hypervisors,
2163
      constants.NV_VERSION: None,
2164
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2165
      constants.NV_NODESETUP: None,
2166
      constants.NV_TIME: None,
2167
      constants.NV_MASTERIP: (master_node, master_ip),
2168
      constants.NV_OSLIST: None,
2169
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2170
      }
2171

    
2172
    if vg_name is not None:
2173
      node_verify_param[constants.NV_VGLIST] = None
2174
      node_verify_param[constants.NV_LVLIST] = vg_name
2175
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2176
      node_verify_param[constants.NV_DRBDLIST] = None
2177

    
2178
    if drbd_helper:
2179
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2180

    
2181
    # Build our expected cluster state
2182
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2183
                                                 name=node.name,
2184
                                                 vm_capable=node.vm_capable))
2185
                      for node in nodeinfo)
2186

    
2187
    for instance in instancelist:
2188
      inst_config = instanceinfo[instance]
2189

    
2190
      for nname in inst_config.all_nodes:
2191
        if nname not in node_image:
2192
          # ghost node
2193
          gnode = self.NodeImage(name=nname)
2194
          gnode.ghost = True
2195
          node_image[nname] = gnode
2196

    
2197
      inst_config.MapLVsByNode(node_vol_should)
2198

    
2199
      pnode = inst_config.primary_node
2200
      node_image[pnode].pinst.append(instance)
2201

    
2202
      for snode in inst_config.secondary_nodes:
2203
        nimg = node_image[snode]
2204
        nimg.sinst.append(instance)
2205
        if pnode not in nimg.sbp:
2206
          nimg.sbp[pnode] = []
2207
        nimg.sbp[pnode].append(instance)
2208

    
2209
    # At this point, we have the in-memory data structures complete,
2210
    # except for the runtime information, which we'll gather next
2211

    
2212
    # Due to the way our RPC system works, exact response times cannot be
2213
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2214
    # time before and after executing the request, we can at least have a time
2215
    # window.
2216
    nvinfo_starttime = time.time()
2217
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2218
                                           self.cfg.GetClusterName())
2219
    nvinfo_endtime = time.time()
2220

    
2221
    all_drbd_map = self.cfg.ComputeDRBDMap()
2222

    
2223
    feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2224
    instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2225

    
2226
    feedback_fn("* Verifying node status")
2227

    
2228
    refos_img = None
2229

    
2230
    for node_i in nodeinfo:
2231
      node = node_i.name
2232
      nimg = node_image[node]
2233

    
2234
      if node_i.offline:
2235
        if verbose:
2236
          feedback_fn("* Skipping offline node %s" % (node,))
2237
        n_offline += 1
2238
        continue
2239

    
2240
      if node == master_node:
2241
        ntype = "master"
2242
      elif node_i.master_candidate:
2243
        ntype = "master candidate"
2244
      elif node_i.drained:
2245
        ntype = "drained"
2246
        n_drained += 1
2247
      else:
2248
        ntype = "regular"
2249
      if verbose:
2250
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2251

    
2252
      msg = all_nvinfo[node].fail_msg
2253
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2254
      if msg:
2255
        nimg.rpc_fail = True
2256
        continue
2257

    
2258
      nresult = all_nvinfo[node].payload
2259

    
2260
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2261
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2262
      self._VerifyNodeNetwork(node_i, nresult)
2263
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2264
                            master_files)
2265

    
2266
      if nimg.vm_capable:
2267
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2268
        self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2269
                             all_drbd_map)
2270

    
2271
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2272
        self._UpdateNodeInstances(node_i, nresult, nimg)
2273
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2274
        self._UpdateNodeOS(node_i, nresult, nimg)
2275
        if not nimg.os_fail:
2276
          if refos_img is None:
2277
            refos_img = nimg
2278
          self._VerifyNodeOS(node_i, nimg, refos_img)
2279

    
2280
    feedback_fn("* Verifying instance status")
2281
    for instance in instancelist:
2282
      if verbose:
2283
        feedback_fn("* Verifying instance %s" % instance)
2284
      inst_config = instanceinfo[instance]
2285
      self._VerifyInstance(instance, inst_config, node_image,
2286
                           instdisk[instance])
2287
      inst_nodes_offline = []
2288

    
2289
      pnode = inst_config.primary_node
2290
      pnode_img = node_image[pnode]
2291
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2292
               self.ENODERPC, pnode, "instance %s, connection to"
2293
               " primary node failed", instance)
2294

    
2295
      if pnode_img.offline:
2296
        inst_nodes_offline.append(pnode)
2297

    
2298
      # If the instance is non-redundant we cannot survive losing its primary
2299
      # node, so we are not N+1 compliant. On the other hand we have no disk
2300
      # templates with more than one secondary so that situation is not well
2301
      # supported either.
2302
      # FIXME: does not support file-backed instances
2303
      if not inst_config.secondary_nodes:
2304
        i_non_redundant.append(instance)
2305
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2306
               instance, "instance has multiple secondary nodes: %s",
2307
               utils.CommaJoin(inst_config.secondary_nodes),
2308
               code=self.ETYPE_WARNING)
2309

    
2310
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2311
        i_non_a_balanced.append(instance)
2312

    
2313
      for snode in inst_config.secondary_nodes:
2314
        s_img = node_image[snode]
2315
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2316
                 "instance %s, connection to secondary node failed", instance)
2317

    
2318
        if s_img.offline:
2319
          inst_nodes_offline.append(snode)
2320

    
2321
      # warn that the instance lives on offline nodes
2322
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2323
               "instance lives on offline node(s) %s",
2324
               utils.CommaJoin(inst_nodes_offline))
2325
      # ... or ghost/non-vm_capable nodes
2326
      for node in inst_config.all_nodes:
2327
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2328
                 "instance lives on ghost node %s", node)
2329
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2330
                 instance, "instance lives on non-vm_capable node %s", node)
2331

    
2332
    feedback_fn("* Verifying orphan volumes")
2333
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2334
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2335

    
2336
    feedback_fn("* Verifying orphan instances")
2337
    self._VerifyOrphanInstances(instancelist, node_image)
2338

    
2339
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2340
      feedback_fn("* Verifying N+1 Memory redundancy")
2341
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
2342

    
2343
    feedback_fn("* Other Notes")
2344
    if i_non_redundant:
2345
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2346
                  % len(i_non_redundant))
2347

    
2348
    if i_non_a_balanced:
2349
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2350
                  % len(i_non_a_balanced))
2351

    
2352
    if n_offline:
2353
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2354

    
2355
    if n_drained:
2356
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2357

    
2358
    return not self.bad
2359

    
2360
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2361
    """Analyze the post-hooks' result
2362

2363
    This method analyses the hook result, handles it, and sends some
2364
    nicely-formatted feedback back to the user.
2365

2366
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2367
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2368
    @param hooks_results: the results of the multi-node hooks rpc call
2369
    @param feedback_fn: function used send feedback back to the caller
2370
    @param lu_result: previous Exec result
2371
    @return: the new Exec result, based on the previous result
2372
        and hook results
2373

2374
    """
2375
    # We only really run POST phase hooks, and are only interested in
2376
    # their results
2377
    if phase == constants.HOOKS_PHASE_POST:
2378
      # Used to change hooks' output to proper indentation
2379
      feedback_fn("* Hooks Results")
2380
      assert hooks_results, "invalid result from hooks"
2381

    
2382
      for node_name in hooks_results:
2383
        res = hooks_results[node_name]
2384
        msg = res.fail_msg
2385
        test = msg and not res.offline
2386
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2387
                      "Communication failure in hooks execution: %s", msg)
2388
        if res.offline or msg:
2389
          # No need to investigate payload if node is offline or gave an error.
2390
          # override manually lu_result here as _ErrorIf only
2391
          # overrides self.bad
2392
          lu_result = 1
2393
          continue
2394
        for script, hkr, output in res.payload:
2395
          test = hkr == constants.HKR_FAIL
2396
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2397
                        "Script %s failed, output:", script)
2398
          if test:
2399
            output = self._HOOKS_INDENT_RE.sub('      ', output)
2400
            feedback_fn("%s" % output)
2401
            lu_result = 0
2402

    
2403
      return lu_result
2404

    
2405

    
2406
class LUVerifyDisks(NoHooksLU):
2407
  """Verifies the cluster disks status.
2408

2409
  """
2410
  REQ_BGL = False
2411

    
2412
  def ExpandNames(self):
2413
    self.needed_locks = {
2414
      locking.LEVEL_NODE: locking.ALL_SET,
2415
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2416
    }
2417
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2418

    
2419
  def Exec(self, feedback_fn):
2420
    """Verify integrity of cluster disks.
2421

2422
    @rtype: tuple of three items
2423
    @return: a tuple of (dict of node-to-node_error, list of instances
2424
        which need activate-disks, dict of instance: (node, volume) for
2425
        missing volumes
2426

2427
    """
2428
    result = res_nodes, res_instances, res_missing = {}, [], {}
2429

    
2430
    nodes = utils.NiceSort(self.cfg.GetNodeList())
2431
    instances = [self.cfg.GetInstanceInfo(name)
2432
                 for name in self.cfg.GetInstanceList()]
2433

    
2434
    nv_dict = {}
2435
    for inst in instances:
2436
      inst_lvs = {}
2437
      if (not inst.admin_up or
2438
          inst.disk_template not in constants.DTS_NET_MIRROR):
2439
        continue
2440
      inst.MapLVsByNode(inst_lvs)
2441
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2442
      for node, vol_list in inst_lvs.iteritems():
2443
        for vol in vol_list:
2444
          nv_dict[(node, vol)] = inst
2445

    
2446
    if not nv_dict:
2447
      return result
2448

    
2449
    vg_names = self.rpc.call_vg_list(nodes)
2450
    vg_names.Raise("Cannot get list of VGs")
2451

    
2452
    for node in nodes:
2453
      # node_volume
2454
      node_res = self.rpc.call_lv_list([node],
2455
                                       vg_names[node].payload.keys())[node]
2456
      if node_res.offline:
2457
        continue
2458
      msg = node_res.fail_msg
2459
      if msg:
2460
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2461
        res_nodes[node] = msg
2462
        continue
2463

    
2464
      lvs = node_res.payload
2465
      for lv_name, (_, _, lv_online) in lvs.items():
2466
        inst = nv_dict.pop((node, lv_name), None)
2467
        if (not lv_online and inst is not None
2468
            and inst.name not in res_instances):
2469
          res_instances.append(inst.name)
2470

    
2471
    # any leftover items in nv_dict are missing LVs, let's arrange the
2472
    # data better
2473
    for key, inst in nv_dict.iteritems():
2474
      if inst.name not in res_missing:
2475
        res_missing[inst.name] = []
2476
      res_missing[inst.name].append(key)
2477

    
2478
    return result
2479

    
2480

    
2481
class LURepairDiskSizes(NoHooksLU):
2482
  """Verifies the cluster disks sizes.
2483

2484
  """
2485
  _OP_PARAMS = [("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString))]
2486
  REQ_BGL = False
2487

    
2488
  def ExpandNames(self):
2489
    if self.op.instances:
2490
      self.wanted_names = []
2491
      for name in self.op.instances:
2492
        full_name = _ExpandInstanceName(self.cfg, name)
2493
        self.wanted_names.append(full_name)
2494
      self.needed_locks = {
2495
        locking.LEVEL_NODE: [],
2496
        locking.LEVEL_INSTANCE: self.wanted_names,
2497
        }
2498
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2499
    else:
2500
      self.wanted_names = None
2501
      self.needed_locks = {
2502
        locking.LEVEL_NODE: locking.ALL_SET,
2503
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2504
        }
2505
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2506

    
2507
  def DeclareLocks(self, level):
2508
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2509
      self._LockInstancesNodes(primary_only=True)
2510

    
2511
  def CheckPrereq(self):
2512
    """Check prerequisites.
2513

2514
    This only checks the optional instance list against the existing names.
2515

2516
    """
2517
    if self.wanted_names is None:
2518
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2519

    
2520
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2521
                             in self.wanted_names]
2522

    
2523
  def _EnsureChildSizes(self, disk):
2524
    """Ensure children of the disk have the needed disk size.
2525

2526
    This is valid mainly for DRBD8 and fixes an issue where the
2527
    children have smaller disk size.
2528

2529
    @param disk: an L{ganeti.objects.Disk} object
2530

2531
    """
2532
    if disk.dev_type == constants.LD_DRBD8:
2533
      assert disk.children, "Empty children for DRBD8?"
2534
      fchild = disk.children[0]
2535
      mismatch = fchild.size < disk.size
2536
      if mismatch:
2537
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2538
                     fchild.size, disk.size)
2539
        fchild.size = disk.size
2540

    
2541
      # and we recurse on this child only, not on the metadev
2542
      return self._EnsureChildSizes(fchild) or mismatch
2543
    else:
2544
      return False
2545

    
2546
  def Exec(self, feedback_fn):
2547
    """Verify the size of cluster disks.
2548

2549
    """
2550
    # TODO: check child disks too
2551
    # TODO: check differences in size between primary/secondary nodes
2552
    per_node_disks = {}
2553
    for instance in self.wanted_instances:
2554
      pnode = instance.primary_node
2555
      if pnode not in per_node_disks:
2556
        per_node_disks[pnode] = []
2557
      for idx, disk in enumerate(instance.disks):
2558
        per_node_disks[pnode].append((instance, idx, disk))
2559

    
2560
    changed = []
2561
    for node, dskl in per_node_disks.items():
2562
      newl = [v[2].Copy() for v in dskl]
2563
      for dsk in newl:
2564
        self.cfg.SetDiskID(dsk, node)
2565
      result = self.rpc.call_blockdev_getsizes(node, newl)
2566
      if result.fail_msg:
2567
        self.LogWarning("Failure in blockdev_getsizes call to node"
2568
                        " %s, ignoring", node)
2569
        continue
2570
      if len(result.data) != len(dskl):
2571
        self.LogWarning("Invalid result from node %s, ignoring node results",
2572
                        node)
2573
        continue
2574
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2575
        if size is None:
2576
          self.LogWarning("Disk %d of instance %s did not return size"
2577
                          " information, ignoring", idx, instance.name)
2578
          continue
2579
        if not isinstance(size, (int, long)):
2580
          self.LogWarning("Disk %d of instance %s did not return valid"
2581
                          " size information, ignoring", idx, instance.name)
2582
          continue
2583
        size = size >> 20
2584
        if size != disk.size:
2585
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2586
                       " correcting: recorded %d, actual %d", idx,
2587
                       instance.name, disk.size, size)
2588
          disk.size = size
2589
          self.cfg.Update(instance, feedback_fn)
2590
          changed.append((instance.name, idx, size))
2591
        if self._EnsureChildSizes(disk):
2592
          self.cfg.Update(instance, feedback_fn)
2593
          changed.append((instance.name, idx, disk.size))
2594
    return changed
2595

    
2596

    
2597
class LURenameCluster(LogicalUnit):
2598
  """Rename the cluster.
2599

2600
  """
2601
  HPATH = "cluster-rename"
2602
  HTYPE = constants.HTYPE_CLUSTER
2603
  _OP_PARAMS = [("name", ht.NoDefault, ht.TNonEmptyString)]
2604

    
2605
  def BuildHooksEnv(self):
2606
    """Build hooks env.
2607

2608
    """
2609
    env = {
2610
      "OP_TARGET": self.cfg.GetClusterName(),
2611
      "NEW_NAME": self.op.name,
2612
      }
2613
    mn = self.cfg.GetMasterNode()
2614
    all_nodes = self.cfg.GetNodeList()
2615
    return env, [mn], all_nodes
2616

    
2617
  def CheckPrereq(self):
2618
    """Verify that the passed name is a valid one.
2619

2620
    """
2621
    hostname = netutils.GetHostname(name=self.op.name,
2622
                                    family=self.cfg.GetPrimaryIPFamily())
2623

    
2624
    new_name = hostname.name
2625
    self.ip = new_ip = hostname.ip
2626
    old_name = self.cfg.GetClusterName()
2627
    old_ip = self.cfg.GetMasterIP()
2628
    if new_name == old_name and new_ip == old_ip:
2629
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2630
                                 " cluster has changed",
2631
                                 errors.ECODE_INVAL)
2632
    if new_ip != old_ip:
2633
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2634
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2635
                                   " reachable on the network" %
2636
                                   new_ip, errors.ECODE_NOTUNIQUE)
2637

    
2638
    self.op.name = new_name
2639

    
2640
  def Exec(self, feedback_fn):
2641
    """Rename the cluster.
2642

2643
    """
2644
    clustername = self.op.name
2645
    ip = self.ip
2646

    
2647
    # shutdown the master IP
2648
    master = self.cfg.GetMasterNode()
2649
    result = self.rpc.call_node_stop_master(master, False)
2650
    result.Raise("Could not disable the master role")
2651

    
2652
    try:
2653
      cluster = self.cfg.GetClusterInfo()
2654
      cluster.cluster_name = clustername
2655
      cluster.master_ip = ip
2656
      self.cfg.Update(cluster, feedback_fn)
2657

    
2658
      # update the known hosts file
2659
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2660
      node_list = self.cfg.GetOnlineNodeList()
2661
      try:
2662
        node_list.remove(master)
2663
      except ValueError:
2664
        pass
2665
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2666
    finally:
2667
      result = self.rpc.call_node_start_master(master, False, False)
2668
      msg = result.fail_msg
2669
      if msg:
2670
        self.LogWarning("Could not re-enable the master role on"
2671
                        " the master, please restart manually: %s", msg)
2672

    
2673
    return clustername
2674

    
2675

    
2676
class LUSetClusterParams(LogicalUnit):
2677
  """Change the parameters of the cluster.
2678

2679
  """
2680
  HPATH = "cluster-modify"
2681
  HTYPE = constants.HTYPE_CLUSTER
2682
  _OP_PARAMS = [
2683
    ("vg_name", None, ht.TMaybeString),
2684
    ("enabled_hypervisors", None,
2685
     ht.TOr(ht.TAnd(ht.TListOf(ht.TElemOf(constants.HYPER_TYPES)), ht.TTrue),
2686
            ht.TNone)),
2687
    ("hvparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2688
                              ht.TNone)),
2689
    ("beparams", None, ht.TOr(ht.TDict, ht.TNone)),
2690
    ("os_hvp", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2691
                            ht.TNone)),
2692
    ("osparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2693
                              ht.TNone)),
2694
    ("candidate_pool_size", None, ht.TOr(ht.TStrictPositiveInt, ht.TNone)),
2695
    ("uid_pool", None, ht.NoType),
2696
    ("add_uids", None, ht.NoType),
2697
    ("remove_uids", None, ht.NoType),
2698
    ("maintain_node_health", None, ht.TMaybeBool),
2699
    ("prealloc_wipe_disks", None, ht.TMaybeBool),
2700
    ("nicparams", None, ht.TOr(ht.TDict, ht.TNone)),
2701
    ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
2702
    ("drbd_helper", None, ht.TOr(ht.TString, ht.TNone)),
2703
    ("default_iallocator", None, ht.TOr(ht.TString, ht.TNone)),
2704
    ("master_netdev", None, ht.TOr(ht.TString, ht.TNone)),
2705
    ("reserved_lvs", None, ht.TOr(ht.TListOf(ht.TNonEmptyString), ht.TNone)),
2706
    ("hidden_os", None, ht.TOr(ht.TListOf(\
2707
          ht.TAnd(ht.TList,
2708
                ht.TIsLength(2),
2709
                ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2710
          ht.TNone)),
2711
    ("blacklisted_os", None, ht.TOr(ht.TListOf(\
2712
          ht.TAnd(ht.TList,
2713
                ht.TIsLength(2),
2714
                ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2715
          ht.TNone)),
2716
    ]
2717
  REQ_BGL = False
2718

    
2719
  def CheckArguments(self):
2720
    """Check parameters
2721

2722
    """
2723
    if self.op.uid_pool:
2724
      uidpool.CheckUidPool(self.op.uid_pool)
2725

    
2726
    if self.op.add_uids:
2727
      uidpool.CheckUidPool(self.op.add_uids)
2728

    
2729
    if self.op.remove_uids:
2730
      uidpool.CheckUidPool(self.op.remove_uids)
2731

    
2732
  def ExpandNames(self):
2733
    # FIXME: in the future maybe other cluster params won't require checking on
2734
    # all nodes to be modified.
2735
    self.needed_locks = {
2736
      locking.LEVEL_NODE: locking.ALL_SET,
2737
    }
2738
    self.share_locks[locking.LEVEL_NODE] = 1
2739

    
2740
  def BuildHooksEnv(self):
2741
    """Build hooks env.
2742

2743
    """
2744
    env = {
2745
      "OP_TARGET": self.cfg.GetClusterName(),
2746
      "NEW_VG_NAME": self.op.vg_name,
2747
      }
2748
    mn = self.cfg.GetMasterNode()
2749
    return env, [mn], [mn]
2750

    
2751
  def CheckPrereq(self):
2752
    """Check prerequisites.
2753

2754
    This checks whether the given params don't conflict and
2755
    if the given volume group is valid.
2756

2757
    """
2758
    if self.op.vg_name is not None and not self.op.vg_name:
2759
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2760
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2761
                                   " instances exist", errors.ECODE_INVAL)
2762

    
2763
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
2764
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2765
        raise errors.OpPrereqError("Cannot disable drbd helper while"
2766
                                   " drbd-based instances exist",
2767
                                   errors.ECODE_INVAL)
2768

    
2769
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2770

    
2771
    # if vg_name not None, checks given volume group on all nodes
2772
    if self.op.vg_name:
2773
      vglist = self.rpc.call_vg_list(node_list)
2774
      for node in node_list:
2775
        msg = vglist[node].fail_msg
2776
        if msg:
2777
          # ignoring down node
2778
          self.LogWarning("Error while gathering data on node %s"
2779
                          " (ignoring node): %s", node, msg)
2780
          continue
2781
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2782
                                              self.op.vg_name,
2783
                                              constants.MIN_VG_SIZE)
2784
        if vgstatus:
2785
          raise errors.OpPrereqError("Error on node '%s': %s" %
2786
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2787

    
2788
    if self.op.drbd_helper:
2789
      # checks given drbd helper on all nodes
2790
      helpers = self.rpc.call_drbd_helper(node_list)
2791
      for node in node_list:
2792
        ninfo = self.cfg.GetNodeInfo(node)
2793
        if ninfo.offline:
2794
          self.LogInfo("Not checking drbd helper on offline node %s", node)
2795
          continue
2796
        msg = helpers[node].fail_msg
2797
        if msg:
2798
          raise errors.OpPrereqError("Error checking drbd helper on node"
2799
                                     " '%s': %s" % (node, msg),
2800
                                     errors.ECODE_ENVIRON)
2801
        node_helper = helpers[node].payload
2802
        if node_helper != self.op.drbd_helper:
2803
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2804
                                     (node, node_helper), errors.ECODE_ENVIRON)
2805

    
2806
    self.cluster = cluster = self.cfg.GetClusterInfo()
2807
    # validate params changes
2808
    if self.op.beparams:
2809
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2810
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2811

    
2812
    if self.op.ndparams:
2813
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2814
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2815

    
2816
    if self.op.nicparams:
2817
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2818
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2819
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2820
      nic_errors = []
2821

    
2822
      # check all instances for consistency
2823
      for instance in self.cfg.GetAllInstancesInfo().values():
2824
        for nic_idx, nic in enumerate(instance.nics):
2825
          params_copy = copy.deepcopy(nic.nicparams)
2826
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2827

    
2828
          # check parameter syntax
2829
          try:
2830
            objects.NIC.CheckParameterSyntax(params_filled)
2831
          except errors.ConfigurationError, err:
2832
            nic_errors.append("Instance %s, nic/%d: %s" %
2833
                              (instance.name, nic_idx, err))
2834

    
2835
          # if we're moving instances to routed, check that they have an ip
2836
          target_mode = params_filled[constants.NIC_MODE]
2837
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2838
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2839
                              (instance.name, nic_idx))
2840
      if nic_errors:
2841
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2842
                                   "\n".join(nic_errors))
2843

    
2844
    # hypervisor list/parameters
2845
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2846
    if self.op.hvparams:
2847
      for hv_name, hv_dict in self.op.hvparams.items():
2848
        if hv_name not in self.new_hvparams:
2849
          self.new_hvparams[hv_name] = hv_dict
2850
        else:
2851
          self.new_hvparams[hv_name].update(hv_dict)
2852

    
2853
    # os hypervisor parameters
2854
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2855
    if self.op.os_hvp:
2856
      for os_name, hvs in self.op.os_hvp.items():
2857
        if os_name not in self.new_os_hvp:
2858
          self.new_os_hvp[os_name] = hvs
2859
        else:
2860
          for hv_name, hv_dict in hvs.items():
2861
            if hv_name not in self.new_os_hvp[os_name]:
2862
              self.new_os_hvp[os_name][hv_name] = hv_dict
2863
            else:
2864
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2865

    
2866
    # os parameters
2867
    self.new_osp = objects.FillDict(cluster.osparams, {})
2868
    if self.op.osparams:
2869
      for os_name, osp in self.op.osparams.items():
2870
        if os_name not in self.new_osp:
2871
          self.new_osp[os_name] = {}
2872

    
2873
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2874
                                                  use_none=True)
2875

    
2876
        if not self.new_osp[os_name]:
2877
          # we removed all parameters
2878
          del self.new_osp[os_name]
2879
        else:
2880
          # check the parameter validity (remote check)
2881
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2882
                         os_name, self.new_osp[os_name])
2883

    
2884
    # changes to the hypervisor list
2885
    if self.op.enabled_hypervisors is not None:
2886
      self.hv_list = self.op.enabled_hypervisors
2887
      for hv in self.hv_list:
2888
        # if the hypervisor doesn't already exist in the cluster
2889
        # hvparams, we initialize it to empty, and then (in both
2890
        # cases) we make sure to fill the defaults, as we might not
2891
        # have a complete defaults list if the hypervisor wasn't
2892
        # enabled before
2893
        if hv not in new_hvp:
2894
          new_hvp[hv] = {}
2895
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2896
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2897
    else:
2898
      self.hv_list = cluster.enabled_hypervisors
2899

    
2900
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2901
      # either the enabled list has changed, or the parameters have, validate
2902
      for hv_name, hv_params in self.new_hvparams.items():
2903
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2904
            (self.op.enabled_hypervisors and
2905
             hv_name in self.op.enabled_hypervisors)):
2906
          # either this is a new hypervisor, or its parameters have changed
2907
          hv_class = hypervisor.GetHypervisor(hv_name)
2908
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2909
          hv_class.CheckParameterSyntax(hv_params)
2910
          _CheckHVParams(self, node_list, hv_name, hv_params)
2911

    
2912
    if self.op.os_hvp:
2913
      # no need to check any newly-enabled hypervisors, since the
2914
      # defaults have already been checked in the above code-block
2915
      for os_name, os_hvp in self.new_os_hvp.items():
2916
        for hv_name, hv_params in os_hvp.items():
2917
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2918
          # we need to fill in the new os_hvp on top of the actual hv_p
2919
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2920
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2921
          hv_class = hypervisor.GetHypervisor(hv_name)
2922
          hv_class.CheckParameterSyntax(new_osp)
2923
          _CheckHVParams(self, node_list, hv_name, new_osp)
2924

    
2925
    if self.op.default_iallocator:
2926
      alloc_script = utils.FindFile(self.op.default_iallocator,
2927
                                    constants.IALLOCATOR_SEARCH_PATH,
2928
                                    os.path.isfile)
2929
      if alloc_script is None:
2930
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2931
                                   " specified" % self.op.default_iallocator,
2932
                                   errors.ECODE_INVAL)
2933

    
2934
  def Exec(self, feedback_fn):
2935
    """Change the parameters of the cluster.
2936

2937
    """
2938
    if self.op.vg_name is not None:
2939
      new_volume = self.op.vg_name
2940
      if not new_volume:
2941
        new_volume = None
2942
      if new_volume != self.cfg.GetVGName():
2943
        self.cfg.SetVGName(new_volume)
2944
      else:
2945
        feedback_fn("Cluster LVM configuration already in desired"
2946
                    " state, not changing")
2947
    if self.op.drbd_helper is not None:
2948
      new_helper = self.op.drbd_helper
2949
      if not new_helper:
2950
        new_helper = None
2951
      if new_helper != self.cfg.GetDRBDHelper():
2952
        self.cfg.SetDRBDHelper(new_helper)
2953
      else:
2954
        feedback_fn("Cluster DRBD helper already in desired state,"
2955
                    " not changing")
2956
    if self.op.hvparams:
2957
      self.cluster.hvparams = self.new_hvparams
2958
    if self.op.os_hvp:
2959
      self.cluster.os_hvp = self.new_os_hvp
2960
    if self.op.enabled_hypervisors is not None:
2961
      self.cluster.hvparams = self.new_hvparams
2962
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2963
    if self.op.beparams:
2964
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2965
    if self.op.nicparams:
2966
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2967
    if self.op.osparams:
2968
      self.cluster.osparams = self.new_osp
2969
    if self.op.ndparams:
2970
      self.cluster.ndparams = self.new_ndparams
2971

    
2972
    if self.op.candidate_pool_size is not None:
2973
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2974
      # we need to update the pool size here, otherwise the save will fail
2975
      _AdjustCandidatePool(self, [])
2976

    
2977
    if self.op.maintain_node_health is not None:
2978
      self.cluster.maintain_node_health = self.op.maintain_node_health
2979

    
2980
    if self.op.prealloc_wipe_disks is not None:
2981
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
2982

    
2983
    if self.op.add_uids is not None:
2984
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2985

    
2986
    if self.op.remove_uids is not None:
2987
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2988

    
2989
    if self.op.uid_pool is not None:
2990
      self.cluster.uid_pool = self.op.uid_pool
2991

    
2992
    if self.op.default_iallocator is not None:
2993
      self.cluster.default_iallocator = self.op.default_iallocator
2994

    
2995
    if self.op.reserved_lvs is not None:
2996
      self.cluster.reserved_lvs = self.op.reserved_lvs
2997

    
2998
    def helper_os(aname, mods, desc):
2999
      desc += " OS list"
3000
      lst = getattr(self.cluster, aname)
3001
      for key, val in mods:
3002
        if key == constants.DDM_ADD:
3003
          if val in lst:
3004
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3005
          else:
3006
            lst.append(val)
3007
        elif key == constants.DDM_REMOVE:
3008
          if val in lst:
3009
            lst.remove(val)
3010
          else:
3011
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3012
        else:
3013
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3014

    
3015
    if self.op.hidden_os:
3016
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3017

    
3018
    if self.op.blacklisted_os:
3019
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3020

    
3021
    if self.op.master_netdev:
3022
      master = self.cfg.GetMasterNode()
3023
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3024
                  self.cluster.master_netdev)
3025
      result = self.rpc.call_node_stop_master(master, False)
3026
      result.Raise("Could not disable the master ip")
3027
      feedback_fn("Changing master_netdev from %s to %s" %
3028
                  (self.cluster.master_netdev, self.op.master_netdev))
3029
      self.cluster.master_netdev = self.op.master_netdev
3030

    
3031
    self.cfg.Update(self.cluster, feedback_fn)
3032

    
3033
    if self.op.master_netdev:
3034
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3035
                  self.op.master_netdev)
3036
      result = self.rpc.call_node_start_master(master, False, False)
3037
      if result.fail_msg:
3038
        self.LogWarning("Could not re-enable the master ip on"
3039
                        " the master, please restart manually: %s",
3040
                        result.fail_msg)
3041

    
3042

    
3043
def _UploadHelper(lu, nodes, fname):
3044
  """Helper for uploading a file and showing warnings.
3045

3046
  """
3047
  if os.path.exists(fname):
3048
    result = lu.rpc.call_upload_file(nodes, fname)
3049
    for to_node, to_result in result.items():
3050
      msg = to_result.fail_msg
3051
      if msg:
3052
        msg = ("Copy of file %s to node %s failed: %s" %
3053
               (fname, to_node, msg))
3054
        lu.proc.LogWarning(msg)
3055

    
3056

    
3057
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3058
  """Distribute additional files which are part of the cluster configuration.
3059

3060
  ConfigWriter takes care of distributing the config and ssconf files, but
3061
  there are more files which should be distributed to all nodes. This function
3062
  makes sure those are copied.
3063

3064
  @param lu: calling logical unit
3065
  @param additional_nodes: list of nodes not in the config to distribute to
3066
  @type additional_vm: boolean
3067
  @param additional_vm: whether the additional nodes are vm-capable or not
3068

3069
  """
3070
  # 1. Gather target nodes
3071
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3072
  dist_nodes = lu.cfg.GetOnlineNodeList()
3073
  nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
3074
  vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
3075
  if additional_nodes is not None:
3076
    dist_nodes.extend(additional_nodes)
3077
    if additional_vm:
3078
      vm_nodes.extend(additional_nodes)
3079
  if myself.name in dist_nodes:
3080
    dist_nodes.remove(myself.name)
3081
  if myself.name in vm_nodes:
3082
    vm_nodes.remove(myself.name)
3083

    
3084
  # 2. Gather files to distribute
3085
  dist_files = set([constants.ETC_HOSTS,
3086
                    constants.SSH_KNOWN_HOSTS_FILE,
3087
                    constants.RAPI_CERT_FILE,
3088
                    constants.RAPI_USERS_FILE,
3089
                    constants.CONFD_HMAC_KEY,
3090
                    constants.CLUSTER_DOMAIN_SECRET_FILE,
3091
                   ])
3092

    
3093
  vm_files = set()
3094
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3095
  for hv_name in enabled_hypervisors:
3096
    hv_class = hypervisor.GetHypervisor(hv_name)
3097
    vm_files.update(hv_class.GetAncillaryFiles())
3098

    
3099
  # 3. Perform the files upload
3100
  for fname in dist_files:
3101
    _UploadHelper(lu, dist_nodes, fname)
3102
  for fname in vm_files:
3103
    _UploadHelper(lu, vm_nodes, fname)
3104

    
3105

    
3106
class LURedistributeConfig(NoHooksLU):
3107
  """Force the redistribution of cluster configuration.
3108

3109
  This is a very simple LU.
3110

3111
  """
3112
  REQ_BGL = False
3113

    
3114
  def ExpandNames(self):
3115
    self.needed_locks = {
3116
      locking.LEVEL_NODE: locking.ALL_SET,
3117
    }
3118
    self.share_locks[locking.LEVEL_NODE] = 1
3119

    
3120
  def Exec(self, feedback_fn):
3121
    """Redistribute the configuration.
3122

3123
    """
3124
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3125
    _RedistributeAncillaryFiles(self)
3126

    
3127

    
3128
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3129
  """Sleep and poll for an instance's disk to sync.
3130

3131
  """
3132
  if not instance.disks or disks is not None and not disks:
3133
    return True
3134

    
3135
  disks = _ExpandCheckDisks(instance, disks)
3136

    
3137
  if not oneshot:
3138
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3139

    
3140
  node = instance.primary_node
3141

    
3142
  for dev in disks:
3143
    lu.cfg.SetDiskID(dev, node)
3144

    
3145
  # TODO: Convert to utils.Retry
3146

    
3147
  retries = 0
3148
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3149
  while True:
3150
    max_time = 0
3151
    done = True
3152
    cumul_degraded = False
3153
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3154
    msg = rstats.fail_msg
3155
    if msg:
3156
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3157
      retries += 1
3158
      if retries >= 10:
3159
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3160
                                 " aborting." % node)
3161
      time.sleep(6)
3162
      continue
3163
    rstats = rstats.payload
3164
    retries = 0
3165
    for i, mstat in enumerate(rstats):
3166
      if mstat is None:
3167
        lu.LogWarning("Can't compute data for node %s/%s",
3168
                           node, disks[i].iv_name)
3169
        continue
3170

    
3171
      cumul_degraded = (cumul_degraded or
3172
                        (mstat.is_degraded and mstat.sync_percent is None))
3173
      if mstat.sync_percent is not None:
3174
        done = False
3175
        if mstat.estimated_time is not None:
3176
          rem_time = ("%s remaining (estimated)" %
3177
                      utils.FormatSeconds(mstat.estimated_time))
3178
          max_time = mstat.estimated_time
3179
        else:
3180
          rem_time = "no time estimate"
3181
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3182
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3183

    
3184
    # if we're done but degraded, let's do a few small retries, to
3185
    # make sure we see a stable and not transient situation; therefore
3186
    # we force restart of the loop
3187
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3188
      logging.info("Degraded disks found, %d retries left", degr_retries)
3189
      degr_retries -= 1
3190
      time.sleep(1)
3191
      continue
3192

    
3193
    if done or oneshot:
3194
      break
3195

    
3196
    time.sleep(min(60, max_time))
3197

    
3198
  if done:
3199
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3200
  return not cumul_degraded
3201

    
3202

    
3203
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3204
  """Check that mirrors are not degraded.
3205

3206
  The ldisk parameter, if True, will change the test from the
3207
  is_degraded attribute (which represents overall non-ok status for
3208
  the device(s)) to the ldisk (representing the local storage status).
3209

3210
  """
3211
  lu.cfg.SetDiskID(dev, node)
3212

    
3213
  result = True
3214

    
3215
  if on_primary or dev.AssembleOnSecondary():
3216
    rstats = lu.rpc.call_blockdev_find(node, dev)
3217
    msg = rstats.fail_msg
3218
    if msg:
3219
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3220
      result = False
3221
    elif not rstats.payload:
3222
      lu.LogWarning("Can't find disk on node %s", node)
3223
      result = False
3224
    else:
3225
      if ldisk:
3226
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3227
      else:
3228
        result = result and not rstats.payload.is_degraded
3229

    
3230
  if dev.children:
3231
    for child in dev.children:
3232
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3233

    
3234
  return result
3235

    
3236

    
3237
class LUOutOfBand(NoHooksLU):
3238
  """Logical unit for OOB handling.
3239

3240
  """
3241
  _OP_PARAMS = [
3242
    _PNodeName,
3243
    ("command", None, ht.TElemOf(constants.OOB_COMMANDS)),
3244
    ("timeout", constants.OOB_TIMEOUT, ht.TInt),
3245
    ]
3246
  REG_BGL = False
3247

    
3248
  def CheckPrereq(self):
3249
    """Check prerequisites.
3250

3251
    This checks:
3252
     - the node exists in the configuration
3253
     - OOB is supported
3254

3255
    Any errors are signaled by raising errors.OpPrereqError.
3256

3257
    """
3258
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3259
    node = self.cfg.GetNodeInfo(self.op.node_name)
3260

    
3261
    if node is None:
3262
      raise errors.OpPrereqError("Node %s not found" % self.op.node_name)
3263

    
3264
    self.oob_program = self.cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
3265

    
3266
    if not self.oob_program:
3267
      raise errors.OpPrereqError("OOB is not supported for node %s" %
3268
                                 self.op.node_name)
3269

    
3270
    self.op.node_name = node.name
3271
    self.node = node
3272

    
3273
  def ExpandNames(self):
3274
    """Gather locks we need.
3275

3276
    """
3277
    self.needed_locks = {
3278
      locking.LEVEL_NODE: [self.op.node_name],
3279
      }
3280

    
3281
  def Exec(self, feedback_fn):
3282
    """Execute OOB and return result if we expect any.
3283

3284
    """
3285
    master_node = self.cfg.GetMasterNode()
3286

    
3287
    logging.info("Executing out-of-band command '%s' using '%s' on %s",
3288
                 self.op.command, self.oob_program, self.op.node_name)
3289
    result = self.rpc.call_run_oob(master_node, self.oob_program,
3290
                                   self.op.command, self.op.node_name,
3291
                                   self.op.timeout)
3292

    
3293
    result.Raise("An error occurred on execution of OOB helper")
3294

    
3295
    self._CheckPayload(result)
3296

    
3297
    if self.op.command == constants.OOB_HEALTH:
3298
      # For health we should log important events
3299
      for item, status in result.payload:
3300
        if status in [constants.OOB_STATUS_WARNING,
3301
                      constants.OOB_STATUS_CRITICAL]:
3302
          logging.warning("On node '%s' item '%s' has status '%s'",
3303
                          self.op.node_name, item, status)
3304

    
3305
    return result.payload
3306

    
3307
  def _CheckPayload(self, result):
3308
    """Checks if the payload is valid.
3309

3310
    @param result: RPC result
3311
    @raises errors.OpExecError: If payload is not valid
3312

3313
    """
3314
    errs = []
3315
    if self.op.command == constants.OOB_HEALTH:
3316
      if not isinstance(result.payload, list):
3317
        errs.append("command 'health' is expected to return a list but got %s" %
3318
                    type(result.payload))
3319
      for item, status in result.payload:
3320
        if status not in constants.OOB_STATUSES:
3321
          errs.append("health item '%s' has invalid status '%s'" %
3322
                      (item, status))
3323

    
3324
    if self.op.command == constants.OOB_POWER_STATUS:
3325
      if not isinstance(result.payload, dict):
3326
        errs.append("power-status is expected to return a dict but got %s" %
3327
                    type(result.payload))
3328

    
3329
    if self.op.command in [
3330
        constants.OOB_POWER_ON,
3331
        constants.OOB_POWER_OFF,
3332
        constants.OOB_POWER_CYCLE,
3333
        ]:
3334
      if result.payload is not None:
3335
        errs.append("%s is expected to not return payload but got '%s'" %
3336
                    (self.op.command, result.payload))
3337

    
3338
    if errs:
3339
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3340
                               utils.CommaJoin(errs))
3341

    
3342

    
3343

    
3344
class LUDiagnoseOS(NoHooksLU):
3345
  """Logical unit for OS diagnose/query.
3346

3347
  """
3348
  _OP_PARAMS = [
3349
    _POutputFields,
3350
    ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3351
    ]
3352
  REQ_BGL = False
3353
  _HID = "hidden"
3354
  _BLK = "blacklisted"
3355
  _VLD = "valid"
3356
  _FIELDS_STATIC = utils.FieldSet()
3357
  _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3358
                                   "parameters", "api_versions", _HID, _BLK)
3359

    
3360
  def CheckArguments(self):
3361
    if self.op.names:
3362
      raise errors.OpPrereqError("Selective OS query not supported",
3363
                                 errors.ECODE_INVAL)
3364

    
3365
    _CheckOutputFields(static=self._FIELDS_STATIC,
3366
                       dynamic=self._FIELDS_DYNAMIC,
3367
                       selected=self.op.output_fields)
3368

    
3369
  def ExpandNames(self):
3370
    # Lock all nodes, in shared mode
3371
    # Temporary removal of locks, should be reverted later
3372
    # TODO: reintroduce locks when they are lighter-weight
3373
    self.needed_locks = {}
3374
    #self.share_locks[locking.LEVEL_NODE] = 1
3375
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3376

    
3377
  @staticmethod
3378
  def _DiagnoseByOS(rlist):
3379
    """Remaps a per-node return list into an a per-os per-node dictionary
3380

3381
    @param rlist: a map with node names as keys and OS objects as values
3382

3383
    @rtype: dict
3384
    @return: a dictionary with osnames as keys and as value another
3385
        map, with nodes as keys and tuples of (path, status, diagnose,
3386
        variants, parameters, api_versions) as values, eg::
3387

3388
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3389
                                     (/srv/..., False, "invalid api")],
3390
                           "node2": [(/srv/..., True, "", [], [])]}
3391
          }
3392

3393
    """
3394
    all_os = {}
3395
    # we build here the list of nodes that didn't fail the RPC (at RPC
3396
    # level), so that nodes with a non-responding node daemon don't
3397
    # make all OSes invalid
3398
    good_nodes = [node_name for node_name in rlist
3399
                  if not rlist[node_name].fail_msg]
3400
    for node_name, nr in rlist.items():
3401
      if nr.fail_msg or not nr.payload:
3402
        continue
3403
      for (name, path, status, diagnose, variants,
3404
           params, api_versions) in nr.payload:
3405
        if name not in all_os:
3406
          # build a list of nodes for this os containing empty lists
3407
          # for each node in node_list
3408
          all_os[name] = {}
3409
          for nname in good_nodes:
3410
            all_os[name][nname] = []
3411
        # convert params from [name, help] to (name, help)
3412
        params = [tuple(v) for v in params]
3413
        all_os[name][node_name].append((path, status, diagnose,
3414
                                        variants, params, api_versions))
3415
    return all_os
3416

    
3417
  def Exec(self, feedback_fn):
3418
    """Compute the list of OSes.
3419

3420
    """
3421
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3422
    node_data = self.rpc.call_os_diagnose(valid_nodes)
3423
    pol = self._DiagnoseByOS(node_data)
3424
    output = []
3425
    cluster = self.cfg.GetClusterInfo()
3426

    
3427
    for os_name in utils.NiceSort(pol.keys()):
3428
      os_data = pol[os_name]
3429
      row = []
3430
      valid = True
3431
      (variants, params, api_versions) = null_state = (set(), set(), set())
3432
      for idx, osl in enumerate(os_data.values()):
3433
        valid = bool(valid and osl and osl[0][1])
3434
        if not valid:
3435
          (variants, params, api_versions) = null_state
3436
          break
3437
        node_variants, node_params, node_api = osl[0][3:6]
3438
        if idx == 0: # first entry
3439
          variants = set(node_variants)
3440
          params = set(node_params)
3441
          api_versions = set(node_api)
3442
        else: # keep consistency
3443
          variants.intersection_update(node_variants)
3444
          params.intersection_update(node_params)
3445
          api_versions.intersection_update(node_api)
3446

    
3447
      is_hid = os_name in cluster.hidden_os
3448
      is_blk = os_name in cluster.blacklisted_os
3449
      if ((self._HID not in self.op.output_fields and is_hid) or
3450
          (self._BLK not in self.op.output_fields and is_blk) or
3451
          (self._VLD not in self.op.output_fields and not valid)):
3452
        continue
3453

    
3454
      for field in self.op.output_fields:
3455
        if field == "name":
3456
          val = os_name
3457
        elif field == self._VLD:
3458
          val = valid
3459
        elif field == "node_status":
3460
          # this is just a copy of the dict
3461
          val = {}
3462
          for node_name, nos_list in os_data.items():
3463
            val[node_name] = nos_list
3464
        elif field == "variants":
3465
          val = utils.NiceSort(list(variants))
3466
        elif field == "parameters":
3467
          val = list(params)
3468
        elif field == "api_versions":
3469
          val = list(api_versions)
3470
        elif field == self._HID:
3471
          val = is_hid
3472
        elif field == self._BLK:
3473
          val = is_blk
3474
        else:
3475
          raise errors.ParameterError(field)
3476
        row.append(val)
3477
      output.append(row)
3478

    
3479
    return output
3480

    
3481

    
3482
class LURemoveNode(LogicalUnit):
3483
  """Logical unit for removing a node.
3484

3485
  """
3486
  HPATH = "node-remove"
3487
  HTYPE = constants.HTYPE_NODE
3488
  _OP_PARAMS = [
3489
    _PNodeName,
3490
    ]
3491

    
3492
  def BuildHooksEnv(self):
3493
    """Build hooks env.
3494

3495
    This doesn't run on the target node in the pre phase as a failed
3496
    node would then be impossible to remove.
3497

3498
    """
3499
    env = {
3500
      "OP_TARGET": self.op.node_name,
3501
      "NODE_NAME": self.op.node_name,
3502
      }
3503
    all_nodes = self.cfg.GetNodeList()
3504
    try:
3505
      all_nodes.remove(self.op.node_name)
3506
    except ValueError:
3507
      logging.warning("Node %s which is about to be removed not found"
3508
                      " in the all nodes list", self.op.node_name)
3509
    return env, all_nodes, all_nodes
3510

    
3511
  def CheckPrereq(self):
3512
    """Check prerequisites.
3513

3514
    This checks:
3515
     - the node exists in the configuration
3516
     - it does not have primary or secondary instances
3517
     - it's not the master
3518

3519
    Any errors are signaled by raising errors.OpPrereqError.
3520

3521
    """
3522
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3523
    node = self.cfg.GetNodeInfo(self.op.node_name)
3524
    assert node is not None
3525

    
3526
    instance_list = self.cfg.GetInstanceList()
3527

    
3528
    masternode = self.cfg.GetMasterNode()
3529
    if node.name == masternode:
3530
      raise errors.OpPrereqError("Node is the master node,"
3531
                                 " you need to failover first.",
3532
                                 errors.ECODE_INVAL)
3533

    
3534
    for instance_name in instance_list:
3535
      instance = self.cfg.GetInstanceInfo(instance_name)
3536
      if node.name in instance.all_nodes:
3537
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3538
                                   " please remove first." % instance_name,
3539
                                   errors.ECODE_INVAL)
3540
    self.op.node_name = node.name
3541
    self.node = node
3542

    
3543
  def Exec(self, feedback_fn):
3544
    """Removes the node from the cluster.
3545

3546
    """
3547
    node = self.node
3548
    logging.info("Stopping the node daemon and removing configs from node %s",
3549
                 node.name)
3550

    
3551
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3552

    
3553
    # Promote nodes to master candidate as needed
3554
    _AdjustCandidatePool(self, exceptions=[node.name])
3555
    self.context.RemoveNode(node.name)
3556

    
3557
    # Run post hooks on the node before it's removed
3558
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3559
    try:
3560
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3561
    except:
3562
      # pylint: disable-msg=W0702
3563
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
3564

    
3565
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3566
    msg = result.fail_msg
3567
    if msg:
3568
      self.LogWarning("Errors encountered on the remote node while leaving"
3569
                      " the cluster: %s", msg)
3570

    
3571
    # Remove node from our /etc/hosts
3572
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3573
      master_node = self.cfg.GetMasterNode()
3574
      result = self.rpc.call_etc_hosts_modify(master_node,
3575
                                              constants.ETC_HOSTS_REMOVE,
3576
                                              node.name, None)
3577
      result.Raise("Can't update hosts file with new host data")
3578
      _RedistributeAncillaryFiles(self)
3579

    
3580

    
3581
class _NodeQuery(_QueryBase):
3582
  FIELDS = query.NODE_FIELDS
3583

    
3584
  def ExpandNames(self, lu):
3585
    lu.needed_locks = {}
3586
    lu.share_locks[locking.LEVEL_NODE] = 1
3587

    
3588
    if self.names:
3589
      self.wanted = _GetWantedNodes(lu, self.names)
3590
    else:
3591
      self.wanted = locking.ALL_SET
3592

    
3593
    self.do_locking = (self.use_locking and
3594
                       query.NQ_LIVE in self.requested_data)
3595

    
3596
    if self.do_locking:
3597
      # if we don't request only static fields, we need to lock the nodes
3598
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3599

    
3600
  def DeclareLocks(self, lu, level):
3601
    pass
3602

    
3603
  def _GetQueryData(self, lu):
3604
    """Computes the list of nodes and their attributes.
3605

3606
    """
3607
    all_info = lu.cfg.GetAllNodesInfo()
3608

    
3609
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3610

    
3611
    # Gather data as requested
3612
    if query.NQ_LIVE in self.requested_data:
3613
      node_data = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
3614
                                        lu.cfg.GetHypervisorType())
3615
      live_data = dict((name, nresult.payload)
3616
                       for (name, nresult) in node_data.items()
3617
                       if not nresult.fail_msg and nresult.payload)
3618
    else:
3619
      live_data = None
3620

    
3621
    if query.NQ_INST in self.requested_data:
3622
      node_to_primary = dict([(name, set()) for name in nodenames])
3623
      node_to_secondary = dict([(name, set()) for name in nodenames])
3624

    
3625
      inst_data = lu.cfg.GetAllInstancesInfo()
3626

    
3627
      for inst in inst_data.values():
3628
        if inst.primary_node in node_to_primary:
3629
          node_to_primary[inst.primary_node].add(inst.name)
3630
        for secnode in inst.secondary_nodes:
3631
          if secnode in node_to_secondary:
3632
            node_to_secondary[secnode].add(inst.name)
3633
    else:
3634
      node_to_primary = None
3635
      node_to_secondary = None
3636

    
3637
    if query.NQ_GROUP in self.requested_data:
3638
      groups = lu.cfg.GetAllNodeGroupsInfo()
3639
    else:
3640
      groups = {}
3641

    
3642
    return query.NodeQueryData([all_info[name] for name in nodenames],
3643
                               live_data, lu.cfg.GetMasterNode(),
3644
                               node_to_primary, node_to_secondary, groups)
3645

    
3646

    
3647
class LUQueryNodes(NoHooksLU):
3648
  """Logical unit for querying nodes.
3649

3650
  """
3651
  # pylint: disable-msg=W0142
3652
  _OP_PARAMS = [
3653
    _POutputFields,
3654
    ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3655
    ("use_locking", False, ht.TBool),
3656
    ]
3657
  REQ_BGL = False
3658

    
3659
  def CheckArguments(self):
3660
    self.nq = _NodeQuery(self.op.names, self.op.output_fields,
3661
                         self.op.use_locking)
3662

    
3663
  def ExpandNames(self):
3664
    self.nq.ExpandNames(self)
3665

    
3666
  def Exec(self, feedback_fn):
3667
    return self.nq.OldStyleQuery(self)
3668

    
3669

    
3670
class LUQueryNodeVolumes(NoHooksLU):
3671
  """Logical unit for getting volumes on node(s).
3672

3673
  """
3674
  _OP_PARAMS = [
3675
    _POutputFields,
3676
    ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3677
    ]
3678
  REQ_BGL = False
3679
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3680
  _FIELDS_STATIC = utils.FieldSet("node")
3681

    
3682
  def CheckArguments(self):
3683
    _CheckOutputFields(static=self._FIELDS_STATIC,
3684
                       dynamic=self._FIELDS_DYNAMIC,
3685
                       selected=self.op.output_fields)
3686

    
3687
  def ExpandNames(self):
3688
    self.needed_locks = {}
3689
    self.share_locks[locking.LEVEL_NODE] = 1
3690
    if not self.op.nodes:
3691
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3692
    else:
3693
      self.needed_locks[locking.LEVEL_NODE] = \
3694
        _GetWantedNodes(self, self.op.nodes)
3695

    
3696
  def Exec(self, feedback_fn):
3697
    """Computes the list of nodes and their attributes.
3698

3699
    """
3700
    nodenames = self.acquired_locks[locking.LEVEL_NODE]
3701
    volumes = self.rpc.call_node_volumes(nodenames)
3702

    
3703
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3704
             in self.cfg.GetInstanceList()]
3705

    
3706
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3707

    
3708
    output = []
3709
    for node in nodenames:
3710
      nresult = volumes[node]
3711
      if nresult.offline:
3712
        continue
3713
      msg = nresult.fail_msg
3714
      if msg:
3715
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3716
        continue
3717

    
3718
      node_vols = nresult.payload[:]
3719
      node_vols.sort(key=lambda vol: vol['dev'])
3720

    
3721
      for vol in node_vols:
3722
        node_output = []
3723
        for field in self.op.output_fields:
3724
          if field == "node":
3725
            val = node
3726
          elif field == "phys":
3727
            val = vol['dev']
3728
          elif field == "vg":
3729
            val = vol['vg']
3730
          elif field == "name":
3731
            val = vol['name']
3732
          elif field == "size":
3733
            val = int(float(vol['size']))
3734
          elif field == "instance":
3735
            for inst in ilist:
3736
              if node not in lv_by_node[inst]:
3737
                continue
3738
              if vol['name'] in lv_by_node[inst][node]:
3739
                val = inst.name
3740
                break
3741
            else:
3742
              val = '-'
3743
          else:
3744
            raise errors.ParameterError(field)
3745
          node_output.append(str(val))
3746

    
3747
        output.append(node_output)
3748

    
3749
    return output
3750

    
3751

    
3752
class LUQueryNodeStorage(NoHooksLU):
3753
  """Logical unit for getting information on storage units on node(s).
3754

3755
  """
3756
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3757
  _OP_PARAMS = [
3758
    _POutputFields,
3759
    ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3760
    ("storage_type", ht.NoDefault, _CheckStorageType),
3761
    ("name", None, ht.TMaybeString),
3762
    ]
3763
  REQ_BGL = False
3764

    
3765
  def CheckArguments(self):
3766
    _CheckOutputFields(static=self._FIELDS_STATIC,
3767
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3768
                       selected=self.op.output_fields)
3769

    
3770
  def ExpandNames(self):
3771
    self.needed_locks = {}
3772
    self.share_locks[locking.LEVEL_NODE] = 1
3773

    
3774
    if self.op.nodes:
3775
      self.needed_locks[locking.LEVEL_NODE] = \
3776
        _GetWantedNodes(self, self.op.nodes)
3777
    else:
3778
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3779

    
3780
  def Exec(self, feedback_fn):
3781
    """Computes the list of nodes and their attributes.
3782

3783
    """
3784
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3785

    
3786
    # Always get name to sort by
3787
    if constants.SF_NAME in self.op.output_fields:
3788
      fields = self.op.output_fields[:]
3789
    else:
3790
      fields = [constants.SF_NAME] + self.op.output_fields
3791

    
3792
    # Never ask for node or type as it's only known to the LU
3793
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3794
      while extra in fields:
3795
        fields.remove(extra)
3796

    
3797
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3798
    name_idx = field_idx[constants.SF_NAME]
3799

    
3800
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3801
    data = self.rpc.call_storage_list(self.nodes,
3802
                                      self.op.storage_type, st_args,
3803
                                      self.op.name, fields)
3804

    
3805
    result = []
3806

    
3807
    for node in utils.NiceSort(self.nodes):
3808
      nresult = data[node]
3809
      if nresult.offline:
3810
        continue
3811

    
3812
      msg = nresult.fail_msg
3813
      if msg:
3814
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3815
        continue
3816

    
3817
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3818

    
3819
      for name in utils.NiceSort(rows.keys()):
3820
        row = rows[name]
3821

    
3822
        out = []
3823

    
3824
        for field in self.op.output_fields:
3825
          if field == constants.SF_NODE:
3826
            val = node
3827
          elif field == constants.SF_TYPE:
3828
            val = self.op.storage_type
3829
          elif field in field_idx:
3830
            val = row[field_idx[field]]
3831
          else:
3832
            raise errors.ParameterError(field)
3833

    
3834
          out.append(val)
3835

    
3836
        result.append(out)
3837

    
3838
    return result
3839

    
3840

    
3841
class _InstanceQuery(_QueryBase):
3842
  FIELDS = query.INSTANCE_FIELDS
3843

    
3844
  def ExpandNames(self, lu):
3845
    lu.needed_locks = {}
3846
    lu.share_locks[locking.LEVEL_INSTANCE] = 1
3847
    lu.share_locks[locking.LEVEL_NODE] = 1
3848

    
3849
    if self.names:
3850
      self.wanted = _GetWantedInstances(lu, self.names)
3851
    else:
3852
      self.wanted = locking.ALL_SET
3853

    
3854
    self.do_locking = (self.use_locking and
3855
                       query.IQ_LIVE in self.requested_data)
3856
    if self.do_locking:
3857
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3858
      lu.needed_locks[locking.LEVEL_NODE] = []
3859
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3860

    
3861
  def DeclareLocks(self, lu, level):
3862
    if level == locking.LEVEL_NODE and self.do_locking:
3863
      lu._LockInstancesNodes() # pylint: disable-msg=W0212
3864

    
3865
  def _GetQueryData(self, lu):
3866
    """Computes the list of instances and their attributes.
3867

3868
    """
3869
    all_info = lu.cfg.GetAllInstancesInfo()
3870

    
3871
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
3872

    
3873
    instance_list = [all_info[name] for name in instance_names]
3874
    nodes = frozenset([inst.primary_node for inst in instance_list])
3875
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
3876
    bad_nodes = []
3877
    offline_nodes = []
3878

    
3879
    # Gather data as requested
3880
    if query.IQ_LIVE in self.requested_data:
3881
      live_data = {}
3882
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
3883
      for name in nodes:
3884
        result = node_data[name]
3885
        if result.offline:
3886
          # offline nodes will be in both lists
3887
          assert result.fail_msg
3888
          offline_nodes.append(name)
3889
        if result.fail_msg:
3890
          bad_nodes.append(name)
3891
        elif result.payload:
3892
          live_data.update(result.payload)
3893
        # else no instance is alive
3894
    else:
3895
      live_data = {}
3896

    
3897
    if query.IQ_DISKUSAGE in self.requested_data:
3898
      disk_usage = dict((inst.name,
3899
                         _ComputeDiskSize(inst.disk_template,
3900
                                          [{"size": disk.size}
3901
                                           for disk in inst.disks]))
3902
                        for inst in instance_list)
3903
    else:
3904
      disk_usage = None
3905

    
3906
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
3907
                                   disk_usage, offline_nodes, bad_nodes,
3908
                                   live_data)
3909

    
3910

    
3911
#: Query type implementations
3912
_QUERY_IMPL = {
3913
  constants.QR_INSTANCE: _InstanceQuery,
3914
  constants.QR_NODE: _NodeQuery,
3915
  }
3916

    
3917

    
3918
def _GetQueryImplementation(name):
3919
  """Returns the implemtnation for a query type.
3920

3921
  @param name: Query type, must be one of L{constants.QR_OP_QUERY}
3922

3923
  """
3924
  try:
3925
    return _QUERY_IMPL[name]
3926
  except KeyError:
3927
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
3928
                               errors.ECODE_INVAL)
3929

    
3930

    
3931
class LUQuery(NoHooksLU):
3932
  """Query for resources/items of a certain kind.
3933

3934
  """
3935
  # pylint: disable-msg=W0142
3936
  _OP_PARAMS = [
3937
    ("what", ht.NoDefault, ht.TElemOf(constants.QR_OP_QUERY)),
3938
    ("fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
3939
    ("filter", None, ht.TOr(ht.TNone,
3940
                            ht.TListOf(ht.TOr(ht.TNonEmptyString, ht.TList)))),
3941
    ]
3942
  REQ_BGL = False
3943

    
3944
  def CheckArguments(self):
3945
    qcls = _GetQueryImplementation(self.op.what)
3946
    names = qlang.ReadSimpleFilter("name", self.op.filter)
3947

    
3948
    self.impl = qcls(names, self.op.fields, False)
3949

    
3950
  def ExpandNames(self):
3951
    self.impl.ExpandNames(self)
3952

    
3953
  def DeclareLocks(self, level):
3954
    self.impl.DeclareLocks(self, level)
3955

    
3956
  def Exec(self, feedback_fn):
3957
    return self.impl.NewStyleQuery(self)
3958

    
3959

    
3960
class LUQueryFields(NoHooksLU):
3961
  """Query for resources/items of a certain kind.
3962

3963
  """
3964
  # pylint: disable-msg=W0142
3965
  _OP_PARAMS = [
3966
    ("what", ht.NoDefault, ht.TElemOf(constants.QR_OP_QUERY)),
3967
    ("fields", None, ht.TOr(ht.TNone, ht.TListOf(ht.TNonEmptyString))),
3968
    ]
3969
  REQ_BGL = False
3970

    
3971
  def CheckArguments(self):
3972
    self.qcls = _GetQueryImplementation(self.op.what)
3973

    
3974
  def ExpandNames(self):
3975
    self.needed_locks = {}
3976

    
3977
  def Exec(self, feedback_fn):
3978
    return self.qcls.FieldsQuery(self.op.fields)
3979

    
3980

    
3981
class LUModifyNodeStorage(NoHooksLU):
3982
  """Logical unit for modifying a storage volume on a node.
3983

3984
  """
3985
  _OP_PARAMS = [
3986
    _PNodeName,
3987
    ("storage_type", ht.NoDefault, _CheckStorageType),
3988
    ("name", ht.NoDefault, ht.TNonEmptyString),
3989
    ("changes", ht.NoDefault, ht.TDict),
3990
    ]
3991
  REQ_BGL = False
3992

    
3993
  def CheckArguments(self):
3994
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3995

    
3996
    storage_type = self.op.storage_type
3997

    
3998
    try:
3999
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4000
    except KeyError:
4001
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4002
                                 " modified" % storage_type,
4003
                                 errors.ECODE_INVAL)
4004

    
4005
    diff = set(self.op.changes.keys()) - modifiable
4006
    if diff:
4007
      raise errors.OpPrereqError("The following fields can not be modified for"
4008
                                 " storage units of type '%s': %r" %
4009
                                 (storage_type, list(diff)),
4010
                                 errors.ECODE_INVAL)
4011

    
4012
  def ExpandNames(self):
4013
    self.needed_locks = {
4014
      locking.LEVEL_NODE: self.op.node_name,
4015
      }
4016

    
4017
  def Exec(self, feedback_fn):
4018
    """Computes the list of nodes and their attributes.
4019

4020
    """
4021
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4022
    result = self.rpc.call_storage_modify(self.op.node_name,
4023
                                          self.op.storage_type, st_args,
4024
                                          self.op.name, self.op.changes)
4025
    result.Raise("Failed to modify storage unit '%s' on %s" %
4026
                 (self.op.name, self.op.node_name))
4027

    
4028

    
4029
class LUAddNode(LogicalUnit):
4030
  """Logical unit for adding node to the cluster.
4031

4032
  """
4033
  HPATH = "node-add"
4034
  HTYPE = constants.HTYPE_NODE
4035
  _OP_PARAMS = [
4036
    _PNodeName,
4037
    ("primary_ip", None, ht.NoType),
4038
    ("secondary_ip", None, ht.TMaybeString),
4039
    ("readd", False, ht.TBool),
4040
    ("group", None, ht.TMaybeString),
4041
    ("master_capable", None, ht.TMaybeBool),
4042
    ("vm_capable", None, ht.TMaybeBool),
4043
    ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
4044
    ]
4045
  _NFLAGS = ["master_capable", "vm_capable"]
4046

    
4047
  def CheckArguments(self):
4048
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4049
    # validate/normalize the node name
4050
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4051
                                         family=self.primary_ip_family)
4052
    self.op.node_name = self.hostname.name
4053
    if self.op.readd and self.op.group:
4054
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4055
                                 " being readded", errors.ECODE_INVAL)
4056

    
4057
  def BuildHooksEnv(self):
4058
    """Build hooks env.
4059

4060
    This will run on all nodes before, and on all nodes + the new node after.
4061

4062
    """
4063
    env = {
4064
      "OP_TARGET": self.op.node_name,
4065
      "NODE_NAME": self.op.node_name,
4066
      "NODE_PIP": self.op.primary_ip,
4067
      "NODE_SIP": self.op.secondary_ip,
4068
      "MASTER_CAPABLE": str(self.op.master_capable),
4069
      "VM_CAPABLE": str(self.op.vm_capable),
4070
      }
4071
    nodes_0 = self.cfg.GetNodeList()
4072
    nodes_1 = nodes_0 + [self.op.node_name, ]
4073
    return env, nodes_0, nodes_1
4074

    
4075
  def CheckPrereq(self):
4076
    """Check prerequisites.
4077

4078
    This checks:
4079
     - the new node is not already in the config
4080
     - it is resolvable
4081
     - its parameters (single/dual homed) matches the cluster
4082

4083
    Any errors are signaled by raising errors.OpPrereqError.
4084

4085
    """
4086
    cfg = self.cfg
4087
    hostname = self.hostname
4088
    node = hostname.name
4089
    primary_ip = self.op.primary_ip = hostname.ip
4090
    if self.op.secondary_ip is None:
4091
      if self.primary_ip_family == netutils.IP6Address.family:
4092
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4093
                                   " IPv4 address must be given as secondary",
4094
                                   errors.ECODE_INVAL)
4095
      self.op.secondary_ip = primary_ip
4096

    
4097
    secondary_ip = self.op.secondary_ip
4098
    if not netutils.IP4Address.IsValid(secondary_ip):
4099
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4100
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4101

    
4102
    node_list = cfg.GetNodeList()
4103
    if not self.op.readd and node in node_list:
4104
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4105
                                 node, errors.ECODE_EXISTS)
4106
    elif self.op.readd and node not in node_list:
4107
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4108
                                 errors.ECODE_NOENT)
4109

    
4110
    self.changed_primary_ip = False
4111

    
4112
    for existing_node_name in node_list:
4113
      existing_node = cfg.GetNodeInfo(existing_node_name)
4114

    
4115
      if self.op.readd and node == existing_node_name:
4116
        if existing_node.secondary_ip != secondary_ip:
4117
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4118
                                     " address configuration as before",
4119
                                     errors.ECODE_INVAL)
4120
        if existing_node.primary_ip != primary_ip:
4121
          self.changed_primary_ip = True
4122

    
4123
        continue
4124

    
4125
      if (existing_node.primary_ip == primary_ip or
4126
          existing_node.secondary_ip == primary_ip or
4127
          existing_node.primary_ip == secondary_ip or
4128
          existing_node.secondary_ip == secondary_ip):
4129
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4130
                                   " existing node %s" % existing_node.name,
4131
                                   errors.ECODE_NOTUNIQUE)
4132

    
4133
    # After this 'if' block, None is no longer a valid value for the
4134
    # _capable op attributes
4135
    if self.op.readd:
4136
      old_node = self.cfg.GetNodeInfo(node)
4137
      assert old_node is not None, "Can't retrieve locked node %s" % node
4138
      for attr in self._NFLAGS:
4139
        if getattr(self.op, attr) is None:
4140
          setattr(self.op, attr, getattr(old_node, attr))
4141
    else:
4142
      for attr in self._NFLAGS:
4143
        if getattr(self.op, attr) is None:
4144
          setattr(self.op, attr, True)
4145

    
4146
    if self.op.readd and not self.op.vm_capable:
4147
      pri, sec = cfg.GetNodeInstances(node)
4148
      if pri or sec:
4149
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4150
                                   " flag set to false, but it already holds"
4151
                                   " instances" % node,
4152
                                   errors.ECODE_STATE)
4153

    
4154
    # check that the type of the node (single versus dual homed) is the
4155
    # same as for the master
4156
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4157
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4158
    newbie_singlehomed = secondary_ip == primary_ip
4159
    if master_singlehomed != newbie_singlehomed:
4160
      if master_singlehomed:
4161
        raise errors.OpPrereqError("The master has no secondary ip but the"
4162
                                   " new node has one",
4163
                                   errors.ECODE_INVAL)
4164
      else:
4165
        raise errors.OpPrereqError("The master has a secondary ip but the"
4166
                                   " new node doesn't have one",
4167
                                   errors.ECODE_INVAL)
4168

    
4169
    # checks reachability
4170
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4171
      raise errors.OpPrereqError("Node not reachable by ping",
4172
                                 errors.ECODE_ENVIRON)
4173

    
4174
    if not newbie_singlehomed:
4175
      # check reachability from my secondary ip to newbie's secondary ip
4176
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4177
                           source=myself.secondary_ip):
4178
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4179
                                   " based ping to node daemon port",
4180
                                   errors.ECODE_ENVIRON)
4181

    
4182
    if self.op.readd:
4183
      exceptions = [node]
4184
    else:
4185
      exceptions = []
4186

    
4187
    if self.op.master_capable:
4188
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4189
    else:
4190
      self.master_candidate = False
4191

    
4192
    if self.op.readd:
4193
      self.new_node = old_node
4194
    else:
4195
      node_group = cfg.LookupNodeGroup(self.op.group)
4196
      self.new_node = objects.Node(name=node,
4197
                                   primary_ip=primary_ip,
4198
                                   secondary_ip=secondary_ip,
4199
                                   master_candidate=self.master_candidate,
4200
                                   offline=False, drained=False,
4201
                                   group=node_group)
4202

    
4203
    if self.op.ndparams:
4204
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4205

    
4206
  def Exec(self, feedback_fn):
4207
    """Adds the new node to the cluster.
4208

4209
    """
4210
    new_node = self.new_node
4211
    node = new_node.name
4212

    
4213
    # for re-adds, reset the offline/drained/master-candidate flags;
4214
    # we need to reset here, otherwise offline would prevent RPC calls
4215
    # later in the procedure; this also means that if the re-add
4216
    # fails, we are left with a non-offlined, broken node
4217
    if self.op.readd:
4218
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4219
      self.LogInfo("Readding a node, the offline/drained flags were reset")
4220
      # if we demote the node, we do cleanup later in the procedure
4221
      new_node.master_candidate = self.master_candidate
4222
      if self.changed_primary_ip:
4223
        new_node.primary_ip = self.op.primary_ip
4224

    
4225
    # copy the master/vm_capable flags
4226
    for attr in self._NFLAGS:
4227
      setattr(new_node, attr, getattr(self.op, attr))
4228

    
4229
    # notify the user about any possible mc promotion
4230
    if new_node.master_candidate:
4231
      self.LogInfo("Node will be a master candidate")
4232

    
4233
    if self.op.ndparams:
4234
      new_node.ndparams = self.op.ndparams
4235

    
4236
    # check connectivity
4237
    result = self.rpc.call_version([node])[node]
4238
    result.Raise("Can't get version information from node %s" % node)
4239
    if constants.PROTOCOL_VERSION == result.payload:
4240
      logging.info("Communication to node %s fine, sw version %s match",
4241
                   node, result.payload)
4242
    else:
4243
      raise errors.OpExecError("Version mismatch master version %s,"
4244
                               " node version %s" %
4245
                               (constants.PROTOCOL_VERSION, result.payload))
4246

    
4247
    # Add node to our /etc/hosts, and add key to known_hosts
4248
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4249
      master_node = self.cfg.GetMasterNode()
4250
      result = self.rpc.call_etc_hosts_modify(master_node,
4251
                                              constants.ETC_HOSTS_ADD,
4252
                                              self.hostname.name,
4253
                                              self.hostname.ip)
4254
      result.Raise("Can't update hosts file with new host data")
4255

    
4256
    if new_node.secondary_ip != new_node.primary_ip:
4257
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4258
                               False)
4259

    
4260
    node_verify_list = [self.cfg.GetMasterNode()]
4261
    node_verify_param = {
4262
      constants.NV_NODELIST: [node],
4263
      # TODO: do a node-net-test as well?
4264
    }
4265

    
4266
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4267
                                       self.cfg.GetClusterName())
4268
    for verifier in node_verify_list:
4269
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
4270
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
4271
      if nl_payload:
4272
        for failed in nl_payload:
4273
          feedback_fn("ssh/hostname verification failed"
4274
                      " (checking from %s): %s" %
4275
                      (verifier, nl_payload[failed]))
4276
        raise errors.OpExecError("ssh/hostname verification failed.")
4277

    
4278
    if self.op.readd:
4279
      _RedistributeAncillaryFiles(self)
4280
      self.context.ReaddNode(new_node)
4281
      # make sure we redistribute the config
4282
      self.cfg.Update(new_node, feedback_fn)
4283
      # and make sure the new node will not have old files around
4284
      if not new_node.master_candidate:
4285
        result = self.rpc.call_node_demote_from_mc(new_node.name)
4286
        msg = result.fail_msg
4287
        if msg:
4288
          self.LogWarning("Node failed to demote itself from master"
4289
                          " candidate status: %s" % msg)
4290
    else:
4291
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
4292
                                  additional_vm=self.op.vm_capable)
4293
      self.context.AddNode(new_node, self.proc.GetECId())
4294

    
4295

    
4296
class LUSetNodeParams(LogicalUnit):
4297
  """Modifies the parameters of a node.
4298

4299
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4300
      to the node role (as _ROLE_*)
4301
  @cvar _R2F: a dictionary from node role to tuples of flags
4302
  @cvar _FLAGS: a list of attribute names corresponding to the flags
4303

4304
  """
4305
  HPATH = "node-modify"
4306
  HTYPE = constants.HTYPE_NODE
4307
  _OP_PARAMS = [
4308
    _PNodeName,
4309
    ("master_candidate", None, ht.TMaybeBool),
4310
    ("offline", None, ht.TMaybeBool),
4311
    ("drained", None, ht.TMaybeBool),
4312
    ("auto_promote", False, ht.TBool),
4313
    ("master_capable", None, ht.TMaybeBool),
4314
    ("vm_capable", None, ht.TMaybeBool),
4315
    ("secondary_ip", None, ht.TMaybeString),
4316
    ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
4317
    _PForce,
4318
    ]
4319
  REQ_BGL = False
4320
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4321
  _F2R = {
4322
    (True, False, False): _ROLE_CANDIDATE,
4323
    (False, True, False): _ROLE_DRAINED,
4324
    (False, False, True): _ROLE_OFFLINE,
4325
    (False, False, False): _ROLE_REGULAR,
4326
    }
4327
  _R2F = dict((v, k) for k, v in _F2R.items())
4328
  _FLAGS = ["master_candidate", "drained", "offline"]
4329

    
4330
  def CheckArguments(self):
4331
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4332
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4333
                self.op.master_capable, self.op.vm_capable,
4334
                self.op.secondary_ip, self.op.ndparams]
4335
    if all_mods.count(None) == len(all_mods):
4336
      raise errors.OpPrereqError("Please pass at least one modification",
4337
                                 errors.ECODE_INVAL)
4338
    if all_mods.count(True) > 1:
4339
      raise errors.OpPrereqError("Can't set the node into more than one"
4340
                                 " state at the same time",
4341
                                 errors.ECODE_INVAL)
4342

    
4343
    # Boolean value that tells us whether we might be demoting from MC
4344
    self.might_demote = (self.op.master_candidate == False or
4345
                         self.op.offline == True or
4346
                         self.op.drained == True or
4347
                         self.op.master_capable == False)
4348

    
4349
    if self.op.secondary_ip:
4350
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4351
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4352
                                   " address" % self.op.secondary_ip,
4353
                                   errors.ECODE_INVAL)
4354

    
4355
    self.lock_all = self.op.auto_promote and self.might_demote
4356
    self.lock_instances = self.op.secondary_ip is not None
4357

    
4358
  def ExpandNames(self):
4359
    if self.lock_all:
4360
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4361
    else:
4362
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4363

    
4364
    if self.lock_instances:
4365
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4366

    
4367
  def DeclareLocks(self, level):
4368
    # If we have locked all instances, before waiting to lock nodes, release
4369
    # all the ones living on nodes unrelated to the current operation.
4370
    if level == locking.LEVEL_NODE and self.lock_instances:
4371
      instances_release = []
4372
      instances_keep = []
4373
      self.affected_instances = []
4374
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4375
        for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4376
          instance = self.context.cfg.GetInstanceInfo(instance_name)
4377
          i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR
4378
          if i_mirrored and self.op.node_name in instance.all_nodes:
4379
            instances_keep.append(instance_name)
4380
            self.affected_instances.append(instance)
4381
          else:
4382
            instances_release.append(instance_name)
4383
        if instances_release:
4384
          self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4385
          self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4386

    
4387
  def BuildHooksEnv(self):
4388
    """Build hooks env.
4389

4390
    This runs on the master node.
4391

4392
    """
4393
    env = {
4394
      "OP_TARGET": self.op.node_name,
4395
      "MASTER_CANDIDATE": str(self.op.master_candidate),
4396
      "OFFLINE": str(self.op.offline),
4397
      "DRAINED": str(self.op.drained),
4398
      "MASTER_CAPABLE": str(self.op.master_capable),
4399
      "VM_CAPABLE": str(self.op.vm_capable),
4400
      }
4401
    nl = [self.cfg.GetMasterNode(),
4402
          self.op.node_name]
4403
    return env, nl, nl
4404

    
4405
  def CheckPrereq(self):
4406
    """Check prerequisites.
4407

4408
    This only checks the instance list against the existing names.
4409

4410
    """
4411
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4412

    
4413
    if (self.op.master_candidate is not None or
4414
        self.op.drained is not None or
4415
        self.op.offline is not None):
4416
      # we can't change the master's node flags
4417
      if self.op.node_name == self.cfg.GetMasterNode():
4418
        raise errors.OpPrereqError("The master role can be changed"
4419
                                   " only via master-failover",
4420
                                   errors.ECODE_INVAL)
4421

    
4422
    if self.op.master_candidate and not node.master_capable:
4423
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4424
                                 " it a master candidate" % node.name,
4425
                                 errors.ECODE_STATE)
4426

    
4427
    if self.op.vm_capable == False:
4428
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4429
      if ipri or isec:
4430
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4431
                                   " the vm_capable flag" % node.name,
4432
                                   errors.ECODE_STATE)
4433

    
4434
    if node.master_candidate and self.might_demote and not self.lock_all:
4435
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
4436
      # check if after removing the current node, we're missing master
4437
      # candidates
4438
      (mc_remaining, mc_should, _) = \
4439
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4440
      if mc_remaining < mc_should:
4441
        raise errors.OpPrereqError("Not enough master candidates, please"
4442
                                   " pass auto_promote to allow promotion",
4443
                                   errors.ECODE_STATE)
4444

    
4445
    self.old_flags = old_flags = (node.master_candidate,
4446
                                  node.drained, node.offline)
4447
    assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4448
    self.old_role = old_role = self._F2R[old_flags]
4449

    
4450
    # Check for ineffective changes
4451
    for attr in self._FLAGS:
4452
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4453
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4454
        setattr(self.op, attr, None)
4455

    
4456
    # Past this point, any flag change to False means a transition
4457
    # away from the respective state, as only real changes are kept
4458

    
4459
    # If we're being deofflined/drained, we'll MC ourself if needed
4460
    if (self.op.drained == False or self.op.offline == False or
4461
        (self.op.master_capable and not node.master_capable)):
4462
      if _DecideSelfPromotion(self):
4463
        self.op.master_candidate = True
4464
        self.LogInfo("Auto-promoting node to master candidate")
4465

    
4466
    # If we're no longer master capable, we'll demote ourselves from MC
4467
    if self.op.master_capable == False and node.master_candidate:
4468
      self.LogInfo("Demoting from master candidate")
4469
      self.op.master_candidate = False
4470

    
4471
    # Compute new role
4472
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4473
    if self.op.master_candidate:
4474
      new_role = self._ROLE_CANDIDATE
4475
    elif self.op.drained:
4476
      new_role = self._ROLE_DRAINED
4477
    elif self.op.offline:
4478
      new_role = self._ROLE_OFFLINE
4479
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4480
      # False is still in new flags, which means we're un-setting (the
4481
      # only) True flag
4482
      new_role = self._ROLE_REGULAR
4483
    else: # no new flags, nothing, keep old role
4484
      new_role = old_role
4485

    
4486
    self.new_role = new_role
4487

    
4488
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
4489
      # Trying to transition out of offline status
4490
      result = self.rpc.call_version([node.name])[node.name]
4491
      if result.fail_msg:
4492
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4493
                                   " to report its version: %s" %
4494
                                   (node.name, result.fail_msg),
4495
                                   errors.ECODE_STATE)
4496
      else:
4497
        self.LogWarning("Transitioning node from offline to online state"
4498
                        " without using re-add. Please make sure the node"
4499
                        " is healthy!")
4500

    
4501
    if self.op.secondary_ip:
4502
      # Ok even without locking, because this can't be changed by any LU
4503
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4504
      master_singlehomed = master.secondary_ip == master.primary_ip
4505
      if master_singlehomed and self.op.secondary_ip:
4506
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4507
                                   " homed cluster", errors.ECODE_INVAL)
4508

    
4509
      if node.offline:
4510
        if self.affected_instances:
4511
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
4512
                                     " node has instances (%s) configured"
4513
                                     " to use it" % self.affected_instances)
4514
      else:
4515
        # On online nodes, check that no instances are running, and that
4516
        # the node has the new ip and we can reach it.
4517
        for instance in self.affected_instances:
4518
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
4519

    
4520
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4521
        if master.name != node.name:
4522
          # check reachability from master secondary ip to new secondary ip
4523
          if not netutils.TcpPing(self.op.secondary_ip,
4524
                                  constants.DEFAULT_NODED_PORT,
4525
                                  source=master.secondary_ip):
4526
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4527
                                       " based ping to node daemon port",
4528
                                       errors.ECODE_ENVIRON)
4529

    
4530
    if self.op.ndparams:
4531
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4532
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4533
      self.new_ndparams = new_ndparams
4534

    
4535
  def Exec(self, feedback_fn):
4536
    """Modifies a node.
4537

4538
    """
4539
    node = self.node
4540
    old_role = self.old_role
4541
    new_role = self.new_role
4542

    
4543
    result = []
4544

    
4545
    if self.op.ndparams:
4546
      node.ndparams = self.new_ndparams
4547

    
4548
    for attr in ["master_capable", "vm_capable"]:
4549
      val = getattr(self.op, attr)
4550
      if val is not None:
4551
        setattr(node, attr, val)
4552
        result.append((attr, str(val)))
4553

    
4554
    if new_role != old_role:
4555
      # Tell the node to demote itself, if no longer MC and not offline
4556
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4557
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4558
        if msg:
4559
          self.LogWarning("Node failed to demote itself: %s", msg)
4560

    
4561
      new_flags = self._R2F[new_role]
4562
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4563
        if of != nf:
4564
          result.append((desc, str(nf)))
4565
      (node.master_candidate, node.drained, node.offline) = new_flags
4566

    
4567
      # we locked all nodes, we adjust the CP before updating this node
4568
      if self.lock_all:
4569
        _AdjustCandidatePool(self, [node.name])
4570

    
4571
    if self.op.secondary_ip:
4572
      node.secondary_ip = self.op.secondary_ip
4573
      result.append(("secondary_ip", self.op.secondary_ip))
4574

    
4575
    # this will trigger configuration file update, if needed
4576
    self.cfg.Update(node, feedback_fn)
4577

    
4578
    # this will trigger job queue propagation or cleanup if the mc
4579
    # flag changed
4580
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4581
      self.context.ReaddNode(node)
4582

    
4583
    return result
4584

    
4585

    
4586
class LUPowercycleNode(NoHooksLU):
4587
  """Powercycles a node.
4588

4589
  """
4590
  _OP_PARAMS = [
4591
    _PNodeName,
4592
    _PForce,
4593
    ]
4594
  REQ_BGL = False
4595

    
4596
  def CheckArguments(self):
4597
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4598
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4599
      raise errors.OpPrereqError("The node is the master and the force"
4600
                                 " parameter was not set",
4601
                                 errors.ECODE_INVAL)
4602

    
4603
  def ExpandNames(self):
4604
    """Locking for PowercycleNode.
4605

4606
    This is a last-resort option and shouldn't block on other
4607
    jobs. Therefore, we grab no locks.
4608

4609
    """
4610
    self.needed_locks = {}
4611

    
4612
  def Exec(self, feedback_fn):
4613
    """Reboots a node.
4614

4615
    """
4616
    result = self.rpc.call_node_powercycle(self.op.node_name,
4617
                                           self.cfg.GetHypervisorType())
4618
    result.Raise("Failed to schedule the reboot")
4619
    return result.payload
4620

    
4621

    
4622
class LUQueryClusterInfo(NoHooksLU):
4623
  """Query cluster configuration.
4624

4625
  """
4626
  REQ_BGL = False
4627

    
4628
  def ExpandNames(self):
4629
    self.needed_locks = {}
4630

    
4631
  def Exec(self, feedback_fn):
4632
    """Return cluster config.
4633

4634
    """
4635
    cluster = self.cfg.GetClusterInfo()
4636
    os_hvp = {}
4637

    
4638
    # Filter just for enabled hypervisors
4639
    for os_name, hv_dict in cluster.os_hvp.items():
4640
      os_hvp[os_name] = {}
4641
      for hv_name, hv_params in hv_dict.items():
4642
        if hv_name in cluster.enabled_hypervisors:
4643
          os_hvp[os_name][hv_name] = hv_params
4644

    
4645
    # Convert ip_family to ip_version
4646
    primary_ip_version = constants.IP4_VERSION
4647
    if cluster.primary_ip_family == netutils.IP6Address.family:
4648
      primary_ip_version = constants.IP6_VERSION
4649

    
4650
    result = {
4651
      "software_version": constants.RELEASE_VERSION,
4652
      "protocol_version": constants.PROTOCOL_VERSION,
4653
      "config_version": constants.CONFIG_VERSION,
4654
      "os_api_version": max(constants.OS_API_VERSIONS),
4655
      "export_version": constants.EXPORT_VERSION,
4656
      "architecture": (platform.architecture()[0], platform.machine()),
4657
      "name": cluster.cluster_name,
4658
      "master": cluster.master_node,
4659
      "default_hypervisor": cluster.enabled_hypervisors[0],
4660
      "enabled_hypervisors": cluster.enabled_hypervisors,
4661
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4662
                        for hypervisor_name in cluster.enabled_hypervisors]),
4663
      "os_hvp": os_hvp,
4664
      "beparams": cluster.beparams,
4665
      "osparams": cluster.osparams,
4666
      "nicparams": cluster.nicparams,
4667
      "candidate_pool_size": cluster.candidate_pool_size,
4668
      "master_netdev": cluster.master_netdev,
4669
      "volume_group_name": cluster.volume_group_name,
4670
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
4671
      "file_storage_dir": cluster.file_storage_dir,
4672
      "maintain_node_health": cluster.maintain_node_health,
4673
      "ctime": cluster.ctime,
4674
      "mtime": cluster.mtime,
4675
      "uuid": cluster.uuid,
4676
      "tags": list(cluster.GetTags()),
4677
      "uid_pool": cluster.uid_pool,
4678
      "default_iallocator": cluster.default_iallocator,
4679
      "reserved_lvs": cluster.reserved_lvs,
4680
      "primary_ip_version": primary_ip_version,
4681
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4682
      }
4683

    
4684
    return result
4685

    
4686

    
4687
class LUQueryConfigValues(NoHooksLU):
4688
  """Return configuration values.
4689

4690
  """
4691
  _OP_PARAMS = [_POutputFields]
4692
  REQ_BGL = False
4693
  _FIELDS_DYNAMIC = utils.FieldSet()
4694
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4695
                                  "watcher_pause", "volume_group_name")
4696

    
4697
  def CheckArguments(self):
4698
    _CheckOutputFields(static=self._FIELDS_STATIC,
4699
                       dynamic=self._FIELDS_DYNAMIC,
4700
                       selected=self.op.output_fields)
4701

    
4702
  def ExpandNames(self):
4703
    self.needed_locks = {}
4704

    
4705
  def Exec(self, feedback_fn):
4706
    """Dump a representation of the cluster config to the standard output.
4707

4708
    """
4709
    values = []
4710
    for field in self.op.output_fields:
4711
      if field == "cluster_name":
4712
        entry = self.cfg.GetClusterName()
4713
      elif field == "master_node":
4714
        entry = self.cfg.GetMasterNode()
4715
      elif field == "drain_flag":
4716
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4717
      elif field == "watcher_pause":
4718
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4719
      elif field == "volume_group_name":
4720
        entry = self.cfg.GetVGName()
4721
      else:
4722
        raise errors.ParameterError(field)
4723
      values.append(entry)
4724
    return values
4725

    
4726

    
4727
class LUActivateInstanceDisks(NoHooksLU):
4728
  """Bring up an instance's disks.
4729

4730
  """
4731
  _OP_PARAMS = [
4732
    _PInstanceName,
4733
    ("ignore_size", False, ht.TBool),
4734
    ]
4735
  REQ_BGL = False
4736

    
4737
  def ExpandNames(self):
4738
    self._ExpandAndLockInstance()
4739
    self.needed_locks[locking.LEVEL_NODE] = []
4740
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4741

    
4742
  def DeclareLocks(self, level):
4743
    if level == locking.LEVEL_NODE:
4744
      self._LockInstancesNodes()
4745

    
4746
  def CheckPrereq(self):
4747
    """Check prerequisites.
4748

4749
    This checks that the instance is in the cluster.
4750

4751
    """
4752
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4753
    assert self.instance is not None, \
4754
      "Cannot retrieve locked instance %s" % self.op.instance_name
4755
    _CheckNodeOnline(self, self.instance.primary_node)
4756

    
4757
  def Exec(self, feedback_fn):
4758
    """Activate the disks.
4759

4760
    """
4761
    disks_ok, disks_info = \
4762
              _AssembleInstanceDisks(self, self.instance,
4763
                                     ignore_size=self.op.ignore_size)
4764
    if not disks_ok:
4765
      raise errors.OpExecError("Cannot activate block devices")
4766

    
4767
    return disks_info
4768

    
4769

    
4770
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4771
                           ignore_size=False):
4772
  """Prepare the block devices for an instance.
4773

4774
  This sets up the block devices on all nodes.
4775

4776
  @type lu: L{LogicalUnit}
4777
  @param lu: the logical unit on whose behalf we execute
4778
  @type instance: L{objects.Instance}
4779
  @param instance: the instance for whose disks we assemble
4780
  @type disks: list of L{objects.Disk} or None
4781
  @param disks: which disks to assemble (or all, if None)
4782
  @type ignore_secondaries: boolean
4783
  @param ignore_secondaries: if true, errors on secondary nodes
4784
      won't result in an error return from the function
4785
  @type ignore_size: boolean
4786
  @param ignore_size: if true, the current known size of the disk
4787
      will not be used during the disk activation, useful for cases
4788
      when the size is wrong
4789
  @return: False if the operation failed, otherwise a list of
4790
      (host, instance_visible_name, node_visible_name)
4791
      with the mapping from node devices to instance devices
4792

4793
  """
4794
  device_info = []
4795
  disks_ok = True
4796
  iname = instance.name
4797
  disks = _ExpandCheckDisks(instance, disks)
4798

    
4799
  # With the two passes mechanism we try to reduce the window of
4800
  # opportunity for the race condition of switching DRBD to primary
4801
  # before handshaking occured, but we do not eliminate it
4802

    
4803
  # The proper fix would be to wait (with some limits) until the
4804
  # connection has been made and drbd transitions from WFConnection
4805
  # into any other network-connected state (Connected, SyncTarget,
4806
  # SyncSource, etc.)
4807

    
4808
  # 1st pass, assemble on all nodes in secondary mode
4809
  for inst_disk in disks:
4810
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4811
      if ignore_size:
4812
        node_disk = node_disk.Copy()
4813
        node_disk.UnsetSize()
4814
      lu.cfg.SetDiskID(node_disk, node)
4815
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4816
      msg = result.fail_msg
4817
      if msg:
4818
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4819
                           " (is_primary=False, pass=1): %s",
4820
                           inst_disk.iv_name, node, msg)
4821
        if not ignore_secondaries:
4822
          disks_ok = False
4823

    
4824
  # FIXME: race condition on drbd migration to primary
4825

    
4826
  # 2nd pass, do only the primary node
4827
  for inst_disk in disks:
4828
    dev_path = None
4829

    
4830
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4831
      if node != instance.primary_node:
4832
        continue
4833
      if ignore_size:
4834
        node_disk = node_disk.Copy()
4835
        node_disk.UnsetSize()
4836
      lu.cfg.SetDiskID(node_disk, node)
4837
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4838
      msg = result.fail_msg
4839
      if msg:
4840
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4841
                           " (is_primary=True, pass=2): %s",
4842
                           inst_disk.iv_name, node, msg)
4843
        disks_ok = False
4844
      else:
4845
        dev_path = result.payload
4846

    
4847
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4848

    
4849
  # leave the disks configured for the primary node
4850
  # this is a workaround that would be fixed better by
4851
  # improving the logical/physical id handling
4852
  for disk in disks:
4853
    lu.cfg.SetDiskID(disk, instance.primary_node)
4854

    
4855
  return disks_ok, device_info
4856

    
4857

    
4858
def _StartInstanceDisks(lu, instance, force):
4859
  """Start the disks of an instance.
4860

4861
  """
4862
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4863
                                           ignore_secondaries=force)
4864
  if not disks_ok:
4865
    _ShutdownInstanceDisks(lu, instance)
4866
    if force is not None and not force:
4867
      lu.proc.LogWarning("", hint="If the message above refers to a"
4868
                         " secondary node,"
4869
                         " you can retry the operation using '--force'.")
4870
    raise errors.OpExecError("Disk consistency error")
4871

    
4872

    
4873
class LUDeactivateInstanceDisks(NoHooksLU):
4874
  """Shutdown an instance's disks.
4875

4876
  """
4877
  _OP_PARAMS = [
4878
    _PInstanceName,
4879
    ]
4880
  REQ_BGL = False
4881

    
4882
  def ExpandNames(self):
4883
    self._ExpandAndLockInstance()
4884
    self.needed_locks[locking.LEVEL_NODE] = []
4885
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4886

    
4887
  def DeclareLocks(self, level):
4888
    if level == locking.LEVEL_NODE:
4889
      self._LockInstancesNodes()
4890

    
4891
  def CheckPrereq(self):
4892
    """Check prerequisites.
4893

4894
    This checks that the instance is in the cluster.
4895

4896
    """
4897
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4898
    assert self.instance is not None, \
4899
      "Cannot retrieve locked instance %s" % self.op.instance_name
4900

    
4901
  def Exec(self, feedback_fn):
4902
    """Deactivate the disks
4903

4904
    """
4905
    instance = self.instance
4906
    _SafeShutdownInstanceDisks(self, instance)
4907

    
4908

    
4909
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4910
  """Shutdown block devices of an instance.
4911

4912
  This function checks if an instance is running, before calling
4913
  _ShutdownInstanceDisks.
4914

4915
  """
4916
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4917
  _ShutdownInstanceDisks(lu, instance, disks=disks)
4918

    
4919

    
4920
def _ExpandCheckDisks(instance, disks):
4921
  """Return the instance disks selected by the disks list
4922

4923
  @type disks: list of L{objects.Disk} or None
4924
  @param disks: selected disks
4925
  @rtype: list of L{objects.Disk}
4926
  @return: selected instance disks to act on
4927

4928
  """
4929
  if disks is None:
4930
    return instance.disks
4931
  else:
4932
    if not set(disks).issubset(instance.disks):
4933
      raise errors.ProgrammerError("Can only act on disks belonging to the"
4934
                                   " target instance")
4935
    return disks
4936

    
4937

    
4938
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4939
  """Shutdown block devices of an instance.
4940

4941
  This does the shutdown on all nodes of the instance.
4942

4943
  If the ignore_primary is false, errors on the primary node are
4944
  ignored.
4945

4946
  """
4947
  all_result = True
4948
  disks = _ExpandCheckDisks(instance, disks)
4949

    
4950
  for disk in disks:
4951
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4952
      lu.cfg.SetDiskID(top_disk, node)
4953
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4954
      msg = result.fail_msg
4955
      if msg:
4956
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4957
                      disk.iv_name, node, msg)
4958
        if not ignore_primary or node != instance.primary_node:
4959
          all_result = False
4960
  return all_result
4961

    
4962

    
4963
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4964
  """Checks if a node has enough free memory.
4965

4966
  This function check if a given node has the needed amount of free
4967
  memory. In case the node has less memory or we cannot get the
4968
  information from the node, this function raise an OpPrereqError
4969
  exception.
4970

4971
  @type lu: C{LogicalUnit}
4972
  @param lu: a logical unit from which we get configuration data
4973
  @type node: C{str}
4974
  @param node: the node to check
4975
  @type reason: C{str}
4976
  @param reason: string to use in the error message
4977
  @type requested: C{int}
4978
  @param requested: the amount of memory in MiB to check for
4979
  @type hypervisor_name: C{str}
4980
  @param hypervisor_name: the hypervisor to ask for memory stats
4981
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4982
      we cannot check the node
4983

4984
  """
4985
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
4986
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4987
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4988
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4989
  if not isinstance(free_mem, int):
4990
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4991
                               " was '%s'" % (node, free_mem),
4992
                               errors.ECODE_ENVIRON)
4993
  if requested > free_mem:
4994
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4995
                               " needed %s MiB, available %s MiB" %
4996
                               (node, reason, requested, free_mem),
4997
                               errors.ECODE_NORES)
4998

    
4999

    
5000
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5001
  """Checks if nodes have enough free disk space in the all VGs.
5002

5003
  This function check if all given nodes have the needed amount of
5004
  free disk. In case any node has less disk or we cannot get the
5005
  information from the node, this function raise an OpPrereqError
5006
  exception.
5007

5008
  @type lu: C{LogicalUnit}
5009
  @param lu: a logical unit from which we get configuration data
5010
  @type nodenames: C{list}
5011
  @param nodenames: the list of node names to check
5012
  @type req_sizes: C{dict}
5013
  @param req_sizes: the hash of vg and corresponding amount of disk in
5014
      MiB to check for
5015
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5016
      or we cannot check the node
5017

5018
  """
5019
  if req_sizes is not None:
5020
    for vg, req_size in req_sizes.iteritems():
5021
      _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5022

    
5023

    
5024
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5025
  """Checks if nodes have enough free disk space in the specified VG.
5026

5027
  This function check if all given nodes have the needed amount of
5028
  free disk. In case any node has less disk or we cannot get the
5029
  information from the node, this function raise an OpPrereqError
5030
  exception.
5031

5032
  @type lu: C{LogicalUnit}
5033
  @param lu: a logical unit from which we get configuration data
5034
  @type nodenames: C{list}
5035
  @param nodenames: the list of node names to check
5036
  @type vg: C{str}
5037
  @param vg: the volume group to check
5038
  @type requested: C{int}
5039
  @param requested: the amount of disk in MiB to check for
5040
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5041
      or we cannot check the node
5042

5043
  """
5044
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5045
  for node in nodenames:
5046
    info = nodeinfo[node]
5047
    info.Raise("Cannot get current information from node %s" % node,
5048
               prereq=True, ecode=errors.ECODE_ENVIRON)
5049
    vg_free = info.payload.get("vg_free", None)
5050
    if not isinstance(vg_free, int):
5051
      raise errors.OpPrereqError("Can't compute free disk space on node"
5052
                                 " %s for vg %s, result was '%s'" %
5053
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5054
    if requested > vg_free:
5055
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5056
                                 " vg %s: required %d MiB, available %d MiB" %
5057
                                 (node, vg, requested, vg_free),
5058
                                 errors.ECODE_NORES)
5059

    
5060

    
5061
class LUStartupInstance(LogicalUnit):
5062
  """Starts an instance.
5063

5064
  """
5065
  HPATH = "instance-start"
5066
  HTYPE = constants.HTYPE_INSTANCE
5067
  _OP_PARAMS = [
5068
    _PInstanceName,
5069
    _PForce,
5070
    _PIgnoreOfflineNodes,
5071
    ("hvparams", ht.EmptyDict, ht.TDict),
5072
    ("beparams", ht.EmptyDict, ht.TDict),
5073
    ]
5074
  REQ_BGL = False
5075

    
5076
  def CheckArguments(self):
5077
    # extra beparams
5078
    if self.op.beparams:
5079
      # fill the beparams dict
5080
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5081

    
5082
  def ExpandNames(self):
5083
    self._ExpandAndLockInstance()
5084

    
5085
  def BuildHooksEnv(self):
5086
    """Build hooks env.
5087

5088
    This runs on master, primary and secondary nodes of the instance.
5089

5090
    """
5091
    env = {
5092
      "FORCE": self.op.force,
5093
      }
5094
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5095
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5096
    return env, nl, nl
5097

    
5098
  def CheckPrereq(self):
5099
    """Check prerequisites.
5100

5101
    This checks that the instance is in the cluster.
5102

5103
    """
5104
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5105
    assert self.instance is not None, \
5106
      "Cannot retrieve locked instance %s" % self.op.instance_name
5107

    
5108
    # extra hvparams
5109
    if self.op.hvparams:
5110
      # check hypervisor parameter syntax (locally)
5111
      cluster = self.cfg.GetClusterInfo()
5112
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5113
      filled_hvp = cluster.FillHV(instance)
5114
      filled_hvp.update(self.op.hvparams)
5115
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5116
      hv_type.CheckParameterSyntax(filled_hvp)
5117
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5118

    
5119
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5120

    
5121
    if self.primary_offline and self.op.ignore_offline_nodes:
5122
      self.proc.LogWarning("Ignoring offline primary node")
5123

    
5124
      if self.op.hvparams or self.op.beparams:
5125
        self.proc.LogWarning("Overridden parameters are ignored")
5126
    else:
5127
      _CheckNodeOnline(self, instance.primary_node)
5128

    
5129
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5130

    
5131
      # check bridges existence
5132
      _CheckInstanceBridgesExist(self, instance)
5133

    
5134
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5135
                                                instance.name,
5136
                                                instance.hypervisor)
5137
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5138
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5139
      if not remote_info.payload: # not running already
5140
        _CheckNodeFreeMemory(self, instance.primary_node,
5141
                             "starting instance %s" % instance.name,
5142
                             bep[constants.BE_MEMORY], instance.hypervisor)
5143

    
5144
  def Exec(self, feedback_fn):
5145
    """Start the instance.
5146

5147
    """
5148
    instance = self.instance
5149
    force = self.op.force
5150

    
5151
    self.cfg.MarkInstanceUp(instance.name)
5152

    
5153
    if self.primary_offline:
5154
      assert self.op.ignore_offline_nodes
5155
      self.proc.LogInfo("Primary node offline, marked instance as started")
5156
    else:
5157
      node_current = instance.primary_node
5158

    
5159
      _StartInstanceDisks(self, instance, force)
5160

    
5161
      result = self.rpc.call_instance_start(node_current, instance,
5162
                                            self.op.hvparams, self.op.beparams)
5163
      msg = result.fail_msg
5164
      if msg:
5165
        _ShutdownInstanceDisks(self, instance)
5166
        raise errors.OpExecError("Could not start instance: %s" % msg)
5167

    
5168

    
5169
class LURebootInstance(LogicalUnit):
5170
  """Reboot an instance.
5171

5172
  """
5173
  HPATH = "instance-reboot"
5174
  HTYPE = constants.HTYPE_INSTANCE
5175
  _OP_PARAMS = [
5176
    _PInstanceName,
5177
    ("ignore_secondaries", False, ht.TBool),
5178
    ("reboot_type", ht.NoDefault, ht.TElemOf(constants.REBOOT_TYPES)),
5179
    _PShutdownTimeout,
5180
    ]
5181
  REQ_BGL = False
5182

    
5183
  def ExpandNames(self):
5184
    self._ExpandAndLockInstance()
5185

    
5186
  def BuildHooksEnv(self):
5187
    """Build hooks env.
5188

5189
    This runs on master, primary and secondary nodes of the instance.
5190

5191
    """
5192
    env = {
5193
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5194
      "REBOOT_TYPE": self.op.reboot_type,
5195
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5196
      }
5197
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5198
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5199
    return env, nl, nl
5200

    
5201
  def CheckPrereq(self):
5202
    """Check prerequisites.
5203

5204
    This checks that the instance is in the cluster.
5205

5206
    """
5207
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5208
    assert self.instance is not None, \
5209
      "Cannot retrieve locked instance %s" % self.op.instance_name
5210

    
5211
    _CheckNodeOnline(self, instance.primary_node)
5212

    
5213
    # check bridges existence
5214
    _CheckInstanceBridgesExist(self, instance)
5215

    
5216
  def Exec(self, feedback_fn):
5217
    """Reboot the instance.
5218

5219
    """
5220
    instance = self.instance
5221
    ignore_secondaries = self.op.ignore_secondaries
5222
    reboot_type = self.op.reboot_type
5223

    
5224
    node_current = instance.primary_node
5225

    
5226
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5227
                       constants.INSTANCE_REBOOT_HARD]:
5228
      for disk in instance.disks:
5229
        self.cfg.SetDiskID(disk, node_current)
5230
      result = self.rpc.call_instance_reboot(node_current, instance,
5231
                                             reboot_type,
5232
                                             self.op.shutdown_timeout)
5233
      result.Raise("Could not reboot instance")
5234
    else:
5235
      result = self.rpc.call_instance_shutdown(node_current, instance,
5236
                                               self.op.shutdown_timeout)
5237
      result.Raise("Could not shutdown instance for full reboot")
5238
      _ShutdownInstanceDisks(self, instance)
5239
      _StartInstanceDisks(self, instance, ignore_secondaries)
5240
      result = self.rpc.call_instance_start(node_current, instance, None, None)
5241
      msg = result.fail_msg
5242
      if msg:
5243
        _ShutdownInstanceDisks(self, instance)
5244
        raise errors.OpExecError("Could not start instance for"
5245
                                 " full reboot: %s" % msg)
5246

    
5247
    self.cfg.MarkInstanceUp(instance.name)
5248

    
5249

    
5250
class LUShutdownInstance(LogicalUnit):
5251
  """Shutdown an instance.
5252

5253
  """
5254
  HPATH = "instance-stop"
5255
  HTYPE = constants.HTYPE_INSTANCE
5256
  _OP_PARAMS = [
5257
    _PInstanceName,
5258
    _PIgnoreOfflineNodes,
5259
    ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, ht.TPositiveInt),
5260
    ]
5261
  REQ_BGL = False
5262

    
5263
  def ExpandNames(self):
5264
    self._ExpandAndLockInstance()
5265

    
5266
  def BuildHooksEnv(self):
5267
    """Build hooks env.
5268

5269
    This runs on master, primary and secondary nodes of the instance.
5270

5271
    """
5272
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5273
    env["TIMEOUT"] = self.op.timeout
5274
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5275
    return env, nl, nl
5276

    
5277
  def CheckPrereq(self):
5278
    """Check prerequisites.
5279

5280
    This checks that the instance is in the cluster.
5281

5282
    """
5283
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5284
    assert self.instance is not None, \
5285
      "Cannot retrieve locked instance %s" % self.op.instance_name
5286

    
5287
    self.primary_offline = \
5288
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
5289

    
5290
    if self.primary_offline and self.op.ignore_offline_nodes:
5291
      self.proc.LogWarning("Ignoring offline primary node")
5292
    else:
5293
      _CheckNodeOnline(self, self.instance.primary_node)
5294

    
5295
  def Exec(self, feedback_fn):
5296
    """Shutdown the instance.
5297

5298
    """
5299
    instance = self.instance
5300
    node_current = instance.primary_node
5301
    timeout = self.op.timeout
5302

    
5303
    self.cfg.MarkInstanceDown(instance.name)
5304

    
5305
    if self.primary_offline:
5306
      assert self.op.ignore_offline_nodes
5307
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
5308
    else:
5309
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5310
      msg = result.fail_msg
5311
      if msg:
5312
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5313

    
5314
      _ShutdownInstanceDisks(self, instance)
5315

    
5316

    
5317
class LUReinstallInstance(LogicalUnit):
5318
  """Reinstall an instance.
5319

5320
  """
5321
  HPATH = "instance-reinstall"
5322
  HTYPE = constants.HTYPE_INSTANCE
5323
  _OP_PARAMS = [
5324
    _PInstanceName,
5325
    ("os_type", None, ht.TMaybeString),
5326
    ("force_variant", False, ht.TBool),
5327
    ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
5328
    ]
5329
  REQ_BGL = False
5330

    
5331
  def ExpandNames(self):
5332
    self._ExpandAndLockInstance()
5333

    
5334
  def BuildHooksEnv(self):
5335
    """Build hooks env.
5336

5337
    This runs on master, primary and secondary nodes of the instance.
5338

5339
    """
5340
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5341
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5342
    return env, nl, nl
5343

    
5344
  def CheckPrereq(self):
5345
    """Check prerequisites.
5346

5347
    This checks that the instance is in the cluster and is not running.
5348

5349
    """
5350
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5351
    assert instance is not None, \
5352
      "Cannot retrieve locked instance %s" % self.op.instance_name
5353
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5354
                     " offline, cannot reinstall")
5355
    for node in instance.secondary_nodes:
5356
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
5357
                       " cannot reinstall")
5358

    
5359
    if instance.disk_template == constants.DT_DISKLESS:
5360
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5361
                                 self.op.instance_name,
5362
                                 errors.ECODE_INVAL)
5363
    _CheckInstanceDown(self, instance, "cannot reinstall")
5364

    
5365
    if self.op.os_type is not None:
5366
      # OS verification
5367
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5368
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5369
      instance_os = self.op.os_type
5370
    else:
5371
      instance_os = instance.os
5372

    
5373
    nodelist = list(instance.all_nodes)
5374

    
5375
    if self.op.osparams:
5376
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5377
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5378
      self.os_inst = i_osdict # the new dict (without defaults)
5379
    else:
5380
      self.os_inst = None
5381

    
5382
    self.instance = instance
5383

    
5384
  def Exec(self, feedback_fn):
5385
    """Reinstall the instance.
5386

5387
    """
5388
    inst = self.instance
5389

    
5390
    if self.op.os_type is not None:
5391
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5392
      inst.os = self.op.os_type
5393
      # Write to configuration
5394
      self.cfg.Update(inst, feedback_fn)
5395

    
5396
    _StartInstanceDisks(self, inst, None)
5397
    try:
5398
      feedback_fn("Running the instance OS create scripts...")
5399
      # FIXME: pass debug option from opcode to backend
5400
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5401
                                             self.op.debug_level,
5402
                                             osparams=self.os_inst)
5403
      result.Raise("Could not install OS for instance %s on node %s" %
5404
                   (inst.name, inst.primary_node))
5405
    finally:
5406
      _ShutdownInstanceDisks(self, inst)
5407

    
5408

    
5409
class LURecreateInstanceDisks(LogicalUnit):
5410
  """Recreate an instance's missing disks.
5411

5412
  """
5413
  HPATH = "instance-recreate-disks"
5414
  HTYPE = constants.HTYPE_INSTANCE
5415
  _OP_PARAMS = [
5416
    _PInstanceName,
5417
    ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
5418
    ]
5419
  REQ_BGL = False
5420

    
5421
  def ExpandNames(self):
5422
    self._ExpandAndLockInstance()
5423

    
5424
  def BuildHooksEnv(self):
5425
    """Build hooks env.
5426

5427
    This runs on master, primary and secondary nodes of the instance.
5428

5429
    """
5430
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5431
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5432
    return env, nl, nl
5433

    
5434
  def CheckPrereq(self):
5435
    """Check prerequisites.
5436

5437
    This checks that the instance is in the cluster and is not running.
5438

5439
    """
5440
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5441
    assert instance is not None, \
5442
      "Cannot retrieve locked instance %s" % self.op.instance_name
5443
    _CheckNodeOnline(self, instance.primary_node)
5444

    
5445
    if instance.disk_template == constants.DT_DISKLESS:
5446
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5447
                                 self.op.instance_name, errors.ECODE_INVAL)
5448
    _CheckInstanceDown(self, instance, "cannot recreate disks")
5449

    
5450
    if not self.op.disks:
5451
      self.op.disks = range(len(instance.disks))
5452
    else:
5453
      for idx in self.op.disks:
5454
        if idx >= len(instance.disks):
5455
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5456
                                     errors.ECODE_INVAL)
5457

    
5458
    self.instance = instance
5459

    
5460
  def Exec(self, feedback_fn):
5461
    """Recreate the disks.
5462

5463
    """
5464
    to_skip = []
5465
    for idx, _ in enumerate(self.instance.disks):
5466
      if idx not in self.op.disks: # disk idx has not been passed in
5467
        to_skip.append(idx)
5468
        continue
5469

    
5470
    _CreateDisks(self, self.instance, to_skip=to_skip)
5471

    
5472

    
5473
class LURenameInstance(LogicalUnit):
5474
  """Rename an instance.
5475

5476
  """
5477
  HPATH = "instance-rename"
5478
  HTYPE = constants.HTYPE_INSTANCE
5479
  _OP_PARAMS = [
5480
    _PInstanceName,
5481
    ("new_name", ht.NoDefault, ht.TNonEmptyString),
5482
    ("ip_check", False, ht.TBool),
5483
    ("name_check", True, ht.TBool),
5484
    ]
5485

    
5486
  def CheckArguments(self):
5487
    """Check arguments.
5488

5489
    """
5490
    if self.op.ip_check and not self.op.name_check:
5491
      # TODO: make the ip check more flexible and not depend on the name check
5492
      raise errors.OpPrereqError("Cannot do ip check without a name check",
5493
                                 errors.ECODE_INVAL)
5494

    
5495
  def BuildHooksEnv(self):
5496
    """Build hooks env.
5497

5498
    This runs on master, primary and secondary nodes of the instance.
5499

5500
    """
5501
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5502
    env["INSTANCE_NEW_NAME"] = self.op.new_name
5503
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5504
    return env, nl, nl
5505

    
5506
  def CheckPrereq(self):
5507
    """Check prerequisites.
5508

5509
    This checks that the instance is in the cluster and is not running.
5510

5511
    """
5512
    self.op.instance_name = _ExpandInstanceName(self.cfg,
5513
                                                self.op.instance_name)
5514
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5515
    assert instance is not None
5516
    _CheckNodeOnline(self, instance.primary_node)
5517
    _CheckInstanceDown(self, instance, "cannot rename")
5518
    self.instance = instance
5519

    
5520
    new_name = self.op.new_name
5521
    if self.op.name_check:
5522
      hostname = netutils.GetHostname(name=new_name)
5523
      new_name = self.op.new_name = hostname.name
5524
      if (self.op.ip_check and
5525
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5526
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
5527
                                   (hostname.ip, new_name),
5528
                                   errors.ECODE_NOTUNIQUE)
5529

    
5530
    instance_list = self.cfg.GetInstanceList()
5531
    if new_name in instance_list and new_name != instance.name:
5532
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5533
                                 new_name, errors.ECODE_EXISTS)
5534

    
5535
  def Exec(self, feedback_fn):
5536
    """Reinstall the instance.
5537

5538
    """
5539
    inst = self.instance
5540
    old_name = inst.name
5541

    
5542
    rename_file_storage = False
5543
    if (inst.disk_template == constants.DT_FILE and
5544
        self.op.new_name != inst.name):
5545
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5546
      rename_file_storage = True
5547

    
5548
    self.cfg.RenameInstance(inst.name, self.op.new_name)
5549
    # Change the instance lock. This is definitely safe while we hold the BGL
5550
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5551
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5552

    
5553
    # re-read the instance from the configuration after rename
5554
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
5555

    
5556
    if rename_file_storage:
5557
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5558
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5559
                                                     old_file_storage_dir,
5560
                                                     new_file_storage_dir)
5561
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
5562
                   " (but the instance has been renamed in Ganeti)" %
5563
                   (inst.primary_node, old_file_storage_dir,
5564
                    new_file_storage_dir))
5565

    
5566
    _StartInstanceDisks(self, inst, None)
5567
    try:
5568
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5569
                                                 old_name, self.op.debug_level)
5570
      msg = result.fail_msg
5571
      if msg:
5572
        msg = ("Could not run OS rename script for instance %s on node %s"
5573
               " (but the instance has been renamed in Ganeti): %s" %
5574
               (inst.name, inst.primary_node, msg))
5575
        self.proc.LogWarning(msg)
5576
    finally:
5577
      _ShutdownInstanceDisks(self, inst)
5578

    
5579
    return inst.name
5580

    
5581

    
5582
class LURemoveInstance(LogicalUnit):
5583
  """Remove an instance.
5584

5585
  """
5586
  HPATH = "instance-remove"
5587
  HTYPE = constants.HTYPE_INSTANCE
5588
  _OP_PARAMS = [
5589
    _PInstanceName,
5590
    ("ignore_failures", False, ht.TBool),
5591
    _PShutdownTimeout,
5592
    ]
5593
  REQ_BGL = False
5594

    
5595
  def ExpandNames(self):
5596
    self._ExpandAndLockInstance()
5597
    self.needed_locks[locking.LEVEL_NODE] = []
5598
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5599

    
5600
  def DeclareLocks(self, level):
5601
    if level == locking.LEVEL_NODE:
5602
      self._LockInstancesNodes()
5603

    
5604
  def BuildHooksEnv(self):
5605
    """Build hooks env.
5606

5607
    This runs on master, primary and secondary nodes of the instance.
5608

5609
    """
5610
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5611
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5612
    nl = [self.cfg.GetMasterNode()]
5613
    nl_post = list(self.instance.all_nodes) + nl
5614
    return env, nl, nl_post
5615

    
5616
  def CheckPrereq(self):
5617
    """Check prerequisites.
5618

5619
    This checks that the instance is in the cluster.
5620

5621
    """
5622
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5623
    assert self.instance is not None, \
5624
      "Cannot retrieve locked instance %s" % self.op.instance_name
5625

    
5626
  def Exec(self, feedback_fn):
5627
    """Remove the instance.
5628

5629
    """
5630
    instance = self.instance
5631
    logging.info("Shutting down instance %s on node %s",
5632
                 instance.name, instance.primary_node)
5633

    
5634
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5635
                                             self.op.shutdown_timeout)
5636
    msg = result.fail_msg
5637
    if msg:
5638
      if self.op.ignore_failures:
5639
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
5640
      else:
5641
        raise errors.OpExecError("Could not shutdown instance %s on"
5642
                                 " node %s: %s" %
5643
                                 (instance.name, instance.primary_node, msg))
5644

    
5645
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5646

    
5647

    
5648
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5649
  """Utility function to remove an instance.
5650

5651
  """
5652
  logging.info("Removing block devices for instance %s", instance.name)
5653

    
5654
  if not _RemoveDisks(lu, instance):
5655
    if not ignore_failures:
5656
      raise errors.OpExecError("Can't remove instance's disks")
5657
    feedback_fn("Warning: can't remove instance's disks")
5658

    
5659
  logging.info("Removing instance %s out of cluster config", instance.name)
5660

    
5661
  lu.cfg.RemoveInstance(instance.name)
5662

    
5663
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5664
    "Instance lock removal conflict"
5665

    
5666
  # Remove lock for the instance
5667
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5668

    
5669

    
5670
class LUQueryInstances(NoHooksLU):
5671
  """Logical unit for querying instances.
5672

5673
  """
5674
  # pylint: disable-msg=W0142
5675
  _OP_PARAMS = [
5676
    _POutputFields,
5677
    ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
5678
    ("use_locking", False, ht.TBool),
5679
    ]
5680
  REQ_BGL = False
5681

    
5682
  def CheckArguments(self):
5683
    self.iq = _InstanceQuery(self.op.names, self.op.output_fields,
5684
                             self.op.use_locking)
5685

    
5686
  def ExpandNames(self):
5687
    self.iq.ExpandNames(self)
5688

    
5689
  def DeclareLocks(self, level):
5690
    self.iq.DeclareLocks(self, level)
5691

    
5692
  def Exec(self, feedback_fn):
5693
    return self.iq.OldStyleQuery(self)
5694

    
5695

    
5696
class LUFailoverInstance(LogicalUnit):
5697
  """Failover an instance.
5698

5699
  """
5700
  HPATH = "instance-failover"
5701
  HTYPE = constants.HTYPE_INSTANCE
5702
  _OP_PARAMS = [
5703
    _PInstanceName,
5704
    ("ignore_consistency", False, ht.TBool),
5705
    _PShutdownTimeout,
5706
    ]
5707
  REQ_BGL = False
5708

    
5709
  def ExpandNames(self):
5710
    self._ExpandAndLockInstance()
5711
    self.needed_locks[locking.LEVEL_NODE] = []
5712
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5713

    
5714
  def DeclareLocks(self, level):
5715
    if level == locking.LEVEL_NODE:
5716
      self._LockInstancesNodes()
5717

    
5718
  def BuildHooksEnv(self):
5719
    """Build hooks env.
5720

5721
    This runs on master, primary and secondary nodes of the instance.
5722

5723
    """
5724
    instance = self.instance
5725
    source_node = instance.primary_node
5726
    target_node = instance.secondary_nodes[0]
5727
    env = {
5728
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5729
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5730
      "OLD_PRIMARY": source_node,
5731
      "OLD_SECONDARY": target_node,
5732
      "NEW_PRIMARY": target_node,
5733
      "NEW_SECONDARY": source_node,
5734
      }
5735
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5736
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5737
    nl_post = list(nl)
5738
    nl_post.append(source_node)
5739
    return env, nl, nl_post
5740

    
5741
  def CheckPrereq(self):
5742
    """Check prerequisites.
5743

5744
    This checks that the instance is in the cluster.
5745

5746
    """
5747
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5748
    assert self.instance is not None, \
5749
      "Cannot retrieve locked instance %s" % self.op.instance_name
5750

    
5751
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5752
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5753
      raise errors.OpPrereqError("Instance's disk layout is not"
5754
                                 " network mirrored, cannot failover.",
5755
                                 errors.ECODE_STATE)
5756

    
5757
    secondary_nodes = instance.secondary_nodes
5758
    if not secondary_nodes:
5759
      raise errors.ProgrammerError("no secondary node but using "
5760
                                   "a mirrored disk template")
5761

    
5762
    target_node = secondary_nodes[0]
5763
    _CheckNodeOnline(self, target_node)
5764
    _CheckNodeNotDrained(self, target_node)
5765
    if instance.admin_up:
5766
      # check memory requirements on the secondary node
5767
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5768
                           instance.name, bep[constants.BE_MEMORY],
5769
                           instance.hypervisor)
5770
    else:
5771
      self.LogInfo("Not checking memory on the secondary node as"
5772
                   " instance will not be started")
5773

    
5774
    # check bridge existance
5775
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5776

    
5777
  def Exec(self, feedback_fn):
5778
    """Failover an instance.
5779

5780
    The failover is done by shutting it down on its present node and
5781
    starting it on the secondary.
5782

5783
    """
5784
    instance = self.instance
5785
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5786

    
5787
    source_node = instance.primary_node
5788
    target_node = instance.secondary_nodes[0]
5789

    
5790
    if instance.admin_up:
5791
      feedback_fn("* checking disk consistency between source and target")
5792
      for dev in instance.disks:
5793
        # for drbd, these are drbd over lvm
5794
        if not _CheckDiskConsistency(self, dev, target_node, False):
5795
          if not self.op.ignore_consistency:
5796
            raise errors.OpExecError("Disk %s is degraded on target node,"
5797
                                     " aborting failover." % dev.iv_name)
5798
    else:
5799
      feedback_fn("* not checking disk consistency as instance is not running")
5800

    
5801
    feedback_fn("* shutting down instance on source node")
5802
    logging.info("Shutting down instance %s on node %s",
5803
                 instance.name, source_node)
5804

    
5805
    result = self.rpc.call_instance_shutdown(source_node, instance,
5806
                                             self.op.shutdown_timeout)
5807
    msg = result.fail_msg
5808
    if msg:
5809
      if self.op.ignore_consistency or primary_node.offline:
5810
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5811
                             " Proceeding anyway. Please make sure node"
5812
                             " %s is down. Error details: %s",
5813
                             instance.name, source_node, source_node, msg)
5814
      else:
5815
        raise errors.OpExecError("Could not shutdown instance %s on"
5816
                                 " node %s: %s" %
5817
                                 (instance.name, source_node, msg))
5818

    
5819
    feedback_fn("* deactivating the instance's disks on source node")
5820
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5821
      raise errors.OpExecError("Can't shut down the instance's disks.")
5822

    
5823
    instance.primary_node = target_node
5824
    # distribute new instance config to the other nodes
5825
    self.cfg.Update(instance, feedback_fn)
5826

    
5827
    # Only start the instance if it's marked as up
5828
    if instance.admin_up:
5829
      feedback_fn("* activating the instance's disks on target node")
5830
      logging.info("Starting instance %s on node %s",
5831
                   instance.name, target_node)
5832

    
5833
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5834
                                           ignore_secondaries=True)
5835
      if not disks_ok:
5836
        _ShutdownInstanceDisks(self, instance)
5837
        raise errors.OpExecError("Can't activate the instance's disks")
5838

    
5839
      feedback_fn("* starting the instance on the target node")
5840
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5841
      msg = result.fail_msg
5842
      if msg:
5843
        _ShutdownInstanceDisks(self, instance)
5844
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5845
                                 (instance.name, target_node, msg))
5846

    
5847

    
5848
class LUMigrateInstance(LogicalUnit):
5849
  """Migrate an instance.
5850

5851
  This is migration without shutting down, compared to the failover,
5852
  which is done with shutdown.
5853

5854
  """
5855
  HPATH = "instance-migrate"
5856
  HTYPE = constants.HTYPE_INSTANCE
5857
  _OP_PARAMS = [
5858
    _PInstanceName,
5859
    _PMigrationMode,
5860
    _PMigrationLive,
5861
    ("cleanup", False, ht.TBool),
5862
    ]
5863

    
5864
  REQ_BGL = False
5865

    
5866
  def ExpandNames(self):
5867
    self._ExpandAndLockInstance()
5868

    
5869
    self.needed_locks[locking.LEVEL_NODE] = []
5870
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5871

    
5872
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5873
                                       self.op.cleanup)
5874
    self.tasklets = [self._migrater]
5875

    
5876
  def DeclareLocks(self, level):
5877
    if level == locking.LEVEL_NODE:
5878
      self._LockInstancesNodes()
5879

    
5880
  def BuildHooksEnv(self):
5881
    """Build hooks env.
5882

5883
    This runs on master, primary and secondary nodes of the instance.
5884

5885
    """
5886
    instance = self._migrater.instance
5887
    source_node = instance.primary_node
5888
    target_node = instance.secondary_nodes[0]
5889
    env = _BuildInstanceHookEnvByObject(self, instance)
5890
    env["MIGRATE_LIVE"] = self._migrater.live
5891
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5892
    env.update({
5893
        "OLD_PRIMARY": source_node,
5894
        "OLD_SECONDARY": target_node,
5895
        "NEW_PRIMARY": target_node,
5896
        "NEW_SECONDARY": source_node,
5897
        })
5898
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5899
    nl_post = list(nl)
5900
    nl_post.append(source_node)
5901
    return env, nl, nl_post
5902

    
5903

    
5904
class LUMoveInstance(LogicalUnit):
5905
  """Move an instance by data-copying.
5906

5907
  """
5908
  HPATH = "instance-move"
5909
  HTYPE = constants.HTYPE_INSTANCE
5910
  _OP_PARAMS = [
5911
    _PInstanceName,
5912
    ("target_node", ht.NoDefault, ht.TNonEmptyString),
5913
    _PShutdownTimeout,
5914
    ]
5915
  REQ_BGL = False
5916

    
5917
  def ExpandNames(self):
5918
    self._ExpandAndLockInstance()
5919
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5920
    self.op.target_node = target_node
5921
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5922
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5923

    
5924
  def DeclareLocks(self, level):
5925
    if level == locking.LEVEL_NODE:
5926
      self._LockInstancesNodes(primary_only=True)
5927

    
5928
  def BuildHooksEnv(self):
5929
    """Build hooks env.
5930

5931
    This runs on master, primary and secondary nodes of the instance.
5932

5933
    """
5934
    env = {
5935
      "TARGET_NODE": self.op.target_node,
5936
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5937
      }
5938
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5939
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5940
                                       self.op.target_node]
5941
    return env, nl, nl
5942

    
5943
  def CheckPrereq(self):
5944
    """Check prerequisites.
5945

5946
    This checks that the instance is in the cluster.
5947

5948
    """
5949
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5950
    assert self.instance is not None, \
5951
      "Cannot retrieve locked instance %s" % self.op.instance_name
5952

    
5953
    node = self.cfg.GetNodeInfo(self.op.target_node)
5954
    assert node is not None, \
5955
      "Cannot retrieve locked node %s" % self.op.target_node
5956

    
5957
    self.target_node = target_node = node.name
5958

    
5959
    if target_node == instance.primary_node:
5960
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
5961
                                 (instance.name, target_node),
5962
                                 errors.ECODE_STATE)
5963

    
5964
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5965

    
5966
    for idx, dsk in enumerate(instance.disks):
5967
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5968
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5969
                                   " cannot copy" % idx, errors.ECODE_STATE)
5970

    
5971
    _CheckNodeOnline(self, target_node)
5972
    _CheckNodeNotDrained(self, target_node)
5973
    _CheckNodeVmCapable(self, target_node)
5974

    
5975
    if instance.admin_up:
5976
      # check memory requirements on the secondary node
5977
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5978
                           instance.name, bep[constants.BE_MEMORY],
5979
                           instance.hypervisor)
5980
    else:
5981
      self.LogInfo("Not checking memory on the secondary node as"
5982
                   " instance will not be started")
5983

    
5984
    # check bridge existance
5985
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5986

    
5987
  def Exec(self, feedback_fn):
5988
    """Move an instance.
5989

5990
    The move is done by shutting it down on its present node, copying
5991
    the data over (slow) and starting it on the new node.
5992

5993
    """
5994
    instance = self.instance
5995

    
5996
    source_node = instance.primary_node
5997
    target_node = self.target_node
5998

    
5999
    self.LogInfo("Shutting down instance %s on source node %s",
6000
                 instance.name, source_node)
6001

    
6002
    result = self.rpc.call_instance_shutdown(source_node, instance,
6003
                                             self.op.shutdown_timeout)
6004
    msg = result.fail_msg
6005
    if msg:
6006
      if self.op.ignore_consistency:
6007
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6008
                             " Proceeding anyway. Please make sure node"
6009
                             " %s is down. Error details: %s",
6010
                             instance.name, source_node, source_node, msg)
6011
      else:
6012
        raise errors.OpExecError("Could not shutdown instance %s on"
6013
                                 " node %s: %s" %
6014
                                 (instance.name, source_node, msg))
6015

    
6016
    # create the target disks
6017
    try:
6018
      _CreateDisks(self, instance, target_node=target_node)
6019
    except errors.OpExecError:
6020
      self.LogWarning("Device creation failed, reverting...")
6021
      try:
6022
        _RemoveDisks(self, instance, target_node=target_node)
6023
      finally:
6024
        self.cfg.ReleaseDRBDMinors(instance.name)
6025
        raise
6026

    
6027
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6028

    
6029
    errs = []
6030
    # activate, get path, copy the data over
6031
    for idx, disk in enumerate(instance.disks):
6032
      self.LogInfo("Copying data for disk %d", idx)
6033
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6034
                                               instance.name, True)
6035
      if result.fail_msg:
6036
        self.LogWarning("Can't assemble newly created disk %d: %s",
6037
                        idx, result.fail_msg)
6038
        errs.append(result.fail_msg)
6039
        break
6040
      dev_path = result.payload
6041
      result = self.rpc.call_blockdev_export(source_node, disk,
6042
                                             target_node, dev_path,
6043
                                             cluster_name)
6044
      if result.fail_msg:
6045
        self.LogWarning("Can't copy data over for disk %d: %s",
6046
                        idx, result.fail_msg)
6047
        errs.append(result.fail_msg)
6048
        break
6049

    
6050
    if errs:
6051
      self.LogWarning("Some disks failed to copy, aborting")
6052
      try:
6053
        _RemoveDisks(self, instance, target_node=target_node)
6054
      finally:
6055
        self.cfg.ReleaseDRBDMinors(instance.name)
6056
        raise errors.OpExecError("Errors during disk copy: %s" %
6057
                                 (",".join(errs),))
6058

    
6059
    instance.primary_node = target_node
6060
    self.cfg.Update(instance, feedback_fn)
6061

    
6062
    self.LogInfo("Removing the disks on the original node")
6063
    _RemoveDisks(self, instance, target_node=source_node)
6064

    
6065
    # Only start the instance if it's marked as up
6066
    if instance.admin_up:
6067
      self.LogInfo("Starting instance %s on node %s",
6068
                   instance.name, target_node)
6069

    
6070
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6071
                                           ignore_secondaries=True)
6072
      if not disks_ok:
6073
        _ShutdownInstanceDisks(self, instance)
6074
        raise errors.OpExecError("Can't activate the instance's disks")
6075

    
6076
      result = self.rpc.call_instance_start(target_node, instance, None, None)
6077
      msg = result.fail_msg
6078
      if msg:
6079
        _ShutdownInstanceDisks(self, instance)
6080
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6081
                                 (instance.name, target_node, msg))
6082

    
6083

    
6084
class LUMigrateNode(LogicalUnit):
6085
  """Migrate all instances from a node.
6086

6087
  """
6088
  HPATH = "node-migrate"
6089
  HTYPE = constants.HTYPE_NODE
6090
  _OP_PARAMS = [
6091
    _PNodeName,
6092
    _PMigrationMode,
6093
    _PMigrationLive,
6094
    ]
6095
  REQ_BGL = False
6096

    
6097
  def ExpandNames(self):
6098
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6099

    
6100
    self.needed_locks = {
6101
      locking.LEVEL_NODE: [self.op.node_name],
6102
      }
6103

    
6104
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6105

    
6106
    # Create tasklets for migrating instances for all instances on this node
6107
    names = []
6108
    tasklets = []
6109

    
6110
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6111
      logging.debug("Migrating instance %s", inst.name)
6112
      names.append(inst.name)
6113

    
6114
      tasklets.append(TLMigrateInstance(self, inst.name, False))
6115

    
6116
    self.tasklets = tasklets
6117

    
6118
    # Declare instance locks
6119
    self.needed_locks[locking.LEVEL_INSTANCE] = names
6120

    
6121
  def DeclareLocks(self, level):
6122
    if level == locking.LEVEL_NODE:
6123
      self._LockInstancesNodes()
6124

    
6125
  def BuildHooksEnv(self):
6126
    """Build hooks env.
6127

6128
    This runs on the master, the primary and all the secondaries.
6129

6130
    """
6131
    env = {
6132
      "NODE_NAME": self.op.node_name,
6133
      }
6134

    
6135
    nl = [self.cfg.GetMasterNode()]
6136

    
6137
    return (env, nl, nl)
6138

    
6139

    
6140
class TLMigrateInstance(Tasklet):
6141
  """Tasklet class for instance migration.
6142

6143
  @type live: boolean
6144
  @ivar live: whether the migration will be done live or non-live;
6145
      this variable is initalized only after CheckPrereq has run
6146

6147
  """
6148
  def __init__(self, lu, instance_name, cleanup):
6149
    """Initializes this class.
6150

6151
    """
6152
    Tasklet.__init__(self, lu)
6153

    
6154
    # Parameters
6155
    self.instance_name = instance_name
6156
    self.cleanup = cleanup
6157
    self.live = False # will be overridden later
6158

    
6159
  def CheckPrereq(self):
6160
    """Check prerequisites.
6161

6162
    This checks that the instance is in the cluster.
6163

6164
    """
6165
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6166
    instance = self.cfg.GetInstanceInfo(instance_name)
6167
    assert instance is not None
6168

    
6169
    if instance.disk_template != constants.DT_DRBD8:
6170
      raise errors.OpPrereqError("Instance's disk layout is not"
6171
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
6172

    
6173
    secondary_nodes = instance.secondary_nodes
6174
    if not secondary_nodes:
6175
      raise errors.ConfigurationError("No secondary node but using"
6176
                                      " drbd8 disk template")
6177

    
6178
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
6179

    
6180
    target_node = secondary_nodes[0]
6181
    # check memory requirements on the secondary node
6182
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6183
                         instance.name, i_be[constants.BE_MEMORY],
6184
                         instance.hypervisor)
6185

    
6186
    # check bridge existance
6187
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6188

    
6189
    if not self.cleanup:
6190
      _CheckNodeNotDrained(self.lu, target_node)
6191
      result = self.rpc.call_instance_migratable(instance.primary_node,
6192
                                                 instance)
6193
      result.Raise("Can't migrate, please use failover",
6194
                   prereq=True, ecode=errors.ECODE_STATE)
6195

    
6196
    self.instance = instance
6197

    
6198
    if self.lu.op.live is not None and self.lu.op.mode is not None:
6199
      raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6200
                                 " parameters are accepted",
6201
                                 errors.ECODE_INVAL)
6202
    if self.lu.op.live is not None:
6203
      if self.lu.op.live:
6204
        self.lu.op.mode = constants.HT_MIGRATION_LIVE
6205
      else:
6206
        self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6207
      # reset the 'live' parameter to None so that repeated
6208
      # invocations of CheckPrereq do not raise an exception
6209
      self.lu.op.live = None
6210
    elif self.lu.op.mode is None:
6211
      # read the default value from the hypervisor
6212
      i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6213
      self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6214

    
6215
    self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6216

    
6217
  def _WaitUntilSync(self):
6218
    """Poll with custom rpc for disk sync.
6219

6220
    This uses our own step-based rpc call.
6221

6222
    """
6223
    self.feedback_fn("* wait until resync is done")
6224
    all_done = False
6225
    while not all_done:
6226
      all_done = True
6227
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6228
                                            self.nodes_ip,
6229
                                            self.instance.disks)
6230
      min_percent = 100
6231
      for node, nres in result.items():
6232
        nres.Raise("Cannot resync disks on node %s" % node)
6233
        node_done, node_percent = nres.payload
6234
        all_done = all_done and node_done
6235
        if node_percent is not None:
6236
          min_percent = min(min_percent, node_percent)
6237
      if not all_done:
6238
        if min_percent < 100:
6239
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
6240
        time.sleep(2)
6241

    
6242
  def _EnsureSecondary(self, node):
6243
    """Demote a node to secondary.
6244

6245
    """
6246
    self.feedback_fn("* switching node %s to secondary mode" % node)
6247

    
6248
    for dev in self.instance.disks:
6249
      self.cfg.SetDiskID(dev, node)
6250

    
6251
    result = self.rpc.call_blockdev_close(node, self.instance.name,
6252
                                          self.instance.disks)
6253
    result.Raise("Cannot change disk to secondary on node %s" % node)
6254

    
6255
  def _GoStandalone(self):
6256
    """Disconnect from the network.
6257

6258
    """
6259
    self.feedback_fn("* changing into standalone mode")
6260
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6261
                                               self.instance.disks)
6262
    for node, nres in result.items():
6263
      nres.Raise("Cannot disconnect disks node %s" % node)
6264

    
6265
  def _GoReconnect(self, multimaster):
6266
    """Reconnect to the network.
6267

6268
    """
6269
    if multimaster:
6270
      msg = "dual-master"
6271
    else:
6272
      msg = "single-master"
6273
    self.feedback_fn("* changing disks into %s mode" % msg)
6274
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6275
                                           self.instance.disks,
6276
                                           self.instance.name, multimaster)
6277
    for node, nres in result.items():
6278
      nres.Raise("Cannot change disks config on node %s" % node)
6279

    
6280
  def _ExecCleanup(self):
6281
    """Try to cleanup after a failed migration.
6282

6283
    The cleanup is done by:
6284
      - check that the instance is running only on one node
6285
        (and update the config if needed)
6286
      - change disks on its secondary node to secondary
6287
      - wait until disks are fully synchronized
6288
      - disconnect from the network
6289
      - change disks into single-master mode
6290
      - wait again until disks are fully synchronized
6291

6292
    """
6293
    instance = self.instance
6294
    target_node = self.target_node
6295
    source_node = self.source_node
6296

    
6297
    # check running on only one node
6298
    self.feedback_fn("* checking where the instance actually runs"
6299
                     " (if this hangs, the hypervisor might be in"
6300
                     " a bad state)")
6301
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6302
    for node, result in ins_l.items():
6303
      result.Raise("Can't contact node %s" % node)
6304

    
6305
    runningon_source = instance.name in ins_l[source_node].payload
6306
    runningon_target = instance.name in ins_l[target_node].payload
6307

    
6308
    if runningon_source and runningon_target:
6309
      raise errors.OpExecError("Instance seems to be running on two nodes,"
6310
                               " or the hypervisor is confused. You will have"
6311
                               " to ensure manually that it runs only on one"
6312
                               " and restart this operation.")
6313

    
6314
    if not (runningon_source or runningon_target):
6315
      raise errors.OpExecError("Instance does not seem to be running at all."
6316
                               " In this case, it's safer to repair by"
6317
                               " running 'gnt-instance stop' to ensure disk"
6318
                               " shutdown, and then restarting it.")
6319

    
6320
    if runningon_target:
6321
      # the migration has actually succeeded, we need to update the config
6322
      self.feedback_fn("* instance running on secondary node (%s),"
6323
                       " updating config" % target_node)
6324
      instance.primary_node = target_node
6325
      self.cfg.Update(instance, self.feedback_fn)
6326
      demoted_node = source_node
6327
    else:
6328
      self.feedback_fn("* instance confirmed to be running on its"
6329
                       " primary node (%s)" % source_node)
6330
      demoted_node = target_node
6331

    
6332
    self._EnsureSecondary(demoted_node)
6333
    try:
6334
      self._WaitUntilSync()
6335
    except errors.OpExecError:
6336
      # we ignore here errors, since if the device is standalone, it
6337
      # won't be able to sync
6338
      pass
6339
    self._GoStandalone()
6340
    self._GoReconnect(False)
6341
    self._WaitUntilSync()
6342

    
6343
    self.feedback_fn("* done")
6344

    
6345
  def _RevertDiskStatus(self):
6346
    """Try to revert the disk status after a failed migration.
6347

6348
    """
6349
    target_node = self.target_node
6350
    try:
6351
      self._EnsureSecondary(target_node)
6352
      self._GoStandalone()
6353
      self._GoReconnect(False)
6354
      self._WaitUntilSync()
6355
    except errors.OpExecError, err:
6356
      self.lu.LogWarning("Migration failed and I can't reconnect the"
6357
                         " drives: error '%s'\n"
6358
                         "Please look and recover the instance status" %
6359
                         str(err))
6360

    
6361
  def _AbortMigration(self):
6362
    """Call the hypervisor code to abort a started migration.
6363

6364
    """
6365
    instance = self.instance
6366
    target_node = self.target_node
6367
    migration_info = self.migration_info
6368

    
6369
    abort_result = self.rpc.call_finalize_migration(target_node,
6370
                                                    instance,
6371
                                                    migration_info,
6372
                                                    False)
6373
    abort_msg = abort_result.fail_msg
6374
    if abort_msg:
6375
      logging.error("Aborting migration failed on target node %s: %s",
6376
                    target_node, abort_msg)
6377
      # Don't raise an exception here, as we stil have to try to revert the
6378
      # disk status, even if this step failed.
6379

    
6380
  def _ExecMigration(self):
6381
    """Migrate an instance.
6382

6383
    The migrate is done by:
6384
      - change the disks into dual-master mode
6385
      - wait until disks are fully synchronized again
6386
      - migrate the instance
6387
      - change disks on the new secondary node (the old primary) to secondary
6388
      - wait until disks are fully synchronized
6389
      - change disks into single-master mode
6390

6391
    """
6392
    instance = self.instance
6393
    target_node = self.target_node
6394
    source_node = self.source_node
6395

    
6396
    self.feedback_fn("* checking disk consistency between source and target")
6397
    for dev in instance.disks:
6398
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6399
        raise errors.OpExecError("Disk %s is degraded or not fully"
6400
                                 " synchronized on target node,"
6401
                                 " aborting migrate." % dev.iv_name)
6402

    
6403
    # First get the migration information from the remote node
6404
    result = self.rpc.call_migration_info(source_node, instance)
6405
    msg = result.fail_msg
6406
    if msg:
6407
      log_err = ("Failed fetching source migration information from %s: %s" %
6408
                 (source_node, msg))
6409
      logging.error(log_err)
6410
      raise errors.OpExecError(log_err)
6411

    
6412
    self.migration_info = migration_info = result.payload
6413

    
6414
    # Then switch the disks to master/master mode
6415
    self._EnsureSecondary(target_node)
6416
    self._GoStandalone()
6417
    self._GoReconnect(True)
6418
    self._WaitUntilSync()
6419

    
6420
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
6421
    result = self.rpc.call_accept_instance(target_node,
6422
                                           instance,
6423
                                           migration_info,
6424
                                           self.nodes_ip[target_node])
6425

    
6426
    msg = result.fail_msg
6427
    if msg:
6428
      logging.error("Instance pre-migration failed, trying to revert"
6429
                    " disk status: %s", msg)
6430
      self.feedback_fn("Pre-migration failed, aborting")
6431
      self._AbortMigration()
6432
      self._RevertDiskStatus()
6433
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6434
                               (instance.name, msg))
6435

    
6436
    self.feedback_fn("* migrating instance to %s" % target_node)
6437
    time.sleep(10)
6438
    result = self.rpc.call_instance_migrate(source_node, instance,
6439
                                            self.nodes_ip[target_node],
6440
                                            self.live)
6441
    msg = result.fail_msg
6442
    if msg:
6443
      logging.error("Instance migration failed, trying to revert"
6444
                    " disk status: %s", msg)
6445
      self.feedback_fn("Migration failed, aborting")
6446
      self._AbortMigration()
6447
      self._RevertDiskStatus()
6448
      raise errors.OpExecError("Could not migrate instance %s: %s" %
6449
                               (instance.name, msg))
6450
    time.sleep(10)
6451

    
6452
    instance.primary_node = target_node
6453
    # distribute new instance config to the other nodes
6454
    self.cfg.Update(instance, self.feedback_fn)
6455

    
6456
    result = self.rpc.call_finalize_migration(target_node,
6457
                                              instance,
6458
                                              migration_info,
6459
                                              True)
6460
    msg = result.fail_msg
6461
    if msg:
6462
      logging.error("Instance migration succeeded, but finalization failed:"
6463
                    " %s", msg)
6464
      raise errors.OpExecError("Could not finalize instance migration: %s" %
6465
                               msg)
6466

    
6467
    self._EnsureSecondary(source_node)
6468
    self._WaitUntilSync()
6469
    self._GoStandalone()
6470
    self._GoReconnect(False)
6471
    self._WaitUntilSync()
6472

    
6473
    self.feedback_fn("* done")
6474

    
6475
  def Exec(self, feedback_fn):
6476
    """Perform the migration.
6477

6478
    """
6479
    feedback_fn("Migrating instance %s" % self.instance.name)
6480

    
6481
    self.feedback_fn = feedback_fn
6482

    
6483
    self.source_node = self.instance.primary_node
6484
    self.target_node = self.instance.secondary_nodes[0]
6485
    self.all_nodes = [self.source_node, self.target_node]
6486
    self.nodes_ip = {
6487
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6488
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6489
      }
6490

    
6491
    if self.cleanup:
6492
      return self._ExecCleanup()
6493
    else:
6494
      return self._ExecMigration()
6495

    
6496

    
6497
def _CreateBlockDev(lu, node, instance, device, force_create,
6498
                    info, force_open):
6499
  """Create a tree of block devices on a given node.
6500

6501
  If this device type has to be created on secondaries, create it and
6502
  all its children.
6503

6504
  If not, just recurse to children keeping the same 'force' value.
6505

6506
  @param lu: the lu on whose behalf we execute
6507
  @param node: the node on which to create the device
6508
  @type instance: L{objects.Instance}
6509
  @param instance: the instance which owns the device
6510
  @type device: L{objects.Disk}
6511
  @param device: the device to create
6512
  @type force_create: boolean
6513
  @param force_create: whether to force creation of this device; this
6514
      will be change to True whenever we find a device which has
6515
      CreateOnSecondary() attribute
6516
  @param info: the extra 'metadata' we should attach to the device
6517
      (this will be represented as a LVM tag)
6518
  @type force_open: boolean
6519
  @param force_open: this parameter will be passes to the
6520
      L{backend.BlockdevCreate} function where it specifies
6521
      whether we run on primary or not, and it affects both
6522
      the child assembly and the device own Open() execution
6523

6524
  """
6525
  if device.CreateOnSecondary():
6526
    force_create = True
6527

    
6528
  if device.children:
6529
    for child in device.children:
6530
      _CreateBlockDev(lu, node, instance, child, force_create,
6531
                      info, force_open)
6532

    
6533
  if not force_create:
6534
    return
6535

    
6536
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6537

    
6538

    
6539
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6540
  """Create a single block device on a given node.
6541

6542
  This will not recurse over children of the device, so they must be
6543
  created in advance.
6544

6545
  @param lu: the lu on whose behalf we execute
6546
  @param node: the node on which to create the device
6547
  @type instance: L{objects.Instance}
6548
  @param instance: the instance which owns the device
6549
  @type device: L{objects.Disk}
6550
  @param device: the device to create
6551
  @param info: the extra 'metadata' we should attach to the device
6552
      (this will be represented as a LVM tag)
6553
  @type force_open: boolean
6554
  @param force_open: this parameter will be passes to the
6555
      L{backend.BlockdevCreate} function where it specifies
6556
      whether we run on primary or not, and it affects both
6557
      the child assembly and the device own Open() execution
6558

6559
  """
6560
  lu.cfg.SetDiskID(device, node)
6561
  result = lu.rpc.call_blockdev_create(node, device, device.size,
6562
                                       instance.name, force_open, info)
6563
  result.Raise("Can't create block device %s on"
6564
               " node %s for instance %s" % (device, node, instance.name))
6565
  if device.physical_id is None:
6566
    device.physical_id = result.payload
6567

    
6568

    
6569
def _GenerateUniqueNames(lu, exts):
6570
  """Generate a suitable LV name.
6571

6572
  This will generate a logical volume name for the given instance.
6573

6574
  """
6575
  results = []
6576
  for val in exts:
6577
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6578
    results.append("%s%s" % (new_id, val))
6579
  return results
6580

    
6581

    
6582
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6583
                         p_minor, s_minor):
6584
  """Generate a drbd8 device complete with its children.
6585

6586
  """
6587
  port = lu.cfg.AllocatePort()
6588
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6589
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6590
                          logical_id=(vgname, names[0]))
6591
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6592
                          logical_id=(vgname, names[1]))
6593
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6594
                          logical_id=(primary, secondary, port,
6595
                                      p_minor, s_minor,
6596
                                      shared_secret),
6597
                          children=[dev_data, dev_meta],
6598
                          iv_name=iv_name)
6599
  return drbd_dev
6600

    
6601

    
6602
def _GenerateDiskTemplate(lu, template_name,
6603
                          instance_name, primary_node,
6604
                          secondary_nodes, disk_info,
6605
                          file_storage_dir, file_driver,
6606
                          base_index, feedback_fn):
6607
  """Generate the entire disk layout for a given template type.
6608

6609
  """
6610
  #TODO: compute space requirements
6611

    
6612
  vgname = lu.cfg.GetVGName()
6613
  disk_count = len(disk_info)
6614
  disks = []
6615
  if template_name == constants.DT_DISKLESS:
6616
    pass
6617
  elif template_name == constants.DT_PLAIN:
6618
    if len(secondary_nodes) != 0:
6619
      raise errors.ProgrammerError("Wrong template configuration")
6620

    
6621
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6622
                                      for i in range(disk_count)])
6623
    for idx, disk in enumerate(disk_info):
6624
      disk_index = idx + base_index
6625
      vg = disk.get("vg", vgname)
6626
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6627
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6628
                              logical_id=(vg, names[idx]),
6629
                              iv_name="disk/%d" % disk_index,
6630
                              mode=disk["mode"])
6631
      disks.append(disk_dev)
6632
  elif template_name == constants.DT_DRBD8:
6633
    if len(secondary_nodes) != 1:
6634
      raise errors.ProgrammerError("Wrong template configuration")
6635
    remote_node = secondary_nodes[0]
6636
    minors = lu.cfg.AllocateDRBDMinor(
6637
      [primary_node, remote_node] * len(disk_info), instance_name)
6638

    
6639
    names = []
6640
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6641
                                               for i in range(disk_count)]):
6642
      names.append(lv_prefix + "_data")
6643
      names.append(lv_prefix + "_meta")
6644
    for idx, disk in enumerate(disk_info):
6645
      disk_index = idx + base_index
6646
      vg = disk.get("vg", vgname)
6647
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6648
                                      disk["size"], vg, names[idx*2:idx*2+2],
6649
                                      "disk/%d" % disk_index,
6650
                                      minors[idx*2], minors[idx*2+1])
6651
      disk_dev.mode = disk["mode"]
6652
      disks.append(disk_dev)
6653
  elif template_name == constants.DT_FILE:
6654
    if len(secondary_nodes) != 0:
6655
      raise errors.ProgrammerError("Wrong template configuration")
6656

    
6657
    _RequireFileStorage()
6658

    
6659
    for idx, disk in enumerate(disk_info):
6660
      disk_index = idx + base_index
6661
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6662
                              iv_name="disk/%d" % disk_index,
6663
                              logical_id=(file_driver,
6664
                                          "%s/disk%d" % (file_storage_dir,
6665
                                                         disk_index)),
6666
                              mode=disk["mode"])
6667
      disks.append(disk_dev)
6668
  else:
6669
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6670
  return disks
6671

    
6672

    
6673
def _GetInstanceInfoText(instance):
6674
  """Compute that text that should be added to the disk's metadata.
6675

6676
  """
6677
  return "originstname+%s" % instance.name
6678

    
6679

    
6680
def _CalcEta(time_taken, written, total_size):
6681
  """Calculates the ETA based on size written and total size.
6682

6683
  @param time_taken: The time taken so far
6684
  @param written: amount written so far
6685
  @param total_size: The total size of data to be written
6686
  @return: The remaining time in seconds
6687

6688
  """
6689
  avg_time = time_taken / float(written)
6690
  return (total_size - written) * avg_time
6691

    
6692

    
6693
def _WipeDisks(lu, instance):
6694
  """Wipes instance disks.
6695

6696
  @type lu: L{LogicalUnit}
6697
  @param lu: the logical unit on whose behalf we execute
6698
  @type instance: L{objects.Instance}
6699
  @param instance: the instance whose disks we should create
6700
  @return: the success of the wipe
6701

6702
  """
6703
  node = instance.primary_node
6704
  for idx, device in enumerate(instance.disks):
6705
    lu.LogInfo("* Wiping disk %d", idx)
6706
    logging.info("Wiping disk %d for instance %s", idx, instance.name)
6707

    
6708
    # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6709
    # MAX_WIPE_CHUNK at max
6710
    wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6711
                          constants.MIN_WIPE_CHUNK_PERCENT)
6712

    
6713
    offset = 0
6714
    size = device.size
6715
    last_output = 0
6716
    start_time = time.time()
6717

    
6718
    while offset < size:
6719
      wipe_size = min(wipe_chunk_size, size - offset)
6720
      result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6721
      result.Raise("Could not wipe disk %d at offset %d for size %d" %
6722
                   (idx, offset, wipe_size))
6723
      now = time.time()
6724
      offset += wipe_size
6725
      if now - last_output >= 60:
6726
        eta = _CalcEta(now - start_time, offset, size)
6727
        lu.LogInfo(" - done: %.1f%% ETA: %s" %
6728
                   (offset / float(size) * 100, utils.FormatSeconds(eta)))
6729
        last_output = now
6730

    
6731

    
6732
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6733
  """Create all disks for an instance.
6734

6735
  This abstracts away some work from AddInstance.
6736

6737
  @type lu: L{LogicalUnit}
6738
  @param lu: the logical unit on whose behalf we execute
6739
  @type instance: L{objects.Instance}
6740
  @param instance: the instance whose disks we should create
6741
  @type to_skip: list
6742
  @param to_skip: list of indices to skip
6743
  @type target_node: string
6744
  @param target_node: if passed, overrides the target node for creation
6745
  @rtype: boolean
6746
  @return: the success of the creation
6747

6748
  """
6749
  info = _GetInstanceInfoText(instance)
6750
  if target_node is None:
6751
    pnode = instance.primary_node
6752
    all_nodes = instance.all_nodes
6753
  else:
6754
    pnode = target_node
6755
    all_nodes = [pnode]
6756

    
6757
  if instance.disk_template == constants.DT_FILE:
6758
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6759
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6760

    
6761
    result.Raise("Failed to create directory '%s' on"
6762
                 " node %s" % (file_storage_dir, pnode))
6763

    
6764
  # Note: this needs to be kept in sync with adding of disks in
6765
  # LUSetInstanceParams
6766
  for idx, device in enumerate(instance.disks):
6767
    if to_skip and idx in to_skip:
6768
      continue
6769
    logging.info("Creating volume %s for instance %s",
6770
                 device.iv_name, instance.name)
6771
    #HARDCODE
6772
    for node in all_nodes:
6773
      f_create = node == pnode
6774
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6775

    
6776

    
6777
def _RemoveDisks(lu, instance, target_node=None):
6778
  """Remove all disks for an instance.
6779

6780
  This abstracts away some work from `AddInstance()` and
6781
  `RemoveInstance()`. Note that in case some of the devices couldn't
6782
  be removed, the removal will continue with the other ones (compare
6783
  with `_CreateDisks()`).
6784

6785
  @type lu: L{LogicalUnit}
6786
  @param lu: the logical unit on whose behalf we execute
6787
  @type instance: L{objects.Instance}
6788
  @param instance: the instance whose disks we should remove
6789
  @type target_node: string
6790
  @param target_node: used to override the node on which to remove the disks
6791
  @rtype: boolean
6792
  @return: the success of the removal
6793

6794
  """
6795
  logging.info("Removing block devices for instance %s", instance.name)
6796

    
6797
  all_result = True
6798
  for device in instance.disks:
6799
    if target_node:
6800
      edata = [(target_node, device)]
6801
    else:
6802
      edata = device.ComputeNodeTree(instance.primary_node)
6803
    for node, disk in edata:
6804
      lu.cfg.SetDiskID(disk, node)
6805
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6806
      if msg:
6807
        lu.LogWarning("Could not remove block device %s on node %s,"
6808
                      " continuing anyway: %s", device.iv_name, node, msg)
6809
        all_result = False
6810

    
6811
  if instance.disk_template == constants.DT_FILE:
6812
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6813
    if target_node:
6814
      tgt = target_node
6815
    else:
6816
      tgt = instance.primary_node
6817
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6818
    if result.fail_msg:
6819
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6820
                    file_storage_dir, instance.primary_node, result.fail_msg)
6821
      all_result = False
6822

    
6823
  return all_result
6824

    
6825

    
6826
def _ComputeDiskSizePerVG(disk_template, disks):
6827
  """Compute disk size requirements in the volume group
6828

6829
  """
6830
  def _compute(disks, payload):
6831
    """Universal algorithm
6832

6833
    """
6834
    vgs = {}
6835
    for disk in disks:
6836
      vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
6837

    
6838
    return vgs
6839

    
6840
  # Required free disk space as a function of disk and swap space
6841
  req_size_dict = {
6842
    constants.DT_DISKLESS: None,
6843
    constants.DT_PLAIN: _compute(disks, 0),
6844
    # 128 MB are added for drbd metadata for each disk
6845
    constants.DT_DRBD8: _compute(disks, 128),
6846
    constants.DT_FILE: None,
6847
  }
6848

    
6849
  if disk_template not in req_size_dict:
6850
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6851
                                 " is unknown" %  disk_template)
6852

    
6853
  return req_size_dict[disk_template]
6854

    
6855

    
6856
def _ComputeDiskSize(disk_template, disks):
6857
  """Compute disk size requirements in the volume group
6858

6859
  """
6860
  # Required free disk space as a function of disk and swap space
6861
  req_size_dict = {
6862
    constants.DT_DISKLESS: None,
6863
    constants.DT_PLAIN: sum(d["size"] for d in disks),
6864
    # 128 MB are added for drbd metadata for each disk
6865
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6866
    constants.DT_FILE: None,
6867
  }
6868

    
6869
  if disk_template not in req_size_dict:
6870
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6871
                                 " is unknown" %  disk_template)
6872

    
6873
  return req_size_dict[disk_template]
6874

    
6875

    
6876
def _CheckHVParams(lu, nodenames, hvname, hvparams):
6877
  """Hypervisor parameter validation.
6878

6879
  This function abstract the hypervisor parameter validation to be
6880
  used in both instance create and instance modify.
6881

6882
  @type lu: L{LogicalUnit}
6883
  @param lu: the logical unit for which we check
6884
  @type nodenames: list
6885
  @param nodenames: the list of nodes on which we should check
6886
  @type hvname: string
6887
  @param hvname: the name of the hypervisor we should use
6888
  @type hvparams: dict
6889
  @param hvparams: the parameters which we need to check
6890
  @raise errors.OpPrereqError: if the parameters are not valid
6891

6892
  """
6893
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6894
                                                  hvname,
6895
                                                  hvparams)
6896
  for node in nodenames:
6897
    info = hvinfo[node]
6898
    if info.offline:
6899
      continue
6900
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6901

    
6902

    
6903
def _CheckOSParams(lu, required, nodenames, osname, osparams):
6904
  """OS parameters validation.
6905

6906
  @type lu: L{LogicalUnit}
6907
  @param lu: the logical unit for which we check
6908
  @type required: boolean
6909
  @param required: whether the validation should fail if the OS is not
6910
      found
6911
  @type nodenames: list
6912
  @param nodenames: the list of nodes on which we should check
6913
  @type osname: string
6914
  @param osname: the name of the hypervisor we should use
6915
  @type osparams: dict
6916
  @param osparams: the parameters which we need to check
6917
  @raise errors.OpPrereqError: if the parameters are not valid
6918

6919
  """
6920
  result = lu.rpc.call_os_validate(required, nodenames, osname,
6921
                                   [constants.OS_VALIDATE_PARAMETERS],
6922
                                   osparams)
6923
  for node, nres in result.items():
6924
    # we don't check for offline cases since this should be run only
6925
    # against the master node and/or an instance's nodes
6926
    nres.Raise("OS Parameters validation failed on node %s" % node)
6927
    if not nres.payload:
6928
      lu.LogInfo("OS %s not found on node %s, validation skipped",
6929
                 osname, node)
6930

    
6931

    
6932
class LUCreateInstance(LogicalUnit):
6933
  """Create an instance.
6934

6935
  """
6936
  HPATH = "instance-add"
6937
  HTYPE = constants.HTYPE_INSTANCE
6938
  _OP_PARAMS = [
6939
    _PInstanceName,
6940
    ("mode", ht.NoDefault, ht.TElemOf(constants.INSTANCE_CREATE_MODES)),
6941
    ("start", True, ht.TBool),
6942
    ("wait_for_sync", True, ht.TBool),
6943
    ("ip_check", True, ht.TBool),
6944
    ("name_check", True, ht.TBool),
6945
    ("disks", ht.NoDefault, ht.TListOf(ht.TDict)),
6946
    ("nics", ht.NoDefault, ht.TListOf(ht.TDict)),
6947
    ("hvparams", ht.EmptyDict, ht.TDict),
6948
    ("beparams", ht.EmptyDict, ht.TDict),
6949
    ("osparams", ht.EmptyDict, ht.TDict),
6950
    ("no_install", None, ht.TMaybeBool),
6951
    ("os_type", None, ht.TMaybeString),
6952
    ("force_variant", False, ht.TBool),
6953
    ("source_handshake", None, ht.TOr(ht.TList, ht.TNone)),
6954
    ("source_x509_ca", None, ht.TMaybeString),
6955
    ("source_instance_name", None, ht.TMaybeString),
6956
    ("source_shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
6957
     ht.TPositiveInt),
6958
    ("src_node", None, ht.TMaybeString),
6959
    ("src_path", None, ht.TMaybeString),
6960
    ("pnode", None, ht.TMaybeString),
6961
    ("snode", None, ht.TMaybeString),
6962
    ("iallocator", None, ht.TMaybeString),
6963
    ("hypervisor", None, ht.TMaybeString),
6964
    ("disk_template", ht.NoDefault, _CheckDiskTemplate),
6965
    ("identify_defaults", False, ht.TBool),
6966
    ("file_driver", None, ht.TOr(ht.TNone, ht.TElemOf(constants.FILE_DRIVER))),
6967
    ("file_storage_dir", None, ht.TMaybeString),
6968
    ]
6969
  REQ_BGL = False
6970

    
6971
  def CheckArguments(self):
6972
    """Check arguments.
6973

6974
    """
6975
    # do not require name_check to ease forward/backward compatibility
6976
    # for tools
6977
    if self.op.no_install and self.op.start:
6978
      self.LogInfo("No-installation mode selected, disabling startup")
6979
      self.op.start = False
6980
    # validate/normalize the instance name
6981
    self.op.instance_name = \
6982
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
6983

    
6984
    if self.op.ip_check and not self.op.name_check:
6985
      # TODO: make the ip check more flexible and not depend on the name check
6986
      raise errors.OpPrereqError("Cannot do ip check without a name check",
6987
                                 errors.ECODE_INVAL)
6988

    
6989
    # check nics' parameter names
6990
    for nic in self.op.nics:
6991
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6992

    
6993
    # check disks. parameter names and consistent adopt/no-adopt strategy
6994
    has_adopt = has_no_adopt = False
6995
    for disk in self.op.disks:
6996
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6997
      if "adopt" in disk:
6998
        has_adopt = True
6999
      else:
7000
        has_no_adopt = True
7001
    if has_adopt and has_no_adopt:
7002
      raise errors.OpPrereqError("Either all disks are adopted or none is",
7003
                                 errors.ECODE_INVAL)
7004
    if has_adopt:
7005
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7006
        raise errors.OpPrereqError("Disk adoption is not supported for the"
7007
                                   " '%s' disk template" %
7008
                                   self.op.disk_template,
7009
                                   errors.ECODE_INVAL)
7010
      if self.op.iallocator is not None:
7011
        raise errors.OpPrereqError("Disk adoption not allowed with an"
7012
                                   " iallocator script", errors.ECODE_INVAL)
7013
      if self.op.mode == constants.INSTANCE_IMPORT:
7014
        raise errors.OpPrereqError("Disk adoption not allowed for"
7015
                                   " instance import", errors.ECODE_INVAL)
7016

    
7017
    self.adopt_disks = has_adopt
7018

    
7019
    # instance name verification
7020
    if self.op.name_check:
7021
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7022
      self.op.instance_name = self.hostname1.name
7023
      # used in CheckPrereq for ip ping check
7024
      self.check_ip = self.hostname1.ip
7025
    else:
7026
      self.check_ip = None
7027

    
7028
    # file storage checks
7029
    if (self.op.file_driver and
7030
        not self.op.file_driver in constants.FILE_DRIVER):
7031
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
7032
                                 self.op.file_driver, errors.ECODE_INVAL)
7033

    
7034
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7035
      raise errors.OpPrereqError("File storage directory path not absolute",
7036
                                 errors.ECODE_INVAL)
7037

    
7038
    ### Node/iallocator related checks
7039
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7040

    
7041
    if self.op.pnode is not None:
7042
      if self.op.disk_template in constants.DTS_NET_MIRROR:
7043
        if self.op.snode is None:
7044
          raise errors.OpPrereqError("The networked disk templates need"
7045
                                     " a mirror node", errors.ECODE_INVAL)
7046
      elif self.op.snode:
7047
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7048
                        " template")
7049
        self.op.snode = None
7050

    
7051
    self._cds = _GetClusterDomainSecret()
7052

    
7053
    if self.op.mode == constants.INSTANCE_IMPORT:
7054
      # On import force_variant must be True, because if we forced it at
7055
      # initial install, our only chance when importing it back is that it
7056
      # works again!
7057
      self.op.force_variant = True
7058

    
7059
      if self.op.no_install:
7060
        self.LogInfo("No-installation mode has no effect during import")
7061

    
7062
    elif self.op.mode == constants.INSTANCE_CREATE:
7063
      if self.op.os_type is None:
7064
        raise errors.OpPrereqError("No guest OS specified",
7065
                                   errors.ECODE_INVAL)
7066
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7067
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7068
                                   " installation" % self.op.os_type,
7069
                                   errors.ECODE_STATE)
7070
      if self.op.disk_template is None:
7071
        raise errors.OpPrereqError("No disk template specified",
7072
                                   errors.ECODE_INVAL)
7073

    
7074
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7075
      # Check handshake to ensure both clusters have the same domain secret
7076
      src_handshake = self.op.source_handshake
7077
      if not src_handshake:
7078
        raise errors.OpPrereqError("Missing source handshake",
7079
                                   errors.ECODE_INVAL)
7080

    
7081
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7082
                                                           src_handshake)
7083
      if errmsg:
7084
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7085
                                   errors.ECODE_INVAL)
7086

    
7087
      # Load and check source CA
7088
      self.source_x509_ca_pem = self.op.source_x509_ca
7089
      if not self.source_x509_ca_pem:
7090
        raise errors.OpPrereqError("Missing source X509 CA",
7091
                                   errors.ECODE_INVAL)
7092

    
7093
      try:
7094
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7095
                                                    self._cds)
7096
      except OpenSSL.crypto.Error, err:
7097
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7098
                                   (err, ), errors.ECODE_INVAL)
7099

    
7100
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7101
      if errcode is not None:
7102
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7103
                                   errors.ECODE_INVAL)
7104

    
7105
      self.source_x509_ca = cert
7106

    
7107
      src_instance_name = self.op.source_instance_name
7108
      if not src_instance_name:
7109
        raise errors.OpPrereqError("Missing source instance name",
7110
                                   errors.ECODE_INVAL)
7111

    
7112
      self.source_instance_name = \
7113
          netutils.GetHostname(name=src_instance_name).name
7114

    
7115
    else:
7116
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
7117
                                 self.op.mode, errors.ECODE_INVAL)
7118

    
7119
  def ExpandNames(self):
7120
    """ExpandNames for CreateInstance.
7121

7122
    Figure out the right locks for instance creation.
7123

7124
    """
7125
    self.needed_locks = {}
7126

    
7127
    instance_name = self.op.instance_name
7128
    # this is just a preventive check, but someone might still add this
7129
    # instance in the meantime, and creation will fail at lock-add time
7130
    if instance_name in self.cfg.GetInstanceList():
7131
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7132
                                 instance_name, errors.ECODE_EXISTS)
7133

    
7134
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7135

    
7136
    if self.op.iallocator:
7137
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7138
    else:
7139
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7140
      nodelist = [self.op.pnode]
7141
      if self.op.snode is not None:
7142
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7143
        nodelist.append(self.op.snode)
7144
      self.needed_locks[locking.LEVEL_NODE] = nodelist
7145

    
7146
    # in case of import lock the source node too
7147
    if self.op.mode == constants.INSTANCE_IMPORT:
7148
      src_node = self.op.src_node
7149
      src_path = self.op.src_path
7150

    
7151
      if src_path is None:
7152
        self.op.src_path = src_path = self.op.instance_name
7153

    
7154
      if src_node is None:
7155
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7156
        self.op.src_node = None
7157
        if os.path.isabs(src_path):
7158
          raise errors.OpPrereqError("Importing an instance from an absolute"
7159
                                     " path requires a source node option.",
7160
                                     errors.ECODE_INVAL)
7161
      else:
7162
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7163
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7164
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
7165
        if not os.path.isabs(src_path):
7166
          self.op.src_path = src_path = \
7167
            utils.PathJoin(constants.EXPORT_DIR, src_path)
7168

    
7169
  def _RunAllocator(self):
7170
    """Run the allocator based on input opcode.
7171

7172
    """
7173
    nics = [n.ToDict() for n in self.nics]
7174
    ial = IAllocator(self.cfg, self.rpc,
7175
                     mode=constants.IALLOCATOR_MODE_ALLOC,
7176
                     name=self.op.instance_name,
7177
                     disk_template=self.op.disk_template,
7178
                     tags=[],
7179
                     os=self.op.os_type,
7180
                     vcpus=self.be_full[constants.BE_VCPUS],
7181
                     mem_size=self.be_full[constants.BE_MEMORY],
7182
                     disks=self.disks,
7183
                     nics=nics,
7184
                     hypervisor=self.op.hypervisor,
7185
                     )
7186

    
7187
    ial.Run(self.op.iallocator)
7188

    
7189
    if not ial.success:
7190
      raise errors.OpPrereqError("Can't compute nodes using"
7191
                                 " iallocator '%s': %s" %
7192
                                 (self.op.iallocator, ial.info),
7193
                                 errors.ECODE_NORES)
7194
    if len(ial.result) != ial.required_nodes:
7195
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7196
                                 " of nodes (%s), required %s" %
7197
                                 (self.op.iallocator, len(ial.result),
7198
                                  ial.required_nodes), errors.ECODE_FAULT)
7199
    self.op.pnode = ial.result[0]
7200
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7201
                 self.op.instance_name, self.op.iallocator,
7202
                 utils.CommaJoin(ial.result))
7203
    if ial.required_nodes == 2:
7204
      self.op.snode = ial.result[1]
7205

    
7206
  def BuildHooksEnv(self):
7207
    """Build hooks env.
7208

7209
    This runs on master, primary and secondary nodes of the instance.
7210

7211
    """
7212
    env = {
7213
      "ADD_MODE": self.op.mode,
7214
      }
7215
    if self.op.mode == constants.INSTANCE_IMPORT:
7216
      env["SRC_NODE"] = self.op.src_node
7217
      env["SRC_PATH"] = self.op.src_path
7218
      env["SRC_IMAGES"] = self.src_images
7219

    
7220
    env.update(_BuildInstanceHookEnv(
7221
      name=self.op.instance_name,
7222
      primary_node=self.op.pnode,
7223
      secondary_nodes=self.secondaries,
7224
      status=self.op.start,
7225
      os_type=self.op.os_type,
7226
      memory=self.be_full[constants.BE_MEMORY],
7227
      vcpus=self.be_full[constants.BE_VCPUS],
7228
      nics=_NICListToTuple(self, self.nics),
7229
      disk_template=self.op.disk_template,
7230
      disks=[(d["size"], d["mode"]) for d in self.disks],
7231
      bep=self.be_full,
7232
      hvp=self.hv_full,
7233
      hypervisor_name=self.op.hypervisor,
7234
    ))
7235

    
7236
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7237
          self.secondaries)
7238
    return env, nl, nl
7239

    
7240
  def _ReadExportInfo(self):
7241
    """Reads the export information from disk.
7242

7243
    It will override the opcode source node and path with the actual
7244
    information, if these two were not specified before.
7245

7246
    @return: the export information
7247

7248
    """
7249
    assert self.op.mode == constants.INSTANCE_IMPORT
7250

    
7251
    src_node = self.op.src_node
7252
    src_path = self.op.src_path
7253

    
7254
    if src_node is None:
7255
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7256
      exp_list = self.rpc.call_export_list(locked_nodes)
7257
      found = False
7258
      for node in exp_list:
7259
        if exp_list[node].fail_msg:
7260
          continue
7261
        if src_path in exp_list[node].payload:
7262
          found = True
7263
          self.op.src_node = src_node = node
7264
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7265
                                                       src_path)
7266
          break
7267
      if not found:
7268
        raise errors.OpPrereqError("No export found for relative path %s" %
7269
                                    src_path, errors.ECODE_INVAL)
7270

    
7271
    _CheckNodeOnline(self, src_node)
7272
    result = self.rpc.call_export_info(src_node, src_path)
7273
    result.Raise("No export or invalid export found in dir %s" % src_path)
7274

    
7275
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7276
    if not export_info.has_section(constants.INISECT_EXP):
7277
      raise errors.ProgrammerError("Corrupted export config",
7278
                                   errors.ECODE_ENVIRON)
7279

    
7280
    ei_version = export_info.get(constants.INISECT_EXP, "version")
7281
    if (int(ei_version) != constants.EXPORT_VERSION):
7282
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7283
                                 (ei_version, constants.EXPORT_VERSION),
7284
                                 errors.ECODE_ENVIRON)
7285
    return export_info
7286

    
7287
  def _ReadExportParams(self, einfo):
7288
    """Use export parameters as defaults.
7289

7290
    In case the opcode doesn't specify (as in override) some instance
7291
    parameters, then try to use them from the export information, if
7292
    that declares them.
7293

7294
    """
7295
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7296

    
7297
    if self.op.disk_template is None:
7298
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
7299
        self.op.disk_template = einfo.get(constants.INISECT_INS,
7300
                                          "disk_template")
7301
      else:
7302
        raise errors.OpPrereqError("No disk template specified and the export"
7303
                                   " is missing the disk_template information",
7304
                                   errors.ECODE_INVAL)
7305

    
7306
    if not self.op.disks:
7307
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
7308
        disks = []
7309
        # TODO: import the disk iv_name too
7310
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7311
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7312
          disks.append({"size": disk_sz})
7313
        self.op.disks = disks
7314
      else:
7315
        raise errors.OpPrereqError("No disk info specified and the export"
7316
                                   " is missing the disk information",
7317
                                   errors.ECODE_INVAL)
7318

    
7319
    if (not self.op.nics and
7320
        einfo.has_option(constants.INISECT_INS, "nic_count")):
7321
      nics = []
7322
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7323
        ndict = {}
7324
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7325
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7326
          ndict[name] = v
7327
        nics.append(ndict)
7328
      self.op.nics = nics
7329

    
7330
    if (self.op.hypervisor is None and
7331
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
7332
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7333
    if einfo.has_section(constants.INISECT_HYP):
7334
      # use the export parameters but do not override the ones
7335
      # specified by the user
7336
      for name, value in einfo.items(constants.INISECT_HYP):
7337
        if name not in self.op.hvparams:
7338
          self.op.hvparams[name] = value
7339

    
7340
    if einfo.has_section(constants.INISECT_BEP):
7341
      # use the parameters, without overriding
7342
      for name, value in einfo.items(constants.INISECT_BEP):
7343
        if name not in self.op.beparams:
7344
          self.op.beparams[name] = value
7345
    else:
7346
      # try to read the parameters old style, from the main section
7347
      for name in constants.BES_PARAMETERS:
7348
        if (name not in self.op.beparams and
7349
            einfo.has_option(constants.INISECT_INS, name)):
7350
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7351

    
7352
    if einfo.has_section(constants.INISECT_OSP):
7353
      # use the parameters, without overriding
7354
      for name, value in einfo.items(constants.INISECT_OSP):
7355
        if name not in self.op.osparams:
7356
          self.op.osparams[name] = value
7357

    
7358
  def _RevertToDefaults(self, cluster):
7359
    """Revert the instance parameters to the default values.
7360

7361
    """
7362
    # hvparams
7363
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7364
    for name in self.op.hvparams.keys():
7365
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7366
        del self.op.hvparams[name]
7367
    # beparams
7368
    be_defs = cluster.SimpleFillBE({})
7369
    for name in self.op.beparams.keys():
7370
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
7371
        del self.op.beparams[name]
7372
    # nic params
7373
    nic_defs = cluster.SimpleFillNIC({})
7374
    for nic in self.op.nics:
7375
      for name in constants.NICS_PARAMETERS:
7376
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7377
          del nic[name]
7378
    # osparams
7379
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7380
    for name in self.op.osparams.keys():
7381
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
7382
        del self.op.osparams[name]
7383

    
7384
  def CheckPrereq(self):
7385
    """Check prerequisites.
7386

7387
    """
7388
    if self.op.mode == constants.INSTANCE_IMPORT:
7389
      export_info = self._ReadExportInfo()
7390
      self._ReadExportParams(export_info)
7391

    
7392
    _CheckDiskTemplate(self.op.disk_template)
7393

    
7394
    if (not self.cfg.GetVGName() and
7395
        self.op.disk_template not in constants.DTS_NOT_LVM):
7396
      raise errors.OpPrereqError("Cluster does not support lvm-based"
7397
                                 " instances", errors.ECODE_STATE)
7398

    
7399
    if self.op.hypervisor is None:
7400
      self.op.hypervisor = self.cfg.GetHypervisorType()
7401

    
7402
    cluster = self.cfg.GetClusterInfo()
7403
    enabled_hvs = cluster.enabled_hypervisors
7404
    if self.op.hypervisor not in enabled_hvs:
7405
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7406
                                 " cluster (%s)" % (self.op.hypervisor,
7407
                                  ",".join(enabled_hvs)),
7408
                                 errors.ECODE_STATE)
7409

    
7410
    # check hypervisor parameter syntax (locally)
7411
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7412
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7413
                                      self.op.hvparams)
7414
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7415
    hv_type.CheckParameterSyntax(filled_hvp)
7416
    self.hv_full = filled_hvp
7417
    # check that we don't specify global parameters on an instance
7418
    _CheckGlobalHvParams(self.op.hvparams)
7419

    
7420
    # fill and remember the beparams dict
7421
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7422
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
7423

    
7424
    # build os parameters
7425
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7426

    
7427
    # now that hvp/bep are in final format, let's reset to defaults,
7428
    # if told to do so
7429
    if self.op.identify_defaults:
7430
      self._RevertToDefaults(cluster)
7431

    
7432
    # NIC buildup
7433
    self.nics = []
7434
    for idx, nic in enumerate(self.op.nics):
7435
      nic_mode_req = nic.get("mode", None)
7436
      nic_mode = nic_mode_req
7437
      if nic_mode is None:
7438
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7439

    
7440
      # in routed mode, for the first nic, the default ip is 'auto'
7441
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7442
        default_ip_mode = constants.VALUE_AUTO
7443
      else:
7444
        default_ip_mode = constants.VALUE_NONE
7445

    
7446
      # ip validity checks
7447
      ip = nic.get("ip", default_ip_mode)
7448
      if ip is None or ip.lower() == constants.VALUE_NONE:
7449
        nic_ip = None
7450
      elif ip.lower() == constants.VALUE_AUTO:
7451
        if not self.op.name_check:
7452
          raise errors.OpPrereqError("IP address set to auto but name checks"
7453
                                     " have been skipped",
7454
                                     errors.ECODE_INVAL)
7455
        nic_ip = self.hostname1.ip
7456
      else:
7457
        if not netutils.IPAddress.IsValid(ip):
7458
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7459
                                     errors.ECODE_INVAL)
7460
        nic_ip = ip
7461

    
7462
      # TODO: check the ip address for uniqueness
7463
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7464
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
7465
                                   errors.ECODE_INVAL)
7466

    
7467
      # MAC address verification
7468
      mac = nic.get("mac", constants.VALUE_AUTO)
7469
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7470
        mac = utils.NormalizeAndValidateMac(mac)
7471

    
7472
        try:
7473
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
7474
        except errors.ReservationError:
7475
          raise errors.OpPrereqError("MAC address %s already in use"
7476
                                     " in cluster" % mac,
7477
                                     errors.ECODE_NOTUNIQUE)
7478

    
7479
      # bridge verification
7480
      bridge = nic.get("bridge", None)
7481
      link = nic.get("link", None)
7482
      if bridge and link:
7483
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7484
                                   " at the same time", errors.ECODE_INVAL)
7485
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7486
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7487
                                   errors.ECODE_INVAL)
7488
      elif bridge:
7489
        link = bridge
7490

    
7491
      nicparams = {}
7492
      if nic_mode_req:
7493
        nicparams[constants.NIC_MODE] = nic_mode_req
7494
      if link:
7495
        nicparams[constants.NIC_LINK] = link
7496

    
7497
      check_params = cluster.SimpleFillNIC(nicparams)
7498
      objects.NIC.CheckParameterSyntax(check_params)
7499
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7500

    
7501
    # disk checks/pre-build
7502
    self.disks = []
7503
    for disk in self.op.disks:
7504
      mode = disk.get("mode", constants.DISK_RDWR)
7505
      if mode not in constants.DISK_ACCESS_SET:
7506
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7507
                                   mode, errors.ECODE_INVAL)
7508
      size = disk.get("size", None)
7509
      if size is None:
7510
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7511
      try:
7512
        size = int(size)
7513
      except (TypeError, ValueError):
7514
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7515
                                   errors.ECODE_INVAL)
7516
      vg = disk.get("vg", self.cfg.GetVGName())
7517
      new_disk = {"size": size, "mode": mode, "vg": vg}
7518
      if "adopt" in disk:
7519
        new_disk["adopt"] = disk["adopt"]
7520
      self.disks.append(new_disk)
7521

    
7522
    if self.op.mode == constants.INSTANCE_IMPORT:
7523

    
7524
      # Check that the new instance doesn't have less disks than the export
7525
      instance_disks = len(self.disks)
7526
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7527
      if instance_disks < export_disks:
7528
        raise errors.OpPrereqError("Not enough disks to import."
7529
                                   " (instance: %d, export: %d)" %
7530
                                   (instance_disks, export_disks),
7531
                                   errors.ECODE_INVAL)
7532

    
7533
      disk_images = []
7534
      for idx in range(export_disks):
7535
        option = 'disk%d_dump' % idx
7536
        if export_info.has_option(constants.INISECT_INS, option):
7537
          # FIXME: are the old os-es, disk sizes, etc. useful?
7538
          export_name = export_info.get(constants.INISECT_INS, option)
7539
          image = utils.PathJoin(self.op.src_path, export_name)
7540
          disk_images.append(image)
7541
        else:
7542
          disk_images.append(False)
7543

    
7544
      self.src_images = disk_images
7545

    
7546
      old_name = export_info.get(constants.INISECT_INS, 'name')
7547
      try:
7548
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7549
      except (TypeError, ValueError), err:
7550
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
7551
                                   " an integer: %s" % str(err),
7552
                                   errors.ECODE_STATE)
7553
      if self.op.instance_name == old_name:
7554
        for idx, nic in enumerate(self.nics):
7555
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7556
            nic_mac_ini = 'nic%d_mac' % idx
7557
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7558

    
7559
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7560

    
7561
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
7562
    if self.op.ip_check:
7563
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7564
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
7565
                                   (self.check_ip, self.op.instance_name),
7566
                                   errors.ECODE_NOTUNIQUE)
7567

    
7568
    #### mac address generation
7569
    # By generating here the mac address both the allocator and the hooks get
7570
    # the real final mac address rather than the 'auto' or 'generate' value.
7571
    # There is a race condition between the generation and the instance object
7572
    # creation, which means that we know the mac is valid now, but we're not
7573
    # sure it will be when we actually add the instance. If things go bad
7574
    # adding the instance will abort because of a duplicate mac, and the
7575
    # creation job will fail.
7576
    for nic in self.nics:
7577
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7578
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7579

    
7580
    #### allocator run
7581

    
7582
    if self.op.iallocator is not None:
7583
      self._RunAllocator()
7584

    
7585
    #### node related checks
7586

    
7587
    # check primary node
7588
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7589
    assert self.pnode is not None, \
7590
      "Cannot retrieve locked node %s" % self.op.pnode
7591
    if pnode.offline:
7592
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7593
                                 pnode.name, errors.ECODE_STATE)
7594
    if pnode.drained:
7595
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7596
                                 pnode.name, errors.ECODE_STATE)
7597
    if not pnode.vm_capable:
7598
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7599
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
7600

    
7601
    self.secondaries = []
7602

    
7603
    # mirror node verification
7604
    if self.op.disk_template in constants.DTS_NET_MIRROR:
7605
      if self.op.snode == pnode.name:
7606
        raise errors.OpPrereqError("The secondary node cannot be the"
7607
                                   " primary node.", errors.ECODE_INVAL)
7608
      _CheckNodeOnline(self, self.op.snode)
7609
      _CheckNodeNotDrained(self, self.op.snode)
7610
      _CheckNodeVmCapable(self, self.op.snode)
7611
      self.secondaries.append(self.op.snode)
7612

    
7613
    nodenames = [pnode.name] + self.secondaries
7614

    
7615
    if not self.adopt_disks:
7616
      # Check lv size requirements, if not adopting
7617
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7618
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7619

    
7620
    else: # instead, we must check the adoption data
7621
      all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks])
7622
      if len(all_lvs) != len(self.disks):
7623
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
7624
                                   errors.ECODE_INVAL)
7625
      for lv_name in all_lvs:
7626
        try:
7627
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
7628
          # to ReserveLV uses the same syntax
7629
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7630
        except errors.ReservationError:
7631
          raise errors.OpPrereqError("LV named %s used by another instance" %
7632
                                     lv_name, errors.ECODE_NOTUNIQUE)
7633

    
7634
      vg_names = self.rpc.call_vg_list([pnode.name])
7635
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
7636

    
7637
      node_lvs = self.rpc.call_lv_list([pnode.name],
7638
                                       vg_names[pnode.name].payload.keys()
7639
                                      )[pnode.name]
7640
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7641
      node_lvs = node_lvs.payload
7642

    
7643
      delta = all_lvs.difference(node_lvs.keys())
7644
      if delta:
7645
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
7646
                                   utils.CommaJoin(delta),
7647
                                   errors.ECODE_INVAL)
7648
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7649
      if online_lvs:
7650
        raise errors.OpPrereqError("Online logical volumes found, cannot"
7651
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
7652
                                   errors.ECODE_STATE)
7653
      # update the size of disk based on what is found
7654
      for dsk in self.disks:
7655
        dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0]))
7656

    
7657
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7658

    
7659
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7660
    # check OS parameters (remotely)
7661
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7662

    
7663
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7664

    
7665
    # memory check on primary node
7666
    if self.op.start:
7667
      _CheckNodeFreeMemory(self, self.pnode.name,
7668
                           "creating instance %s" % self.op.instance_name,
7669
                           self.be_full[constants.BE_MEMORY],
7670
                           self.op.hypervisor)
7671

    
7672
    self.dry_run_result = list(nodenames)
7673

    
7674
  def Exec(self, feedback_fn):
7675
    """Create and add the instance to the cluster.
7676

7677
    """
7678
    instance = self.op.instance_name
7679
    pnode_name = self.pnode.name
7680

    
7681
    ht_kind = self.op.hypervisor
7682
    if ht_kind in constants.HTS_REQ_PORT:
7683
      network_port = self.cfg.AllocatePort()
7684
    else:
7685
      network_port = None
7686

    
7687
    if constants.ENABLE_FILE_STORAGE:
7688
      # this is needed because os.path.join does not accept None arguments
7689
      if self.op.file_storage_dir is None:
7690
        string_file_storage_dir = ""
7691
      else:
7692
        string_file_storage_dir = self.op.file_storage_dir
7693

    
7694
      # build the full file storage dir path
7695
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7696
                                        string_file_storage_dir, instance)
7697
    else:
7698
      file_storage_dir = ""
7699

    
7700
    disks = _GenerateDiskTemplate(self,
7701
                                  self.op.disk_template,
7702
                                  instance, pnode_name,
7703
                                  self.secondaries,
7704
                                  self.disks,
7705
                                  file_storage_dir,
7706
                                  self.op.file_driver,
7707
                                  0,
7708
                                  feedback_fn)
7709

    
7710
    iobj = objects.Instance(name=instance, os=self.op.os_type,
7711
                            primary_node=pnode_name,
7712
                            nics=self.nics, disks=disks,
7713
                            disk_template=self.op.disk_template,
7714
                            admin_up=False,
7715
                            network_port=network_port,
7716
                            beparams=self.op.beparams,
7717
                            hvparams=self.op.hvparams,
7718
                            hypervisor=self.op.hypervisor,
7719
                            osparams=self.op.osparams,
7720
                            )
7721

    
7722
    if self.adopt_disks:
7723
      # rename LVs to the newly-generated names; we need to construct
7724
      # 'fake' LV disks with the old data, plus the new unique_id
7725
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7726
      rename_to = []
7727
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7728
        rename_to.append(t_dsk.logical_id)
7729
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7730
        self.cfg.SetDiskID(t_dsk, pnode_name)
7731
      result = self.rpc.call_blockdev_rename(pnode_name,
7732
                                             zip(tmp_disks, rename_to))
7733
      result.Raise("Failed to rename adoped LVs")
7734
    else:
7735
      feedback_fn("* creating instance disks...")
7736
      try:
7737
        _CreateDisks(self, iobj)
7738
      except errors.OpExecError:
7739
        self.LogWarning("Device creation failed, reverting...")
7740
        try:
7741
          _RemoveDisks(self, iobj)
7742
        finally:
7743
          self.cfg.ReleaseDRBDMinors(instance)
7744
          raise
7745

    
7746
      if self.cfg.GetClusterInfo().prealloc_wipe_disks:
7747
        feedback_fn("* wiping instance disks...")
7748
        try:
7749
          _WipeDisks(self, iobj)
7750
        except errors.OpExecError:
7751
          self.LogWarning("Device wiping failed, reverting...")
7752
          try:
7753
            _RemoveDisks(self, iobj)
7754
          finally:
7755
            self.cfg.ReleaseDRBDMinors(instance)
7756
            raise
7757

    
7758
    feedback_fn("adding instance %s to cluster config" % instance)
7759

    
7760
    self.cfg.AddInstance(iobj, self.proc.GetECId())
7761

    
7762
    # Declare that we don't want to remove the instance lock anymore, as we've
7763
    # added the instance to the config
7764
    del self.remove_locks[locking.LEVEL_INSTANCE]
7765
    # Unlock all the nodes
7766
    if self.op.mode == constants.INSTANCE_IMPORT:
7767
      nodes_keep = [self.op.src_node]
7768
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7769
                       if node != self.op.src_node]
7770
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7771
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7772
    else:
7773
      self.context.glm.release(locking.LEVEL_NODE)
7774
      del self.acquired_locks[locking.LEVEL_NODE]
7775

    
7776
    if self.op.wait_for_sync:
7777
      disk_abort = not _WaitForSync(self, iobj)
7778
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
7779
      # make sure the disks are not degraded (still sync-ing is ok)
7780
      time.sleep(15)
7781
      feedback_fn("* checking mirrors status")
7782
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7783
    else:
7784
      disk_abort = False
7785

    
7786
    if disk_abort:
7787
      _RemoveDisks(self, iobj)
7788
      self.cfg.RemoveInstance(iobj.name)
7789
      # Make sure the instance lock gets removed
7790
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7791
      raise errors.OpExecError("There are some degraded disks for"
7792
                               " this instance")
7793

    
7794
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7795
      if self.op.mode == constants.INSTANCE_CREATE:
7796
        if not self.op.no_install:
7797
          feedback_fn("* running the instance OS create scripts...")
7798
          # FIXME: pass debug option from opcode to backend
7799
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7800
                                                 self.op.debug_level)
7801
          result.Raise("Could not add os for instance %s"
7802
                       " on node %s" % (instance, pnode_name))
7803

    
7804
      elif self.op.mode == constants.INSTANCE_IMPORT:
7805
        feedback_fn("* running the instance OS import scripts...")
7806

    
7807
        transfers = []
7808

    
7809
        for idx, image in enumerate(self.src_images):
7810
          if not image:
7811
            continue
7812

    
7813
          # FIXME: pass debug option from opcode to backend
7814
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7815
                                             constants.IEIO_FILE, (image, ),
7816
                                             constants.IEIO_SCRIPT,
7817
                                             (iobj.disks[idx], idx),
7818
                                             None)
7819
          transfers.append(dt)
7820

    
7821
        import_result = \
7822
          masterd.instance.TransferInstanceData(self, feedback_fn,
7823
                                                self.op.src_node, pnode_name,
7824
                                                self.pnode.secondary_ip,
7825
                                                iobj, transfers)
7826
        if not compat.all(import_result):
7827
          self.LogWarning("Some disks for instance %s on node %s were not"
7828
                          " imported successfully" % (instance, pnode_name))
7829

    
7830
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7831
        feedback_fn("* preparing remote import...")
7832
        # The source cluster will stop the instance before attempting to make a
7833
        # connection. In some cases stopping an instance can take a long time,
7834
        # hence the shutdown timeout is added to the connection timeout.
7835
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
7836
                           self.op.source_shutdown_timeout)
7837
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7838

    
7839
        assert iobj.primary_node == self.pnode.name
7840
        disk_results = \
7841
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
7842
                                        self.source_x509_ca,
7843
                                        self._cds, timeouts)
7844
        if not compat.all(disk_results):
7845
          # TODO: Should the instance still be started, even if some disks
7846
          # failed to import (valid for local imports, too)?
7847
          self.LogWarning("Some disks for instance %s on node %s were not"
7848
                          " imported successfully" % (instance, pnode_name))
7849

    
7850
        # Run rename script on newly imported instance
7851
        assert iobj.name == instance
7852
        feedback_fn("Running rename script for %s" % instance)
7853
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7854
                                                   self.source_instance_name,
7855
                                                   self.op.debug_level)
7856
        if result.fail_msg:
7857
          self.LogWarning("Failed to run rename script for %s on node"
7858
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
7859

    
7860
      else:
7861
        # also checked in the prereq part
7862
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7863
                                     % self.op.mode)
7864

    
7865
    if self.op.start:
7866
      iobj.admin_up = True
7867
      self.cfg.Update(iobj, feedback_fn)
7868
      logging.info("Starting instance %s on node %s", instance, pnode_name)
7869
      feedback_fn("* starting instance...")
7870
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7871
      result.Raise("Could not start instance")
7872

    
7873
    return list(iobj.all_nodes)
7874

    
7875

    
7876
class LUConnectConsole(NoHooksLU):
7877
  """Connect to an instance's console.
7878

7879
  This is somewhat special in that it returns the command line that
7880
  you need to run on the master node in order to connect to the
7881
  console.
7882

7883
  """
7884
  _OP_PARAMS = [
7885
    _PInstanceName
7886
    ]
7887
  REQ_BGL = False
7888

    
7889
  def ExpandNames(self):
7890
    self._ExpandAndLockInstance()
7891

    
7892
  def CheckPrereq(self):
7893
    """Check prerequisites.
7894

7895
    This checks that the instance is in the cluster.
7896

7897
    """
7898
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7899
    assert self.instance is not None, \
7900
      "Cannot retrieve locked instance %s" % self.op.instance_name
7901
    _CheckNodeOnline(self, self.instance.primary_node)
7902

    
7903
  def Exec(self, feedback_fn):
7904
    """Connect to the console of an instance
7905

7906
    """
7907
    instance = self.instance
7908
    node = instance.primary_node
7909

    
7910
    node_insts = self.rpc.call_instance_list([node],
7911
                                             [instance.hypervisor])[node]
7912
    node_insts.Raise("Can't get node information from %s" % node)
7913

    
7914
    if instance.name not in node_insts.payload:
7915
      if instance.admin_up:
7916
        state = "ERROR_down"
7917
      else:
7918
        state = "ADMIN_down"
7919
      raise errors.OpExecError("Instance %s is not running (state %s)" %
7920
                               (instance.name, state))
7921

    
7922
    logging.debug("Connecting to console of %s on %s", instance.name, node)
7923

    
7924
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
7925
    cluster = self.cfg.GetClusterInfo()
7926
    # beparams and hvparams are passed separately, to avoid editing the
7927
    # instance and then saving the defaults in the instance itself.
7928
    hvparams = cluster.FillHV(instance)
7929
    beparams = cluster.FillBE(instance)
7930
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7931

    
7932
    # build ssh cmdline
7933
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7934

    
7935

    
7936
class LUReplaceDisks(LogicalUnit):
7937
  """Replace the disks of an instance.
7938

7939
  """
7940
  HPATH = "mirrors-replace"
7941
  HTYPE = constants.HTYPE_INSTANCE
7942
  _OP_PARAMS = [
7943
    _PInstanceName,
7944
    ("mode", ht.NoDefault, ht.TElemOf(constants.REPLACE_MODES)),
7945
    ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
7946
    ("remote_node", None, ht.TMaybeString),
7947
    ("iallocator", None, ht.TMaybeString),
7948
    ("early_release", False, ht.TBool),
7949
    ]
7950
  REQ_BGL = False
7951

    
7952
  def CheckArguments(self):
7953
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7954
                                  self.op.iallocator)
7955

    
7956
  def ExpandNames(self):
7957
    self._ExpandAndLockInstance()
7958

    
7959
    if self.op.iallocator is not None:
7960
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7961

    
7962
    elif self.op.remote_node is not None:
7963
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7964
      self.op.remote_node = remote_node
7965

    
7966
      # Warning: do not remove the locking of the new secondary here
7967
      # unless DRBD8.AddChildren is changed to work in parallel;
7968
      # currently it doesn't since parallel invocations of
7969
      # FindUnusedMinor will conflict
7970
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7971
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7972

    
7973
    else:
7974
      self.needed_locks[locking.LEVEL_NODE] = []
7975
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7976

    
7977
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7978
                                   self.op.iallocator, self.op.remote_node,
7979
                                   self.op.disks, False, self.op.early_release)
7980

    
7981
    self.tasklets = [self.replacer]
7982

    
7983
  def DeclareLocks(self, level):
7984
    # If we're not already locking all nodes in the set we have to declare the
7985
    # instance's primary/secondary nodes.
7986
    if (level == locking.LEVEL_NODE and
7987
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7988
      self._LockInstancesNodes()
7989

    
7990
  def BuildHooksEnv(self):
7991
    """Build hooks env.
7992

7993
    This runs on the master, the primary and all the secondaries.
7994

7995
    """
7996
    instance = self.replacer.instance
7997
    env = {
7998
      "MODE": self.op.mode,
7999
      "NEW_SECONDARY": self.op.remote_node,
8000
      "OLD_SECONDARY": instance.secondary_nodes[0],
8001
      }
8002
    env.update(_BuildInstanceHookEnvByObject(self, instance))
8003
    nl = [
8004
      self.cfg.GetMasterNode(),
8005
      instance.primary_node,
8006
      ]
8007
    if self.op.remote_node is not None:
8008
      nl.append(self.op.remote_node)
8009
    return env, nl, nl
8010

    
8011

    
8012
class TLReplaceDisks(Tasklet):
8013
  """Replaces disks for an instance.
8014

8015
  Note: Locking is not within the scope of this class.
8016

8017
  """
8018
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8019
               disks, delay_iallocator, early_release):
8020
    """Initializes this class.
8021

8022
    """
8023
    Tasklet.__init__(self, lu)
8024

    
8025
    # Parameters
8026
    self.instance_name = instance_name
8027
    self.mode = mode
8028
    self.iallocator_name = iallocator_name
8029
    self.remote_node = remote_node
8030
    self.disks = disks
8031
    self.delay_iallocator = delay_iallocator
8032
    self.early_release = early_release
8033

    
8034
    # Runtime data
8035
    self.instance = None
8036
    self.new_node = None
8037
    self.target_node = None
8038
    self.other_node = None
8039
    self.remote_node_info = None
8040
    self.node_secondary_ip = None
8041

    
8042
  @staticmethod
8043
  def CheckArguments(mode, remote_node, iallocator):
8044
    """Helper function for users of this class.
8045

8046
    """
8047
    # check for valid parameter combination
8048
    if mode == constants.REPLACE_DISK_CHG:
8049
      if remote_node is None and iallocator is None:
8050
        raise errors.OpPrereqError("When changing the secondary either an"
8051
                                   " iallocator script must be used or the"
8052
                                   " new node given", errors.ECODE_INVAL)
8053

    
8054
      if remote_node is not None and iallocator is not None:
8055
        raise errors.OpPrereqError("Give either the iallocator or the new"
8056
                                   " secondary, not both", errors.ECODE_INVAL)
8057

    
8058
    elif remote_node is not None or iallocator is not None:
8059
      # Not replacing the secondary
8060
      raise errors.OpPrereqError("The iallocator and new node options can"
8061
                                 " only be used when changing the"
8062
                                 " secondary node", errors.ECODE_INVAL)
8063

    
8064
  @staticmethod
8065
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8066
    """Compute a new secondary node using an IAllocator.
8067

8068
    """
8069
    ial = IAllocator(lu.cfg, lu.rpc,
8070
                     mode=constants.IALLOCATOR_MODE_RELOC,
8071
                     name=instance_name,
8072
                     relocate_from=relocate_from)
8073

    
8074
    ial.Run(iallocator_name)
8075

    
8076
    if not ial.success:
8077
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8078
                                 " %s" % (iallocator_name, ial.info),
8079
                                 errors.ECODE_NORES)
8080

    
8081
    if len(ial.result) != ial.required_nodes:
8082
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8083
                                 " of nodes (%s), required %s" %
8084
                                 (iallocator_name,
8085
                                  len(ial.result), ial.required_nodes),
8086
                                 errors.ECODE_FAULT)
8087

    
8088
    remote_node_name = ial.result[0]
8089

    
8090
    lu.LogInfo("Selected new secondary for instance '%s': %s",
8091
               instance_name, remote_node_name)
8092

    
8093
    return remote_node_name
8094

    
8095
  def _FindFaultyDisks(self, node_name):
8096
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8097
                                    node_name, True)
8098

    
8099
  def CheckPrereq(self):
8100
    """Check prerequisites.
8101

8102
    This checks that the instance is in the cluster.
8103

8104
    """
8105
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8106
    assert instance is not None, \
8107
      "Cannot retrieve locked instance %s" % self.instance_name
8108

    
8109
    if instance.disk_template != constants.DT_DRBD8:
8110
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8111
                                 " instances", errors.ECODE_INVAL)
8112

    
8113
    if len(instance.secondary_nodes) != 1:
8114
      raise errors.OpPrereqError("The instance has a strange layout,"
8115
                                 " expected one secondary but found %d" %
8116
                                 len(instance.secondary_nodes),
8117
                                 errors.ECODE_FAULT)
8118

    
8119
    if not self.delay_iallocator:
8120
      self._CheckPrereq2()
8121

    
8122
  def _CheckPrereq2(self):
8123
    """Check prerequisites, second part.
8124

8125
    This function should always be part of CheckPrereq. It was separated and is
8126
    now called from Exec because during node evacuation iallocator was only
8127
    called with an unmodified cluster model, not taking planned changes into
8128
    account.
8129

8130
    """
8131
    instance = self.instance
8132
    secondary_node = instance.secondary_nodes[0]
8133

    
8134
    if self.iallocator_name is None:
8135
      remote_node = self.remote_node
8136
    else:
8137
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8138
                                       instance.name, instance.secondary_nodes)
8139

    
8140
    if remote_node is not None:
8141
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8142
      assert self.remote_node_info is not None, \
8143
        "Cannot retrieve locked node %s" % remote_node
8144
    else:
8145
      self.remote_node_info = None
8146

    
8147
    if remote_node == self.instance.primary_node:
8148
      raise errors.OpPrereqError("The specified node is the primary node of"
8149
                                 " the instance.", errors.ECODE_INVAL)
8150

    
8151
    if remote_node == secondary_node:
8152
      raise errors.OpPrereqError("The specified node is already the"
8153
                                 " secondary node of the instance.",
8154
                                 errors.ECODE_INVAL)
8155

    
8156
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8157
                                    constants.REPLACE_DISK_CHG):
8158
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
8159
                                 errors.ECODE_INVAL)
8160

    
8161
    if self.mode == constants.REPLACE_DISK_AUTO:
8162
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
8163
      faulty_secondary = self._FindFaultyDisks(secondary_node)
8164

    
8165
      if faulty_primary and faulty_secondary:
8166
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8167
                                   " one node and can not be repaired"
8168
                                   " automatically" % self.instance_name,
8169
                                   errors.ECODE_STATE)
8170

    
8171
      if faulty_primary:
8172
        self.disks = faulty_primary
8173
        self.target_node = instance.primary_node
8174
        self.other_node = secondary_node
8175
        check_nodes = [self.target_node, self.other_node]
8176
      elif faulty_secondary:
8177
        self.disks = faulty_secondary
8178
        self.target_node = secondary_node
8179
        self.other_node = instance.primary_node
8180
        check_nodes = [self.target_node, self.other_node]
8181
      else:
8182
        self.disks = []
8183
        check_nodes = []
8184

    
8185
    else:
8186
      # Non-automatic modes
8187
      if self.mode == constants.REPLACE_DISK_PRI:
8188
        self.target_node = instance.primary_node
8189
        self.other_node = secondary_node
8190
        check_nodes = [self.target_node, self.other_node]
8191

    
8192
      elif self.mode == constants.REPLACE_DISK_SEC:
8193
        self.target_node = secondary_node
8194
        self.other_node = instance.primary_node
8195
        check_nodes = [self.target_node, self.other_node]
8196

    
8197
      elif self.mode == constants.REPLACE_DISK_CHG:
8198
        self.new_node = remote_node
8199
        self.other_node = instance.primary_node
8200
        self.target_node = secondary_node
8201
        check_nodes = [self.new_node, self.other_node]
8202

    
8203
        _CheckNodeNotDrained(self.lu, remote_node)
8204
        _CheckNodeVmCapable(self.lu, remote_node)
8205

    
8206
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
8207
        assert old_node_info is not None
8208
        if old_node_info.offline and not self.early_release:
8209
          # doesn't make sense to delay the release
8210
          self.early_release = True
8211
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8212
                          " early-release mode", secondary_node)
8213

    
8214
      else:
8215
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8216
                                     self.mode)
8217

    
8218
      # If not specified all disks should be replaced
8219
      if not self.disks:
8220
        self.disks = range(len(self.instance.disks))
8221

    
8222
    for node in check_nodes:
8223
      _CheckNodeOnline(self.lu, node)
8224

    
8225
    # Check whether disks are valid
8226
    for disk_idx in self.disks:
8227
      instance.FindDisk(disk_idx)
8228

    
8229
    # Get secondary node IP addresses
8230
    node_2nd_ip = {}
8231

    
8232
    for node_name in [self.target_node, self.other_node, self.new_node]:
8233
      if node_name is not None:
8234
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8235

    
8236
    self.node_secondary_ip = node_2nd_ip
8237

    
8238
  def Exec(self, feedback_fn):
8239
    """Execute disk replacement.
8240

8241
    This dispatches the disk replacement to the appropriate handler.
8242

8243
    """
8244
    if self.delay_iallocator:
8245
      self._CheckPrereq2()
8246

    
8247
    if not self.disks:
8248
      feedback_fn("No disks need replacement")
8249
      return
8250

    
8251
    feedback_fn("Replacing disk(s) %s for %s" %
8252
                (utils.CommaJoin(self.disks), self.instance.name))
8253

    
8254
    activate_disks = (not self.instance.admin_up)
8255

    
8256
    # Activate the instance disks if we're replacing them on a down instance
8257
    if activate_disks:
8258
      _StartInstanceDisks(self.lu, self.instance, True)
8259

    
8260
    try:
8261
      # Should we replace the secondary node?
8262
      if self.new_node is not None:
8263
        fn = self._ExecDrbd8Secondary
8264
      else:
8265
        fn = self._ExecDrbd8DiskOnly
8266

    
8267
      return fn(feedback_fn)
8268

    
8269
    finally:
8270
      # Deactivate the instance disks if we're replacing them on a
8271
      # down instance
8272
      if activate_disks:
8273
        _SafeShutdownInstanceDisks(self.lu, self.instance)
8274

    
8275
  def _CheckVolumeGroup(self, nodes):
8276
    self.lu.LogInfo("Checking volume groups")
8277

    
8278
    vgname = self.cfg.GetVGName()
8279

    
8280
    # Make sure volume group exists on all involved nodes
8281
    results = self.rpc.call_vg_list(nodes)
8282
    if not results:
8283
      raise errors.OpExecError("Can't list volume groups on the nodes")
8284

    
8285
    for node in nodes:
8286
      res = results[node]
8287
      res.Raise("Error checking node %s" % node)
8288
      if vgname not in res.payload:
8289
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
8290
                                 (vgname, node))
8291

    
8292
  def _CheckDisksExistence(self, nodes):
8293
    # Check disk existence
8294
    for idx, dev in enumerate(self.instance.disks):
8295
      if idx not in self.disks:
8296
        continue
8297

    
8298
      for node in nodes:
8299
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8300
        self.cfg.SetDiskID(dev, node)
8301

    
8302
        result = self.rpc.call_blockdev_find(node, dev)
8303

    
8304
        msg = result.fail_msg
8305
        if msg or not result.payload:
8306
          if not msg:
8307
            msg = "disk not found"
8308
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8309
                                   (idx, node, msg))
8310

    
8311
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8312
    for idx, dev in enumerate(self.instance.disks):
8313
      if idx not in self.disks:
8314
        continue
8315

    
8316
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8317
                      (idx, node_name))
8318

    
8319
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8320
                                   ldisk=ldisk):
8321
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8322
                                 " replace disks for instance %s" %
8323
                                 (node_name, self.instance.name))
8324

    
8325
  def _CreateNewStorage(self, node_name):
8326
    vgname = self.cfg.GetVGName()
8327
    iv_names = {}
8328

    
8329
    for idx, dev in enumerate(self.instance.disks):
8330
      if idx not in self.disks:
8331
        continue
8332

    
8333
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8334

    
8335
      self.cfg.SetDiskID(dev, node_name)
8336

    
8337
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8338
      names = _GenerateUniqueNames(self.lu, lv_names)
8339

    
8340
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8341
                             logical_id=(vgname, names[0]))
8342
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8343
                             logical_id=(vgname, names[1]))
8344

    
8345
      new_lvs = [lv_data, lv_meta]
8346
      old_lvs = dev.children
8347
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8348

    
8349
      # we pass force_create=True to force the LVM creation
8350
      for new_lv in new_lvs:
8351
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8352
                        _GetInstanceInfoText(self.instance), False)
8353

    
8354
    return iv_names
8355

    
8356
  def _CheckDevices(self, node_name, iv_names):
8357
    for name, (dev, _, _) in iv_names.iteritems():
8358
      self.cfg.SetDiskID(dev, node_name)
8359

    
8360
      result = self.rpc.call_blockdev_find(node_name, dev)
8361

    
8362
      msg = result.fail_msg
8363
      if msg or not result.payload:
8364
        if not msg:
8365
          msg = "disk not found"
8366
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
8367
                                 (name, msg))
8368

    
8369
      if result.payload.is_degraded:
8370
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
8371

    
8372
  def _RemoveOldStorage(self, node_name, iv_names):
8373
    for name, (_, old_lvs, _) in iv_names.iteritems():
8374
      self.lu.LogInfo("Remove logical volumes for %s" % name)
8375

    
8376
      for lv in old_lvs:
8377
        self.cfg.SetDiskID(lv, node_name)
8378

    
8379
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8380
        if msg:
8381
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
8382
                             hint="remove unused LVs manually")
8383

    
8384
  def _ReleaseNodeLock(self, node_name):
8385
    """Releases the lock for a given node."""
8386
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8387

    
8388
  def _ExecDrbd8DiskOnly(self, feedback_fn):
8389
    """Replace a disk on the primary or secondary for DRBD 8.
8390

8391
    The algorithm for replace is quite complicated:
8392

8393
      1. for each disk to be replaced:
8394

8395
        1. create new LVs on the target node with unique names
8396
        1. detach old LVs from the drbd device
8397
        1. rename old LVs to name_replaced.<time_t>
8398
        1. rename new LVs to old LVs
8399
        1. attach the new LVs (with the old names now) to the drbd device
8400

8401
      1. wait for sync across all devices
8402

8403
      1. for each modified disk:
8404

8405
        1. remove old LVs (which have the name name_replaces.<time_t>)
8406

8407
    Failures are not very well handled.
8408

8409
    """
8410
    steps_total = 6
8411

    
8412
    # Step: check device activation
8413
    self.lu.LogStep(1, steps_total, "Check device existence")
8414
    self._CheckDisksExistence([self.other_node, self.target_node])
8415
    self._CheckVolumeGroup([self.target_node, self.other_node])
8416

    
8417
    # Step: check other node consistency
8418
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8419
    self._CheckDisksConsistency(self.other_node,
8420
                                self.other_node == self.instance.primary_node,
8421
                                False)
8422

    
8423
    # Step: create new storage
8424
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8425
    iv_names = self._CreateNewStorage(self.target_node)
8426

    
8427
    # Step: for each lv, detach+rename*2+attach
8428
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8429
    for dev, old_lvs, new_lvs in iv_names.itervalues():
8430
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8431

    
8432
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8433
                                                     old_lvs)
8434
      result.Raise("Can't detach drbd from local storage on node"
8435
                   " %s for device %s" % (self.target_node, dev.iv_name))
8436
      #dev.children = []
8437
      #cfg.Update(instance)
8438

    
8439
      # ok, we created the new LVs, so now we know we have the needed
8440
      # storage; as such, we proceed on the target node to rename
8441
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8442
      # using the assumption that logical_id == physical_id (which in
8443
      # turn is the unique_id on that node)
8444

    
8445
      # FIXME(iustin): use a better name for the replaced LVs
8446
      temp_suffix = int(time.time())
8447
      ren_fn = lambda d, suff: (d.physical_id[0],
8448
                                d.physical_id[1] + "_replaced-%s" % suff)
8449

    
8450
      # Build the rename list based on what LVs exist on the node
8451
      rename_old_to_new = []
8452
      for to_ren in old_lvs:
8453
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8454
        if not result.fail_msg and result.payload:
8455
          # device exists
8456
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8457

    
8458
      self.lu.LogInfo("Renaming the old LVs on the target node")
8459
      result = self.rpc.call_blockdev_rename(self.target_node,
8460
                                             rename_old_to_new)
8461
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
8462

    
8463
      # Now we rename the new LVs to the old LVs
8464
      self.lu.LogInfo("Renaming the new LVs on the target node")
8465
      rename_new_to_old = [(new, old.physical_id)
8466
                           for old, new in zip(old_lvs, new_lvs)]
8467
      result = self.rpc.call_blockdev_rename(self.target_node,
8468
                                             rename_new_to_old)
8469
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
8470

    
8471
      for old, new in zip(old_lvs, new_lvs):
8472
        new.logical_id = old.logical_id
8473
        self.cfg.SetDiskID(new, self.target_node)
8474

    
8475
      for disk in old_lvs:
8476
        disk.logical_id = ren_fn(disk, temp_suffix)
8477
        self.cfg.SetDiskID(disk, self.target_node)
8478

    
8479
      # Now that the new lvs have the old name, we can add them to the device
8480
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8481
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8482
                                                  new_lvs)
8483
      msg = result.fail_msg
8484
      if msg:
8485
        for new_lv in new_lvs:
8486
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
8487
                                               new_lv).fail_msg
8488
          if msg2:
8489
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8490
                               hint=("cleanup manually the unused logical"
8491
                                     "volumes"))
8492
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8493

    
8494
      dev.children = new_lvs
8495

    
8496
      self.cfg.Update(self.instance, feedback_fn)
8497

    
8498
    cstep = 5
8499
    if self.early_release:
8500
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8501
      cstep += 1
8502
      self._RemoveOldStorage(self.target_node, iv_names)
8503
      # WARNING: we release both node locks here, do not do other RPCs
8504
      # than WaitForSync to the primary node
8505
      self._ReleaseNodeLock([self.target_node, self.other_node])
8506

    
8507
    # Wait for sync
8508
    # This can fail as the old devices are degraded and _WaitForSync
8509
    # does a combined result over all disks, so we don't check its return value
8510
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8511
    cstep += 1
8512
    _WaitForSync(self.lu, self.instance)
8513

    
8514
    # Check all devices manually
8515
    self._CheckDevices(self.instance.primary_node, iv_names)
8516

    
8517
    # Step: remove old storage
8518
    if not self.early_release:
8519
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8520
      cstep += 1
8521
      self._RemoveOldStorage(self.target_node, iv_names)
8522

    
8523
  def _ExecDrbd8Secondary(self, feedback_fn):
8524
    """Replace the secondary node for DRBD 8.
8525

8526
    The algorithm for replace is quite complicated:
8527
      - for all disks of the instance:
8528
        - create new LVs on the new node with same names
8529
        - shutdown the drbd device on the old secondary
8530
        - disconnect the drbd network on the primary
8531
        - create the drbd device on the new secondary
8532
        - network attach the drbd on the primary, using an artifice:
8533
          the drbd code for Attach() will connect to the network if it
8534
          finds a device which is connected to the good local disks but
8535
          not network enabled
8536
      - wait for sync across all devices
8537
      - remove all disks from the old secondary
8538

8539
    Failures are not very well handled.
8540

8541
    """
8542
    steps_total = 6
8543

    
8544
    # Step: check device activation
8545
    self.lu.LogStep(1, steps_total, "Check device existence")
8546
    self._CheckDisksExistence([self.instance.primary_node])
8547
    self._CheckVolumeGroup([self.instance.primary_node])
8548

    
8549
    # Step: check other node consistency
8550
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8551
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
8552

    
8553
    # Step: create new storage
8554
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8555
    for idx, dev in enumerate(self.instance.disks):
8556
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8557
                      (self.new_node, idx))
8558
      # we pass force_create=True to force LVM creation
8559
      for new_lv in dev.children:
8560
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8561
                        _GetInstanceInfoText(self.instance), False)
8562

    
8563
    # Step 4: dbrd minors and drbd setups changes
8564
    # after this, we must manually remove the drbd minors on both the
8565
    # error and the success paths
8566
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8567
    minors = self.cfg.AllocateDRBDMinor([self.new_node
8568
                                         for dev in self.instance.disks],
8569
                                        self.instance.name)
8570
    logging.debug("Allocated minors %r", minors)
8571

    
8572
    iv_names = {}
8573
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8574
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8575
                      (self.new_node, idx))
8576
      # create new devices on new_node; note that we create two IDs:
8577
      # one without port, so the drbd will be activated without
8578
      # networking information on the new node at this stage, and one
8579
      # with network, for the latter activation in step 4
8580
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8581
      if self.instance.primary_node == o_node1:
8582
        p_minor = o_minor1
8583
      else:
8584
        assert self.instance.primary_node == o_node2, "Three-node instance?"
8585
        p_minor = o_minor2
8586

    
8587
      new_alone_id = (self.instance.primary_node, self.new_node, None,
8588
                      p_minor, new_minor, o_secret)
8589
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
8590
                    p_minor, new_minor, o_secret)
8591

    
8592
      iv_names[idx] = (dev, dev.children, new_net_id)
8593
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8594
                    new_net_id)
8595
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8596
                              logical_id=new_alone_id,
8597
                              children=dev.children,
8598
                              size=dev.size)
8599
      try:
8600
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8601
                              _GetInstanceInfoText(self.instance), False)
8602
      except errors.GenericError:
8603
        self.cfg.ReleaseDRBDMinors(self.instance.name)
8604
        raise
8605

    
8606
    # We have new devices, shutdown the drbd on the old secondary
8607
    for idx, dev in enumerate(self.instance.disks):
8608
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8609
      self.cfg.SetDiskID(dev, self.target_node)
8610
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8611
      if msg:
8612
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8613
                           "node: %s" % (idx, msg),
8614
                           hint=("Please cleanup this device manually as"
8615
                                 " soon as possible"))
8616

    
8617
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8618
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8619
                                               self.node_secondary_ip,
8620
                                               self.instance.disks)\
8621
                                              [self.instance.primary_node]
8622

    
8623
    msg = result.fail_msg
8624
    if msg:
8625
      # detaches didn't succeed (unlikely)
8626
      self.cfg.ReleaseDRBDMinors(self.instance.name)
8627
      raise errors.OpExecError("Can't detach the disks from the network on"
8628
                               " old node: %s" % (msg,))
8629

    
8630
    # if we managed to detach at least one, we update all the disks of
8631
    # the instance to point to the new secondary
8632
    self.lu.LogInfo("Updating instance configuration")
8633
    for dev, _, new_logical_id in iv_names.itervalues():
8634
      dev.logical_id = new_logical_id
8635
      self.cfg.SetDiskID(dev, self.instance.primary_node)
8636

    
8637
    self.cfg.Update(self.instance, feedback_fn)
8638

    
8639
    # and now perform the drbd attach
8640
    self.lu.LogInfo("Attaching primary drbds to new secondary"
8641
                    " (standalone => connected)")
8642
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8643
                                            self.new_node],
8644
                                           self.node_secondary_ip,
8645
                                           self.instance.disks,
8646
                                           self.instance.name,
8647
                                           False)
8648
    for to_node, to_result in result.items():
8649
      msg = to_result.fail_msg
8650
      if msg:
8651
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8652
                           to_node, msg,
8653
                           hint=("please do a gnt-instance info to see the"
8654
                                 " status of disks"))
8655
    cstep = 5
8656
    if self.early_release:
8657
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8658
      cstep += 1
8659
      self._RemoveOldStorage(self.target_node, iv_names)
8660
      # WARNING: we release all node locks here, do not do other RPCs
8661
      # than WaitForSync to the primary node
8662
      self._ReleaseNodeLock([self.instance.primary_node,
8663
                             self.target_node,
8664
                             self.new_node])
8665

    
8666
    # Wait for sync
8667
    # This can fail as the old devices are degraded and _WaitForSync
8668
    # does a combined result over all disks, so we don't check its return value
8669
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8670
    cstep += 1
8671
    _WaitForSync(self.lu, self.instance)
8672

    
8673
    # Check all devices manually
8674
    self._CheckDevices(self.instance.primary_node, iv_names)
8675

    
8676
    # Step: remove old storage
8677
    if not self.early_release:
8678
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8679
      self._RemoveOldStorage(self.target_node, iv_names)
8680

    
8681

    
8682
class LURepairNodeStorage(NoHooksLU):
8683
  """Repairs the volume group on a node.
8684

8685
  """
8686
  _OP_PARAMS = [
8687
    _PNodeName,
8688
    ("storage_type", ht.NoDefault, _CheckStorageType),
8689
    ("name", ht.NoDefault, ht.TNonEmptyString),
8690
    ("ignore_consistency", False, ht.TBool),
8691
    ]
8692
  REQ_BGL = False
8693

    
8694
  def CheckArguments(self):
8695
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8696

    
8697
    storage_type = self.op.storage_type
8698

    
8699
    if (constants.SO_FIX_CONSISTENCY not in
8700
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8701
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
8702
                                 " repaired" % storage_type,
8703
                                 errors.ECODE_INVAL)
8704

    
8705
  def ExpandNames(self):
8706
    self.needed_locks = {
8707
      locking.LEVEL_NODE: [self.op.node_name],
8708
      }
8709

    
8710
  def _CheckFaultyDisks(self, instance, node_name):
8711
    """Ensure faulty disks abort the opcode or at least warn."""
8712
    try:
8713
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8714
                                  node_name, True):
8715
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8716
                                   " node '%s'" % (instance.name, node_name),
8717
                                   errors.ECODE_STATE)
8718
    except errors.OpPrereqError, err:
8719
      if self.op.ignore_consistency:
8720
        self.proc.LogWarning(str(err.args[0]))
8721
      else:
8722
        raise
8723

    
8724
  def CheckPrereq(self):
8725
    """Check prerequisites.
8726

8727
    """
8728
    # Check whether any instance on this node has faulty disks
8729
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8730
      if not inst.admin_up:
8731
        continue
8732
      check_nodes = set(inst.all_nodes)
8733
      check_nodes.discard(self.op.node_name)
8734
      for inst_node_name in check_nodes:
8735
        self._CheckFaultyDisks(inst, inst_node_name)
8736

    
8737
  def Exec(self, feedback_fn):
8738
    feedback_fn("Repairing storage unit '%s' on %s ..." %
8739
                (self.op.name, self.op.node_name))
8740

    
8741
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8742
    result = self.rpc.call_storage_execute(self.op.node_name,
8743
                                           self.op.storage_type, st_args,
8744
                                           self.op.name,
8745
                                           constants.SO_FIX_CONSISTENCY)
8746
    result.Raise("Failed to repair storage unit '%s' on %s" %
8747
                 (self.op.name, self.op.node_name))
8748

    
8749

    
8750
class LUNodeEvacuationStrategy(NoHooksLU):
8751
  """Computes the node evacuation strategy.
8752

8753
  """
8754
  _OP_PARAMS = [
8755
    ("nodes", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
8756
    ("remote_node", None, ht.TMaybeString),
8757
    ("iallocator", None, ht.TMaybeString),
8758
    ]
8759
  REQ_BGL = False
8760

    
8761
  def CheckArguments(self):
8762
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8763

    
8764
  def ExpandNames(self):
8765
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8766
    self.needed_locks = locks = {}
8767
    if self.op.remote_node is None:
8768
      locks[locking.LEVEL_NODE] = locking.ALL_SET
8769
    else:
8770
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8771
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8772

    
8773
  def Exec(self, feedback_fn):
8774
    if self.op.remote_node is not None:
8775
      instances = []
8776
      for node in self.op.nodes:
8777
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8778
      result = []
8779
      for i in instances:
8780
        if i.primary_node == self.op.remote_node:
8781
          raise errors.OpPrereqError("Node %s is the primary node of"
8782
                                     " instance %s, cannot use it as"
8783
                                     " secondary" %
8784
                                     (self.op.remote_node, i.name),
8785
                                     errors.ECODE_INVAL)
8786
        result.append([i.name, self.op.remote_node])
8787
    else:
8788
      ial = IAllocator(self.cfg, self.rpc,
8789
                       mode=constants.IALLOCATOR_MODE_MEVAC,
8790
                       evac_nodes=self.op.nodes)
8791
      ial.Run(self.op.iallocator, validate=True)
8792
      if not ial.success:
8793
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8794
                                 errors.ECODE_NORES)
8795
      result = ial.result
8796
    return result
8797

    
8798

    
8799
class LUGrowDisk(LogicalUnit):
8800
  """Grow a disk of an instance.
8801

8802
  """
8803
  HPATH = "disk-grow"
8804
  HTYPE = constants.HTYPE_INSTANCE
8805
  _OP_PARAMS = [
8806
    _PInstanceName,
8807
    ("disk", ht.NoDefault, ht.TInt),
8808
    ("amount", ht.NoDefault, ht.TInt),
8809
    ("wait_for_sync", True, ht.TBool),
8810
    ]
8811
  REQ_BGL = False
8812

    
8813
  def ExpandNames(self):
8814
    self._ExpandAndLockInstance()
8815
    self.needed_locks[locking.LEVEL_NODE] = []
8816
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8817

    
8818
  def DeclareLocks(self, level):
8819
    if level == locking.LEVEL_NODE:
8820
      self._LockInstancesNodes()
8821

    
8822
  def BuildHooksEnv(self):
8823
    """Build hooks env.
8824

8825
    This runs on the master, the primary and all the secondaries.
8826

8827
    """
8828
    env = {
8829
      "DISK": self.op.disk,
8830
      "AMOUNT": self.op.amount,
8831
      }
8832
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8833
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8834
    return env, nl, nl
8835

    
8836
  def CheckPrereq(self):
8837
    """Check prerequisites.
8838

8839
    This checks that the instance is in the cluster.
8840

8841
    """
8842
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8843
    assert instance is not None, \
8844
      "Cannot retrieve locked instance %s" % self.op.instance_name
8845
    nodenames = list(instance.all_nodes)
8846
    for node in nodenames:
8847
      _CheckNodeOnline(self, node)
8848

    
8849
    self.instance = instance
8850

    
8851
    if instance.disk_template not in constants.DTS_GROWABLE:
8852
      raise errors.OpPrereqError("Instance's disk layout does not support"
8853
                                 " growing.", errors.ECODE_INVAL)
8854

    
8855
    self.disk = instance.FindDisk(self.op.disk)
8856

    
8857
    if instance.disk_template != constants.DT_FILE:
8858
      # TODO: check the free disk space for file, when that feature
8859
      # will be supported
8860
      _CheckNodesFreeDiskPerVG(self, nodenames,
8861
                               {self.disk.physical_id[0]: self.op.amount})
8862

    
8863
  def Exec(self, feedback_fn):
8864
    """Execute disk grow.
8865

8866
    """
8867
    instance = self.instance
8868
    disk = self.disk
8869

    
8870
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8871
    if not disks_ok:
8872
      raise errors.OpExecError("Cannot activate block device to grow")
8873

    
8874
    for node in instance.all_nodes:
8875
      self.cfg.SetDiskID(disk, node)
8876
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8877
      result.Raise("Grow request failed to node %s" % node)
8878

    
8879
      # TODO: Rewrite code to work properly
8880
      # DRBD goes into sync mode for a short amount of time after executing the
8881
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8882
      # calling "resize" in sync mode fails. Sleeping for a short amount of
8883
      # time is a work-around.
8884
      time.sleep(5)
8885

    
8886
    disk.RecordGrow(self.op.amount)
8887
    self.cfg.Update(instance, feedback_fn)
8888
    if self.op.wait_for_sync:
8889
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
8890
      if disk_abort:
8891
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8892
                             " status.\nPlease check the instance.")
8893
      if not instance.admin_up:
8894
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8895
    elif not instance.admin_up:
8896
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
8897
                           " not supposed to be running because no wait for"
8898
                           " sync mode was requested.")
8899

    
8900

    
8901
class LUQueryInstanceData(NoHooksLU):
8902
  """Query runtime instance data.
8903

8904
  """
8905
  _OP_PARAMS = [
8906
    ("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
8907
    ("static", False, ht.TBool),
8908
    ]
8909
  REQ_BGL = False
8910

    
8911
  def ExpandNames(self):
8912
    self.needed_locks = {}
8913
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8914

    
8915
    if self.op.instances:
8916
      self.wanted_names = []
8917
      for name in self.op.instances:
8918
        full_name = _ExpandInstanceName(self.cfg, name)
8919
        self.wanted_names.append(full_name)
8920
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8921
    else:
8922
      self.wanted_names = None
8923
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8924

    
8925
    self.needed_locks[locking.LEVEL_NODE] = []
8926
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8927

    
8928
  def DeclareLocks(self, level):
8929
    if level == locking.LEVEL_NODE:
8930
      self._LockInstancesNodes()
8931

    
8932
  def CheckPrereq(self):
8933
    """Check prerequisites.
8934

8935
    This only checks the optional instance list against the existing names.
8936

8937
    """
8938
    if self.wanted_names is None:
8939
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8940

    
8941
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8942
                             in self.wanted_names]
8943

    
8944
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
8945
    """Returns the status of a block device
8946

8947
    """
8948
    if self.op.static or not node:
8949
      return None
8950

    
8951
    self.cfg.SetDiskID(dev, node)
8952

    
8953
    result = self.rpc.call_blockdev_find(node, dev)
8954
    if result.offline:
8955
      return None
8956

    
8957
    result.Raise("Can't compute disk status for %s" % instance_name)
8958

    
8959
    status = result.payload
8960
    if status is None:
8961
      return None
8962

    
8963
    return (status.dev_path, status.major, status.minor,
8964
            status.sync_percent, status.estimated_time,
8965
            status.is_degraded, status.ldisk_status)
8966

    
8967
  def _ComputeDiskStatus(self, instance, snode, dev):
8968
    """Compute block device status.
8969

8970
    """
8971
    if dev.dev_type in constants.LDS_DRBD:
8972
      # we change the snode then (otherwise we use the one passed in)
8973
      if dev.logical_id[0] == instance.primary_node:
8974
        snode = dev.logical_id[1]
8975
      else:
8976
        snode = dev.logical_id[0]
8977

    
8978
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8979
                                              instance.name, dev)
8980
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8981

    
8982
    if dev.children:
8983
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
8984
                      for child in dev.children]
8985
    else:
8986
      dev_children = []
8987

    
8988
    data = {
8989
      "iv_name": dev.iv_name,
8990
      "dev_type": dev.dev_type,
8991
      "logical_id": dev.logical_id,
8992
      "physical_id": dev.physical_id,
8993
      "pstatus": dev_pstatus,
8994
      "sstatus": dev_sstatus,
8995
      "children": dev_children,
8996
      "mode": dev.mode,
8997
      "size": dev.size,
8998
      }
8999

    
9000
    return data
9001

    
9002
  def Exec(self, feedback_fn):
9003
    """Gather and return data"""
9004
    result = {}
9005

    
9006
    cluster = self.cfg.GetClusterInfo()
9007

    
9008
    for instance in self.wanted_instances:
9009
      if not self.op.static:
9010
        remote_info = self.rpc.call_instance_info(instance.primary_node,
9011
                                                  instance.name,
9012
                                                  instance.hypervisor)
9013
        remote_info.Raise("Error checking node %s" % instance.primary_node)
9014
        remote_info = remote_info.payload
9015
        if remote_info and "state" in remote_info:
9016
          remote_state = "up"
9017
        else:
9018
          remote_state = "down"
9019
      else:
9020
        remote_state = None
9021
      if instance.admin_up:
9022
        config_state = "up"
9023
      else:
9024
        config_state = "down"
9025

    
9026
      disks = [self._ComputeDiskStatus(instance, None, device)
9027
               for device in instance.disks]
9028

    
9029
      idict = {
9030
        "name": instance.name,
9031
        "config_state": config_state,
9032
        "run_state": remote_state,
9033
        "pnode": instance.primary_node,
9034
        "snodes": instance.secondary_nodes,
9035
        "os": instance.os,
9036
        # this happens to be the same format used for hooks
9037
        "nics": _NICListToTuple(self, instance.nics),
9038
        "disk_template": instance.disk_template,
9039
        "disks": disks,
9040
        "hypervisor": instance.hypervisor,
9041
        "network_port": instance.network_port,
9042
        "hv_instance": instance.hvparams,
9043
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
9044
        "be_instance": instance.beparams,
9045
        "be_actual": cluster.FillBE(instance),
9046
        "os_instance": instance.osparams,
9047
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9048
        "serial_no": instance.serial_no,
9049
        "mtime": instance.mtime,
9050
        "ctime": instance.ctime,
9051
        "uuid": instance.uuid,
9052
        }
9053

    
9054
      result[instance.name] = idict
9055

    
9056
    return result
9057

    
9058

    
9059
class LUSetInstanceParams(LogicalUnit):
9060
  """Modifies an instances's parameters.
9061

9062
  """
9063
  HPATH = "instance-modify"
9064
  HTYPE = constants.HTYPE_INSTANCE
9065
  _OP_PARAMS = [
9066
    _PInstanceName,
9067
    ("nics", ht.EmptyList, ht.TList),
9068
    ("disks", ht.EmptyList, ht.TList),
9069
    ("beparams", ht.EmptyDict, ht.TDict),
9070
    ("hvparams", ht.EmptyDict, ht.TDict),
9071
    ("disk_template", None, ht.TMaybeString),
9072
    ("remote_node", None, ht.TMaybeString),
9073
    ("os_name", None, ht.TMaybeString),
9074
    ("force_variant", False, ht.TBool),
9075
    ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
9076
    _PForce,
9077
    ]
9078
  REQ_BGL = False
9079

    
9080
  def CheckArguments(self):
9081
    if not (self.op.nics or self.op.disks or self.op.disk_template or
9082
            self.op.hvparams or self.op.beparams or self.op.os_name):
9083
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9084

    
9085
    if self.op.hvparams:
9086
      _CheckGlobalHvParams(self.op.hvparams)
9087

    
9088
    # Disk validation
9089
    disk_addremove = 0
9090
    for disk_op, disk_dict in self.op.disks:
9091
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9092
      if disk_op == constants.DDM_REMOVE:
9093
        disk_addremove += 1
9094
        continue
9095
      elif disk_op == constants.DDM_ADD:
9096
        disk_addremove += 1
9097
      else:
9098
        if not isinstance(disk_op, int):
9099
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9100
        if not isinstance(disk_dict, dict):
9101
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9102
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9103

    
9104
      if disk_op == constants.DDM_ADD:
9105
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
9106
        if mode not in constants.DISK_ACCESS_SET:
9107
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9108
                                     errors.ECODE_INVAL)
9109
        size = disk_dict.get('size', None)
9110
        if size is None:
9111
          raise errors.OpPrereqError("Required disk parameter size missing",
9112
                                     errors.ECODE_INVAL)
9113
        try:
9114
          size = int(size)
9115
        except (TypeError, ValueError), err:
9116
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9117
                                     str(err), errors.ECODE_INVAL)
9118
        disk_dict['size'] = size
9119
      else:
9120
        # modification of disk
9121
        if 'size' in disk_dict:
9122
          raise errors.OpPrereqError("Disk size change not possible, use"
9123
                                     " grow-disk", errors.ECODE_INVAL)
9124

    
9125
    if disk_addremove > 1:
9126
      raise errors.OpPrereqError("Only one disk add or remove operation"
9127
                                 " supported at a time", errors.ECODE_INVAL)
9128

    
9129
    if self.op.disks and self.op.disk_template is not None:
9130
      raise errors.OpPrereqError("Disk template conversion and other disk"
9131
                                 " changes not supported at the same time",
9132
                                 errors.ECODE_INVAL)
9133

    
9134
    if self.op.disk_template:
9135
      _CheckDiskTemplate(self.op.disk_template)
9136
      if (self.op.disk_template in constants.DTS_NET_MIRROR and
9137
          self.op.remote_node is None):
9138
        raise errors.OpPrereqError("Changing the disk template to a mirrored"
9139
                                   " one requires specifying a secondary node",
9140
                                   errors.ECODE_INVAL)
9141

    
9142
    # NIC validation
9143
    nic_addremove = 0
9144
    for nic_op, nic_dict in self.op.nics:
9145
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9146
      if nic_op == constants.DDM_REMOVE:
9147
        nic_addremove += 1
9148
        continue
9149
      elif nic_op == constants.DDM_ADD:
9150
        nic_addremove += 1
9151
      else:
9152
        if not isinstance(nic_op, int):
9153
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9154
        if not isinstance(nic_dict, dict):
9155
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9156
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9157

    
9158
      # nic_dict should be a dict
9159
      nic_ip = nic_dict.get('ip', None)
9160
      if nic_ip is not None:
9161
        if nic_ip.lower() == constants.VALUE_NONE:
9162
          nic_dict['ip'] = None
9163
        else:
9164
          if not netutils.IPAddress.IsValid(nic_ip):
9165
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9166
                                       errors.ECODE_INVAL)
9167

    
9168
      nic_bridge = nic_dict.get('bridge', None)
9169
      nic_link = nic_dict.get('link', None)
9170
      if nic_bridge and nic_link:
9171
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9172
                                   " at the same time", errors.ECODE_INVAL)
9173
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9174
        nic_dict['bridge'] = None
9175
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9176
        nic_dict['link'] = None
9177

    
9178
      if nic_op == constants.DDM_ADD:
9179
        nic_mac = nic_dict.get('mac', None)
9180
        if nic_mac is None:
9181
          nic_dict['mac'] = constants.VALUE_AUTO
9182

    
9183
      if 'mac' in nic_dict:
9184
        nic_mac = nic_dict['mac']
9185
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9186
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9187

    
9188
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9189
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9190
                                     " modifying an existing nic",
9191
                                     errors.ECODE_INVAL)
9192

    
9193
    if nic_addremove > 1:
9194
      raise errors.OpPrereqError("Only one NIC add or remove operation"
9195
                                 " supported at a time", errors.ECODE_INVAL)
9196

    
9197
  def ExpandNames(self):
9198
    self._ExpandAndLockInstance()
9199
    self.needed_locks[locking.LEVEL_NODE] = []
9200
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9201

    
9202
  def DeclareLocks(self, level):
9203
    if level == locking.LEVEL_NODE:
9204
      self._LockInstancesNodes()
9205
      if self.op.disk_template and self.op.remote_node:
9206
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9207
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9208

    
9209
  def BuildHooksEnv(self):
9210
    """Build hooks env.
9211

9212
    This runs on the master, primary and secondaries.
9213

9214
    """
9215
    args = dict()
9216
    if constants.BE_MEMORY in self.be_new:
9217
      args['memory'] = self.be_new[constants.BE_MEMORY]
9218
    if constants.BE_VCPUS in self.be_new:
9219
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
9220
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9221
    # information at all.
9222
    if self.op.nics:
9223
      args['nics'] = []
9224
      nic_override = dict(self.op.nics)
9225
      for idx, nic in enumerate(self.instance.nics):
9226
        if idx in nic_override:
9227
          this_nic_override = nic_override[idx]
9228
        else:
9229
          this_nic_override = {}
9230
        if 'ip' in this_nic_override:
9231
          ip = this_nic_override['ip']
9232
        else:
9233
          ip = nic.ip
9234
        if 'mac' in this_nic_override:
9235
          mac = this_nic_override['mac']
9236
        else:
9237
          mac = nic.mac
9238
        if idx in self.nic_pnew:
9239
          nicparams = self.nic_pnew[idx]
9240
        else:
9241
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9242
        mode = nicparams[constants.NIC_MODE]
9243
        link = nicparams[constants.NIC_LINK]
9244
        args['nics'].append((ip, mac, mode, link))
9245
      if constants.DDM_ADD in nic_override:
9246
        ip = nic_override[constants.DDM_ADD].get('ip', None)
9247
        mac = nic_override[constants.DDM_ADD]['mac']
9248
        nicparams = self.nic_pnew[constants.DDM_ADD]
9249
        mode = nicparams[constants.NIC_MODE]
9250
        link = nicparams[constants.NIC_LINK]
9251
        args['nics'].append((ip, mac, mode, link))
9252
      elif constants.DDM_REMOVE in nic_override:
9253
        del args['nics'][-1]
9254

    
9255
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9256
    if self.op.disk_template:
9257
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9258
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9259
    return env, nl, nl
9260

    
9261
  def CheckPrereq(self):
9262
    """Check prerequisites.
9263

9264
    This only checks the instance list against the existing names.
9265

9266
    """
9267
    # checking the new params on the primary/secondary nodes
9268

    
9269
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9270
    cluster = self.cluster = self.cfg.GetClusterInfo()
9271
    assert self.instance is not None, \
9272
      "Cannot retrieve locked instance %s" % self.op.instance_name
9273
    pnode = instance.primary_node
9274
    nodelist = list(instance.all_nodes)
9275

    
9276
    # OS change
9277
    if self.op.os_name and not self.op.force:
9278
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9279
                      self.op.force_variant)
9280
      instance_os = self.op.os_name
9281
    else:
9282
      instance_os = instance.os
9283

    
9284
    if self.op.disk_template:
9285
      if instance.disk_template == self.op.disk_template:
9286
        raise errors.OpPrereqError("Instance already has disk template %s" %
9287
                                   instance.disk_template, errors.ECODE_INVAL)
9288

    
9289
      if (instance.disk_template,
9290
          self.op.disk_template) not in self._DISK_CONVERSIONS:
9291
        raise errors.OpPrereqError("Unsupported disk template conversion from"
9292
                                   " %s to %s" % (instance.disk_template,
9293
                                                  self.op.disk_template),
9294
                                   errors.ECODE_INVAL)
9295
      _CheckInstanceDown(self, instance, "cannot change disk template")
9296
      if self.op.disk_template in constants.DTS_NET_MIRROR:
9297
        if self.op.remote_node == pnode:
9298
          raise errors.OpPrereqError("Given new secondary node %s is the same"
9299
                                     " as the primary node of the instance" %
9300
                                     self.op.remote_node, errors.ECODE_STATE)
9301
        _CheckNodeOnline(self, self.op.remote_node)
9302
        _CheckNodeNotDrained(self, self.op.remote_node)
9303
        # FIXME: here we assume that the old instance type is DT_PLAIN
9304
        assert instance.disk_template == constants.DT_PLAIN
9305
        disks = [{"size": d.size, "vg": d.logical_id[0]}
9306
                 for d in instance.disks]
9307
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9308
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9309

    
9310
    # hvparams processing
9311
    if self.op.hvparams:
9312
      hv_type = instance.hypervisor
9313
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9314
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9315
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9316

    
9317
      # local check
9318
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9319
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9320
      self.hv_new = hv_new # the new actual values
9321
      self.hv_inst = i_hvdict # the new dict (without defaults)
9322
    else:
9323
      self.hv_new = self.hv_inst = {}
9324

    
9325
    # beparams processing
9326
    if self.op.beparams:
9327
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9328
                                   use_none=True)
9329
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9330
      be_new = cluster.SimpleFillBE(i_bedict)
9331
      self.be_new = be_new # the new actual values
9332
      self.be_inst = i_bedict # the new dict (without defaults)
9333
    else:
9334
      self.be_new = self.be_inst = {}
9335

    
9336
    # osparams processing
9337
    if self.op.osparams:
9338
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9339
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9340
      self.os_inst = i_osdict # the new dict (without defaults)
9341
    else:
9342
      self.os_inst = {}
9343

    
9344
    self.warn = []
9345

    
9346
    if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9347
      mem_check_list = [pnode]
9348
      if be_new[constants.BE_AUTO_BALANCE]:
9349
        # either we changed auto_balance to yes or it was from before
9350
        mem_check_list.extend(instance.secondary_nodes)
9351
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
9352
                                                  instance.hypervisor)
9353
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9354
                                         instance.hypervisor)
9355
      pninfo = nodeinfo[pnode]
9356
      msg = pninfo.fail_msg
9357
      if msg:
9358
        # Assume the primary node is unreachable and go ahead
9359
        self.warn.append("Can't get info from primary node %s: %s" %
9360
                         (pnode,  msg))
9361
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
9362
        self.warn.append("Node data from primary node %s doesn't contain"
9363
                         " free memory information" % pnode)
9364
      elif instance_info.fail_msg:
9365
        self.warn.append("Can't get instance runtime information: %s" %
9366
                        instance_info.fail_msg)
9367
      else:
9368
        if instance_info.payload:
9369
          current_mem = int(instance_info.payload['memory'])
9370
        else:
9371
          # Assume instance not running
9372
          # (there is a slight race condition here, but it's not very probable,
9373
          # and we have no other way to check)
9374
          current_mem = 0
9375
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9376
                    pninfo.payload['memory_free'])
9377
        if miss_mem > 0:
9378
          raise errors.OpPrereqError("This change will prevent the instance"
9379
                                     " from starting, due to %d MB of memory"
9380
                                     " missing on its primary node" % miss_mem,
9381
                                     errors.ECODE_NORES)
9382

    
9383
      if be_new[constants.BE_AUTO_BALANCE]:
9384
        for node, nres in nodeinfo.items():
9385
          if node not in instance.secondary_nodes:
9386
            continue
9387
          msg = nres.fail_msg
9388
          if msg:
9389
            self.warn.append("Can't get info from secondary node %s: %s" %
9390
                             (node, msg))
9391
          elif not isinstance(nres.payload.get('memory_free', None), int):
9392
            self.warn.append("Secondary node %s didn't return free"
9393
                             " memory information" % node)
9394
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9395
            self.warn.append("Not enough memory to failover instance to"
9396
                             " secondary node %s" % node)
9397

    
9398
    # NIC processing
9399
    self.nic_pnew = {}
9400
    self.nic_pinst = {}
9401
    for nic_op, nic_dict in self.op.nics:
9402
      if nic_op == constants.DDM_REMOVE:
9403
        if not instance.nics:
9404
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9405
                                     errors.ECODE_INVAL)
9406
        continue
9407
      if nic_op != constants.DDM_ADD:
9408
        # an existing nic
9409
        if not instance.nics:
9410
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9411
                                     " no NICs" % nic_op,
9412
                                     errors.ECODE_INVAL)
9413
        if nic_op < 0 or nic_op >= len(instance.nics):
9414
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9415
                                     " are 0 to %d" %
9416
                                     (nic_op, len(instance.nics) - 1),
9417
                                     errors.ECODE_INVAL)
9418
        old_nic_params = instance.nics[nic_op].nicparams
9419
        old_nic_ip = instance.nics[nic_op].ip
9420
      else:
9421
        old_nic_params = {}
9422
        old_nic_ip = None
9423

    
9424
      update_params_dict = dict([(key, nic_dict[key])
9425
                                 for key in constants.NICS_PARAMETERS
9426
                                 if key in nic_dict])
9427

    
9428
      if 'bridge' in nic_dict:
9429
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9430

    
9431
      new_nic_params = _GetUpdatedParams(old_nic_params,
9432
                                         update_params_dict)
9433
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9434
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9435
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9436
      self.nic_pinst[nic_op] = new_nic_params
9437
      self.nic_pnew[nic_op] = new_filled_nic_params
9438
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9439

    
9440
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
9441
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9442
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9443
        if msg:
9444
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9445
          if self.op.force:
9446
            self.warn.append(msg)
9447
          else:
9448
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9449
      if new_nic_mode == constants.NIC_MODE_ROUTED:
9450
        if 'ip' in nic_dict:
9451
          nic_ip = nic_dict['ip']
9452
        else:
9453
          nic_ip = old_nic_ip
9454
        if nic_ip is None:
9455
          raise errors.OpPrereqError('Cannot set the nic ip to None'
9456
                                     ' on a routed nic', errors.ECODE_INVAL)
9457
      if 'mac' in nic_dict:
9458
        nic_mac = nic_dict['mac']
9459
        if nic_mac is None:
9460
          raise errors.OpPrereqError('Cannot set the nic mac to None',
9461
                                     errors.ECODE_INVAL)
9462
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9463
          # otherwise generate the mac
9464
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9465
        else:
9466
          # or validate/reserve the current one
9467
          try:
9468
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9469
          except errors.ReservationError:
9470
            raise errors.OpPrereqError("MAC address %s already in use"
9471
                                       " in cluster" % nic_mac,
9472
                                       errors.ECODE_NOTUNIQUE)
9473

    
9474
    # DISK processing
9475
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9476
      raise errors.OpPrereqError("Disk operations not supported for"
9477
                                 " diskless instances",
9478
                                 errors.ECODE_INVAL)
9479
    for disk_op, _ in self.op.disks:
9480
      if disk_op == constants.DDM_REMOVE:
9481
        if len(instance.disks) == 1:
9482
          raise errors.OpPrereqError("Cannot remove the last disk of"
9483
                                     " an instance", errors.ECODE_INVAL)
9484
        _CheckInstanceDown(self, instance, "cannot remove disks")
9485

    
9486
      if (disk_op == constants.DDM_ADD and
9487
          len(instance.nics) >= constants.MAX_DISKS):
9488
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9489
                                   " add more" % constants.MAX_DISKS,
9490
                                   errors.ECODE_STATE)
9491
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9492
        # an existing disk
9493
        if disk_op < 0 or disk_op >= len(instance.disks):
9494
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
9495
                                     " are 0 to %d" %
9496
                                     (disk_op, len(instance.disks)),
9497
                                     errors.ECODE_INVAL)
9498

    
9499
    return
9500

    
9501
  def _ConvertPlainToDrbd(self, feedback_fn):
9502
    """Converts an instance from plain to drbd.
9503

9504
    """
9505
    feedback_fn("Converting template to drbd")
9506
    instance = self.instance
9507
    pnode = instance.primary_node
9508
    snode = self.op.remote_node
9509

    
9510
    # create a fake disk info for _GenerateDiskTemplate
9511
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9512
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9513
                                      instance.name, pnode, [snode],
9514
                                      disk_info, None, None, 0, feedback_fn)
9515
    info = _GetInstanceInfoText(instance)
9516
    feedback_fn("Creating aditional volumes...")
9517
    # first, create the missing data and meta devices
9518
    for disk in new_disks:
9519
      # unfortunately this is... not too nice
9520
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9521
                            info, True)
9522
      for child in disk.children:
9523
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
9524
    # at this stage, all new LVs have been created, we can rename the
9525
    # old ones
9526
    feedback_fn("Renaming original volumes...")
9527
    rename_list = [(o, n.children[0].logical_id)
9528
                   for (o, n) in zip(instance.disks, new_disks)]
9529
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
9530
    result.Raise("Failed to rename original LVs")
9531

    
9532
    feedback_fn("Initializing DRBD devices...")
9533
    # all child devices are in place, we can now create the DRBD devices
9534
    for disk in new_disks:
9535
      for node in [pnode, snode]:
9536
        f_create = node == pnode
9537
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9538

    
9539
    # at this point, the instance has been modified
9540
    instance.disk_template = constants.DT_DRBD8
9541
    instance.disks = new_disks
9542
    self.cfg.Update(instance, feedback_fn)
9543

    
9544
    # disks are created, waiting for sync
9545
    disk_abort = not _WaitForSync(self, instance)
9546
    if disk_abort:
9547
      raise errors.OpExecError("There are some degraded disks for"
9548
                               " this instance, please cleanup manually")
9549

    
9550
  def _ConvertDrbdToPlain(self, feedback_fn):
9551
    """Converts an instance from drbd to plain.
9552

9553
    """
9554
    instance = self.instance
9555
    assert len(instance.secondary_nodes) == 1
9556
    pnode = instance.primary_node
9557
    snode = instance.secondary_nodes[0]
9558
    feedback_fn("Converting template to plain")
9559

    
9560
    old_disks = instance.disks
9561
    new_disks = [d.children[0] for d in old_disks]
9562

    
9563
    # copy over size and mode
9564
    for parent, child in zip(old_disks, new_disks):
9565
      child.size = parent.size
9566
      child.mode = parent.mode
9567

    
9568
    # update instance structure
9569
    instance.disks = new_disks
9570
    instance.disk_template = constants.DT_PLAIN
9571
    self.cfg.Update(instance, feedback_fn)
9572

    
9573
    feedback_fn("Removing volumes on the secondary node...")
9574
    for disk in old_disks:
9575
      self.cfg.SetDiskID(disk, snode)
9576
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9577
      if msg:
9578
        self.LogWarning("Could not remove block device %s on node %s,"
9579
                        " continuing anyway: %s", disk.iv_name, snode, msg)
9580

    
9581
    feedback_fn("Removing unneeded volumes on the primary node...")
9582
    for idx, disk in enumerate(old_disks):
9583
      meta = disk.children[1]
9584
      self.cfg.SetDiskID(meta, pnode)
9585
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9586
      if msg:
9587
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
9588
                        " continuing anyway: %s", idx, pnode, msg)
9589

    
9590
  def Exec(self, feedback_fn):
9591
    """Modifies an instance.
9592

9593
    All parameters take effect only at the next restart of the instance.
9594

9595
    """
9596
    # Process here the warnings from CheckPrereq, as we don't have a
9597
    # feedback_fn there.
9598
    for warn in self.warn:
9599
      feedback_fn("WARNING: %s" % warn)
9600

    
9601
    result = []
9602
    instance = self.instance
9603
    # disk changes
9604
    for disk_op, disk_dict in self.op.disks:
9605
      if disk_op == constants.DDM_REMOVE:
9606
        # remove the last disk
9607
        device = instance.disks.pop()
9608
        device_idx = len(instance.disks)
9609
        for node, disk in device.ComputeNodeTree(instance.primary_node):
9610
          self.cfg.SetDiskID(disk, node)
9611
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9612
          if msg:
9613
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
9614
                            " continuing anyway", device_idx, node, msg)
9615
        result.append(("disk/%d" % device_idx, "remove"))
9616
      elif disk_op == constants.DDM_ADD:
9617
        # add a new disk
9618
        if instance.disk_template == constants.DT_FILE:
9619
          file_driver, file_path = instance.disks[0].logical_id
9620
          file_path = os.path.dirname(file_path)
9621
        else:
9622
          file_driver = file_path = None
9623
        disk_idx_base = len(instance.disks)
9624
        new_disk = _GenerateDiskTemplate(self,
9625
                                         instance.disk_template,
9626
                                         instance.name, instance.primary_node,
9627
                                         instance.secondary_nodes,
9628
                                         [disk_dict],
9629
                                         file_path,
9630
                                         file_driver,
9631
                                         disk_idx_base, feedback_fn)[0]
9632
        instance.disks.append(new_disk)
9633
        info = _GetInstanceInfoText(instance)
9634

    
9635
        logging.info("Creating volume %s for instance %s",
9636
                     new_disk.iv_name, instance.name)
9637
        # Note: this needs to be kept in sync with _CreateDisks
9638
        #HARDCODE
9639
        for node in instance.all_nodes:
9640
          f_create = node == instance.primary_node
9641
          try:
9642
            _CreateBlockDev(self, node, instance, new_disk,
9643
                            f_create, info, f_create)
9644
          except errors.OpExecError, err:
9645
            self.LogWarning("Failed to create volume %s (%s) on"
9646
                            " node %s: %s",
9647
                            new_disk.iv_name, new_disk, node, err)
9648
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9649
                       (new_disk.size, new_disk.mode)))
9650
      else:
9651
        # change a given disk
9652
        instance.disks[disk_op].mode = disk_dict['mode']
9653
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9654

    
9655
    if self.op.disk_template:
9656
      r_shut = _ShutdownInstanceDisks(self, instance)
9657
      if not r_shut:
9658
        raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9659
                                 " proceed with disk template conversion")
9660
      mode = (instance.disk_template, self.op.disk_template)
9661
      try:
9662
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
9663
      except:
9664
        self.cfg.ReleaseDRBDMinors(instance.name)
9665
        raise
9666
      result.append(("disk_template", self.op.disk_template))
9667

    
9668
    # NIC changes
9669
    for nic_op, nic_dict in self.op.nics:
9670
      if nic_op == constants.DDM_REMOVE:
9671
        # remove the last nic
9672
        del instance.nics[-1]
9673
        result.append(("nic.%d" % len(instance.nics), "remove"))
9674
      elif nic_op == constants.DDM_ADD:
9675
        # mac and bridge should be set, by now
9676
        mac = nic_dict['mac']
9677
        ip = nic_dict.get('ip', None)
9678
        nicparams = self.nic_pinst[constants.DDM_ADD]
9679
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9680
        instance.nics.append(new_nic)
9681
        result.append(("nic.%d" % (len(instance.nics) - 1),
9682
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
9683
                       (new_nic.mac, new_nic.ip,
9684
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9685
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9686
                       )))
9687
      else:
9688
        for key in 'mac', 'ip':
9689
          if key in nic_dict:
9690
            setattr(instance.nics[nic_op], key, nic_dict[key])
9691
        if nic_op in self.nic_pinst:
9692
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9693
        for key, val in nic_dict.iteritems():
9694
          result.append(("nic.%s/%d" % (key, nic_op), val))
9695

    
9696
    # hvparams changes
9697
    if self.op.hvparams:
9698
      instance.hvparams = self.hv_inst
9699
      for key, val in self.op.hvparams.iteritems():
9700
        result.append(("hv/%s" % key, val))
9701

    
9702
    # beparams changes
9703
    if self.op.beparams:
9704
      instance.beparams = self.be_inst
9705
      for key, val in self.op.beparams.iteritems():
9706
        result.append(("be/%s" % key, val))
9707

    
9708
    # OS change
9709
    if self.op.os_name:
9710
      instance.os = self.op.os_name
9711

    
9712
    # osparams changes
9713
    if self.op.osparams:
9714
      instance.osparams = self.os_inst
9715
      for key, val in self.op.osparams.iteritems():
9716
        result.append(("os/%s" % key, val))
9717

    
9718
    self.cfg.Update(instance, feedback_fn)
9719

    
9720
    return result
9721

    
9722
  _DISK_CONVERSIONS = {
9723
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9724
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9725
    }
9726

    
9727

    
9728
class LUQueryExports(NoHooksLU):
9729
  """Query the exports list
9730

9731
  """
9732
  _OP_PARAMS = [
9733
    ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
9734
    ("use_locking", False, ht.TBool),
9735
    ]
9736
  REQ_BGL = False
9737

    
9738
  def ExpandNames(self):
9739
    self.needed_locks = {}
9740
    self.share_locks[locking.LEVEL_NODE] = 1
9741
    if not self.op.nodes:
9742
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9743
    else:
9744
      self.needed_locks[locking.LEVEL_NODE] = \
9745
        _GetWantedNodes(self, self.op.nodes)
9746

    
9747
  def Exec(self, feedback_fn):
9748
    """Compute the list of all the exported system images.
9749

9750
    @rtype: dict
9751
    @return: a dictionary with the structure node->(export-list)
9752
        where export-list is a list of the instances exported on
9753
        that node.
9754

9755
    """
9756
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9757
    rpcresult = self.rpc.call_export_list(self.nodes)
9758
    result = {}
9759
    for node in rpcresult:
9760
      if rpcresult[node].fail_msg:
9761
        result[node] = False
9762
      else:
9763
        result[node] = rpcresult[node].payload
9764

    
9765
    return result
9766

    
9767

    
9768
class LUPrepareExport(NoHooksLU):
9769
  """Prepares an instance for an export and returns useful information.
9770

9771
  """
9772
  _OP_PARAMS = [
9773
    _PInstanceName,
9774
    ("mode", ht.NoDefault, ht.TElemOf(constants.EXPORT_MODES)),
9775
    ]
9776
  REQ_BGL = False
9777

    
9778
  def ExpandNames(self):
9779
    self._ExpandAndLockInstance()
9780

    
9781
  def CheckPrereq(self):
9782
    """Check prerequisites.
9783

9784
    """
9785
    instance_name = self.op.instance_name
9786

    
9787
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9788
    assert self.instance is not None, \
9789
          "Cannot retrieve locked instance %s" % self.op.instance_name
9790
    _CheckNodeOnline(self, self.instance.primary_node)
9791

    
9792
    self._cds = _GetClusterDomainSecret()
9793

    
9794
  def Exec(self, feedback_fn):
9795
    """Prepares an instance for an export.
9796

9797
    """
9798
    instance = self.instance
9799

    
9800
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9801
      salt = utils.GenerateSecret(8)
9802

    
9803
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9804
      result = self.rpc.call_x509_cert_create(instance.primary_node,
9805
                                              constants.RIE_CERT_VALIDITY)
9806
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
9807

    
9808
      (name, cert_pem) = result.payload
9809

    
9810
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9811
                                             cert_pem)
9812

    
9813
      return {
9814
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9815
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9816
                          salt),
9817
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9818
        }
9819

    
9820
    return None
9821

    
9822

    
9823
class LUExportInstance(LogicalUnit):
9824
  """Export an instance to an image in the cluster.
9825

9826
  """
9827
  HPATH = "instance-export"
9828
  HTYPE = constants.HTYPE_INSTANCE
9829
  _OP_PARAMS = [
9830
    _PInstanceName,
9831
    ("target_node", ht.NoDefault, ht.TOr(ht.TNonEmptyString, ht.TList)),
9832
    ("shutdown", True, ht.TBool),
9833
    _PShutdownTimeout,
9834
    ("remove_instance", False, ht.TBool),
9835
    ("ignore_remove_failures", False, ht.TBool),
9836
    ("mode", constants.EXPORT_MODE_LOCAL, ht.TElemOf(constants.EXPORT_MODES)),
9837
    ("x509_key_name", None, ht.TOr(ht.TList, ht.TNone)),
9838
    ("destination_x509_ca", None, ht.TMaybeString),
9839
    ]
9840
  REQ_BGL = False
9841

    
9842
  def CheckArguments(self):
9843
    """Check the arguments.
9844

9845
    """
9846
    self.x509_key_name = self.op.x509_key_name
9847
    self.dest_x509_ca_pem = self.op.destination_x509_ca
9848

    
9849
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9850
      if not self.x509_key_name:
9851
        raise errors.OpPrereqError("Missing X509 key name for encryption",
9852
                                   errors.ECODE_INVAL)
9853

    
9854
      if not self.dest_x509_ca_pem:
9855
        raise errors.OpPrereqError("Missing destination X509 CA",
9856
                                   errors.ECODE_INVAL)
9857

    
9858
  def ExpandNames(self):
9859
    self._ExpandAndLockInstance()
9860

    
9861
    # Lock all nodes for local exports
9862
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9863
      # FIXME: lock only instance primary and destination node
9864
      #
9865
      # Sad but true, for now we have do lock all nodes, as we don't know where
9866
      # the previous export might be, and in this LU we search for it and
9867
      # remove it from its current node. In the future we could fix this by:
9868
      #  - making a tasklet to search (share-lock all), then create the
9869
      #    new one, then one to remove, after
9870
      #  - removing the removal operation altogether
9871
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9872

    
9873
  def DeclareLocks(self, level):
9874
    """Last minute lock declaration."""
9875
    # All nodes are locked anyway, so nothing to do here.
9876

    
9877
  def BuildHooksEnv(self):
9878
    """Build hooks env.
9879

9880
    This will run on the master, primary node and target node.
9881

9882
    """
9883
    env = {
9884
      "EXPORT_MODE": self.op.mode,
9885
      "EXPORT_NODE": self.op.target_node,
9886
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9887
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9888
      # TODO: Generic function for boolean env variables
9889
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9890
      }
9891

    
9892
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9893

    
9894
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9895

    
9896
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9897
      nl.append(self.op.target_node)
9898

    
9899
    return env, nl, nl
9900

    
9901
  def CheckPrereq(self):
9902
    """Check prerequisites.
9903

9904
    This checks that the instance and node names are valid.
9905

9906
    """
9907
    instance_name = self.op.instance_name
9908

    
9909
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9910
    assert self.instance is not None, \
9911
          "Cannot retrieve locked instance %s" % self.op.instance_name
9912
    _CheckNodeOnline(self, self.instance.primary_node)
9913

    
9914
    if (self.op.remove_instance and self.instance.admin_up and
9915
        not self.op.shutdown):
9916
      raise errors.OpPrereqError("Can not remove instance without shutting it"
9917
                                 " down before")
9918

    
9919
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9920
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9921
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9922
      assert self.dst_node is not None
9923

    
9924
      _CheckNodeOnline(self, self.dst_node.name)
9925
      _CheckNodeNotDrained(self, self.dst_node.name)
9926

    
9927
      self._cds = None
9928
      self.dest_disk_info = None
9929
      self.dest_x509_ca = None
9930

    
9931
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9932
      self.dst_node = None
9933

    
9934
      if len(self.op.target_node) != len(self.instance.disks):
9935
        raise errors.OpPrereqError(("Received destination information for %s"
9936
                                    " disks, but instance %s has %s disks") %
9937
                                   (len(self.op.target_node), instance_name,
9938
                                    len(self.instance.disks)),
9939
                                   errors.ECODE_INVAL)
9940

    
9941
      cds = _GetClusterDomainSecret()
9942

    
9943
      # Check X509 key name
9944
      try:
9945
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9946
      except (TypeError, ValueError), err:
9947
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9948

    
9949
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9950
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9951
                                   errors.ECODE_INVAL)
9952

    
9953
      # Load and verify CA
9954
      try:
9955
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9956
      except OpenSSL.crypto.Error, err:
9957
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9958
                                   (err, ), errors.ECODE_INVAL)
9959

    
9960
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9961
      if errcode is not None:
9962
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9963
                                   (msg, ), errors.ECODE_INVAL)
9964

    
9965
      self.dest_x509_ca = cert
9966

    
9967
      # Verify target information
9968
      disk_info = []
9969
      for idx, disk_data in enumerate(self.op.target_node):
9970
        try:
9971
          (host, port, magic) = \
9972
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9973
        except errors.GenericError, err:
9974
          raise errors.OpPrereqError("Target info for disk %s: %s" %
9975
                                     (idx, err), errors.ECODE_INVAL)
9976

    
9977
        disk_info.append((host, port, magic))
9978

    
9979
      assert len(disk_info) == len(self.op.target_node)
9980
      self.dest_disk_info = disk_info
9981

    
9982
    else:
9983
      raise errors.ProgrammerError("Unhandled export mode %r" %
9984
                                   self.op.mode)
9985

    
9986
    # instance disk type verification
9987
    # TODO: Implement export support for file-based disks
9988
    for disk in self.instance.disks:
9989
      if disk.dev_type == constants.LD_FILE:
9990
        raise errors.OpPrereqError("Export not supported for instances with"
9991
                                   " file-based disks", errors.ECODE_INVAL)
9992

    
9993
  def _CleanupExports(self, feedback_fn):
9994
    """Removes exports of current instance from all other nodes.
9995

9996
    If an instance in a cluster with nodes A..D was exported to node C, its
9997
    exports will be removed from the nodes A, B and D.
9998

9999
    """
10000
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
10001

    
10002
    nodelist = self.cfg.GetNodeList()
10003
    nodelist.remove(self.dst_node.name)
10004

    
10005
    # on one-node clusters nodelist will be empty after the removal
10006
    # if we proceed the backup would be removed because OpQueryExports
10007
    # substitutes an empty list with the full cluster node list.
10008
    iname = self.instance.name
10009
    if nodelist:
10010
      feedback_fn("Removing old exports for instance %s" % iname)
10011
      exportlist = self.rpc.call_export_list(nodelist)
10012
      for node in exportlist:
10013
        if exportlist[node].fail_msg:
10014
          continue
10015
        if iname in exportlist[node].payload:
10016
          msg = self.rpc.call_export_remove(node, iname).fail_msg
10017
          if msg:
10018
            self.LogWarning("Could not remove older export for instance %s"
10019
                            " on node %s: %s", iname, node, msg)
10020

    
10021
  def Exec(self, feedback_fn):
10022
    """Export an instance to an image in the cluster.
10023

10024
    """
10025
    assert self.op.mode in constants.EXPORT_MODES
10026

    
10027
    instance = self.instance
10028
    src_node = instance.primary_node
10029

    
10030
    if self.op.shutdown:
10031
      # shutdown the instance, but not the disks
10032
      feedback_fn("Shutting down instance %s" % instance.name)
10033
      result = self.rpc.call_instance_shutdown(src_node, instance,
10034
                                               self.op.shutdown_timeout)
10035
      # TODO: Maybe ignore failures if ignore_remove_failures is set
10036
      result.Raise("Could not shutdown instance %s on"
10037
                   " node %s" % (instance.name, src_node))
10038

    
10039
    # set the disks ID correctly since call_instance_start needs the
10040
    # correct drbd minor to create the symlinks
10041
    for disk in instance.disks:
10042
      self.cfg.SetDiskID(disk, src_node)
10043

    
10044
    activate_disks = (not instance.admin_up)
10045

    
10046
    if activate_disks:
10047
      # Activate the instance disks if we'exporting a stopped instance
10048
      feedback_fn("Activating disks for %s" % instance.name)
10049
      _StartInstanceDisks(self, instance, None)
10050

    
10051
    try:
10052
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10053
                                                     instance)
10054

    
10055
      helper.CreateSnapshots()
10056
      try:
10057
        if (self.op.shutdown and instance.admin_up and
10058
            not self.op.remove_instance):
10059
          assert not activate_disks
10060
          feedback_fn("Starting instance %s" % instance.name)
10061
          result = self.rpc.call_instance_start(src_node, instance, None, None)
10062
          msg = result.fail_msg
10063
          if msg:
10064
            feedback_fn("Failed to start instance: %s" % msg)
10065
            _ShutdownInstanceDisks(self, instance)
10066
            raise errors.OpExecError("Could not start instance: %s" % msg)
10067

    
10068
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
10069
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10070
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10071
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
10072
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10073

    
10074
          (key_name, _, _) = self.x509_key_name
10075

    
10076
          dest_ca_pem = \
10077
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10078
                                            self.dest_x509_ca)
10079

    
10080
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10081
                                                     key_name, dest_ca_pem,
10082
                                                     timeouts)
10083
      finally:
10084
        helper.Cleanup()
10085

    
10086
      # Check for backwards compatibility
10087
      assert len(dresults) == len(instance.disks)
10088
      assert compat.all(isinstance(i, bool) for i in dresults), \
10089
             "Not all results are boolean: %r" % dresults
10090

    
10091
    finally:
10092
      if activate_disks:
10093
        feedback_fn("Deactivating disks for %s" % instance.name)
10094
        _ShutdownInstanceDisks(self, instance)
10095

    
10096
    if not (compat.all(dresults) and fin_resu):
10097
      failures = []
10098
      if not fin_resu:
10099
        failures.append("export finalization")
10100
      if not compat.all(dresults):
10101
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10102
                               if not dsk)
10103
        failures.append("disk export: disk(s) %s" % fdsk)
10104

    
10105
      raise errors.OpExecError("Export failed, errors in %s" %
10106
                               utils.CommaJoin(failures))
10107

    
10108
    # At this point, the export was successful, we can cleanup/finish
10109

    
10110
    # Remove instance if requested
10111
    if self.op.remove_instance:
10112
      feedback_fn("Removing instance %s" % instance.name)
10113
      _RemoveInstance(self, feedback_fn, instance,
10114
                      self.op.ignore_remove_failures)
10115

    
10116
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10117
      self._CleanupExports(feedback_fn)
10118

    
10119
    return fin_resu, dresults
10120

    
10121

    
10122
class LURemoveExport(NoHooksLU):
10123
  """Remove exports related to the named instance.
10124

10125
  """
10126
  _OP_PARAMS = [
10127
    _PInstanceName,
10128
    ]
10129
  REQ_BGL = False
10130

    
10131
  def ExpandNames(self):
10132
    self.needed_locks = {}
10133
    # We need all nodes to be locked in order for RemoveExport to work, but we
10134
    # don't need to lock the instance itself, as nothing will happen to it (and
10135
    # we can remove exports also for a removed instance)
10136
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10137

    
10138
  def Exec(self, feedback_fn):
10139
    """Remove any export.
10140

10141
    """
10142
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10143
    # If the instance was not found we'll try with the name that was passed in.
10144
    # This will only work if it was an FQDN, though.
10145
    fqdn_warn = False
10146
    if not instance_name:
10147
      fqdn_warn = True
10148
      instance_name = self.op.instance_name
10149

    
10150
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10151
    exportlist = self.rpc.call_export_list(locked_nodes)
10152
    found = False
10153
    for node in exportlist:
10154
      msg = exportlist[node].fail_msg
10155
      if msg:
10156
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10157
        continue
10158
      if instance_name in exportlist[node].payload:
10159
        found = True
10160
        result = self.rpc.call_export_remove(node, instance_name)
10161
        msg = result.fail_msg
10162
        if msg:
10163
          logging.error("Could not remove export for instance %s"
10164
                        " on node %s: %s", instance_name, node, msg)
10165

    
10166
    if fqdn_warn and not found:
10167
      feedback_fn("Export not found. If trying to remove an export belonging"
10168
                  " to a deleted instance please use its Fully Qualified"
10169
                  " Domain Name.")
10170

    
10171

    
10172
class LUAddGroup(LogicalUnit):
10173
  """Logical unit for creating node groups.
10174

10175
  """
10176
  HPATH = "group-add"
10177
  HTYPE = constants.HTYPE_GROUP
10178

    
10179
  _OP_PARAMS = [
10180
    _PGroupName,
10181
    ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
10182
    ]
10183

    
10184
  REQ_BGL = False
10185

    
10186
  def ExpandNames(self):
10187
    # We need the new group's UUID here so that we can create and acquire the
10188
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10189
    # that it should not check whether the UUID exists in the configuration.
10190
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10191
    self.needed_locks = {}
10192
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10193

    
10194
  def CheckPrereq(self):
10195
    """Check prerequisites.
10196

10197
    This checks that the given group name is not an existing node group
10198
    already.
10199

10200
    """
10201
    try:
10202
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10203
    except errors.OpPrereqError:
10204
      pass
10205
    else:
10206
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10207
                                 " node group (UUID: %s)" %
10208
                                 (self.op.group_name, existing_uuid),
10209
                                 errors.ECODE_EXISTS)
10210

    
10211
    if self.op.ndparams:
10212
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10213

    
10214
  def BuildHooksEnv(self):
10215
    """Build hooks env.
10216

10217
    """
10218
    env = {
10219
      "GROUP_NAME": self.op.group_name,
10220
      }
10221
    mn = self.cfg.GetMasterNode()
10222
    return env, [mn], [mn]
10223

    
10224
  def Exec(self, feedback_fn):
10225
    """Add the node group to the cluster.
10226

10227
    """
10228
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10229
                                  uuid=self.group_uuid,
10230
                                  ndparams=self.op.ndparams)
10231

    
10232
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10233
    del self.remove_locks[locking.LEVEL_NODEGROUP]
10234

    
10235

    
10236
class LUQueryGroups(NoHooksLU):
10237
  """Logical unit for querying node groups.
10238

10239
  """
10240
  # pylint: disable-msg=W0142
10241
  _OP_PARAMS = [
10242
    _POutputFields,
10243
    ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
10244
    ]
10245

    
10246
  REQ_BGL = False
10247

    
10248
  _FIELDS_DYNAMIC = utils.FieldSet()
10249

    
10250
  _SIMPLE_FIELDS = ["name", "uuid", "ctime", "mtime", "serial_no"]
10251

    
10252
  _FIELDS_STATIC = utils.FieldSet(
10253
      "node_cnt", "node_list", "pinst_cnt", "pinst_list", *_SIMPLE_FIELDS)
10254

    
10255
  def CheckArguments(self):
10256
    _CheckOutputFields(static=self._FIELDS_STATIC,
10257
                       dynamic=self._FIELDS_DYNAMIC,
10258
                       selected=self.op.output_fields)
10259

    
10260
  def ExpandNames(self):
10261
    self.needed_locks = {}
10262

    
10263
  def Exec(self, feedback_fn):
10264
    """Computes the list of groups and their attributes.
10265

10266
    """
10267
    all_groups = self.cfg.GetAllNodeGroupsInfo()
10268
    name_to_uuid = dict((g.name, g.uuid) for g in all_groups.values())
10269

    
10270
    if not self.op.names:
10271
      sorted_names = utils.NiceSort(name_to_uuid.keys())
10272
      my_groups = [name_to_uuid[n] for n in sorted_names]
10273
    else:
10274
      # Accept names to be either names or UUIDs.
10275
      all_uuid = frozenset(all_groups.keys())
10276
      my_groups = []
10277
      missing = []
10278

    
10279
      for name in self.op.names:
10280
        if name in all_uuid:
10281
          my_groups.append(name)
10282
        elif name in name_to_uuid:
10283
          my_groups.append(name_to_uuid[name])
10284
        else:
10285
          missing.append(name)
10286

    
10287
      if missing:
10288
        raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
10289
                                   errors.ECODE_NOENT)
10290

    
10291
    do_nodes = bool(frozenset(["node_cnt", "node_list"]).
10292
                    intersection(self.op.output_fields))
10293

    
10294
    do_instances = bool(frozenset(["pinst_cnt", "pinst_list"]).
10295
                        intersection(self.op.output_fields))
10296

    
10297
    # We need to map group->[nodes], and group->[instances]. The former is
10298
    # directly attainable, but the latter we have to do through instance->node,
10299
    # hence we need to process nodes even if we only need instance information.
10300
    if do_nodes or do_instances:
10301
      all_nodes = self.cfg.GetAllNodesInfo()
10302
      group_to_nodes = dict((all_groups[name].uuid, []) for name in my_groups)
10303
      node_to_group = {}
10304

    
10305
      for node in all_nodes.values():
10306
        if node.group in group_to_nodes:
10307
          group_to_nodes[node.group].append(node.name)
10308
          node_to_group[node.name] = node.group
10309

    
10310
      if do_instances:
10311
        all_instances = self.cfg.GetAllInstancesInfo()
10312
        group_to_instances = dict((all_groups[name].uuid, [])
10313
                                  for name in my_groups)
10314
        for instance in all_instances.values():
10315
          node = instance.primary_node
10316
          if node in node_to_group:
10317
            group_to_instances[node_to_group[node]].append(instance.name)
10318

    
10319
    output = []
10320

    
10321
    for uuid in my_groups:
10322
      group = all_groups[uuid]
10323
      group_output = []
10324

    
10325
      for field in self.op.output_fields:
10326
        if field in self._SIMPLE_FIELDS:
10327
          val = getattr(group, field)
10328
        elif field == "node_list":
10329
          val = utils.NiceSort(group_to_nodes[group.uuid])
10330
        elif field == "node_cnt":
10331
          val = len(group_to_nodes[group.uuid])
10332
        elif field == "pinst_list":
10333
          val = utils.NiceSort(group_to_instances[group.uuid])
10334
        elif field == "pinst_cnt":
10335
          val = len(group_to_instances[group.uuid])
10336
        else:
10337
          raise errors.ParameterError(field)
10338
        group_output.append(val)
10339
      output.append(group_output)
10340

    
10341
    return output
10342

    
10343

    
10344
class LURemoveGroup(LogicalUnit):
10345
  HPATH = "group-remove"
10346
  HTYPE = constants.HTYPE_GROUP
10347

    
10348
  _OP_PARAMS = [
10349
    _PGroupName,
10350
    ]
10351

    
10352
  REQ_BGL = False
10353

    
10354
  def ExpandNames(self):
10355
    # This will raises errors.OpPrereqError on its own:
10356
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10357
    self.needed_locks = {
10358
      locking.LEVEL_NODEGROUP: [self.group_uuid],
10359
      }
10360

    
10361
  def CheckPrereq(self):
10362
    """Check prerequisites.
10363

10364
    This checks that the given group name exists as a node group, that is
10365
    empty (i.e., contains no nodes), and that is not the last group of the
10366
    cluster.
10367

10368
    """
10369
    # Verify that the group is empty.
10370
    group_nodes = [node.name
10371
                   for node in self.cfg.GetAllNodesInfo().values()
10372
                   if node.group == self.group_uuid]
10373

    
10374
    if group_nodes:
10375
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
10376
                                 " nodes: %s" %
10377
                                 (self.op.group_name,
10378
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
10379
                                 errors.ECODE_STATE)
10380

    
10381
    # Verify the cluster would not be left group-less.
10382
    if len(self.cfg.GetNodeGroupList()) == 1:
10383
      raise errors.OpPrereqError("Group '%s' is the last group in the cluster,"
10384
                                 " which cannot be left without at least one"
10385
                                 " group" % self.op.group_name,
10386
                                 errors.ECODE_STATE)
10387

    
10388
  def BuildHooksEnv(self):
10389
    """Build hooks env.
10390

10391
    """
10392
    env = {
10393
      "GROUP_NAME": self.op.group_name,
10394
      }
10395
    mn = self.cfg.GetMasterNode()
10396
    return env, [mn], [mn]
10397

    
10398
  def Exec(self, feedback_fn):
10399
    """Remove the node group.
10400

10401
    """
10402
    try:
10403
      self.cfg.RemoveNodeGroup(self.group_uuid)
10404
    except errors.ConfigurationError:
10405
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
10406
                               (self.op.group_name, self.group_uuid))
10407

    
10408
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10409

    
10410

    
10411
class LURenameGroup(LogicalUnit):
10412
  HPATH = "group-rename"
10413
  HTYPE = constants.HTYPE_GROUP
10414

    
10415
  _OP_PARAMS = [
10416
    ("old_name", ht.NoDefault, ht.TNonEmptyString),
10417
    ("new_name", ht.NoDefault, ht.TNonEmptyString),
10418
    ]
10419

    
10420
  REQ_BGL = False
10421

    
10422
  def ExpandNames(self):
10423
    # This raises errors.OpPrereqError on its own:
10424
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.old_name)
10425

    
10426
    self.needed_locks = {
10427
      locking.LEVEL_NODEGROUP: [self.group_uuid],
10428
      }
10429

    
10430
  def CheckPrereq(self):
10431
    """Check prerequisites.
10432

10433
    This checks that the given old_name exists as a node group, and that
10434
    new_name doesn't.
10435

10436
    """
10437
    try:
10438
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
10439
    except errors.OpPrereqError:
10440
      pass
10441
    else:
10442
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
10443
                                 " node group (UUID: %s)" %
10444
                                 (self.op.new_name, new_name_uuid),
10445
                                 errors.ECODE_EXISTS)
10446

    
10447
  def BuildHooksEnv(self):
10448
    """Build hooks env.
10449

10450
    """
10451
    env = {
10452
      "OLD_NAME": self.op.old_name,
10453
      "NEW_NAME": self.op.new_name,
10454
      }
10455

    
10456
    mn = self.cfg.GetMasterNode()
10457
    all_nodes = self.cfg.GetAllNodesInfo()
10458
    run_nodes = [mn]
10459
    all_nodes.pop(mn, None)
10460

    
10461
    for node in all_nodes.values():
10462
      if node.group == self.group_uuid:
10463
        run_nodes.append(node.name)
10464

    
10465
    return env, run_nodes, run_nodes
10466

    
10467
  def Exec(self, feedback_fn):
10468
    """Rename the node group.
10469

10470
    """
10471
    group = self.cfg.GetNodeGroup(self.group_uuid)
10472

    
10473
    if group is None:
10474
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10475
                               (self.op.old_name, self.group_uuid))
10476

    
10477
    group.name = self.op.new_name
10478
    self.cfg.Update(group, feedback_fn)
10479

    
10480
    return self.op.new_name
10481

    
10482

    
10483
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10484
  """Generic tags LU.
10485

10486
  This is an abstract class which is the parent of all the other tags LUs.
10487

10488
  """
10489

    
10490
  def ExpandNames(self):
10491
    self.needed_locks = {}
10492
    if self.op.kind == constants.TAG_NODE:
10493
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10494
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
10495
    elif self.op.kind == constants.TAG_INSTANCE:
10496
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10497
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10498

    
10499
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10500
    # not possible to acquire the BGL based on opcode parameters)
10501

    
10502
  def CheckPrereq(self):
10503
    """Check prerequisites.
10504

10505
    """
10506
    if self.op.kind == constants.TAG_CLUSTER:
10507
      self.target = self.cfg.GetClusterInfo()
10508
    elif self.op.kind == constants.TAG_NODE:
10509
      self.target = self.cfg.GetNodeInfo(self.op.name)
10510
    elif self.op.kind == constants.TAG_INSTANCE:
10511
      self.target = self.cfg.GetInstanceInfo(self.op.name)
10512
    else:
10513
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10514
                                 str(self.op.kind), errors.ECODE_INVAL)
10515

    
10516

    
10517
class LUGetTags(TagsLU):
10518
  """Returns the tags of a given object.
10519

10520
  """
10521
  _OP_PARAMS = [
10522
    ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10523
    # Name is only meaningful for nodes and instances
10524
    ("name", ht.NoDefault, ht.TMaybeString),
10525
    ]
10526
  REQ_BGL = False
10527

    
10528
  def ExpandNames(self):
10529
    TagsLU.ExpandNames(self)
10530

    
10531
    # Share locks as this is only a read operation
10532
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10533

    
10534
  def Exec(self, feedback_fn):
10535
    """Returns the tag list.
10536

10537
    """
10538
    return list(self.target.GetTags())
10539

    
10540

    
10541
class LUSearchTags(NoHooksLU):
10542
  """Searches the tags for a given pattern.
10543

10544
  """
10545
  _OP_PARAMS = [
10546
    ("pattern", ht.NoDefault, ht.TNonEmptyString),
10547
    ]
10548
  REQ_BGL = False
10549

    
10550
  def ExpandNames(self):
10551
    self.needed_locks = {}
10552

    
10553
  def CheckPrereq(self):
10554
    """Check prerequisites.
10555

10556
    This checks the pattern passed for validity by compiling it.
10557

10558
    """
10559
    try:
10560
      self.re = re.compile(self.op.pattern)
10561
    except re.error, err:
10562
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10563
                                 (self.op.pattern, err), errors.ECODE_INVAL)
10564

    
10565
  def Exec(self, feedback_fn):
10566
    """Returns the tag list.
10567

10568
    """
10569
    cfg = self.cfg
10570
    tgts = [("/cluster", cfg.GetClusterInfo())]
10571
    ilist = cfg.GetAllInstancesInfo().values()
10572
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10573
    nlist = cfg.GetAllNodesInfo().values()
10574
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10575
    results = []
10576
    for path, target in tgts:
10577
      for tag in target.GetTags():
10578
        if self.re.search(tag):
10579
          results.append((path, tag))
10580
    return results
10581

    
10582

    
10583
class LUAddTags(TagsLU):
10584
  """Sets a tag on a given object.
10585

10586
  """
10587
  _OP_PARAMS = [
10588
    ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10589
    # Name is only meaningful for nodes and instances
10590
    ("name", ht.NoDefault, ht.TMaybeString),
10591
    ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
10592
    ]
10593
  REQ_BGL = False
10594

    
10595
  def CheckPrereq(self):
10596
    """Check prerequisites.
10597

10598
    This checks the type and length of the tag name and value.
10599

10600
    """
10601
    TagsLU.CheckPrereq(self)
10602
    for tag in self.op.tags:
10603
      objects.TaggableObject.ValidateTag(tag)
10604

    
10605
  def Exec(self, feedback_fn):
10606
    """Sets the tag.
10607

10608
    """
10609
    try:
10610
      for tag in self.op.tags:
10611
        self.target.AddTag(tag)
10612
    except errors.TagError, err:
10613
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
10614
    self.cfg.Update(self.target, feedback_fn)
10615

    
10616

    
10617
class LUDelTags(TagsLU):
10618
  """Delete a list of tags from a given object.
10619

10620
  """
10621
  _OP_PARAMS = [
10622
    ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10623
    # Name is only meaningful for nodes and instances
10624
    ("name", ht.NoDefault, ht.TMaybeString),
10625
    ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
10626
    ]
10627
  REQ_BGL = False
10628

    
10629
  def CheckPrereq(self):
10630
    """Check prerequisites.
10631

10632
    This checks that we have the given tag.
10633

10634
    """
10635
    TagsLU.CheckPrereq(self)
10636
    for tag in self.op.tags:
10637
      objects.TaggableObject.ValidateTag(tag)
10638
    del_tags = frozenset(self.op.tags)
10639
    cur_tags = self.target.GetTags()
10640

    
10641
    diff_tags = del_tags - cur_tags
10642
    if diff_tags:
10643
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
10644
      raise errors.OpPrereqError("Tag(s) %s not found" %
10645
                                 (utils.CommaJoin(diff_names), ),
10646
                                 errors.ECODE_NOENT)
10647

    
10648
  def Exec(self, feedback_fn):
10649
    """Remove the tag from the object.
10650

10651
    """
10652
    for tag in self.op.tags:
10653
      self.target.RemoveTag(tag)
10654
    self.cfg.Update(self.target, feedback_fn)
10655

    
10656

    
10657
class LUTestDelay(NoHooksLU):
10658
  """Sleep for a specified amount of time.
10659

10660
  This LU sleeps on the master and/or nodes for a specified amount of
10661
  time.
10662

10663
  """
10664
  _OP_PARAMS = [
10665
    ("duration", ht.NoDefault, ht.TFloat),
10666
    ("on_master", True, ht.TBool),
10667
    ("on_nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
10668
    ("repeat", 0, ht.TPositiveInt)
10669
    ]
10670
  REQ_BGL = False
10671

    
10672
  def ExpandNames(self):
10673
    """Expand names and set required locks.
10674

10675
    This expands the node list, if any.
10676

10677
    """
10678
    self.needed_locks = {}
10679
    if self.op.on_nodes:
10680
      # _GetWantedNodes can be used here, but is not always appropriate to use
10681
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10682
      # more information.
10683
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10684
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10685

    
10686
  def _TestDelay(self):
10687
    """Do the actual sleep.
10688

10689
    """
10690
    if self.op.on_master:
10691
      if not utils.TestDelay(self.op.duration):
10692
        raise errors.OpExecError("Error during master delay test")
10693
    if self.op.on_nodes:
10694
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10695
      for node, node_result in result.items():
10696
        node_result.Raise("Failure during rpc call to node %s" % node)
10697

    
10698
  def Exec(self, feedback_fn):
10699
    """Execute the test delay opcode, with the wanted repetitions.
10700

10701
    """
10702
    if self.op.repeat == 0:
10703
      self._TestDelay()
10704
    else:
10705
      top_value = self.op.repeat - 1
10706
      for i in range(self.op.repeat):
10707
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10708
        self._TestDelay()
10709

    
10710

    
10711
class LUTestJobqueue(NoHooksLU):
10712
  """Utility LU to test some aspects of the job queue.
10713

10714
  """
10715
  _OP_PARAMS = [
10716
    ("notify_waitlock", False, ht.TBool),
10717
    ("notify_exec", False, ht.TBool),
10718
    ("log_messages", ht.EmptyList, ht.TListOf(ht.TString)),
10719
    ("fail", False, ht.TBool),
10720
    ]
10721
  REQ_BGL = False
10722

    
10723
  # Must be lower than default timeout for WaitForJobChange to see whether it
10724
  # notices changed jobs
10725
  _CLIENT_CONNECT_TIMEOUT = 20.0
10726
  _CLIENT_CONFIRM_TIMEOUT = 60.0
10727

    
10728
  @classmethod
10729
  def _NotifyUsingSocket(cls, cb, errcls):
10730
    """Opens a Unix socket and waits for another program to connect.
10731

10732
    @type cb: callable
10733
    @param cb: Callback to send socket name to client
10734
    @type errcls: class
10735
    @param errcls: Exception class to use for errors
10736

10737
    """
10738
    # Using a temporary directory as there's no easy way to create temporary
10739
    # sockets without writing a custom loop around tempfile.mktemp and
10740
    # socket.bind
10741
    tmpdir = tempfile.mkdtemp()
10742
    try:
10743
      tmpsock = utils.PathJoin(tmpdir, "sock")
10744

    
10745
      logging.debug("Creating temporary socket at %s", tmpsock)
10746
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10747
      try:
10748
        sock.bind(tmpsock)
10749
        sock.listen(1)
10750

    
10751
        # Send details to client
10752
        cb(tmpsock)
10753

    
10754
        # Wait for client to connect before continuing
10755
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10756
        try:
10757
          (conn, _) = sock.accept()
10758
        except socket.error, err:
10759
          raise errcls("Client didn't connect in time (%s)" % err)
10760
      finally:
10761
        sock.close()
10762
    finally:
10763
      # Remove as soon as client is connected
10764
      shutil.rmtree(tmpdir)
10765

    
10766
    # Wait for client to close
10767
    try:
10768
      try:
10769
        # pylint: disable-msg=E1101
10770
        # Instance of '_socketobject' has no ... member
10771
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10772
        conn.recv(1)
10773
      except socket.error, err:
10774
        raise errcls("Client failed to confirm notification (%s)" % err)
10775
    finally:
10776
      conn.close()
10777

    
10778
  def _SendNotification(self, test, arg, sockname):
10779
    """Sends a notification to the client.
10780

10781
    @type test: string
10782
    @param test: Test name
10783
    @param arg: Test argument (depends on test)
10784
    @type sockname: string
10785
    @param sockname: Socket path
10786

10787
    """
10788
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10789

    
10790
  def _Notify(self, prereq, test, arg):
10791
    """Notifies the client of a test.
10792

10793
    @type prereq: bool
10794
    @param prereq: Whether this is a prereq-phase test
10795
    @type test: string
10796
    @param test: Test name
10797
    @param arg: Test argument (depends on test)
10798

10799
    """
10800
    if prereq:
10801
      errcls = errors.OpPrereqError
10802
    else:
10803
      errcls = errors.OpExecError
10804

    
10805
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10806
                                                  test, arg),
10807
                                   errcls)
10808

    
10809
  def CheckArguments(self):
10810
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10811
    self.expandnames_calls = 0
10812

    
10813
  def ExpandNames(self):
10814
    checkargs_calls = getattr(self, "checkargs_calls", 0)
10815
    if checkargs_calls < 1:
10816
      raise errors.ProgrammerError("CheckArguments was not called")
10817

    
10818
    self.expandnames_calls += 1
10819

    
10820
    if self.op.notify_waitlock:
10821
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
10822

    
10823
    self.LogInfo("Expanding names")
10824

    
10825
    # Get lock on master node (just to get a lock, not for a particular reason)
10826
    self.needed_locks = {
10827
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10828
      }
10829

    
10830
  def Exec(self, feedback_fn):
10831
    if self.expandnames_calls < 1:
10832
      raise errors.ProgrammerError("ExpandNames was not called")
10833

    
10834
    if self.op.notify_exec:
10835
      self._Notify(False, constants.JQT_EXEC, None)
10836

    
10837
    self.LogInfo("Executing")
10838

    
10839
    if self.op.log_messages:
10840
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10841
      for idx, msg in enumerate(self.op.log_messages):
10842
        self.LogInfo("Sending log message %s", idx + 1)
10843
        feedback_fn(constants.JQT_MSGPREFIX + msg)
10844
        # Report how many test messages have been sent
10845
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10846

    
10847
    if self.op.fail:
10848
      raise errors.OpExecError("Opcode failure was requested")
10849

    
10850
    return True
10851

    
10852

    
10853
class IAllocator(object):
10854
  """IAllocator framework.
10855

10856
  An IAllocator instance has three sets of attributes:
10857
    - cfg that is needed to query the cluster
10858
    - input data (all members of the _KEYS class attribute are required)
10859
    - four buffer attributes (in|out_data|text), that represent the
10860
      input (to the external script) in text and data structure format,
10861
      and the output from it, again in two formats
10862
    - the result variables from the script (success, info, nodes) for
10863
      easy usage
10864

10865
  """
10866
  # pylint: disable-msg=R0902
10867
  # lots of instance attributes
10868
  _ALLO_KEYS = [
10869
    "name", "mem_size", "disks", "disk_template",
10870
    "os", "tags", "nics", "vcpus", "hypervisor",
10871
    ]
10872
  _RELO_KEYS = [
10873
    "name", "relocate_from",
10874
    ]
10875
  _EVAC_KEYS = [
10876
    "evac_nodes",
10877
    ]
10878

    
10879
  def __init__(self, cfg, rpc, mode, **kwargs):
10880
    self.cfg = cfg
10881
    self.rpc = rpc
10882
    # init buffer variables
10883
    self.in_text = self.out_text = self.in_data = self.out_data = None
10884
    # init all input fields so that pylint is happy
10885
    self.mode = mode
10886
    self.mem_size = self.disks = self.disk_template = None
10887
    self.os = self.tags = self.nics = self.vcpus = None
10888
    self.hypervisor = None
10889
    self.relocate_from = None
10890
    self.name = None
10891
    self.evac_nodes = None
10892
    # computed fields
10893
    self.required_nodes = None
10894
    # init result fields
10895
    self.success = self.info = self.result = None
10896
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10897
      keyset = self._ALLO_KEYS
10898
      fn = self._AddNewInstance
10899
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10900
      keyset = self._RELO_KEYS
10901
      fn = self._AddRelocateInstance
10902
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10903
      keyset = self._EVAC_KEYS
10904
      fn = self._AddEvacuateNodes
10905
    else:
10906
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10907
                                   " IAllocator" % self.mode)
10908
    for key in kwargs:
10909
      if key not in keyset:
10910
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
10911
                                     " IAllocator" % key)
10912
      setattr(self, key, kwargs[key])
10913

    
10914
    for key in keyset:
10915
      if key not in kwargs:
10916
        raise errors.ProgrammerError("Missing input parameter '%s' to"
10917
                                     " IAllocator" % key)
10918
    self._BuildInputData(fn)
10919

    
10920
  def _ComputeClusterData(self):
10921
    """Compute the generic allocator input data.
10922

10923
    This is the data that is independent of the actual operation.
10924

10925
    """
10926
    cfg = self.cfg
10927
    cluster_info = cfg.GetClusterInfo()
10928
    # cluster data
10929
    data = {
10930
      "version": constants.IALLOCATOR_VERSION,
10931
      "cluster_name": cfg.GetClusterName(),
10932
      "cluster_tags": list(cluster_info.GetTags()),
10933
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10934
      # we don't have job IDs
10935
      }
10936
    iinfo = cfg.GetAllInstancesInfo().values()
10937
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10938

    
10939
    # node data
10940
    node_list = cfg.GetNodeList()
10941

    
10942
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10943
      hypervisor_name = self.hypervisor
10944
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10945
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10946
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10947
      hypervisor_name = cluster_info.enabled_hypervisors[0]
10948

    
10949
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10950
                                        hypervisor_name)
10951
    node_iinfo = \
10952
      self.rpc.call_all_instances_info(node_list,
10953
                                       cluster_info.enabled_hypervisors)
10954

    
10955
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
10956

    
10957
    data["nodes"] = self._ComputeNodeData(cfg, node_data, node_iinfo, i_list)
10958

    
10959
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
10960

    
10961
    self.in_data = data
10962

    
10963
  @staticmethod
10964
  def _ComputeNodeGroupData(cfg):
10965
    """Compute node groups data.
10966

10967
    """
10968
    ng = {}
10969
    for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
10970
      ng[guuid] = { "name": gdata.name }
10971
    return ng
10972

    
10973
  @staticmethod
10974
  def _ComputeNodeData(cfg, node_data, node_iinfo, i_list):
10975
    """Compute global node data.
10976

10977
    """
10978
    node_results = {}
10979
    for nname, nresult in node_data.items():
10980
      # first fill in static (config-based) values
10981
      ninfo = cfg.GetNodeInfo(nname)
10982
      pnr = {
10983
        "tags": list(ninfo.GetTags()),
10984
        "primary_ip": ninfo.primary_ip,
10985
        "secondary_ip": ninfo.secondary_ip,
10986
        "offline": ninfo.offline,
10987
        "drained": ninfo.drained,
10988
        "master_candidate": ninfo.master_candidate,
10989
        "group": ninfo.group,
10990
        "master_capable": ninfo.master_capable,
10991
        "vm_capable": ninfo.vm_capable,
10992
        }
10993

    
10994
      if not (ninfo.offline or ninfo.drained):
10995
        nresult.Raise("Can't get data for node %s" % nname)
10996
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10997
                                nname)
10998
        remote_info = nresult.payload
10999

    
11000
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
11001
                     'vg_size', 'vg_free', 'cpu_total']:
11002
          if attr not in remote_info:
11003
            raise errors.OpExecError("Node '%s' didn't return attribute"
11004
                                     " '%s'" % (nname, attr))
11005
          if not isinstance(remote_info[attr], int):
11006
            raise errors.OpExecError("Node '%s' returned invalid value"
11007
                                     " for '%s': %s" %
11008
                                     (nname, attr, remote_info[attr]))
11009
        # compute memory used by primary instances
11010
        i_p_mem = i_p_up_mem = 0
11011
        for iinfo, beinfo in i_list:
11012
          if iinfo.primary_node == nname:
11013
            i_p_mem += beinfo[constants.BE_MEMORY]
11014
            if iinfo.name not in node_iinfo[nname].payload:
11015
              i_used_mem = 0
11016
            else:
11017
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11018
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11019
            remote_info['memory_free'] -= max(0, i_mem_diff)
11020

    
11021
            if iinfo.admin_up:
11022
              i_p_up_mem += beinfo[constants.BE_MEMORY]
11023

    
11024
        # compute memory used by instances
11025
        pnr_dyn = {
11026
          "total_memory": remote_info['memory_total'],
11027
          "reserved_memory": remote_info['memory_dom0'],
11028
          "free_memory": remote_info['memory_free'],
11029
          "total_disk": remote_info['vg_size'],
11030
          "free_disk": remote_info['vg_free'],
11031
          "total_cpus": remote_info['cpu_total'],
11032
          "i_pri_memory": i_p_mem,
11033
          "i_pri_up_memory": i_p_up_mem,
11034
          }
11035
        pnr.update(pnr_dyn)
11036

    
11037
      node_results[nname] = pnr
11038

    
11039
    return node_results
11040

    
11041
  @staticmethod
11042
  def _ComputeInstanceData(cluster_info, i_list):
11043
    """Compute global instance data.
11044

11045
    """
11046
    instance_data = {}
11047
    for iinfo, beinfo in i_list:
11048
      nic_data = []
11049
      for nic in iinfo.nics:
11050
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11051
        nic_dict = {"mac": nic.mac,
11052
                    "ip": nic.ip,
11053
                    "mode": filled_params[constants.NIC_MODE],
11054
                    "link": filled_params[constants.NIC_LINK],
11055
                   }
11056
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11057
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11058
        nic_data.append(nic_dict)
11059
      pir = {
11060
        "tags": list(iinfo.GetTags()),
11061
        "admin_up": iinfo.admin_up,
11062
        "vcpus": beinfo[constants.BE_VCPUS],
11063
        "memory": beinfo[constants.BE_MEMORY],
11064
        "os": iinfo.os,
11065
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11066
        "nics": nic_data,
11067
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
11068
        "disk_template": iinfo.disk_template,
11069
        "hypervisor": iinfo.hypervisor,
11070
        }
11071
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11072
                                                 pir["disks"])
11073
      instance_data[iinfo.name] = pir
11074

    
11075
    return instance_data
11076

    
11077
  def _AddNewInstance(self):
11078
    """Add new instance data to allocator structure.
11079

11080
    This in combination with _AllocatorGetClusterData will create the
11081
    correct structure needed as input for the allocator.
11082

11083
    The checks for the completeness of the opcode must have already been
11084
    done.
11085

11086
    """
11087
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11088

    
11089
    if self.disk_template in constants.DTS_NET_MIRROR:
11090
      self.required_nodes = 2
11091
    else:
11092
      self.required_nodes = 1
11093
    request = {
11094
      "name": self.name,
11095
      "disk_template": self.disk_template,
11096
      "tags": self.tags,
11097
      "os": self.os,
11098
      "vcpus": self.vcpus,
11099
      "memory": self.mem_size,
11100
      "disks": self.disks,
11101
      "disk_space_total": disk_space,
11102
      "nics": self.nics,
11103
      "required_nodes": self.required_nodes,
11104
      }
11105
    return request
11106

    
11107
  def _AddRelocateInstance(self):
11108
    """Add relocate instance data to allocator structure.
11109

11110
    This in combination with _IAllocatorGetClusterData will create the
11111
    correct structure needed as input for the allocator.
11112

11113
    The checks for the completeness of the opcode must have already been
11114
    done.
11115

11116
    """
11117
    instance = self.cfg.GetInstanceInfo(self.name)
11118
    if instance is None:
11119
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
11120
                                   " IAllocator" % self.name)
11121

    
11122
    if instance.disk_template not in constants.DTS_NET_MIRROR:
11123
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11124
                                 errors.ECODE_INVAL)
11125

    
11126
    if len(instance.secondary_nodes) != 1:
11127
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
11128
                                 errors.ECODE_STATE)
11129

    
11130
    self.required_nodes = 1
11131
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
11132
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11133

    
11134
    request = {
11135
      "name": self.name,
11136
      "disk_space_total": disk_space,
11137
      "required_nodes": self.required_nodes,
11138
      "relocate_from": self.relocate_from,
11139
      }
11140
    return request
11141

    
11142
  def _AddEvacuateNodes(self):
11143
    """Add evacuate nodes data to allocator structure.
11144

11145
    """
11146
    request = {
11147
      "evac_nodes": self.evac_nodes
11148
      }
11149
    return request
11150

    
11151
  def _BuildInputData(self, fn):
11152
    """Build input data structures.
11153

11154
    """
11155
    self._ComputeClusterData()
11156

    
11157
    request = fn()
11158
    request["type"] = self.mode
11159
    self.in_data["request"] = request
11160

    
11161
    self.in_text = serializer.Dump(self.in_data)
11162

    
11163
  def Run(self, name, validate=True, call_fn=None):
11164
    """Run an instance allocator and return the results.
11165

11166
    """
11167
    if call_fn is None:
11168
      call_fn = self.rpc.call_iallocator_runner
11169

    
11170
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11171
    result.Raise("Failure while running the iallocator script")
11172

    
11173
    self.out_text = result.payload
11174
    if validate:
11175
      self._ValidateResult()
11176

    
11177
  def _ValidateResult(self):
11178
    """Process the allocator results.
11179

11180
    This will process and if successful save the result in
11181
    self.out_data and the other parameters.
11182

11183
    """
11184
    try:
11185
      rdict = serializer.Load(self.out_text)
11186
    except Exception, err:
11187
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11188

    
11189
    if not isinstance(rdict, dict):
11190
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
11191

    
11192
    # TODO: remove backwards compatiblity in later versions
11193
    if "nodes" in rdict and "result" not in rdict:
11194
      rdict["result"] = rdict["nodes"]
11195
      del rdict["nodes"]
11196

    
11197
    for key in "success", "info", "result":
11198
      if key not in rdict:
11199
        raise errors.OpExecError("Can't parse iallocator results:"
11200
                                 " missing key '%s'" % key)
11201
      setattr(self, key, rdict[key])
11202

    
11203
    if not isinstance(rdict["result"], list):
11204
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11205
                               " is not a list")
11206
    self.out_data = rdict
11207

    
11208

    
11209
class LUTestAllocator(NoHooksLU):
11210
  """Run allocator tests.
11211

11212
  This LU runs the allocator tests
11213

11214
  """
11215
  _OP_PARAMS = [
11216
    ("direction", ht.NoDefault,
11217
     ht.TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
11218
    ("mode", ht.NoDefault, ht.TElemOf(constants.VALID_IALLOCATOR_MODES)),
11219
    ("name", ht.NoDefault, ht.TNonEmptyString),
11220
    ("nics", ht.NoDefault, ht.TOr(ht.TNone, ht.TListOf(
11221
      ht.TDictOf(ht.TElemOf(["mac", "ip", "bridge"]),
11222
               ht.TOr(ht.TNone, ht.TNonEmptyString))))),
11223
    ("disks", ht.NoDefault, ht.TOr(ht.TNone, ht.TList)),
11224
    ("hypervisor", None, ht.TMaybeString),
11225
    ("allocator", None, ht.TMaybeString),
11226
    ("tags", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
11227
    ("mem_size", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
11228
    ("vcpus", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
11229
    ("os", None, ht.TMaybeString),
11230
    ("disk_template", None, ht.TMaybeString),
11231
    ("evac_nodes", None, ht.TOr(ht.TNone, ht.TListOf(ht.TNonEmptyString))),
11232
    ]
11233

    
11234
  def CheckPrereq(self):
11235
    """Check prerequisites.
11236

11237
    This checks the opcode parameters depending on the director and mode test.
11238

11239
    """
11240
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11241
      for attr in ["mem_size", "disks", "disk_template",
11242
                   "os", "tags", "nics", "vcpus"]:
11243
        if not hasattr(self.op, attr):
11244
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11245
                                     attr, errors.ECODE_INVAL)
11246
      iname = self.cfg.ExpandInstanceName(self.op.name)
11247
      if iname is not None:
11248
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11249
                                   iname, errors.ECODE_EXISTS)
11250
      if not isinstance(self.op.nics, list):
11251
        raise errors.OpPrereqError("Invalid parameter 'nics'",
11252
                                   errors.ECODE_INVAL)
11253
      if not isinstance(self.op.disks, list):
11254
        raise errors.OpPrereqError("Invalid parameter 'disks'",
11255
                                   errors.ECODE_INVAL)
11256
      for row in self.op.disks:
11257
        if (not isinstance(row, dict) or
11258
            "size" not in row or
11259
            not isinstance(row["size"], int) or
11260
            "mode" not in row or
11261
            row["mode"] not in ['r', 'w']):
11262
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
11263
                                     " parameter", errors.ECODE_INVAL)
11264
      if self.op.hypervisor is None:
11265
        self.op.hypervisor = self.cfg.GetHypervisorType()
11266
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11267
      fname = _ExpandInstanceName(self.cfg, self.op.name)
11268
      self.op.name = fname
11269
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11270
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11271
      if not hasattr(self.op, "evac_nodes"):
11272
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11273
                                   " opcode input", errors.ECODE_INVAL)
11274
    else:
11275
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11276
                                 self.op.mode, errors.ECODE_INVAL)
11277

    
11278
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11279
      if self.op.allocator is None:
11280
        raise errors.OpPrereqError("Missing allocator name",
11281
                                   errors.ECODE_INVAL)
11282
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11283
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
11284
                                 self.op.direction, errors.ECODE_INVAL)
11285

    
11286
  def Exec(self, feedback_fn):
11287
    """Run the allocator test.
11288

11289
    """
11290
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11291
      ial = IAllocator(self.cfg, self.rpc,
11292
                       mode=self.op.mode,
11293
                       name=self.op.name,
11294
                       mem_size=self.op.mem_size,
11295
                       disks=self.op.disks,
11296
                       disk_template=self.op.disk_template,
11297
                       os=self.op.os,
11298
                       tags=self.op.tags,
11299
                       nics=self.op.nics,
11300
                       vcpus=self.op.vcpus,
11301
                       hypervisor=self.op.hypervisor,
11302
                       )
11303
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11304
      ial = IAllocator(self.cfg, self.rpc,
11305
                       mode=self.op.mode,
11306
                       name=self.op.name,
11307
                       relocate_from=list(self.relocate_from),
11308
                       )
11309
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11310
      ial = IAllocator(self.cfg, self.rpc,
11311
                       mode=self.op.mode,
11312
                       evac_nodes=self.op.evac_nodes)
11313
    else:
11314
      raise errors.ProgrammerError("Uncatched mode %s in"
11315
                                   " LUTestAllocator.Exec", self.op.mode)
11316

    
11317
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
11318
      result = ial.in_text
11319
    else:
11320
      ial.Run(self.op.allocator, validate=False)
11321
      result = ial.out_text
11322
    return result