Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ dd94e9f6

History | View | Annotate | Download (399.2 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import operator
43
import itertools
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import ht
59
from ganeti import query
60
from ganeti import qlang
61

    
62
import ganeti.masterd.instance # pylint: disable-msg=W0611
63

    
64
# Common opcode attributes
65

    
66
#: output fields for a query operation
67
_POutputFields = ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString))
68

    
69

    
70
#: the shutdown timeout
71
_PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
72
                     ht.TPositiveInt)
73

    
74
#: the force parameter
75
_PForce = ("force", False, ht.TBool)
76

    
77
#: a required instance name (for single-instance LUs)
78
_PInstanceName = ("instance_name", ht.NoDefault, ht.TNonEmptyString)
79

    
80
#: Whether to ignore offline nodes
81
_PIgnoreOfflineNodes = ("ignore_offline_nodes", False, ht.TBool)
82

    
83
#: a required node name (for single-node LUs)
84
_PNodeName = ("node_name", ht.NoDefault, ht.TNonEmptyString)
85

    
86
#: a required node group name (for single-group LUs)
87
_PGroupName = ("group_name", ht.NoDefault, ht.TNonEmptyString)
88

    
89
#: the migration type (live/non-live)
90
_PMigrationMode = ("mode", None,
91
                   ht.TOr(ht.TNone, ht.TElemOf(constants.HT_MIGRATION_MODES)))
92

    
93
#: the obsolete 'live' mode (boolean)
94
_PMigrationLive = ("live", None, ht.TMaybeBool)
95

    
96

    
97
def _SupportsOob(cfg, node):
98
  """Tells if node supports OOB.
99

100
  @type cfg: L{config.ConfigWriter}
101
  @param cfg: The cluster configuration
102
  @type node: L{objects.Node}
103
  @param node: The node
104
  @return: The OOB script if supported or an empty string otherwise
105

106
  """
107
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
108

    
109

    
110
# End types
111
class LogicalUnit(object):
112
  """Logical Unit base class.
113

114
  Subclasses must follow these rules:
115
    - implement ExpandNames
116
    - implement CheckPrereq (except when tasklets are used)
117
    - implement Exec (except when tasklets are used)
118
    - implement BuildHooksEnv
119
    - redefine HPATH and HTYPE
120
    - optionally redefine their run requirements:
121
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
122

123
  Note that all commands require root permissions.
124

125
  @ivar dry_run_result: the value (if any) that will be returned to the caller
126
      in dry-run mode (signalled by opcode dry_run parameter)
127
  @cvar _OP_PARAMS: a list of opcode attributes, the default values
128
      they should get if not already defined, and types they must match
129

130
  """
131
  HPATH = None
132
  HTYPE = None
133
  _OP_PARAMS = []
134
  REQ_BGL = True
135

    
136
  def __init__(self, processor, op, context, rpc):
137
    """Constructor for LogicalUnit.
138

139
    This needs to be overridden in derived classes in order to check op
140
    validity.
141

142
    """
143
    self.proc = processor
144
    self.op = op
145
    self.cfg = context.cfg
146
    self.context = context
147
    self.rpc = rpc
148
    # Dicts used to declare locking needs to mcpu
149
    self.needed_locks = None
150
    self.acquired_locks = {}
151
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
152
    self.add_locks = {}
153
    self.remove_locks = {}
154
    # Used to force good behavior when calling helper functions
155
    self.recalculate_locks = {}
156
    self.__ssh = None
157
    # logging
158
    self.Log = processor.Log # pylint: disable-msg=C0103
159
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
160
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
161
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
162
    # support for dry-run
163
    self.dry_run_result = None
164
    # support for generic debug attribute
165
    if (not hasattr(self.op, "debug_level") or
166
        not isinstance(self.op.debug_level, int)):
167
      self.op.debug_level = 0
168

    
169
    # Tasklets
170
    self.tasklets = None
171

    
172
    # The new kind-of-type-system
173
    op_id = self.op.OP_ID
174
    for attr_name, aval, test in self._OP_PARAMS:
175
      if not hasattr(op, attr_name):
176
        if aval == ht.NoDefault:
177
          raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
178
                                     (op_id, attr_name), errors.ECODE_INVAL)
179
        else:
180
          if callable(aval):
181
            dval = aval()
182
          else:
183
            dval = aval
184
          setattr(self.op, attr_name, dval)
185
      attr_val = getattr(op, attr_name)
186
      if test == ht.NoType:
187
        # no tests here
188
        continue
189
      if not callable(test):
190
        raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
191
                                     " given type is not a proper type (%s)" %
192
                                     (op_id, attr_name, test))
193
      if not test(attr_val):
194
        logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
195
                      self.op.OP_ID, attr_name, type(attr_val), attr_val)
196
        raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
197
                                   (op_id, attr_name), errors.ECODE_INVAL)
198

    
199
    self.CheckArguments()
200

    
201
  def __GetSSH(self):
202
    """Returns the SshRunner object
203

204
    """
205
    if not self.__ssh:
206
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
207
    return self.__ssh
208

    
209
  ssh = property(fget=__GetSSH)
210

    
211
  def CheckArguments(self):
212
    """Check syntactic validity for the opcode arguments.
213

214
    This method is for doing a simple syntactic check and ensure
215
    validity of opcode parameters, without any cluster-related
216
    checks. While the same can be accomplished in ExpandNames and/or
217
    CheckPrereq, doing these separate is better because:
218

219
      - ExpandNames is left as as purely a lock-related function
220
      - CheckPrereq is run after we have acquired locks (and possible
221
        waited for them)
222

223
    The function is allowed to change the self.op attribute so that
224
    later methods can no longer worry about missing parameters.
225

226
    """
227
    pass
228

    
229
  def ExpandNames(self):
230
    """Expand names for this LU.
231

232
    This method is called before starting to execute the opcode, and it should
233
    update all the parameters of the opcode to their canonical form (e.g. a
234
    short node name must be fully expanded after this method has successfully
235
    completed). This way locking, hooks, logging, etc. can work correctly.
236

237
    LUs which implement this method must also populate the self.needed_locks
238
    member, as a dict with lock levels as keys, and a list of needed lock names
239
    as values. Rules:
240

241
      - use an empty dict if you don't need any lock
242
      - if you don't need any lock at a particular level omit that level
243
      - don't put anything for the BGL level
244
      - if you want all locks at a level use locking.ALL_SET as a value
245

246
    If you need to share locks (rather than acquire them exclusively) at one
247
    level you can modify self.share_locks, setting a true value (usually 1) for
248
    that level. By default locks are not shared.
249

250
    This function can also define a list of tasklets, which then will be
251
    executed in order instead of the usual LU-level CheckPrereq and Exec
252
    functions, if those are not defined by the LU.
253

254
    Examples::
255

256
      # Acquire all nodes and one instance
257
      self.needed_locks = {
258
        locking.LEVEL_NODE: locking.ALL_SET,
259
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
260
      }
261
      # Acquire just two nodes
262
      self.needed_locks = {
263
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
264
      }
265
      # Acquire no locks
266
      self.needed_locks = {} # No, you can't leave it to the default value None
267

268
    """
269
    # The implementation of this method is mandatory only if the new LU is
270
    # concurrent, so that old LUs don't need to be changed all at the same
271
    # time.
272
    if self.REQ_BGL:
273
      self.needed_locks = {} # Exclusive LUs don't need locks.
274
    else:
275
      raise NotImplementedError
276

    
277
  def DeclareLocks(self, level):
278
    """Declare LU locking needs for a level
279

280
    While most LUs can just declare their locking needs at ExpandNames time,
281
    sometimes there's the need to calculate some locks after having acquired
282
    the ones before. This function is called just before acquiring locks at a
283
    particular level, but after acquiring the ones at lower levels, and permits
284
    such calculations. It can be used to modify self.needed_locks, and by
285
    default it does nothing.
286

287
    This function is only called if you have something already set in
288
    self.needed_locks for the level.
289

290
    @param level: Locking level which is going to be locked
291
    @type level: member of ganeti.locking.LEVELS
292

293
    """
294

    
295
  def CheckPrereq(self):
296
    """Check prerequisites for this LU.
297

298
    This method should check that the prerequisites for the execution
299
    of this LU are fulfilled. It can do internode communication, but
300
    it should be idempotent - no cluster or system changes are
301
    allowed.
302

303
    The method should raise errors.OpPrereqError in case something is
304
    not fulfilled. Its return value is ignored.
305

306
    This method should also update all the parameters of the opcode to
307
    their canonical form if it hasn't been done by ExpandNames before.
308

309
    """
310
    if self.tasklets is not None:
311
      for (idx, tl) in enumerate(self.tasklets):
312
        logging.debug("Checking prerequisites for tasklet %s/%s",
313
                      idx + 1, len(self.tasklets))
314
        tl.CheckPrereq()
315
    else:
316
      pass
317

    
318
  def Exec(self, feedback_fn):
319
    """Execute the LU.
320

321
    This method should implement the actual work. It should raise
322
    errors.OpExecError for failures that are somewhat dealt with in
323
    code, or expected.
324

325
    """
326
    if self.tasklets is not None:
327
      for (idx, tl) in enumerate(self.tasklets):
328
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
329
        tl.Exec(feedback_fn)
330
    else:
331
      raise NotImplementedError
332

    
333
  def BuildHooksEnv(self):
334
    """Build hooks environment for this LU.
335

336
    This method should return a three-node tuple consisting of: a dict
337
    containing the environment that will be used for running the
338
    specific hook for this LU, a list of node names on which the hook
339
    should run before the execution, and a list of node names on which
340
    the hook should run after the execution.
341

342
    The keys of the dict must not have 'GANETI_' prefixed as this will
343
    be handled in the hooks runner. Also note additional keys will be
344
    added by the hooks runner. If the LU doesn't define any
345
    environment, an empty dict (and not None) should be returned.
346

347
    No nodes should be returned as an empty list (and not None).
348

349
    Note that if the HPATH for a LU class is None, this function will
350
    not be called.
351

352
    """
353
    raise NotImplementedError
354

    
355
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
356
    """Notify the LU about the results of its hooks.
357

358
    This method is called every time a hooks phase is executed, and notifies
359
    the Logical Unit about the hooks' result. The LU can then use it to alter
360
    its result based on the hooks.  By default the method does nothing and the
361
    previous result is passed back unchanged but any LU can define it if it
362
    wants to use the local cluster hook-scripts somehow.
363

364
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
365
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
366
    @param hook_results: the results of the multi-node hooks rpc call
367
    @param feedback_fn: function used send feedback back to the caller
368
    @param lu_result: the previous Exec result this LU had, or None
369
        in the PRE phase
370
    @return: the new Exec result, based on the previous result
371
        and hook results
372

373
    """
374
    # API must be kept, thus we ignore the unused argument and could
375
    # be a function warnings
376
    # pylint: disable-msg=W0613,R0201
377
    return lu_result
378

    
379
  def _ExpandAndLockInstance(self):
380
    """Helper function to expand and lock an instance.
381

382
    Many LUs that work on an instance take its name in self.op.instance_name
383
    and need to expand it and then declare the expanded name for locking. This
384
    function does it, and then updates self.op.instance_name to the expanded
385
    name. It also initializes needed_locks as a dict, if this hasn't been done
386
    before.
387

388
    """
389
    if self.needed_locks is None:
390
      self.needed_locks = {}
391
    else:
392
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
393
        "_ExpandAndLockInstance called with instance-level locks set"
394
    self.op.instance_name = _ExpandInstanceName(self.cfg,
395
                                                self.op.instance_name)
396
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
397

    
398
  def _LockInstancesNodes(self, primary_only=False):
399
    """Helper function to declare instances' nodes for locking.
400

401
    This function should be called after locking one or more instances to lock
402
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
403
    with all primary or secondary nodes for instances already locked and
404
    present in self.needed_locks[locking.LEVEL_INSTANCE].
405

406
    It should be called from DeclareLocks, and for safety only works if
407
    self.recalculate_locks[locking.LEVEL_NODE] is set.
408

409
    In the future it may grow parameters to just lock some instance's nodes, or
410
    to just lock primaries or secondary nodes, if needed.
411

412
    If should be called in DeclareLocks in a way similar to::
413

414
      if level == locking.LEVEL_NODE:
415
        self._LockInstancesNodes()
416

417
    @type primary_only: boolean
418
    @param primary_only: only lock primary nodes of locked instances
419

420
    """
421
    assert locking.LEVEL_NODE in self.recalculate_locks, \
422
      "_LockInstancesNodes helper function called with no nodes to recalculate"
423

    
424
    # TODO: check if we're really been called with the instance locks held
425

    
426
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
427
    # future we might want to have different behaviors depending on the value
428
    # of self.recalculate_locks[locking.LEVEL_NODE]
429
    wanted_nodes = []
430
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
431
      instance = self.context.cfg.GetInstanceInfo(instance_name)
432
      wanted_nodes.append(instance.primary_node)
433
      if not primary_only:
434
        wanted_nodes.extend(instance.secondary_nodes)
435

    
436
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
437
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
438
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
439
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
440

    
441
    del self.recalculate_locks[locking.LEVEL_NODE]
442

    
443

    
444
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
445
  """Simple LU which runs no hooks.
446

447
  This LU is intended as a parent for other LogicalUnits which will
448
  run no hooks, in order to reduce duplicate code.
449

450
  """
451
  HPATH = None
452
  HTYPE = None
453

    
454
  def BuildHooksEnv(self):
455
    """Empty BuildHooksEnv for NoHooksLu.
456

457
    This just raises an error.
458

459
    """
460
    assert False, "BuildHooksEnv called for NoHooksLUs"
461

    
462

    
463
class Tasklet:
464
  """Tasklet base class.
465

466
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
467
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
468
  tasklets know nothing about locks.
469

470
  Subclasses must follow these rules:
471
    - Implement CheckPrereq
472
    - Implement Exec
473

474
  """
475
  def __init__(self, lu):
476
    self.lu = lu
477

    
478
    # Shortcuts
479
    self.cfg = lu.cfg
480
    self.rpc = lu.rpc
481

    
482
  def CheckPrereq(self):
483
    """Check prerequisites for this tasklets.
484

485
    This method should check whether the prerequisites for the execution of
486
    this tasklet are fulfilled. It can do internode communication, but it
487
    should be idempotent - no cluster or system changes are allowed.
488

489
    The method should raise errors.OpPrereqError in case something is not
490
    fulfilled. Its return value is ignored.
491

492
    This method should also update all parameters to their canonical form if it
493
    hasn't been done before.
494

495
    """
496
    pass
497

    
498
  def Exec(self, feedback_fn):
499
    """Execute the tasklet.
500

501
    This method should implement the actual work. It should raise
502
    errors.OpExecError for failures that are somewhat dealt with in code, or
503
    expected.
504

505
    """
506
    raise NotImplementedError
507

    
508

    
509
class _QueryBase:
510
  """Base for query utility classes.
511

512
  """
513
  #: Attribute holding field definitions
514
  FIELDS = None
515

    
516
  def __init__(self, names, fields, use_locking):
517
    """Initializes this class.
518

519
    """
520
    self.names = names
521
    self.use_locking = use_locking
522

    
523
    self.query = query.Query(self.FIELDS, fields)
524
    self.requested_data = self.query.RequestedData()
525

    
526
    self.do_locking = None
527
    self.wanted = None
528

    
529
  def _GetNames(self, lu, all_names, lock_level):
530
    """Helper function to determine names asked for in the query.
531

532
    """
533
    if self.do_locking:
534
      names = lu.acquired_locks[lock_level]
535
    else:
536
      names = all_names
537

    
538
    if self.wanted == locking.ALL_SET:
539
      assert not self.names
540
      # caller didn't specify names, so ordering is not important
541
      return utils.NiceSort(names)
542

    
543
    # caller specified names and we must keep the same order
544
    assert self.names
545
    assert not self.do_locking or lu.acquired_locks[lock_level]
546

    
547
    missing = set(self.wanted).difference(names)
548
    if missing:
549
      raise errors.OpExecError("Some items were removed before retrieving"
550
                               " their data: %s" % missing)
551

    
552
    # Return expanded names
553
    return self.wanted
554

    
555
  @classmethod
556
  def FieldsQuery(cls, fields):
557
    """Returns list of available fields.
558

559
    @return: List of L{objects.QueryFieldDefinition}
560

561
    """
562
    if fields is None:
563
      # Client requests all fields, sort by name
564
      fdefs = sorted(query.GetAllFields(cls.FIELDS.values()),
565
                     key=operator.attrgetter("name"))
566
    else:
567
      # Keep order as requested by client
568
      fdefs = query.Query(cls.FIELDS, fields).GetFields()
569

    
570
    return objects.QueryFieldsResponse(fields=fdefs).ToDict()
571

    
572
  def ExpandNames(self, lu):
573
    """Expand names for this query.
574

575
    See L{LogicalUnit.ExpandNames}.
576

577
    """
578
    raise NotImplementedError()
579

    
580
  def DeclareLocks(self, lu, level):
581
    """Declare locks for this query.
582

583
    See L{LogicalUnit.DeclareLocks}.
584

585
    """
586
    raise NotImplementedError()
587

    
588
  def _GetQueryData(self, lu):
589
    """Collects all data for this query.
590

591
    @return: Query data object
592

593
    """
594
    raise NotImplementedError()
595

    
596
  def NewStyleQuery(self, lu):
597
    """Collect data and execute query.
598

599
    """
600
    data = self._GetQueryData(lu)
601

    
602
    return objects.QueryResponse(data=self.query.Query(data),
603
                                 fields=self.query.GetFields()).ToDict()
604

    
605
  def OldStyleQuery(self, lu):
606
    """Collect data and execute query.
607

608
    """
609
    return self.query.OldStyleQuery(self._GetQueryData(lu))
610

    
611

    
612
def _GetWantedNodes(lu, nodes):
613
  """Returns list of checked and expanded node names.
614

615
  @type lu: L{LogicalUnit}
616
  @param lu: the logical unit on whose behalf we execute
617
  @type nodes: list
618
  @param nodes: list of node names or None for all nodes
619
  @rtype: list
620
  @return: the list of nodes, sorted
621
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
622

623
  """
624
  if nodes:
625
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
626

    
627
  return utils.NiceSort(lu.cfg.GetNodeList())
628

    
629

    
630
def _GetWantedInstances(lu, instances):
631
  """Returns list of checked and expanded instance names.
632

633
  @type lu: L{LogicalUnit}
634
  @param lu: the logical unit on whose behalf we execute
635
  @type instances: list
636
  @param instances: list of instance names or None for all instances
637
  @rtype: list
638
  @return: the list of instances, sorted
639
  @raise errors.OpPrereqError: if the instances parameter is wrong type
640
  @raise errors.OpPrereqError: if any of the passed instances is not found
641

642
  """
643
  if instances:
644
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
645
  else:
646
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
647
  return wanted
648

    
649

    
650
def _GetUpdatedParams(old_params, update_dict,
651
                      use_default=True, use_none=False):
652
  """Return the new version of a parameter dictionary.
653

654
  @type old_params: dict
655
  @param old_params: old parameters
656
  @type update_dict: dict
657
  @param update_dict: dict containing new parameter values, or
658
      constants.VALUE_DEFAULT to reset the parameter to its default
659
      value
660
  @param use_default: boolean
661
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
662
      values as 'to be deleted' values
663
  @param use_none: boolean
664
  @type use_none: whether to recognise C{None} values as 'to be
665
      deleted' values
666
  @rtype: dict
667
  @return: the new parameter dictionary
668

669
  """
670
  params_copy = copy.deepcopy(old_params)
671
  for key, val in update_dict.iteritems():
672
    if ((use_default and val == constants.VALUE_DEFAULT) or
673
        (use_none and val is None)):
674
      try:
675
        del params_copy[key]
676
      except KeyError:
677
        pass
678
    else:
679
      params_copy[key] = val
680
  return params_copy
681

    
682

    
683
def _CheckOutputFields(static, dynamic, selected):
684
  """Checks whether all selected fields are valid.
685

686
  @type static: L{utils.FieldSet}
687
  @param static: static fields set
688
  @type dynamic: L{utils.FieldSet}
689
  @param dynamic: dynamic fields set
690

691
  """
692
  f = utils.FieldSet()
693
  f.Extend(static)
694
  f.Extend(dynamic)
695

    
696
  delta = f.NonMatching(selected)
697
  if delta:
698
    raise errors.OpPrereqError("Unknown output fields selected: %s"
699
                               % ",".join(delta), errors.ECODE_INVAL)
700

    
701

    
702
def _CheckGlobalHvParams(params):
703
  """Validates that given hypervisor params are not global ones.
704

705
  This will ensure that instances don't get customised versions of
706
  global params.
707

708
  """
709
  used_globals = constants.HVC_GLOBALS.intersection(params)
710
  if used_globals:
711
    msg = ("The following hypervisor parameters are global and cannot"
712
           " be customized at instance level, please modify them at"
713
           " cluster level: %s" % utils.CommaJoin(used_globals))
714
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
715

    
716

    
717
def _CheckNodeOnline(lu, node, msg=None):
718
  """Ensure that a given node is online.
719

720
  @param lu: the LU on behalf of which we make the check
721
  @param node: the node to check
722
  @param msg: if passed, should be a message to replace the default one
723
  @raise errors.OpPrereqError: if the node is offline
724

725
  """
726
  if msg is None:
727
    msg = "Can't use offline node"
728
  if lu.cfg.GetNodeInfo(node).offline:
729
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
730

    
731

    
732
def _CheckNodeNotDrained(lu, node):
733
  """Ensure that a given node is not drained.
734

735
  @param lu: the LU on behalf of which we make the check
736
  @param node: the node to check
737
  @raise errors.OpPrereqError: if the node is drained
738

739
  """
740
  if lu.cfg.GetNodeInfo(node).drained:
741
    raise errors.OpPrereqError("Can't use drained node %s" % node,
742
                               errors.ECODE_STATE)
743

    
744

    
745
def _CheckNodeVmCapable(lu, node):
746
  """Ensure that a given node is vm capable.
747

748
  @param lu: the LU on behalf of which we make the check
749
  @param node: the node to check
750
  @raise errors.OpPrereqError: if the node is not vm capable
751

752
  """
753
  if not lu.cfg.GetNodeInfo(node).vm_capable:
754
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
755
                               errors.ECODE_STATE)
756

    
757

    
758
def _CheckNodeHasOS(lu, node, os_name, force_variant):
759
  """Ensure that a node supports a given OS.
760

761
  @param lu: the LU on behalf of which we make the check
762
  @param node: the node to check
763
  @param os_name: the OS to query about
764
  @param force_variant: whether to ignore variant errors
765
  @raise errors.OpPrereqError: if the node is not supporting the OS
766

767
  """
768
  result = lu.rpc.call_os_get(node, os_name)
769
  result.Raise("OS '%s' not in supported OS list for node %s" %
770
               (os_name, node),
771
               prereq=True, ecode=errors.ECODE_INVAL)
772
  if not force_variant:
773
    _CheckOSVariant(result.payload, os_name)
774

    
775

    
776
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
777
  """Ensure that a node has the given secondary ip.
778

779
  @type lu: L{LogicalUnit}
780
  @param lu: the LU on behalf of which we make the check
781
  @type node: string
782
  @param node: the node to check
783
  @type secondary_ip: string
784
  @param secondary_ip: the ip to check
785
  @type prereq: boolean
786
  @param prereq: whether to throw a prerequisite or an execute error
787
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
788
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
789

790
  """
791
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
792
  result.Raise("Failure checking secondary ip on node %s" % node,
793
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
794
  if not result.payload:
795
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
796
           " please fix and re-run this command" % secondary_ip)
797
    if prereq:
798
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
799
    else:
800
      raise errors.OpExecError(msg)
801

    
802

    
803
def _RequireFileStorage():
804
  """Checks that file storage is enabled.
805

806
  @raise errors.OpPrereqError: when file storage is disabled
807

808
  """
809
  if not constants.ENABLE_FILE_STORAGE:
810
    raise errors.OpPrereqError("File storage disabled at configure time",
811
                               errors.ECODE_INVAL)
812

    
813

    
814
def _CheckDiskTemplate(template):
815
  """Ensure a given disk template is valid.
816

817
  """
818
  if template not in constants.DISK_TEMPLATES:
819
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
820
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
821
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
822
  if template == constants.DT_FILE:
823
    _RequireFileStorage()
824
  return True
825

    
826

    
827
def _CheckStorageType(storage_type):
828
  """Ensure a given storage type is valid.
829

830
  """
831
  if storage_type not in constants.VALID_STORAGE_TYPES:
832
    raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
833
                               errors.ECODE_INVAL)
834
  if storage_type == constants.ST_FILE:
835
    _RequireFileStorage()
836
  return True
837

    
838

    
839
def _GetClusterDomainSecret():
840
  """Reads the cluster domain secret.
841

842
  """
843
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
844
                               strict=True)
845

    
846

    
847
def _CheckInstanceDown(lu, instance, reason):
848
  """Ensure that an instance is not running."""
849
  if instance.admin_up:
850
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
851
                               (instance.name, reason), errors.ECODE_STATE)
852

    
853
  pnode = instance.primary_node
854
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
855
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
856
              prereq=True, ecode=errors.ECODE_ENVIRON)
857

    
858
  if instance.name in ins_l.payload:
859
    raise errors.OpPrereqError("Instance %s is running, %s" %
860
                               (instance.name, reason), errors.ECODE_STATE)
861

    
862

    
863
def _ExpandItemName(fn, name, kind):
864
  """Expand an item name.
865

866
  @param fn: the function to use for expansion
867
  @param name: requested item name
868
  @param kind: text description ('Node' or 'Instance')
869
  @return: the resolved (full) name
870
  @raise errors.OpPrereqError: if the item is not found
871

872
  """
873
  full_name = fn(name)
874
  if full_name is None:
875
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
876
                               errors.ECODE_NOENT)
877
  return full_name
878

    
879

    
880
def _ExpandNodeName(cfg, name):
881
  """Wrapper over L{_ExpandItemName} for nodes."""
882
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
883

    
884

    
885
def _ExpandInstanceName(cfg, name):
886
  """Wrapper over L{_ExpandItemName} for instance."""
887
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
888

    
889

    
890
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
891
                          memory, vcpus, nics, disk_template, disks,
892
                          bep, hvp, hypervisor_name):
893
  """Builds instance related env variables for hooks
894

895
  This builds the hook environment from individual variables.
896

897
  @type name: string
898
  @param name: the name of the instance
899
  @type primary_node: string
900
  @param primary_node: the name of the instance's primary node
901
  @type secondary_nodes: list
902
  @param secondary_nodes: list of secondary nodes as strings
903
  @type os_type: string
904
  @param os_type: the name of the instance's OS
905
  @type status: boolean
906
  @param status: the should_run status of the instance
907
  @type memory: string
908
  @param memory: the memory size of the instance
909
  @type vcpus: string
910
  @param vcpus: the count of VCPUs the instance has
911
  @type nics: list
912
  @param nics: list of tuples (ip, mac, mode, link) representing
913
      the NICs the instance has
914
  @type disk_template: string
915
  @param disk_template: the disk template of the instance
916
  @type disks: list
917
  @param disks: the list of (size, mode) pairs
918
  @type bep: dict
919
  @param bep: the backend parameters for the instance
920
  @type hvp: dict
921
  @param hvp: the hypervisor parameters for the instance
922
  @type hypervisor_name: string
923
  @param hypervisor_name: the hypervisor for the instance
924
  @rtype: dict
925
  @return: the hook environment for this instance
926

927
  """
928
  if status:
929
    str_status = "up"
930
  else:
931
    str_status = "down"
932
  env = {
933
    "OP_TARGET": name,
934
    "INSTANCE_NAME": name,
935
    "INSTANCE_PRIMARY": primary_node,
936
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
937
    "INSTANCE_OS_TYPE": os_type,
938
    "INSTANCE_STATUS": str_status,
939
    "INSTANCE_MEMORY": memory,
940
    "INSTANCE_VCPUS": vcpus,
941
    "INSTANCE_DISK_TEMPLATE": disk_template,
942
    "INSTANCE_HYPERVISOR": hypervisor_name,
943
  }
944

    
945
  if nics:
946
    nic_count = len(nics)
947
    for idx, (ip, mac, mode, link) in enumerate(nics):
948
      if ip is None:
949
        ip = ""
950
      env["INSTANCE_NIC%d_IP" % idx] = ip
951
      env["INSTANCE_NIC%d_MAC" % idx] = mac
952
      env["INSTANCE_NIC%d_MODE" % idx] = mode
953
      env["INSTANCE_NIC%d_LINK" % idx] = link
954
      if mode == constants.NIC_MODE_BRIDGED:
955
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
956
  else:
957
    nic_count = 0
958

    
959
  env["INSTANCE_NIC_COUNT"] = nic_count
960

    
961
  if disks:
962
    disk_count = len(disks)
963
    for idx, (size, mode) in enumerate(disks):
964
      env["INSTANCE_DISK%d_SIZE" % idx] = size
965
      env["INSTANCE_DISK%d_MODE" % idx] = mode
966
  else:
967
    disk_count = 0
968

    
969
  env["INSTANCE_DISK_COUNT"] = disk_count
970

    
971
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
972
    for key, value in source.items():
973
      env["INSTANCE_%s_%s" % (kind, key)] = value
974

    
975
  return env
976

    
977

    
978
def _NICListToTuple(lu, nics):
979
  """Build a list of nic information tuples.
980

981
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
982
  value in LUQueryInstanceData.
983

984
  @type lu:  L{LogicalUnit}
985
  @param lu: the logical unit on whose behalf we execute
986
  @type nics: list of L{objects.NIC}
987
  @param nics: list of nics to convert to hooks tuples
988

989
  """
990
  hooks_nics = []
991
  cluster = lu.cfg.GetClusterInfo()
992
  for nic in nics:
993
    ip = nic.ip
994
    mac = nic.mac
995
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
996
    mode = filled_params[constants.NIC_MODE]
997
    link = filled_params[constants.NIC_LINK]
998
    hooks_nics.append((ip, mac, mode, link))
999
  return hooks_nics
1000

    
1001

    
1002
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1003
  """Builds instance related env variables for hooks from an object.
1004

1005
  @type lu: L{LogicalUnit}
1006
  @param lu: the logical unit on whose behalf we execute
1007
  @type instance: L{objects.Instance}
1008
  @param instance: the instance for which we should build the
1009
      environment
1010
  @type override: dict
1011
  @param override: dictionary with key/values that will override
1012
      our values
1013
  @rtype: dict
1014
  @return: the hook environment dictionary
1015

1016
  """
1017
  cluster = lu.cfg.GetClusterInfo()
1018
  bep = cluster.FillBE(instance)
1019
  hvp = cluster.FillHV(instance)
1020
  args = {
1021
    'name': instance.name,
1022
    'primary_node': instance.primary_node,
1023
    'secondary_nodes': instance.secondary_nodes,
1024
    'os_type': instance.os,
1025
    'status': instance.admin_up,
1026
    'memory': bep[constants.BE_MEMORY],
1027
    'vcpus': bep[constants.BE_VCPUS],
1028
    'nics': _NICListToTuple(lu, instance.nics),
1029
    'disk_template': instance.disk_template,
1030
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
1031
    'bep': bep,
1032
    'hvp': hvp,
1033
    'hypervisor_name': instance.hypervisor,
1034
  }
1035
  if override:
1036
    args.update(override)
1037
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1038

    
1039

    
1040
def _AdjustCandidatePool(lu, exceptions):
1041
  """Adjust the candidate pool after node operations.
1042

1043
  """
1044
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1045
  if mod_list:
1046
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1047
               utils.CommaJoin(node.name for node in mod_list))
1048
    for name in mod_list:
1049
      lu.context.ReaddNode(name)
1050
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1051
  if mc_now > mc_max:
1052
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1053
               (mc_now, mc_max))
1054

    
1055

    
1056
def _DecideSelfPromotion(lu, exceptions=None):
1057
  """Decide whether I should promote myself as a master candidate.
1058

1059
  """
1060
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1061
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1062
  # the new node will increase mc_max with one, so:
1063
  mc_should = min(mc_should + 1, cp_size)
1064
  return mc_now < mc_should
1065

    
1066

    
1067
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1068
  """Check that the brigdes needed by a list of nics exist.
1069

1070
  """
1071
  cluster = lu.cfg.GetClusterInfo()
1072
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1073
  brlist = [params[constants.NIC_LINK] for params in paramslist
1074
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1075
  if brlist:
1076
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1077
    result.Raise("Error checking bridges on destination node '%s'" %
1078
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1079

    
1080

    
1081
def _CheckInstanceBridgesExist(lu, instance, node=None):
1082
  """Check that the brigdes needed by an instance exist.
1083

1084
  """
1085
  if node is None:
1086
    node = instance.primary_node
1087
  _CheckNicsBridgesExist(lu, instance.nics, node)
1088

    
1089

    
1090
def _CheckOSVariant(os_obj, name):
1091
  """Check whether an OS name conforms to the os variants specification.
1092

1093
  @type os_obj: L{objects.OS}
1094
  @param os_obj: OS object to check
1095
  @type name: string
1096
  @param name: OS name passed by the user, to check for validity
1097

1098
  """
1099
  if not os_obj.supported_variants:
1100
    return
1101
  variant = objects.OS.GetVariant(name)
1102
  if not variant:
1103
    raise errors.OpPrereqError("OS name must include a variant",
1104
                               errors.ECODE_INVAL)
1105

    
1106
  if variant not in os_obj.supported_variants:
1107
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1108

    
1109

    
1110
def _GetNodeInstancesInner(cfg, fn):
1111
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1112

    
1113

    
1114
def _GetNodeInstances(cfg, node_name):
1115
  """Returns a list of all primary and secondary instances on a node.
1116

1117
  """
1118

    
1119
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1120

    
1121

    
1122
def _GetNodePrimaryInstances(cfg, node_name):
1123
  """Returns primary instances on a node.
1124

1125
  """
1126
  return _GetNodeInstancesInner(cfg,
1127
                                lambda inst: node_name == inst.primary_node)
1128

    
1129

    
1130
def _GetNodeSecondaryInstances(cfg, node_name):
1131
  """Returns secondary instances on a node.
1132

1133
  """
1134
  return _GetNodeInstancesInner(cfg,
1135
                                lambda inst: node_name in inst.secondary_nodes)
1136

    
1137

    
1138
def _GetStorageTypeArgs(cfg, storage_type):
1139
  """Returns the arguments for a storage type.
1140

1141
  """
1142
  # Special case for file storage
1143
  if storage_type == constants.ST_FILE:
1144
    # storage.FileStorage wants a list of storage directories
1145
    return [[cfg.GetFileStorageDir()]]
1146

    
1147
  return []
1148

    
1149

    
1150
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1151
  faulty = []
1152

    
1153
  for dev in instance.disks:
1154
    cfg.SetDiskID(dev, node_name)
1155

    
1156
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1157
  result.Raise("Failed to get disk status from node %s" % node_name,
1158
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1159

    
1160
  for idx, bdev_status in enumerate(result.payload):
1161
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1162
      faulty.append(idx)
1163

    
1164
  return faulty
1165

    
1166

    
1167
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1168
  """Check the sanity of iallocator and node arguments and use the
1169
  cluster-wide iallocator if appropriate.
1170

1171
  Check that at most one of (iallocator, node) is specified. If none is
1172
  specified, then the LU's opcode's iallocator slot is filled with the
1173
  cluster-wide default iallocator.
1174

1175
  @type iallocator_slot: string
1176
  @param iallocator_slot: the name of the opcode iallocator slot
1177
  @type node_slot: string
1178
  @param node_slot: the name of the opcode target node slot
1179

1180
  """
1181
  node = getattr(lu.op, node_slot, None)
1182
  iallocator = getattr(lu.op, iallocator_slot, None)
1183

    
1184
  if node is not None and iallocator is not None:
1185
    raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1186
                               errors.ECODE_INVAL)
1187
  elif node is None and iallocator is None:
1188
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1189
    if default_iallocator:
1190
      setattr(lu.op, iallocator_slot, default_iallocator)
1191
    else:
1192
      raise errors.OpPrereqError("No iallocator or node given and no"
1193
                                 " cluster-wide default iallocator found."
1194
                                 " Please specify either an iallocator or a"
1195
                                 " node, or set a cluster-wide default"
1196
                                 " iallocator.")
1197

    
1198

    
1199
class LUPostInitCluster(LogicalUnit):
1200
  """Logical unit for running hooks after cluster initialization.
1201

1202
  """
1203
  HPATH = "cluster-init"
1204
  HTYPE = constants.HTYPE_CLUSTER
1205

    
1206
  def BuildHooksEnv(self):
1207
    """Build hooks env.
1208

1209
    """
1210
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1211
    mn = self.cfg.GetMasterNode()
1212
    return env, [], [mn]
1213

    
1214
  def Exec(self, feedback_fn):
1215
    """Nothing to do.
1216

1217
    """
1218
    return True
1219

    
1220

    
1221
class LUDestroyCluster(LogicalUnit):
1222
  """Logical unit for destroying the cluster.
1223

1224
  """
1225
  HPATH = "cluster-destroy"
1226
  HTYPE = constants.HTYPE_CLUSTER
1227

    
1228
  def BuildHooksEnv(self):
1229
    """Build hooks env.
1230

1231
    """
1232
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1233
    return env, [], []
1234

    
1235
  def CheckPrereq(self):
1236
    """Check prerequisites.
1237

1238
    This checks whether the cluster is empty.
1239

1240
    Any errors are signaled by raising errors.OpPrereqError.
1241

1242
    """
1243
    master = self.cfg.GetMasterNode()
1244

    
1245
    nodelist = self.cfg.GetNodeList()
1246
    if len(nodelist) != 1 or nodelist[0] != master:
1247
      raise errors.OpPrereqError("There are still %d node(s) in"
1248
                                 " this cluster." % (len(nodelist) - 1),
1249
                                 errors.ECODE_INVAL)
1250
    instancelist = self.cfg.GetInstanceList()
1251
    if instancelist:
1252
      raise errors.OpPrereqError("There are still %d instance(s) in"
1253
                                 " this cluster." % len(instancelist),
1254
                                 errors.ECODE_INVAL)
1255

    
1256
  def Exec(self, feedback_fn):
1257
    """Destroys the cluster.
1258

1259
    """
1260
    master = self.cfg.GetMasterNode()
1261

    
1262
    # Run post hooks on master node before it's removed
1263
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1264
    try:
1265
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1266
    except:
1267
      # pylint: disable-msg=W0702
1268
      self.LogWarning("Errors occurred running hooks on %s" % master)
1269

    
1270
    result = self.rpc.call_node_stop_master(master, False)
1271
    result.Raise("Could not disable the master role")
1272

    
1273
    return master
1274

    
1275

    
1276
def _VerifyCertificate(filename):
1277
  """Verifies a certificate for LUVerifyCluster.
1278

1279
  @type filename: string
1280
  @param filename: Path to PEM file
1281

1282
  """
1283
  try:
1284
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1285
                                           utils.ReadFile(filename))
1286
  except Exception, err: # pylint: disable-msg=W0703
1287
    return (LUVerifyCluster.ETYPE_ERROR,
1288
            "Failed to load X509 certificate %s: %s" % (filename, err))
1289

    
1290
  (errcode, msg) = \
1291
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1292
                                constants.SSL_CERT_EXPIRATION_ERROR)
1293

    
1294
  if msg:
1295
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1296
  else:
1297
    fnamemsg = None
1298

    
1299
  if errcode is None:
1300
    return (None, fnamemsg)
1301
  elif errcode == utils.CERT_WARNING:
1302
    return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1303
  elif errcode == utils.CERT_ERROR:
1304
    return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1305

    
1306
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1307

    
1308

    
1309
class LUVerifyCluster(LogicalUnit):
1310
  """Verifies the cluster status.
1311

1312
  """
1313
  HPATH = "cluster-verify"
1314
  HTYPE = constants.HTYPE_CLUSTER
1315
  _OP_PARAMS = [
1316
    ("skip_checks", ht.EmptyList,
1317
     ht.TListOf(ht.TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1318
    ("verbose", False, ht.TBool),
1319
    ("error_codes", False, ht.TBool),
1320
    ("debug_simulate_errors", False, ht.TBool),
1321
    ]
1322
  REQ_BGL = False
1323

    
1324
  TCLUSTER = "cluster"
1325
  TNODE = "node"
1326
  TINSTANCE = "instance"
1327

    
1328
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1329
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1330
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1331
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1332
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1333
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1334
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1335
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1336
  ENODEDRBD = (TNODE, "ENODEDRBD")
1337
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1338
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1339
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1340
  ENODEHV = (TNODE, "ENODEHV")
1341
  ENODELVM = (TNODE, "ENODELVM")
1342
  ENODEN1 = (TNODE, "ENODEN1")
1343
  ENODENET = (TNODE, "ENODENET")
1344
  ENODEOS = (TNODE, "ENODEOS")
1345
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1346
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1347
  ENODERPC = (TNODE, "ENODERPC")
1348
  ENODESSH = (TNODE, "ENODESSH")
1349
  ENODEVERSION = (TNODE, "ENODEVERSION")
1350
  ENODESETUP = (TNODE, "ENODESETUP")
1351
  ENODETIME = (TNODE, "ENODETIME")
1352

    
1353
  ETYPE_FIELD = "code"
1354
  ETYPE_ERROR = "ERROR"
1355
  ETYPE_WARNING = "WARNING"
1356

    
1357
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1358

    
1359
  class NodeImage(object):
1360
    """A class representing the logical and physical status of a node.
1361

1362
    @type name: string
1363
    @ivar name: the node name to which this object refers
1364
    @ivar volumes: a structure as returned from
1365
        L{ganeti.backend.GetVolumeList} (runtime)
1366
    @ivar instances: a list of running instances (runtime)
1367
    @ivar pinst: list of configured primary instances (config)
1368
    @ivar sinst: list of configured secondary instances (config)
1369
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1370
        of this node (config)
1371
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1372
    @ivar dfree: free disk, as reported by the node (runtime)
1373
    @ivar offline: the offline status (config)
1374
    @type rpc_fail: boolean
1375
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1376
        not whether the individual keys were correct) (runtime)
1377
    @type lvm_fail: boolean
1378
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1379
    @type hyp_fail: boolean
1380
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1381
    @type ghost: boolean
1382
    @ivar ghost: whether this is a known node or not (config)
1383
    @type os_fail: boolean
1384
    @ivar os_fail: whether the RPC call didn't return valid OS data
1385
    @type oslist: list
1386
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1387
    @type vm_capable: boolean
1388
    @ivar vm_capable: whether the node can host instances
1389

1390
    """
1391
    def __init__(self, offline=False, name=None, vm_capable=True):
1392
      self.name = name
1393
      self.volumes = {}
1394
      self.instances = []
1395
      self.pinst = []
1396
      self.sinst = []
1397
      self.sbp = {}
1398
      self.mfree = 0
1399
      self.dfree = 0
1400
      self.offline = offline
1401
      self.vm_capable = vm_capable
1402
      self.rpc_fail = False
1403
      self.lvm_fail = False
1404
      self.hyp_fail = False
1405
      self.ghost = False
1406
      self.os_fail = False
1407
      self.oslist = {}
1408

    
1409
  def ExpandNames(self):
1410
    self.needed_locks = {
1411
      locking.LEVEL_NODE: locking.ALL_SET,
1412
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1413
    }
1414
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1415

    
1416
  def _Error(self, ecode, item, msg, *args, **kwargs):
1417
    """Format an error message.
1418

1419
    Based on the opcode's error_codes parameter, either format a
1420
    parseable error code, or a simpler error string.
1421

1422
    This must be called only from Exec and functions called from Exec.
1423

1424
    """
1425
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1426
    itype, etxt = ecode
1427
    # first complete the msg
1428
    if args:
1429
      msg = msg % args
1430
    # then format the whole message
1431
    if self.op.error_codes:
1432
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1433
    else:
1434
      if item:
1435
        item = " " + item
1436
      else:
1437
        item = ""
1438
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1439
    # and finally report it via the feedback_fn
1440
    self._feedback_fn("  - %s" % msg)
1441

    
1442
  def _ErrorIf(self, cond, *args, **kwargs):
1443
    """Log an error message if the passed condition is True.
1444

1445
    """
1446
    cond = bool(cond) or self.op.debug_simulate_errors
1447
    if cond:
1448
      self._Error(*args, **kwargs)
1449
    # do not mark the operation as failed for WARN cases only
1450
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1451
      self.bad = self.bad or cond
1452

    
1453
  def _VerifyNode(self, ninfo, nresult):
1454
    """Perform some basic validation on data returned from a node.
1455

1456
      - check the result data structure is well formed and has all the
1457
        mandatory fields
1458
      - check ganeti version
1459

1460
    @type ninfo: L{objects.Node}
1461
    @param ninfo: the node to check
1462
    @param nresult: the results from the node
1463
    @rtype: boolean
1464
    @return: whether overall this call was successful (and we can expect
1465
         reasonable values in the respose)
1466

1467
    """
1468
    node = ninfo.name
1469
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1470

    
1471
    # main result, nresult should be a non-empty dict
1472
    test = not nresult or not isinstance(nresult, dict)
1473
    _ErrorIf(test, self.ENODERPC, node,
1474
                  "unable to verify node: no data returned")
1475
    if test:
1476
      return False
1477

    
1478
    # compares ganeti version
1479
    local_version = constants.PROTOCOL_VERSION
1480
    remote_version = nresult.get("version", None)
1481
    test = not (remote_version and
1482
                isinstance(remote_version, (list, tuple)) and
1483
                len(remote_version) == 2)
1484
    _ErrorIf(test, self.ENODERPC, node,
1485
             "connection to node returned invalid data")
1486
    if test:
1487
      return False
1488

    
1489
    test = local_version != remote_version[0]
1490
    _ErrorIf(test, self.ENODEVERSION, node,
1491
             "incompatible protocol versions: master %s,"
1492
             " node %s", local_version, remote_version[0])
1493
    if test:
1494
      return False
1495

    
1496
    # node seems compatible, we can actually try to look into its results
1497

    
1498
    # full package version
1499
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1500
                  self.ENODEVERSION, node,
1501
                  "software version mismatch: master %s, node %s",
1502
                  constants.RELEASE_VERSION, remote_version[1],
1503
                  code=self.ETYPE_WARNING)
1504

    
1505
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1506
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1507
      for hv_name, hv_result in hyp_result.iteritems():
1508
        test = hv_result is not None
1509
        _ErrorIf(test, self.ENODEHV, node,
1510
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1511

    
1512
    test = nresult.get(constants.NV_NODESETUP,
1513
                           ["Missing NODESETUP results"])
1514
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1515
             "; ".join(test))
1516

    
1517
    return True
1518

    
1519
  def _VerifyNodeTime(self, ninfo, nresult,
1520
                      nvinfo_starttime, nvinfo_endtime):
1521
    """Check the node time.
1522

1523
    @type ninfo: L{objects.Node}
1524
    @param ninfo: the node to check
1525
    @param nresult: the remote results for the node
1526
    @param nvinfo_starttime: the start time of the RPC call
1527
    @param nvinfo_endtime: the end time of the RPC call
1528

1529
    """
1530
    node = ninfo.name
1531
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1532

    
1533
    ntime = nresult.get(constants.NV_TIME, None)
1534
    try:
1535
      ntime_merged = utils.MergeTime(ntime)
1536
    except (ValueError, TypeError):
1537
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1538
      return
1539

    
1540
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1541
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1542
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1543
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1544
    else:
1545
      ntime_diff = None
1546

    
1547
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1548
             "Node time diverges by at least %s from master node time",
1549
             ntime_diff)
1550

    
1551
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1552
    """Check the node time.
1553

1554
    @type ninfo: L{objects.Node}
1555
    @param ninfo: the node to check
1556
    @param nresult: the remote results for the node
1557
    @param vg_name: the configured VG name
1558

1559
    """
1560
    if vg_name is None:
1561
      return
1562

    
1563
    node = ninfo.name
1564
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1565

    
1566
    # checks vg existence and size > 20G
1567
    vglist = nresult.get(constants.NV_VGLIST, None)
1568
    test = not vglist
1569
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1570
    if not test:
1571
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1572
                                            constants.MIN_VG_SIZE)
1573
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1574

    
1575
    # check pv names
1576
    pvlist = nresult.get(constants.NV_PVLIST, None)
1577
    test = pvlist is None
1578
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1579
    if not test:
1580
      # check that ':' is not present in PV names, since it's a
1581
      # special character for lvcreate (denotes the range of PEs to
1582
      # use on the PV)
1583
      for _, pvname, owner_vg in pvlist:
1584
        test = ":" in pvname
1585
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1586
                 " '%s' of VG '%s'", pvname, owner_vg)
1587

    
1588
  def _VerifyNodeNetwork(self, ninfo, nresult):
1589
    """Check the node time.
1590

1591
    @type ninfo: L{objects.Node}
1592
    @param ninfo: the node to check
1593
    @param nresult: the remote results for the node
1594

1595
    """
1596
    node = ninfo.name
1597
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1598

    
1599
    test = constants.NV_NODELIST not in nresult
1600
    _ErrorIf(test, self.ENODESSH, node,
1601
             "node hasn't returned node ssh connectivity data")
1602
    if not test:
1603
      if nresult[constants.NV_NODELIST]:
1604
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1605
          _ErrorIf(True, self.ENODESSH, node,
1606
                   "ssh communication with node '%s': %s", a_node, a_msg)
1607

    
1608
    test = constants.NV_NODENETTEST not in nresult
1609
    _ErrorIf(test, self.ENODENET, node,
1610
             "node hasn't returned node tcp connectivity data")
1611
    if not test:
1612
      if nresult[constants.NV_NODENETTEST]:
1613
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1614
        for anode in nlist:
1615
          _ErrorIf(True, self.ENODENET, node,
1616
                   "tcp communication with node '%s': %s",
1617
                   anode, nresult[constants.NV_NODENETTEST][anode])
1618

    
1619
    test = constants.NV_MASTERIP not in nresult
1620
    _ErrorIf(test, self.ENODENET, node,
1621
             "node hasn't returned node master IP reachability data")
1622
    if not test:
1623
      if not nresult[constants.NV_MASTERIP]:
1624
        if node == self.master_node:
1625
          msg = "the master node cannot reach the master IP (not configured?)"
1626
        else:
1627
          msg = "cannot reach the master IP"
1628
        _ErrorIf(True, self.ENODENET, node, msg)
1629

    
1630
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1631
                      diskstatus):
1632
    """Verify an instance.
1633

1634
    This function checks to see if the required block devices are
1635
    available on the instance's node.
1636

1637
    """
1638
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1639
    node_current = instanceconfig.primary_node
1640

    
1641
    node_vol_should = {}
1642
    instanceconfig.MapLVsByNode(node_vol_should)
1643

    
1644
    for node in node_vol_should:
1645
      n_img = node_image[node]
1646
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1647
        # ignore missing volumes on offline or broken nodes
1648
        continue
1649
      for volume in node_vol_should[node]:
1650
        test = volume not in n_img.volumes
1651
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1652
                 "volume %s missing on node %s", volume, node)
1653

    
1654
    if instanceconfig.admin_up:
1655
      pri_img = node_image[node_current]
1656
      test = instance not in pri_img.instances and not pri_img.offline
1657
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1658
               "instance not running on its primary node %s",
1659
               node_current)
1660

    
1661
    for node, n_img in node_image.items():
1662
      if (not node == node_current):
1663
        test = instance in n_img.instances
1664
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1665
                 "instance should not run on node %s", node)
1666

    
1667
    diskdata = [(nname, success, status, idx)
1668
                for (nname, disks) in diskstatus.items()
1669
                for idx, (success, status) in enumerate(disks)]
1670

    
1671
    for nname, success, bdev_status, idx in diskdata:
1672
      _ErrorIf(instanceconfig.admin_up and not success,
1673
               self.EINSTANCEFAULTYDISK, instance,
1674
               "couldn't retrieve status for disk/%s on %s: %s",
1675
               idx, nname, bdev_status)
1676
      _ErrorIf((instanceconfig.admin_up and success and
1677
                bdev_status.ldisk_status == constants.LDS_FAULTY),
1678
               self.EINSTANCEFAULTYDISK, instance,
1679
               "disk/%s on %s is faulty", idx, nname)
1680

    
1681
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1682
    """Verify if there are any unknown volumes in the cluster.
1683

1684
    The .os, .swap and backup volumes are ignored. All other volumes are
1685
    reported as unknown.
1686

1687
    @type reserved: L{ganeti.utils.FieldSet}
1688
    @param reserved: a FieldSet of reserved volume names
1689

1690
    """
1691
    for node, n_img in node_image.items():
1692
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1693
        # skip non-healthy nodes
1694
        continue
1695
      for volume in n_img.volumes:
1696
        test = ((node not in node_vol_should or
1697
                volume not in node_vol_should[node]) and
1698
                not reserved.Matches(volume))
1699
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1700
                      "volume %s is unknown", volume)
1701

    
1702
  def _VerifyOrphanInstances(self, instancelist, node_image):
1703
    """Verify the list of running instances.
1704

1705
    This checks what instances are running but unknown to the cluster.
1706

1707
    """
1708
    for node, n_img in node_image.items():
1709
      for o_inst in n_img.instances:
1710
        test = o_inst not in instancelist
1711
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1712
                      "instance %s on node %s should not exist", o_inst, node)
1713

    
1714
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1715
    """Verify N+1 Memory Resilience.
1716

1717
    Check that if one single node dies we can still start all the
1718
    instances it was primary for.
1719

1720
    """
1721
    for node, n_img in node_image.items():
1722
      # This code checks that every node which is now listed as
1723
      # secondary has enough memory to host all instances it is
1724
      # supposed to should a single other node in the cluster fail.
1725
      # FIXME: not ready for failover to an arbitrary node
1726
      # FIXME: does not support file-backed instances
1727
      # WARNING: we currently take into account down instances as well
1728
      # as up ones, considering that even if they're down someone
1729
      # might want to start them even in the event of a node failure.
1730
      for prinode, instances in n_img.sbp.items():
1731
        needed_mem = 0
1732
        for instance in instances:
1733
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1734
          if bep[constants.BE_AUTO_BALANCE]:
1735
            needed_mem += bep[constants.BE_MEMORY]
1736
        test = n_img.mfree < needed_mem
1737
        self._ErrorIf(test, self.ENODEN1, node,
1738
                      "not enough memory to accomodate instance failovers"
1739
                      " should node %s fail", prinode)
1740

    
1741
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1742
                       master_files):
1743
    """Verifies and computes the node required file checksums.
1744

1745
    @type ninfo: L{objects.Node}
1746
    @param ninfo: the node to check
1747
    @param nresult: the remote results for the node
1748
    @param file_list: required list of files
1749
    @param local_cksum: dictionary of local files and their checksums
1750
    @param master_files: list of files that only masters should have
1751

1752
    """
1753
    node = ninfo.name
1754
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1755

    
1756
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1757
    test = not isinstance(remote_cksum, dict)
1758
    _ErrorIf(test, self.ENODEFILECHECK, node,
1759
             "node hasn't returned file checksum data")
1760
    if test:
1761
      return
1762

    
1763
    for file_name in file_list:
1764
      node_is_mc = ninfo.master_candidate
1765
      must_have = (file_name not in master_files) or node_is_mc
1766
      # missing
1767
      test1 = file_name not in remote_cksum
1768
      # invalid checksum
1769
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1770
      # existing and good
1771
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1772
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1773
               "file '%s' missing", file_name)
1774
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1775
               "file '%s' has wrong checksum", file_name)
1776
      # not candidate and this is not a must-have file
1777
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1778
               "file '%s' should not exist on non master"
1779
               " candidates (and the file is outdated)", file_name)
1780
      # all good, except non-master/non-must have combination
1781
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1782
               "file '%s' should not exist"
1783
               " on non master candidates", file_name)
1784

    
1785
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1786
                      drbd_map):
1787
    """Verifies and the node DRBD status.
1788

1789
    @type ninfo: L{objects.Node}
1790
    @param ninfo: the node to check
1791
    @param nresult: the remote results for the node
1792
    @param instanceinfo: the dict of instances
1793
    @param drbd_helper: the configured DRBD usermode helper
1794
    @param drbd_map: the DRBD map as returned by
1795
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1796

1797
    """
1798
    node = ninfo.name
1799
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1800

    
1801
    if drbd_helper:
1802
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1803
      test = (helper_result == None)
1804
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
1805
               "no drbd usermode helper returned")
1806
      if helper_result:
1807
        status, payload = helper_result
1808
        test = not status
1809
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1810
                 "drbd usermode helper check unsuccessful: %s", payload)
1811
        test = status and (payload != drbd_helper)
1812
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1813
                 "wrong drbd usermode helper: %s", payload)
1814

    
1815
    # compute the DRBD minors
1816
    node_drbd = {}
1817
    for minor, instance in drbd_map[node].items():
1818
      test = instance not in instanceinfo
1819
      _ErrorIf(test, self.ECLUSTERCFG, None,
1820
               "ghost instance '%s' in temporary DRBD map", instance)
1821
        # ghost instance should not be running, but otherwise we
1822
        # don't give double warnings (both ghost instance and
1823
        # unallocated minor in use)
1824
      if test:
1825
        node_drbd[minor] = (instance, False)
1826
      else:
1827
        instance = instanceinfo[instance]
1828
        node_drbd[minor] = (instance.name, instance.admin_up)
1829

    
1830
    # and now check them
1831
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1832
    test = not isinstance(used_minors, (tuple, list))
1833
    _ErrorIf(test, self.ENODEDRBD, node,
1834
             "cannot parse drbd status file: %s", str(used_minors))
1835
    if test:
1836
      # we cannot check drbd status
1837
      return
1838

    
1839
    for minor, (iname, must_exist) in node_drbd.items():
1840
      test = minor not in used_minors and must_exist
1841
      _ErrorIf(test, self.ENODEDRBD, node,
1842
               "drbd minor %d of instance %s is not active", minor, iname)
1843
    for minor in used_minors:
1844
      test = minor not in node_drbd
1845
      _ErrorIf(test, self.ENODEDRBD, node,
1846
               "unallocated drbd minor %d is in use", minor)
1847

    
1848
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1849
    """Builds the node OS structures.
1850

1851
    @type ninfo: L{objects.Node}
1852
    @param ninfo: the node to check
1853
    @param nresult: the remote results for the node
1854
    @param nimg: the node image object
1855

1856
    """
1857
    node = ninfo.name
1858
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1859

    
1860
    remote_os = nresult.get(constants.NV_OSLIST, None)
1861
    test = (not isinstance(remote_os, list) or
1862
            not compat.all(isinstance(v, list) and len(v) == 7
1863
                           for v in remote_os))
1864

    
1865
    _ErrorIf(test, self.ENODEOS, node,
1866
             "node hasn't returned valid OS data")
1867

    
1868
    nimg.os_fail = test
1869

    
1870
    if test:
1871
      return
1872

    
1873
    os_dict = {}
1874

    
1875
    for (name, os_path, status, diagnose,
1876
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1877

    
1878
      if name not in os_dict:
1879
        os_dict[name] = []
1880

    
1881
      # parameters is a list of lists instead of list of tuples due to
1882
      # JSON lacking a real tuple type, fix it:
1883
      parameters = [tuple(v) for v in parameters]
1884
      os_dict[name].append((os_path, status, diagnose,
1885
                            set(variants), set(parameters), set(api_ver)))
1886

    
1887
    nimg.oslist = os_dict
1888

    
1889
  def _VerifyNodeOS(self, ninfo, nimg, base):
1890
    """Verifies the node OS list.
1891

1892
    @type ninfo: L{objects.Node}
1893
    @param ninfo: the node to check
1894
    @param nimg: the node image object
1895
    @param base: the 'template' node we match against (e.g. from the master)
1896

1897
    """
1898
    node = ninfo.name
1899
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1900

    
1901
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1902

    
1903
    for os_name, os_data in nimg.oslist.items():
1904
      assert os_data, "Empty OS status for OS %s?!" % os_name
1905
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1906
      _ErrorIf(not f_status, self.ENODEOS, node,
1907
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1908
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1909
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1910
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1911
      # this will catched in backend too
1912
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1913
               and not f_var, self.ENODEOS, node,
1914
               "OS %s with API at least %d does not declare any variant",
1915
               os_name, constants.OS_API_V15)
1916
      # comparisons with the 'base' image
1917
      test = os_name not in base.oslist
1918
      _ErrorIf(test, self.ENODEOS, node,
1919
               "Extra OS %s not present on reference node (%s)",
1920
               os_name, base.name)
1921
      if test:
1922
        continue
1923
      assert base.oslist[os_name], "Base node has empty OS status?"
1924
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1925
      if not b_status:
1926
        # base OS is invalid, skipping
1927
        continue
1928
      for kind, a, b in [("API version", f_api, b_api),
1929
                         ("variants list", f_var, b_var),
1930
                         ("parameters", f_param, b_param)]:
1931
        _ErrorIf(a != b, self.ENODEOS, node,
1932
                 "OS %s %s differs from reference node %s: %s vs. %s",
1933
                 kind, os_name, base.name,
1934
                 utils.CommaJoin(a), utils.CommaJoin(b))
1935

    
1936
    # check any missing OSes
1937
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1938
    _ErrorIf(missing, self.ENODEOS, node,
1939
             "OSes present on reference node %s but missing on this node: %s",
1940
             base.name, utils.CommaJoin(missing))
1941

    
1942
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1943
    """Verifies and updates the node volume data.
1944

1945
    This function will update a L{NodeImage}'s internal structures
1946
    with data from the remote call.
1947

1948
    @type ninfo: L{objects.Node}
1949
    @param ninfo: the node to check
1950
    @param nresult: the remote results for the node
1951
    @param nimg: the node image object
1952
    @param vg_name: the configured VG name
1953

1954
    """
1955
    node = ninfo.name
1956
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1957

    
1958
    nimg.lvm_fail = True
1959
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1960
    if vg_name is None:
1961
      pass
1962
    elif isinstance(lvdata, basestring):
1963
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1964
               utils.SafeEncode(lvdata))
1965
    elif not isinstance(lvdata, dict):
1966
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1967
    else:
1968
      nimg.volumes = lvdata
1969
      nimg.lvm_fail = False
1970

    
1971
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1972
    """Verifies and updates the node instance list.
1973

1974
    If the listing was successful, then updates this node's instance
1975
    list. Otherwise, it marks the RPC call as failed for the instance
1976
    list key.
1977

1978
    @type ninfo: L{objects.Node}
1979
    @param ninfo: the node to check
1980
    @param nresult: the remote results for the node
1981
    @param nimg: the node image object
1982

1983
    """
1984
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1985
    test = not isinstance(idata, list)
1986
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1987
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1988
    if test:
1989
      nimg.hyp_fail = True
1990
    else:
1991
      nimg.instances = idata
1992

    
1993
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1994
    """Verifies and computes a node information map
1995

1996
    @type ninfo: L{objects.Node}
1997
    @param ninfo: the node to check
1998
    @param nresult: the remote results for the node
1999
    @param nimg: the node image object
2000
    @param vg_name: the configured VG name
2001

2002
    """
2003
    node = ninfo.name
2004
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2005

    
2006
    # try to read free memory (from the hypervisor)
2007
    hv_info = nresult.get(constants.NV_HVINFO, None)
2008
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2009
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2010
    if not test:
2011
      try:
2012
        nimg.mfree = int(hv_info["memory_free"])
2013
      except (ValueError, TypeError):
2014
        _ErrorIf(True, self.ENODERPC, node,
2015
                 "node returned invalid nodeinfo, check hypervisor")
2016

    
2017
    # FIXME: devise a free space model for file based instances as well
2018
    if vg_name is not None:
2019
      test = (constants.NV_VGLIST not in nresult or
2020
              vg_name not in nresult[constants.NV_VGLIST])
2021
      _ErrorIf(test, self.ENODELVM, node,
2022
               "node didn't return data for the volume group '%s'"
2023
               " - it is either missing or broken", vg_name)
2024
      if not test:
2025
        try:
2026
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2027
        except (ValueError, TypeError):
2028
          _ErrorIf(True, self.ENODERPC, node,
2029
                   "node returned invalid LVM info, check LVM status")
2030

    
2031
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2032
    """Gets per-disk status information for all instances.
2033

2034
    @type nodelist: list of strings
2035
    @param nodelist: Node names
2036
    @type node_image: dict of (name, L{objects.Node})
2037
    @param node_image: Node objects
2038
    @type instanceinfo: dict of (name, L{objects.Instance})
2039
    @param instanceinfo: Instance objects
2040

2041
    """
2042
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2043

    
2044
    node_disks = {}
2045
    node_disks_devonly = {}
2046
    diskless_instances = set()
2047
    diskless = constants.DT_DISKLESS
2048

    
2049
    for nname in nodelist:
2050
      node_instances = list(itertools.chain(node_image[nname].pinst,
2051
                                            node_image[nname].sinst))
2052
      diskless_instances.update(inst for inst in node_instances
2053
                                if instanceinfo[inst].disk_template == diskless)
2054
      disks = [(inst, disk)
2055
               for inst in node_instances
2056
               for disk in instanceinfo[inst].disks]
2057

    
2058
      if not disks:
2059
        # No need to collect data
2060
        continue
2061

    
2062
      node_disks[nname] = disks
2063

    
2064
      # Creating copies as SetDiskID below will modify the objects and that can
2065
      # lead to incorrect data returned from nodes
2066
      devonly = [dev.Copy() for (_, dev) in disks]
2067

    
2068
      for dev in devonly:
2069
        self.cfg.SetDiskID(dev, nname)
2070

    
2071
      node_disks_devonly[nname] = devonly
2072

    
2073
    assert len(node_disks) == len(node_disks_devonly)
2074

    
2075
    # Collect data from all nodes with disks
2076
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2077
                                                          node_disks_devonly)
2078

    
2079
    assert len(result) == len(node_disks)
2080

    
2081
    instdisk = {}
2082

    
2083
    for (nname, nres) in result.items():
2084
      if nres.offline:
2085
        # Ignore offline node
2086
        continue
2087

    
2088
      disks = node_disks[nname]
2089

    
2090
      msg = nres.fail_msg
2091
      _ErrorIf(msg, self.ENODERPC, nname,
2092
               "while getting disk information: %s", nres.fail_msg)
2093
      if msg:
2094
        # No data from this node
2095
        data = len(disks) * [None]
2096
      else:
2097
        data = nres.payload
2098

    
2099
      for ((inst, _), status) in zip(disks, data):
2100
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2101

    
2102
    # Add empty entries for diskless instances.
2103
    for inst in diskless_instances:
2104
      assert inst not in instdisk
2105
      instdisk[inst] = {}
2106

    
2107
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2108
                      len(nnames) <= len(instanceinfo[inst].all_nodes)
2109
                      for inst, nnames in instdisk.items()
2110
                      for nname, statuses in nnames.items())
2111

    
2112
    return instdisk
2113

    
2114
  def BuildHooksEnv(self):
2115
    """Build hooks env.
2116

2117
    Cluster-Verify hooks just ran in the post phase and their failure makes
2118
    the output be logged in the verify output and the verification to fail.
2119

2120
    """
2121
    all_nodes = self.cfg.GetNodeList()
2122
    env = {
2123
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2124
      }
2125
    for node in self.cfg.GetAllNodesInfo().values():
2126
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2127

    
2128
    return env, [], all_nodes
2129

    
2130
  def Exec(self, feedback_fn):
2131
    """Verify integrity of cluster, performing various test on nodes.
2132

2133
    """
2134
    self.bad = False
2135
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2136
    verbose = self.op.verbose
2137
    self._feedback_fn = feedback_fn
2138
    feedback_fn("* Verifying global settings")
2139
    for msg in self.cfg.VerifyConfig():
2140
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2141

    
2142
    # Check the cluster certificates
2143
    for cert_filename in constants.ALL_CERT_FILES:
2144
      (errcode, msg) = _VerifyCertificate(cert_filename)
2145
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2146

    
2147
    vg_name = self.cfg.GetVGName()
2148
    drbd_helper = self.cfg.GetDRBDHelper()
2149
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2150
    cluster = self.cfg.GetClusterInfo()
2151
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
2152
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2153
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2154
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2155
                        for iname in instancelist)
2156
    i_non_redundant = [] # Non redundant instances
2157
    i_non_a_balanced = [] # Non auto-balanced instances
2158
    n_offline = 0 # Count of offline nodes
2159
    n_drained = 0 # Count of nodes being drained
2160
    node_vol_should = {}
2161

    
2162
    # FIXME: verify OS list
2163
    # do local checksums
2164
    master_files = [constants.CLUSTER_CONF_FILE]
2165
    master_node = self.master_node = self.cfg.GetMasterNode()
2166
    master_ip = self.cfg.GetMasterIP()
2167

    
2168
    file_names = ssconf.SimpleStore().GetFileList()
2169
    file_names.extend(constants.ALL_CERT_FILES)
2170
    file_names.extend(master_files)
2171
    if cluster.modify_etc_hosts:
2172
      file_names.append(constants.ETC_HOSTS)
2173

    
2174
    local_checksums = utils.FingerprintFiles(file_names)
2175

    
2176
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2177
    node_verify_param = {
2178
      constants.NV_FILELIST: file_names,
2179
      constants.NV_NODELIST: [node.name for node in nodeinfo
2180
                              if not node.offline],
2181
      constants.NV_HYPERVISOR: hypervisors,
2182
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2183
                                  node.secondary_ip) for node in nodeinfo
2184
                                 if not node.offline],
2185
      constants.NV_INSTANCELIST: hypervisors,
2186
      constants.NV_VERSION: None,
2187
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2188
      constants.NV_NODESETUP: None,
2189
      constants.NV_TIME: None,
2190
      constants.NV_MASTERIP: (master_node, master_ip),
2191
      constants.NV_OSLIST: None,
2192
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2193
      }
2194

    
2195
    if vg_name is not None:
2196
      node_verify_param[constants.NV_VGLIST] = None
2197
      node_verify_param[constants.NV_LVLIST] = vg_name
2198
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2199
      node_verify_param[constants.NV_DRBDLIST] = None
2200

    
2201
    if drbd_helper:
2202
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2203

    
2204
    # Build our expected cluster state
2205
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2206
                                                 name=node.name,
2207
                                                 vm_capable=node.vm_capable))
2208
                      for node in nodeinfo)
2209

    
2210
    for instance in instancelist:
2211
      inst_config = instanceinfo[instance]
2212

    
2213
      for nname in inst_config.all_nodes:
2214
        if nname not in node_image:
2215
          # ghost node
2216
          gnode = self.NodeImage(name=nname)
2217
          gnode.ghost = True
2218
          node_image[nname] = gnode
2219

    
2220
      inst_config.MapLVsByNode(node_vol_should)
2221

    
2222
      pnode = inst_config.primary_node
2223
      node_image[pnode].pinst.append(instance)
2224

    
2225
      for snode in inst_config.secondary_nodes:
2226
        nimg = node_image[snode]
2227
        nimg.sinst.append(instance)
2228
        if pnode not in nimg.sbp:
2229
          nimg.sbp[pnode] = []
2230
        nimg.sbp[pnode].append(instance)
2231

    
2232
    # At this point, we have the in-memory data structures complete,
2233
    # except for the runtime information, which we'll gather next
2234

    
2235
    # Due to the way our RPC system works, exact response times cannot be
2236
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2237
    # time before and after executing the request, we can at least have a time
2238
    # window.
2239
    nvinfo_starttime = time.time()
2240
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2241
                                           self.cfg.GetClusterName())
2242
    nvinfo_endtime = time.time()
2243

    
2244
    all_drbd_map = self.cfg.ComputeDRBDMap()
2245

    
2246
    feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2247
    instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2248

    
2249
    feedback_fn("* Verifying node status")
2250

    
2251
    refos_img = None
2252

    
2253
    for node_i in nodeinfo:
2254
      node = node_i.name
2255
      nimg = node_image[node]
2256

    
2257
      if node_i.offline:
2258
        if verbose:
2259
          feedback_fn("* Skipping offline node %s" % (node,))
2260
        n_offline += 1
2261
        continue
2262

    
2263
      if node == master_node:
2264
        ntype = "master"
2265
      elif node_i.master_candidate:
2266
        ntype = "master candidate"
2267
      elif node_i.drained:
2268
        ntype = "drained"
2269
        n_drained += 1
2270
      else:
2271
        ntype = "regular"
2272
      if verbose:
2273
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2274

    
2275
      msg = all_nvinfo[node].fail_msg
2276
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2277
      if msg:
2278
        nimg.rpc_fail = True
2279
        continue
2280

    
2281
      nresult = all_nvinfo[node].payload
2282

    
2283
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2284
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2285
      self._VerifyNodeNetwork(node_i, nresult)
2286
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2287
                            master_files)
2288

    
2289
      if nimg.vm_capable:
2290
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2291
        self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2292
                             all_drbd_map)
2293

    
2294
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2295
        self._UpdateNodeInstances(node_i, nresult, nimg)
2296
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2297
        self._UpdateNodeOS(node_i, nresult, nimg)
2298
        if not nimg.os_fail:
2299
          if refos_img is None:
2300
            refos_img = nimg
2301
          self._VerifyNodeOS(node_i, nimg, refos_img)
2302

    
2303
    feedback_fn("* Verifying instance status")
2304
    for instance in instancelist:
2305
      if verbose:
2306
        feedback_fn("* Verifying instance %s" % instance)
2307
      inst_config = instanceinfo[instance]
2308
      self._VerifyInstance(instance, inst_config, node_image,
2309
                           instdisk[instance])
2310
      inst_nodes_offline = []
2311

    
2312
      pnode = inst_config.primary_node
2313
      pnode_img = node_image[pnode]
2314
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2315
               self.ENODERPC, pnode, "instance %s, connection to"
2316
               " primary node failed", instance)
2317

    
2318
      if pnode_img.offline:
2319
        inst_nodes_offline.append(pnode)
2320

    
2321
      # If the instance is non-redundant we cannot survive losing its primary
2322
      # node, so we are not N+1 compliant. On the other hand we have no disk
2323
      # templates with more than one secondary so that situation is not well
2324
      # supported either.
2325
      # FIXME: does not support file-backed instances
2326
      if not inst_config.secondary_nodes:
2327
        i_non_redundant.append(instance)
2328
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2329
               instance, "instance has multiple secondary nodes: %s",
2330
               utils.CommaJoin(inst_config.secondary_nodes),
2331
               code=self.ETYPE_WARNING)
2332

    
2333
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2334
        i_non_a_balanced.append(instance)
2335

    
2336
      for snode in inst_config.secondary_nodes:
2337
        s_img = node_image[snode]
2338
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2339
                 "instance %s, connection to secondary node failed", instance)
2340

    
2341
        if s_img.offline:
2342
          inst_nodes_offline.append(snode)
2343

    
2344
      # warn that the instance lives on offline nodes
2345
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2346
               "instance lives on offline node(s) %s",
2347
               utils.CommaJoin(inst_nodes_offline))
2348
      # ... or ghost/non-vm_capable nodes
2349
      for node in inst_config.all_nodes:
2350
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2351
                 "instance lives on ghost node %s", node)
2352
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2353
                 instance, "instance lives on non-vm_capable node %s", node)
2354

    
2355
    feedback_fn("* Verifying orphan volumes")
2356
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2357
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2358

    
2359
    feedback_fn("* Verifying orphan instances")
2360
    self._VerifyOrphanInstances(instancelist, node_image)
2361

    
2362
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2363
      feedback_fn("* Verifying N+1 Memory redundancy")
2364
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
2365

    
2366
    feedback_fn("* Other Notes")
2367
    if i_non_redundant:
2368
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2369
                  % len(i_non_redundant))
2370

    
2371
    if i_non_a_balanced:
2372
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2373
                  % len(i_non_a_balanced))
2374

    
2375
    if n_offline:
2376
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2377

    
2378
    if n_drained:
2379
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2380

    
2381
    return not self.bad
2382

    
2383
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2384
    """Analyze the post-hooks' result
2385

2386
    This method analyses the hook result, handles it, and sends some
2387
    nicely-formatted feedback back to the user.
2388

2389
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2390
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2391
    @param hooks_results: the results of the multi-node hooks rpc call
2392
    @param feedback_fn: function used send feedback back to the caller
2393
    @param lu_result: previous Exec result
2394
    @return: the new Exec result, based on the previous result
2395
        and hook results
2396

2397
    """
2398
    # We only really run POST phase hooks, and are only interested in
2399
    # their results
2400
    if phase == constants.HOOKS_PHASE_POST:
2401
      # Used to change hooks' output to proper indentation
2402
      feedback_fn("* Hooks Results")
2403
      assert hooks_results, "invalid result from hooks"
2404

    
2405
      for node_name in hooks_results:
2406
        res = hooks_results[node_name]
2407
        msg = res.fail_msg
2408
        test = msg and not res.offline
2409
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2410
                      "Communication failure in hooks execution: %s", msg)
2411
        if res.offline or msg:
2412
          # No need to investigate payload if node is offline or gave an error.
2413
          # override manually lu_result here as _ErrorIf only
2414
          # overrides self.bad
2415
          lu_result = 1
2416
          continue
2417
        for script, hkr, output in res.payload:
2418
          test = hkr == constants.HKR_FAIL
2419
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2420
                        "Script %s failed, output:", script)
2421
          if test:
2422
            output = self._HOOKS_INDENT_RE.sub('      ', output)
2423
            feedback_fn("%s" % output)
2424
            lu_result = 0
2425

    
2426
      return lu_result
2427

    
2428

    
2429
class LUVerifyDisks(NoHooksLU):
2430
  """Verifies the cluster disks status.
2431

2432
  """
2433
  REQ_BGL = False
2434

    
2435
  def ExpandNames(self):
2436
    self.needed_locks = {
2437
      locking.LEVEL_NODE: locking.ALL_SET,
2438
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2439
    }
2440
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2441

    
2442
  def Exec(self, feedback_fn):
2443
    """Verify integrity of cluster disks.
2444

2445
    @rtype: tuple of three items
2446
    @return: a tuple of (dict of node-to-node_error, list of instances
2447
        which need activate-disks, dict of instance: (node, volume) for
2448
        missing volumes
2449

2450
    """
2451
    result = res_nodes, res_instances, res_missing = {}, [], {}
2452

    
2453
    nodes = utils.NiceSort(self.cfg.GetNodeList())
2454
    instances = [self.cfg.GetInstanceInfo(name)
2455
                 for name in self.cfg.GetInstanceList()]
2456

    
2457
    nv_dict = {}
2458
    for inst in instances:
2459
      inst_lvs = {}
2460
      if (not inst.admin_up or
2461
          inst.disk_template not in constants.DTS_NET_MIRROR):
2462
        continue
2463
      inst.MapLVsByNode(inst_lvs)
2464
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2465
      for node, vol_list in inst_lvs.iteritems():
2466
        for vol in vol_list:
2467
          nv_dict[(node, vol)] = inst
2468

    
2469
    if not nv_dict:
2470
      return result
2471

    
2472
    vg_names = self.rpc.call_vg_list(nodes)
2473
    vg_names.Raise("Cannot get list of VGs")
2474

    
2475
    for node in nodes:
2476
      # node_volume
2477
      node_res = self.rpc.call_lv_list([node],
2478
                                       vg_names[node].payload.keys())[node]
2479
      if node_res.offline:
2480
        continue
2481
      msg = node_res.fail_msg
2482
      if msg:
2483
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2484
        res_nodes[node] = msg
2485
        continue
2486

    
2487
      lvs = node_res.payload
2488
      for lv_name, (_, _, lv_online) in lvs.items():
2489
        inst = nv_dict.pop((node, lv_name), None)
2490
        if (not lv_online and inst is not None
2491
            and inst.name not in res_instances):
2492
          res_instances.append(inst.name)
2493

    
2494
    # any leftover items in nv_dict are missing LVs, let's arrange the
2495
    # data better
2496
    for key, inst in nv_dict.iteritems():
2497
      if inst.name not in res_missing:
2498
        res_missing[inst.name] = []
2499
      res_missing[inst.name].append(key)
2500

    
2501
    return result
2502

    
2503

    
2504
class LURepairDiskSizes(NoHooksLU):
2505
  """Verifies the cluster disks sizes.
2506

2507
  """
2508
  _OP_PARAMS = [("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString))]
2509
  REQ_BGL = False
2510

    
2511
  def ExpandNames(self):
2512
    if self.op.instances:
2513
      self.wanted_names = []
2514
      for name in self.op.instances:
2515
        full_name = _ExpandInstanceName(self.cfg, name)
2516
        self.wanted_names.append(full_name)
2517
      self.needed_locks = {
2518
        locking.LEVEL_NODE: [],
2519
        locking.LEVEL_INSTANCE: self.wanted_names,
2520
        }
2521
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2522
    else:
2523
      self.wanted_names = None
2524
      self.needed_locks = {
2525
        locking.LEVEL_NODE: locking.ALL_SET,
2526
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2527
        }
2528
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2529

    
2530
  def DeclareLocks(self, level):
2531
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2532
      self._LockInstancesNodes(primary_only=True)
2533

    
2534
  def CheckPrereq(self):
2535
    """Check prerequisites.
2536

2537
    This only checks the optional instance list against the existing names.
2538

2539
    """
2540
    if self.wanted_names is None:
2541
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2542

    
2543
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2544
                             in self.wanted_names]
2545

    
2546
  def _EnsureChildSizes(self, disk):
2547
    """Ensure children of the disk have the needed disk size.
2548

2549
    This is valid mainly for DRBD8 and fixes an issue where the
2550
    children have smaller disk size.
2551

2552
    @param disk: an L{ganeti.objects.Disk} object
2553

2554
    """
2555
    if disk.dev_type == constants.LD_DRBD8:
2556
      assert disk.children, "Empty children for DRBD8?"
2557
      fchild = disk.children[0]
2558
      mismatch = fchild.size < disk.size
2559
      if mismatch:
2560
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2561
                     fchild.size, disk.size)
2562
        fchild.size = disk.size
2563

    
2564
      # and we recurse on this child only, not on the metadev
2565
      return self._EnsureChildSizes(fchild) or mismatch
2566
    else:
2567
      return False
2568

    
2569
  def Exec(self, feedback_fn):
2570
    """Verify the size of cluster disks.
2571

2572
    """
2573
    # TODO: check child disks too
2574
    # TODO: check differences in size between primary/secondary nodes
2575
    per_node_disks = {}
2576
    for instance in self.wanted_instances:
2577
      pnode = instance.primary_node
2578
      if pnode not in per_node_disks:
2579
        per_node_disks[pnode] = []
2580
      for idx, disk in enumerate(instance.disks):
2581
        per_node_disks[pnode].append((instance, idx, disk))
2582

    
2583
    changed = []
2584
    for node, dskl in per_node_disks.items():
2585
      newl = [v[2].Copy() for v in dskl]
2586
      for dsk in newl:
2587
        self.cfg.SetDiskID(dsk, node)
2588
      result = self.rpc.call_blockdev_getsizes(node, newl)
2589
      if result.fail_msg:
2590
        self.LogWarning("Failure in blockdev_getsizes call to node"
2591
                        " %s, ignoring", node)
2592
        continue
2593
      if len(result.data) != len(dskl):
2594
        self.LogWarning("Invalid result from node %s, ignoring node results",
2595
                        node)
2596
        continue
2597
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2598
        if size is None:
2599
          self.LogWarning("Disk %d of instance %s did not return size"
2600
                          " information, ignoring", idx, instance.name)
2601
          continue
2602
        if not isinstance(size, (int, long)):
2603
          self.LogWarning("Disk %d of instance %s did not return valid"
2604
                          " size information, ignoring", idx, instance.name)
2605
          continue
2606
        size = size >> 20
2607
        if size != disk.size:
2608
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2609
                       " correcting: recorded %d, actual %d", idx,
2610
                       instance.name, disk.size, size)
2611
          disk.size = size
2612
          self.cfg.Update(instance, feedback_fn)
2613
          changed.append((instance.name, idx, size))
2614
        if self._EnsureChildSizes(disk):
2615
          self.cfg.Update(instance, feedback_fn)
2616
          changed.append((instance.name, idx, disk.size))
2617
    return changed
2618

    
2619

    
2620
class LURenameCluster(LogicalUnit):
2621
  """Rename the cluster.
2622

2623
  """
2624
  HPATH = "cluster-rename"
2625
  HTYPE = constants.HTYPE_CLUSTER
2626
  _OP_PARAMS = [("name", ht.NoDefault, ht.TNonEmptyString)]
2627

    
2628
  def BuildHooksEnv(self):
2629
    """Build hooks env.
2630

2631
    """
2632
    env = {
2633
      "OP_TARGET": self.cfg.GetClusterName(),
2634
      "NEW_NAME": self.op.name,
2635
      }
2636
    mn = self.cfg.GetMasterNode()
2637
    all_nodes = self.cfg.GetNodeList()
2638
    return env, [mn], all_nodes
2639

    
2640
  def CheckPrereq(self):
2641
    """Verify that the passed name is a valid one.
2642

2643
    """
2644
    hostname = netutils.GetHostname(name=self.op.name,
2645
                                    family=self.cfg.GetPrimaryIPFamily())
2646

    
2647
    new_name = hostname.name
2648
    self.ip = new_ip = hostname.ip
2649
    old_name = self.cfg.GetClusterName()
2650
    old_ip = self.cfg.GetMasterIP()
2651
    if new_name == old_name and new_ip == old_ip:
2652
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2653
                                 " cluster has changed",
2654
                                 errors.ECODE_INVAL)
2655
    if new_ip != old_ip:
2656
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2657
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2658
                                   " reachable on the network" %
2659
                                   new_ip, errors.ECODE_NOTUNIQUE)
2660

    
2661
    self.op.name = new_name
2662

    
2663
  def Exec(self, feedback_fn):
2664
    """Rename the cluster.
2665

2666
    """
2667
    clustername = self.op.name
2668
    ip = self.ip
2669

    
2670
    # shutdown the master IP
2671
    master = self.cfg.GetMasterNode()
2672
    result = self.rpc.call_node_stop_master(master, False)
2673
    result.Raise("Could not disable the master role")
2674

    
2675
    try:
2676
      cluster = self.cfg.GetClusterInfo()
2677
      cluster.cluster_name = clustername
2678
      cluster.master_ip = ip
2679
      self.cfg.Update(cluster, feedback_fn)
2680

    
2681
      # update the known hosts file
2682
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2683
      node_list = self.cfg.GetOnlineNodeList()
2684
      try:
2685
        node_list.remove(master)
2686
      except ValueError:
2687
        pass
2688
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2689
    finally:
2690
      result = self.rpc.call_node_start_master(master, False, False)
2691
      msg = result.fail_msg
2692
      if msg:
2693
        self.LogWarning("Could not re-enable the master role on"
2694
                        " the master, please restart manually: %s", msg)
2695

    
2696
    return clustername
2697

    
2698

    
2699
class LUSetClusterParams(LogicalUnit):
2700
  """Change the parameters of the cluster.
2701

2702
  """
2703
  HPATH = "cluster-modify"
2704
  HTYPE = constants.HTYPE_CLUSTER
2705
  _OP_PARAMS = [
2706
    ("vg_name", None, ht.TMaybeString),
2707
    ("enabled_hypervisors", None,
2708
     ht.TOr(ht.TAnd(ht.TListOf(ht.TElemOf(constants.HYPER_TYPES)), ht.TTrue),
2709
            ht.TNone)),
2710
    ("hvparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2711
                              ht.TNone)),
2712
    ("beparams", None, ht.TOr(ht.TDict, ht.TNone)),
2713
    ("os_hvp", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2714
                            ht.TNone)),
2715
    ("osparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2716
                              ht.TNone)),
2717
    ("candidate_pool_size", None, ht.TOr(ht.TStrictPositiveInt, ht.TNone)),
2718
    ("uid_pool", None, ht.NoType),
2719
    ("add_uids", None, ht.NoType),
2720
    ("remove_uids", None, ht.NoType),
2721
    ("maintain_node_health", None, ht.TMaybeBool),
2722
    ("prealloc_wipe_disks", None, ht.TMaybeBool),
2723
    ("nicparams", None, ht.TOr(ht.TDict, ht.TNone)),
2724
    ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
2725
    ("drbd_helper", None, ht.TOr(ht.TString, ht.TNone)),
2726
    ("default_iallocator", None, ht.TOr(ht.TString, ht.TNone)),
2727
    ("master_netdev", None, ht.TOr(ht.TString, ht.TNone)),
2728
    ("reserved_lvs", None, ht.TOr(ht.TListOf(ht.TNonEmptyString), ht.TNone)),
2729
    ("hidden_os", None, ht.TOr(ht.TListOf(\
2730
          ht.TAnd(ht.TList,
2731
                ht.TIsLength(2),
2732
                ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2733
          ht.TNone)),
2734
    ("blacklisted_os", None, ht.TOr(ht.TListOf(\
2735
          ht.TAnd(ht.TList,
2736
                ht.TIsLength(2),
2737
                ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2738
          ht.TNone)),
2739
    ]
2740
  REQ_BGL = False
2741

    
2742
  def CheckArguments(self):
2743
    """Check parameters
2744

2745
    """
2746
    if self.op.uid_pool:
2747
      uidpool.CheckUidPool(self.op.uid_pool)
2748

    
2749
    if self.op.add_uids:
2750
      uidpool.CheckUidPool(self.op.add_uids)
2751

    
2752
    if self.op.remove_uids:
2753
      uidpool.CheckUidPool(self.op.remove_uids)
2754

    
2755
  def ExpandNames(self):
2756
    # FIXME: in the future maybe other cluster params won't require checking on
2757
    # all nodes to be modified.
2758
    self.needed_locks = {
2759
      locking.LEVEL_NODE: locking.ALL_SET,
2760
    }
2761
    self.share_locks[locking.LEVEL_NODE] = 1
2762

    
2763
  def BuildHooksEnv(self):
2764
    """Build hooks env.
2765

2766
    """
2767
    env = {
2768
      "OP_TARGET": self.cfg.GetClusterName(),
2769
      "NEW_VG_NAME": self.op.vg_name,
2770
      }
2771
    mn = self.cfg.GetMasterNode()
2772
    return env, [mn], [mn]
2773

    
2774
  def CheckPrereq(self):
2775
    """Check prerequisites.
2776

2777
    This checks whether the given params don't conflict and
2778
    if the given volume group is valid.
2779

2780
    """
2781
    if self.op.vg_name is not None and not self.op.vg_name:
2782
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2783
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2784
                                   " instances exist", errors.ECODE_INVAL)
2785

    
2786
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
2787
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2788
        raise errors.OpPrereqError("Cannot disable drbd helper while"
2789
                                   " drbd-based instances exist",
2790
                                   errors.ECODE_INVAL)
2791

    
2792
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2793

    
2794
    # if vg_name not None, checks given volume group on all nodes
2795
    if self.op.vg_name:
2796
      vglist = self.rpc.call_vg_list(node_list)
2797
      for node in node_list:
2798
        msg = vglist[node].fail_msg
2799
        if msg:
2800
          # ignoring down node
2801
          self.LogWarning("Error while gathering data on node %s"
2802
                          " (ignoring node): %s", node, msg)
2803
          continue
2804
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2805
                                              self.op.vg_name,
2806
                                              constants.MIN_VG_SIZE)
2807
        if vgstatus:
2808
          raise errors.OpPrereqError("Error on node '%s': %s" %
2809
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2810

    
2811
    if self.op.drbd_helper:
2812
      # checks given drbd helper on all nodes
2813
      helpers = self.rpc.call_drbd_helper(node_list)
2814
      for node in node_list:
2815
        ninfo = self.cfg.GetNodeInfo(node)
2816
        if ninfo.offline:
2817
          self.LogInfo("Not checking drbd helper on offline node %s", node)
2818
          continue
2819
        msg = helpers[node].fail_msg
2820
        if msg:
2821
          raise errors.OpPrereqError("Error checking drbd helper on node"
2822
                                     " '%s': %s" % (node, msg),
2823
                                     errors.ECODE_ENVIRON)
2824
        node_helper = helpers[node].payload
2825
        if node_helper != self.op.drbd_helper:
2826
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2827
                                     (node, node_helper), errors.ECODE_ENVIRON)
2828

    
2829
    self.cluster = cluster = self.cfg.GetClusterInfo()
2830
    # validate params changes
2831
    if self.op.beparams:
2832
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2833
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2834

    
2835
    if self.op.ndparams:
2836
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2837
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2838

    
2839
    if self.op.nicparams:
2840
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2841
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2842
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2843
      nic_errors = []
2844

    
2845
      # check all instances for consistency
2846
      for instance in self.cfg.GetAllInstancesInfo().values():
2847
        for nic_idx, nic in enumerate(instance.nics):
2848
          params_copy = copy.deepcopy(nic.nicparams)
2849
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2850

    
2851
          # check parameter syntax
2852
          try:
2853
            objects.NIC.CheckParameterSyntax(params_filled)
2854
          except errors.ConfigurationError, err:
2855
            nic_errors.append("Instance %s, nic/%d: %s" %
2856
                              (instance.name, nic_idx, err))
2857

    
2858
          # if we're moving instances to routed, check that they have an ip
2859
          target_mode = params_filled[constants.NIC_MODE]
2860
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2861
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2862
                              (instance.name, nic_idx))
2863
      if nic_errors:
2864
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2865
                                   "\n".join(nic_errors))
2866

    
2867
    # hypervisor list/parameters
2868
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2869
    if self.op.hvparams:
2870
      for hv_name, hv_dict in self.op.hvparams.items():
2871
        if hv_name not in self.new_hvparams:
2872
          self.new_hvparams[hv_name] = hv_dict
2873
        else:
2874
          self.new_hvparams[hv_name].update(hv_dict)
2875

    
2876
    # os hypervisor parameters
2877
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2878
    if self.op.os_hvp:
2879
      for os_name, hvs in self.op.os_hvp.items():
2880
        if os_name not in self.new_os_hvp:
2881
          self.new_os_hvp[os_name] = hvs
2882
        else:
2883
          for hv_name, hv_dict in hvs.items():
2884
            if hv_name not in self.new_os_hvp[os_name]:
2885
              self.new_os_hvp[os_name][hv_name] = hv_dict
2886
            else:
2887
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2888

    
2889
    # os parameters
2890
    self.new_osp = objects.FillDict(cluster.osparams, {})
2891
    if self.op.osparams:
2892
      for os_name, osp in self.op.osparams.items():
2893
        if os_name not in self.new_osp:
2894
          self.new_osp[os_name] = {}
2895

    
2896
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2897
                                                  use_none=True)
2898

    
2899
        if not self.new_osp[os_name]:
2900
          # we removed all parameters
2901
          del self.new_osp[os_name]
2902
        else:
2903
          # check the parameter validity (remote check)
2904
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2905
                         os_name, self.new_osp[os_name])
2906

    
2907
    # changes to the hypervisor list
2908
    if self.op.enabled_hypervisors is not None:
2909
      self.hv_list = self.op.enabled_hypervisors
2910
      for hv in self.hv_list:
2911
        # if the hypervisor doesn't already exist in the cluster
2912
        # hvparams, we initialize it to empty, and then (in both
2913
        # cases) we make sure to fill the defaults, as we might not
2914
        # have a complete defaults list if the hypervisor wasn't
2915
        # enabled before
2916
        if hv not in new_hvp:
2917
          new_hvp[hv] = {}
2918
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2919
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2920
    else:
2921
      self.hv_list = cluster.enabled_hypervisors
2922

    
2923
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2924
      # either the enabled list has changed, or the parameters have, validate
2925
      for hv_name, hv_params in self.new_hvparams.items():
2926
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2927
            (self.op.enabled_hypervisors and
2928
             hv_name in self.op.enabled_hypervisors)):
2929
          # either this is a new hypervisor, or its parameters have changed
2930
          hv_class = hypervisor.GetHypervisor(hv_name)
2931
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2932
          hv_class.CheckParameterSyntax(hv_params)
2933
          _CheckHVParams(self, node_list, hv_name, hv_params)
2934

    
2935
    if self.op.os_hvp:
2936
      # no need to check any newly-enabled hypervisors, since the
2937
      # defaults have already been checked in the above code-block
2938
      for os_name, os_hvp in self.new_os_hvp.items():
2939
        for hv_name, hv_params in os_hvp.items():
2940
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2941
          # we need to fill in the new os_hvp on top of the actual hv_p
2942
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2943
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2944
          hv_class = hypervisor.GetHypervisor(hv_name)
2945
          hv_class.CheckParameterSyntax(new_osp)
2946
          _CheckHVParams(self, node_list, hv_name, new_osp)
2947

    
2948
    if self.op.default_iallocator:
2949
      alloc_script = utils.FindFile(self.op.default_iallocator,
2950
                                    constants.IALLOCATOR_SEARCH_PATH,
2951
                                    os.path.isfile)
2952
      if alloc_script is None:
2953
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2954
                                   " specified" % self.op.default_iallocator,
2955
                                   errors.ECODE_INVAL)
2956

    
2957
  def Exec(self, feedback_fn):
2958
    """Change the parameters of the cluster.
2959

2960
    """
2961
    if self.op.vg_name is not None:
2962
      new_volume = self.op.vg_name
2963
      if not new_volume:
2964
        new_volume = None
2965
      if new_volume != self.cfg.GetVGName():
2966
        self.cfg.SetVGName(new_volume)
2967
      else:
2968
        feedback_fn("Cluster LVM configuration already in desired"
2969
                    " state, not changing")
2970
    if self.op.drbd_helper is not None:
2971
      new_helper = self.op.drbd_helper
2972
      if not new_helper:
2973
        new_helper = None
2974
      if new_helper != self.cfg.GetDRBDHelper():
2975
        self.cfg.SetDRBDHelper(new_helper)
2976
      else:
2977
        feedback_fn("Cluster DRBD helper already in desired state,"
2978
                    " not changing")
2979
    if self.op.hvparams:
2980
      self.cluster.hvparams = self.new_hvparams
2981
    if self.op.os_hvp:
2982
      self.cluster.os_hvp = self.new_os_hvp
2983
    if self.op.enabled_hypervisors is not None:
2984
      self.cluster.hvparams = self.new_hvparams
2985
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2986
    if self.op.beparams:
2987
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2988
    if self.op.nicparams:
2989
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2990
    if self.op.osparams:
2991
      self.cluster.osparams = self.new_osp
2992
    if self.op.ndparams:
2993
      self.cluster.ndparams = self.new_ndparams
2994

    
2995
    if self.op.candidate_pool_size is not None:
2996
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2997
      # we need to update the pool size here, otherwise the save will fail
2998
      _AdjustCandidatePool(self, [])
2999

    
3000
    if self.op.maintain_node_health is not None:
3001
      self.cluster.maintain_node_health = self.op.maintain_node_health
3002

    
3003
    if self.op.prealloc_wipe_disks is not None:
3004
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3005

    
3006
    if self.op.add_uids is not None:
3007
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3008

    
3009
    if self.op.remove_uids is not None:
3010
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3011

    
3012
    if self.op.uid_pool is not None:
3013
      self.cluster.uid_pool = self.op.uid_pool
3014

    
3015
    if self.op.default_iallocator is not None:
3016
      self.cluster.default_iallocator = self.op.default_iallocator
3017

    
3018
    if self.op.reserved_lvs is not None:
3019
      self.cluster.reserved_lvs = self.op.reserved_lvs
3020

    
3021
    def helper_os(aname, mods, desc):
3022
      desc += " OS list"
3023
      lst = getattr(self.cluster, aname)
3024
      for key, val in mods:
3025
        if key == constants.DDM_ADD:
3026
          if val in lst:
3027
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3028
          else:
3029
            lst.append(val)
3030
        elif key == constants.DDM_REMOVE:
3031
          if val in lst:
3032
            lst.remove(val)
3033
          else:
3034
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3035
        else:
3036
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3037

    
3038
    if self.op.hidden_os:
3039
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3040

    
3041
    if self.op.blacklisted_os:
3042
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3043

    
3044
    if self.op.master_netdev:
3045
      master = self.cfg.GetMasterNode()
3046
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3047
                  self.cluster.master_netdev)
3048
      result = self.rpc.call_node_stop_master(master, False)
3049
      result.Raise("Could not disable the master ip")
3050
      feedback_fn("Changing master_netdev from %s to %s" %
3051
                  (self.cluster.master_netdev, self.op.master_netdev))
3052
      self.cluster.master_netdev = self.op.master_netdev
3053

    
3054
    self.cfg.Update(self.cluster, feedback_fn)
3055

    
3056
    if self.op.master_netdev:
3057
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3058
                  self.op.master_netdev)
3059
      result = self.rpc.call_node_start_master(master, False, False)
3060
      if result.fail_msg:
3061
        self.LogWarning("Could not re-enable the master ip on"
3062
                        " the master, please restart manually: %s",
3063
                        result.fail_msg)
3064

    
3065

    
3066
def _UploadHelper(lu, nodes, fname):
3067
  """Helper for uploading a file and showing warnings.
3068

3069
  """
3070
  if os.path.exists(fname):
3071
    result = lu.rpc.call_upload_file(nodes, fname)
3072
    for to_node, to_result in result.items():
3073
      msg = to_result.fail_msg
3074
      if msg:
3075
        msg = ("Copy of file %s to node %s failed: %s" %
3076
               (fname, to_node, msg))
3077
        lu.proc.LogWarning(msg)
3078

    
3079

    
3080
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3081
  """Distribute additional files which are part of the cluster configuration.
3082

3083
  ConfigWriter takes care of distributing the config and ssconf files, but
3084
  there are more files which should be distributed to all nodes. This function
3085
  makes sure those are copied.
3086

3087
  @param lu: calling logical unit
3088
  @param additional_nodes: list of nodes not in the config to distribute to
3089
  @type additional_vm: boolean
3090
  @param additional_vm: whether the additional nodes are vm-capable or not
3091

3092
  """
3093
  # 1. Gather target nodes
3094
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3095
  dist_nodes = lu.cfg.GetOnlineNodeList()
3096
  nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
3097
  vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
3098
  if additional_nodes is not None:
3099
    dist_nodes.extend(additional_nodes)
3100
    if additional_vm:
3101
      vm_nodes.extend(additional_nodes)
3102
  if myself.name in dist_nodes:
3103
    dist_nodes.remove(myself.name)
3104
  if myself.name in vm_nodes:
3105
    vm_nodes.remove(myself.name)
3106

    
3107
  # 2. Gather files to distribute
3108
  dist_files = set([constants.ETC_HOSTS,
3109
                    constants.SSH_KNOWN_HOSTS_FILE,
3110
                    constants.RAPI_CERT_FILE,
3111
                    constants.RAPI_USERS_FILE,
3112
                    constants.CONFD_HMAC_KEY,
3113
                    constants.CLUSTER_DOMAIN_SECRET_FILE,
3114
                   ])
3115

    
3116
  vm_files = set()
3117
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3118
  for hv_name in enabled_hypervisors:
3119
    hv_class = hypervisor.GetHypervisor(hv_name)
3120
    vm_files.update(hv_class.GetAncillaryFiles())
3121

    
3122
  # 3. Perform the files upload
3123
  for fname in dist_files:
3124
    _UploadHelper(lu, dist_nodes, fname)
3125
  for fname in vm_files:
3126
    _UploadHelper(lu, vm_nodes, fname)
3127

    
3128

    
3129
class LURedistributeConfig(NoHooksLU):
3130
  """Force the redistribution of cluster configuration.
3131

3132
  This is a very simple LU.
3133

3134
  """
3135
  REQ_BGL = False
3136

    
3137
  def ExpandNames(self):
3138
    self.needed_locks = {
3139
      locking.LEVEL_NODE: locking.ALL_SET,
3140
    }
3141
    self.share_locks[locking.LEVEL_NODE] = 1
3142

    
3143
  def Exec(self, feedback_fn):
3144
    """Redistribute the configuration.
3145

3146
    """
3147
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3148
    _RedistributeAncillaryFiles(self)
3149

    
3150

    
3151
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3152
  """Sleep and poll for an instance's disk to sync.
3153

3154
  """
3155
  if not instance.disks or disks is not None and not disks:
3156
    return True
3157

    
3158
  disks = _ExpandCheckDisks(instance, disks)
3159

    
3160
  if not oneshot:
3161
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3162

    
3163
  node = instance.primary_node
3164

    
3165
  for dev in disks:
3166
    lu.cfg.SetDiskID(dev, node)
3167

    
3168
  # TODO: Convert to utils.Retry
3169

    
3170
  retries = 0
3171
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3172
  while True:
3173
    max_time = 0
3174
    done = True
3175
    cumul_degraded = False
3176
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3177
    msg = rstats.fail_msg
3178
    if msg:
3179
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3180
      retries += 1
3181
      if retries >= 10:
3182
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3183
                                 " aborting." % node)
3184
      time.sleep(6)
3185
      continue
3186
    rstats = rstats.payload
3187
    retries = 0
3188
    for i, mstat in enumerate(rstats):
3189
      if mstat is None:
3190
        lu.LogWarning("Can't compute data for node %s/%s",
3191
                           node, disks[i].iv_name)
3192
        continue
3193

    
3194
      cumul_degraded = (cumul_degraded or
3195
                        (mstat.is_degraded and mstat.sync_percent is None))
3196
      if mstat.sync_percent is not None:
3197
        done = False
3198
        if mstat.estimated_time is not None:
3199
          rem_time = ("%s remaining (estimated)" %
3200
                      utils.FormatSeconds(mstat.estimated_time))
3201
          max_time = mstat.estimated_time
3202
        else:
3203
          rem_time = "no time estimate"
3204
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3205
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3206

    
3207
    # if we're done but degraded, let's do a few small retries, to
3208
    # make sure we see a stable and not transient situation; therefore
3209
    # we force restart of the loop
3210
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3211
      logging.info("Degraded disks found, %d retries left", degr_retries)
3212
      degr_retries -= 1
3213
      time.sleep(1)
3214
      continue
3215

    
3216
    if done or oneshot:
3217
      break
3218

    
3219
    time.sleep(min(60, max_time))
3220

    
3221
  if done:
3222
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3223
  return not cumul_degraded
3224

    
3225

    
3226
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3227
  """Check that mirrors are not degraded.
3228

3229
  The ldisk parameter, if True, will change the test from the
3230
  is_degraded attribute (which represents overall non-ok status for
3231
  the device(s)) to the ldisk (representing the local storage status).
3232

3233
  """
3234
  lu.cfg.SetDiskID(dev, node)
3235

    
3236
  result = True
3237

    
3238
  if on_primary or dev.AssembleOnSecondary():
3239
    rstats = lu.rpc.call_blockdev_find(node, dev)
3240
    msg = rstats.fail_msg
3241
    if msg:
3242
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3243
      result = False
3244
    elif not rstats.payload:
3245
      lu.LogWarning("Can't find disk on node %s", node)
3246
      result = False
3247
    else:
3248
      if ldisk:
3249
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3250
      else:
3251
        result = result and not rstats.payload.is_degraded
3252

    
3253
  if dev.children:
3254
    for child in dev.children:
3255
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3256

    
3257
  return result
3258

    
3259

    
3260
class LUOobCommand(NoHooksLU):
3261
  """Logical unit for OOB handling.
3262

3263
  """
3264
  _OP_PARAMS = [
3265
    _PNodeName,
3266
    ("command", None, ht.TElemOf(constants.OOB_COMMANDS)),
3267
    ("timeout", constants.OOB_TIMEOUT, ht.TInt),
3268
    ]
3269
  REG_BGL = False
3270

    
3271
  def CheckPrereq(self):
3272
    """Check prerequisites.
3273

3274
    This checks:
3275
     - the node exists in the configuration
3276
     - OOB is supported
3277

3278
    Any errors are signaled by raising errors.OpPrereqError.
3279

3280
    """
3281
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3282
    node = self.cfg.GetNodeInfo(self.op.node_name)
3283

    
3284
    if node is None:
3285
      raise errors.OpPrereqError("Node %s not found" % self.op.node_name)
3286

    
3287
    self.oob_program = _SupportsOob(self.cfg, node)
3288

    
3289
    if not self.oob_program:
3290
      raise errors.OpPrereqError("OOB is not supported for node %s" %
3291
                                 self.op.node_name)
3292

    
3293
    if self.op.command == constants.OOB_POWER_OFF and not node.offline:
3294
      raise errors.OpPrereqError(("Cannot power off node %s because it is"
3295
                                  " not marked offline") % self.op.node_name)
3296

    
3297
    self.node = node
3298

    
3299
  def ExpandNames(self):
3300
    """Gather locks we need.
3301

3302
    """
3303
    node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3304
    self.needed_locks = {
3305
      locking.LEVEL_NODE: [node_name],
3306
      }
3307

    
3308
  def Exec(self, feedback_fn):
3309
    """Execute OOB and return result if we expect any.
3310

3311
    """
3312
    master_node = self.cfg.GetMasterNode()
3313
    node = self.node
3314

    
3315
    logging.info("Executing out-of-band command '%s' using '%s' on %s",
3316
                 self.op.command, self.oob_program, self.op.node_name)
3317
    result = self.rpc.call_run_oob(master_node, self.oob_program,
3318
                                   self.op.command, self.op.node_name,
3319
                                   self.op.timeout)
3320

    
3321
    result.Raise("An error occurred on execution of OOB helper")
3322

    
3323
    self._CheckPayload(result)
3324

    
3325
    if self.op.command == constants.OOB_HEALTH:
3326
      # For health we should log important events
3327
      for item, status in result.payload:
3328
        if status in [constants.OOB_STATUS_WARNING,
3329
                      constants.OOB_STATUS_CRITICAL]:
3330
          logging.warning("On node '%s' item '%s' has status '%s'",
3331
                          self.op.node_name, item, status)
3332

    
3333
    if self.op.command == constants.OOB_POWER_ON:
3334
      node.powered = True
3335
    elif self.op.command == constants.OOB_POWER_OFF:
3336
      node.powered = False
3337
    elif self.op.command == constants.OOB_POWER_STATUS:
3338
      powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3339
      if powered != self.node.powered:
3340
        logging.warning(("Recorded power state (%s) of node '%s' does not match"
3341
                         " actual power state (%s)"), node.powered,
3342
                        self.op.node_name, powered)
3343

    
3344
    self.cfg.Update(node, feedback_fn)
3345

    
3346
    return result.payload
3347

    
3348
  def _CheckPayload(self, result):
3349
    """Checks if the payload is valid.
3350

3351
    @param result: RPC result
3352
    @raises errors.OpExecError: If payload is not valid
3353

3354
    """
3355
    errs = []
3356
    if self.op.command == constants.OOB_HEALTH:
3357
      if not isinstance(result.payload, list):
3358
        errs.append("command 'health' is expected to return a list but got %s" %
3359
                    type(result.payload))
3360
      for item, status in result.payload:
3361
        if status not in constants.OOB_STATUSES:
3362
          errs.append("health item '%s' has invalid status '%s'" %
3363
                      (item, status))
3364

    
3365
    if self.op.command == constants.OOB_POWER_STATUS:
3366
      if not isinstance(result.payload, dict):
3367
        errs.append("power-status is expected to return a dict but got %s" %
3368
                    type(result.payload))
3369

    
3370
    if self.op.command in [
3371
        constants.OOB_POWER_ON,
3372
        constants.OOB_POWER_OFF,
3373
        constants.OOB_POWER_CYCLE,
3374
        ]:
3375
      if result.payload is not None:
3376
        errs.append("%s is expected to not return payload but got '%s'" %
3377
                    (self.op.command, result.payload))
3378

    
3379
    if errs:
3380
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3381
                               utils.CommaJoin(errs))
3382

    
3383

    
3384

    
3385
class LUDiagnoseOS(NoHooksLU):
3386
  """Logical unit for OS diagnose/query.
3387

3388
  """
3389
  _OP_PARAMS = [
3390
    _POutputFields,
3391
    ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3392
    ]
3393
  REQ_BGL = False
3394
  _HID = "hidden"
3395
  _BLK = "blacklisted"
3396
  _VLD = "valid"
3397
  _FIELDS_STATIC = utils.FieldSet()
3398
  _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3399
                                   "parameters", "api_versions", _HID, _BLK)
3400

    
3401
  def CheckArguments(self):
3402
    if self.op.names:
3403
      raise errors.OpPrereqError("Selective OS query not supported",
3404
                                 errors.ECODE_INVAL)
3405

    
3406
    _CheckOutputFields(static=self._FIELDS_STATIC,
3407
                       dynamic=self._FIELDS_DYNAMIC,
3408
                       selected=self.op.output_fields)
3409

    
3410
  def ExpandNames(self):
3411
    # Lock all nodes, in shared mode
3412
    # Temporary removal of locks, should be reverted later
3413
    # TODO: reintroduce locks when they are lighter-weight
3414
    self.needed_locks = {}
3415
    #self.share_locks[locking.LEVEL_NODE] = 1
3416
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3417

    
3418
  @staticmethod
3419
  def _DiagnoseByOS(rlist):
3420
    """Remaps a per-node return list into an a per-os per-node dictionary
3421

3422
    @param rlist: a map with node names as keys and OS objects as values
3423

3424
    @rtype: dict
3425
    @return: a dictionary with osnames as keys and as value another
3426
        map, with nodes as keys and tuples of (path, status, diagnose,
3427
        variants, parameters, api_versions) as values, eg::
3428

3429
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3430
                                     (/srv/..., False, "invalid api")],
3431
                           "node2": [(/srv/..., True, "", [], [])]}
3432
          }
3433

3434
    """
3435
    all_os = {}
3436
    # we build here the list of nodes that didn't fail the RPC (at RPC
3437
    # level), so that nodes with a non-responding node daemon don't
3438
    # make all OSes invalid
3439
    good_nodes = [node_name for node_name in rlist
3440
                  if not rlist[node_name].fail_msg]
3441
    for node_name, nr in rlist.items():
3442
      if nr.fail_msg or not nr.payload:
3443
        continue
3444
      for (name, path, status, diagnose, variants,
3445
           params, api_versions) in nr.payload:
3446
        if name not in all_os:
3447
          # build a list of nodes for this os containing empty lists
3448
          # for each node in node_list
3449
          all_os[name] = {}
3450
          for nname in good_nodes:
3451
            all_os[name][nname] = []
3452
        # convert params from [name, help] to (name, help)
3453
        params = [tuple(v) for v in params]
3454
        all_os[name][node_name].append((path, status, diagnose,
3455
                                        variants, params, api_versions))
3456
    return all_os
3457

    
3458
  def Exec(self, feedback_fn):
3459
    """Compute the list of OSes.
3460

3461
    """
3462
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3463
    node_data = self.rpc.call_os_diagnose(valid_nodes)
3464
    pol = self._DiagnoseByOS(node_data)
3465
    output = []
3466
    cluster = self.cfg.GetClusterInfo()
3467

    
3468
    for os_name in utils.NiceSort(pol.keys()):
3469
      os_data = pol[os_name]
3470
      row = []
3471
      valid = True
3472
      (variants, params, api_versions) = null_state = (set(), set(), set())
3473
      for idx, osl in enumerate(os_data.values()):
3474
        valid = bool(valid and osl and osl[0][1])
3475
        if not valid:
3476
          (variants, params, api_versions) = null_state
3477
          break
3478
        node_variants, node_params, node_api = osl[0][3:6]
3479
        if idx == 0: # first entry
3480
          variants = set(node_variants)
3481
          params = set(node_params)
3482
          api_versions = set(node_api)
3483
        else: # keep consistency
3484
          variants.intersection_update(node_variants)
3485
          params.intersection_update(node_params)
3486
          api_versions.intersection_update(node_api)
3487

    
3488
      is_hid = os_name in cluster.hidden_os
3489
      is_blk = os_name in cluster.blacklisted_os
3490
      if ((self._HID not in self.op.output_fields and is_hid) or
3491
          (self._BLK not in self.op.output_fields and is_blk) or
3492
          (self._VLD not in self.op.output_fields and not valid)):
3493
        continue
3494

    
3495
      for field in self.op.output_fields:
3496
        if field == "name":
3497
          val = os_name
3498
        elif field == self._VLD:
3499
          val = valid
3500
        elif field == "node_status":
3501
          # this is just a copy of the dict
3502
          val = {}
3503
          for node_name, nos_list in os_data.items():
3504
            val[node_name] = nos_list
3505
        elif field == "variants":
3506
          val = utils.NiceSort(list(variants))
3507
        elif field == "parameters":
3508
          val = list(params)
3509
        elif field == "api_versions":
3510
          val = list(api_versions)
3511
        elif field == self._HID:
3512
          val = is_hid
3513
        elif field == self._BLK:
3514
          val = is_blk
3515
        else:
3516
          raise errors.ParameterError(field)
3517
        row.append(val)
3518
      output.append(row)
3519

    
3520
    return output
3521

    
3522

    
3523
class LURemoveNode(LogicalUnit):
3524
  """Logical unit for removing a node.
3525

3526
  """
3527
  HPATH = "node-remove"
3528
  HTYPE = constants.HTYPE_NODE
3529
  _OP_PARAMS = [
3530
    _PNodeName,
3531
    ]
3532

    
3533
  def BuildHooksEnv(self):
3534
    """Build hooks env.
3535

3536
    This doesn't run on the target node in the pre phase as a failed
3537
    node would then be impossible to remove.
3538

3539
    """
3540
    env = {
3541
      "OP_TARGET": self.op.node_name,
3542
      "NODE_NAME": self.op.node_name,
3543
      }
3544
    all_nodes = self.cfg.GetNodeList()
3545
    try:
3546
      all_nodes.remove(self.op.node_name)
3547
    except ValueError:
3548
      logging.warning("Node %s which is about to be removed not found"
3549
                      " in the all nodes list", self.op.node_name)
3550
    return env, all_nodes, all_nodes
3551

    
3552
  def CheckPrereq(self):
3553
    """Check prerequisites.
3554

3555
    This checks:
3556
     - the node exists in the configuration
3557
     - it does not have primary or secondary instances
3558
     - it's not the master
3559

3560
    Any errors are signaled by raising errors.OpPrereqError.
3561

3562
    """
3563
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3564
    node = self.cfg.GetNodeInfo(self.op.node_name)
3565
    assert node is not None
3566

    
3567
    instance_list = self.cfg.GetInstanceList()
3568

    
3569
    masternode = self.cfg.GetMasterNode()
3570
    if node.name == masternode:
3571
      raise errors.OpPrereqError("Node is the master node,"
3572
                                 " you need to failover first.",
3573
                                 errors.ECODE_INVAL)
3574

    
3575
    for instance_name in instance_list:
3576
      instance = self.cfg.GetInstanceInfo(instance_name)
3577
      if node.name in instance.all_nodes:
3578
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3579
                                   " please remove first." % instance_name,
3580
                                   errors.ECODE_INVAL)
3581
    self.op.node_name = node.name
3582
    self.node = node
3583

    
3584
  def Exec(self, feedback_fn):
3585
    """Removes the node from the cluster.
3586

3587
    """
3588
    node = self.node
3589
    logging.info("Stopping the node daemon and removing configs from node %s",
3590
                 node.name)
3591

    
3592
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3593

    
3594
    # Promote nodes to master candidate as needed
3595
    _AdjustCandidatePool(self, exceptions=[node.name])
3596
    self.context.RemoveNode(node.name)
3597

    
3598
    # Run post hooks on the node before it's removed
3599
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3600
    try:
3601
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3602
    except:
3603
      # pylint: disable-msg=W0702
3604
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
3605

    
3606
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3607
    msg = result.fail_msg
3608
    if msg:
3609
      self.LogWarning("Errors encountered on the remote node while leaving"
3610
                      " the cluster: %s", msg)
3611

    
3612
    # Remove node from our /etc/hosts
3613
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3614
      master_node = self.cfg.GetMasterNode()
3615
      result = self.rpc.call_etc_hosts_modify(master_node,
3616
                                              constants.ETC_HOSTS_REMOVE,
3617
                                              node.name, None)
3618
      result.Raise("Can't update hosts file with new host data")
3619
      _RedistributeAncillaryFiles(self)
3620

    
3621

    
3622
class _NodeQuery(_QueryBase):
3623
  FIELDS = query.NODE_FIELDS
3624

    
3625
  def ExpandNames(self, lu):
3626
    lu.needed_locks = {}
3627
    lu.share_locks[locking.LEVEL_NODE] = 1
3628

    
3629
    if self.names:
3630
      self.wanted = _GetWantedNodes(lu, self.names)
3631
    else:
3632
      self.wanted = locking.ALL_SET
3633

    
3634
    self.do_locking = (self.use_locking and
3635
                       query.NQ_LIVE in self.requested_data)
3636

    
3637
    if self.do_locking:
3638
      # if we don't request only static fields, we need to lock the nodes
3639
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3640

    
3641
  def DeclareLocks(self, lu, level):
3642
    pass
3643

    
3644
  def _GetQueryData(self, lu):
3645
    """Computes the list of nodes and their attributes.
3646

3647
    """
3648
    all_info = lu.cfg.GetAllNodesInfo()
3649

    
3650
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3651

    
3652
    # Gather data as requested
3653
    if query.NQ_LIVE in self.requested_data:
3654
      node_data = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
3655
                                        lu.cfg.GetHypervisorType())
3656
      live_data = dict((name, nresult.payload)
3657
                       for (name, nresult) in node_data.items()
3658
                       if not nresult.fail_msg and nresult.payload)
3659
    else:
3660
      live_data = None
3661

    
3662
    if query.NQ_INST in self.requested_data:
3663
      node_to_primary = dict([(name, set()) for name in nodenames])
3664
      node_to_secondary = dict([(name, set()) for name in nodenames])
3665

    
3666
      inst_data = lu.cfg.GetAllInstancesInfo()
3667

    
3668
      for inst in inst_data.values():
3669
        if inst.primary_node in node_to_primary:
3670
          node_to_primary[inst.primary_node].add(inst.name)
3671
        for secnode in inst.secondary_nodes:
3672
          if secnode in node_to_secondary:
3673
            node_to_secondary[secnode].add(inst.name)
3674
    else:
3675
      node_to_primary = None
3676
      node_to_secondary = None
3677

    
3678
    if query.NQ_GROUP in self.requested_data:
3679
      groups = lu.cfg.GetAllNodeGroupsInfo()
3680
    else:
3681
      groups = {}
3682

    
3683
    return query.NodeQueryData([all_info[name] for name in nodenames],
3684
                               live_data, lu.cfg.GetMasterNode(),
3685
                               node_to_primary, node_to_secondary, groups)
3686

    
3687

    
3688
class LUQueryNodes(NoHooksLU):
3689
  """Logical unit for querying nodes.
3690

3691
  """
3692
  # pylint: disable-msg=W0142
3693
  _OP_PARAMS = [
3694
    _POutputFields,
3695
    ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3696
    ("use_locking", False, ht.TBool),
3697
    ]
3698
  REQ_BGL = False
3699

    
3700
  def CheckArguments(self):
3701
    self.nq = _NodeQuery(self.op.names, self.op.output_fields,
3702
                         self.op.use_locking)
3703

    
3704
  def ExpandNames(self):
3705
    self.nq.ExpandNames(self)
3706

    
3707
  def Exec(self, feedback_fn):
3708
    return self.nq.OldStyleQuery(self)
3709

    
3710

    
3711
class LUQueryNodeVolumes(NoHooksLU):
3712
  """Logical unit for getting volumes on node(s).
3713

3714
  """
3715
  _OP_PARAMS = [
3716
    _POutputFields,
3717
    ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3718
    ]
3719
  REQ_BGL = False
3720
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3721
  _FIELDS_STATIC = utils.FieldSet("node")
3722

    
3723
  def CheckArguments(self):
3724
    _CheckOutputFields(static=self._FIELDS_STATIC,
3725
                       dynamic=self._FIELDS_DYNAMIC,
3726
                       selected=self.op.output_fields)
3727

    
3728
  def ExpandNames(self):
3729
    self.needed_locks = {}
3730
    self.share_locks[locking.LEVEL_NODE] = 1
3731
    if not self.op.nodes:
3732
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3733
    else:
3734
      self.needed_locks[locking.LEVEL_NODE] = \
3735
        _GetWantedNodes(self, self.op.nodes)
3736

    
3737
  def Exec(self, feedback_fn):
3738
    """Computes the list of nodes and their attributes.
3739

3740
    """
3741
    nodenames = self.acquired_locks[locking.LEVEL_NODE]
3742
    volumes = self.rpc.call_node_volumes(nodenames)
3743

    
3744
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3745
             in self.cfg.GetInstanceList()]
3746

    
3747
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3748

    
3749
    output = []
3750
    for node in nodenames:
3751
      nresult = volumes[node]
3752
      if nresult.offline:
3753
        continue
3754
      msg = nresult.fail_msg
3755
      if msg:
3756
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3757
        continue
3758

    
3759
      node_vols = nresult.payload[:]
3760
      node_vols.sort(key=lambda vol: vol['dev'])
3761

    
3762
      for vol in node_vols:
3763
        node_output = []
3764
        for field in self.op.output_fields:
3765
          if field == "node":
3766
            val = node
3767
          elif field == "phys":
3768
            val = vol['dev']
3769
          elif field == "vg":
3770
            val = vol['vg']
3771
          elif field == "name":
3772
            val = vol['name']
3773
          elif field == "size":
3774
            val = int(float(vol['size']))
3775
          elif field == "instance":
3776
            for inst in ilist:
3777
              if node not in lv_by_node[inst]:
3778
                continue
3779
              if vol['name'] in lv_by_node[inst][node]:
3780
                val = inst.name
3781
                break
3782
            else:
3783
              val = '-'
3784
          else:
3785
            raise errors.ParameterError(field)
3786
          node_output.append(str(val))
3787

    
3788
        output.append(node_output)
3789

    
3790
    return output
3791

    
3792

    
3793
class LUQueryNodeStorage(NoHooksLU):
3794
  """Logical unit for getting information on storage units on node(s).
3795

3796
  """
3797
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3798
  _OP_PARAMS = [
3799
    _POutputFields,
3800
    ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3801
    ("storage_type", ht.NoDefault, _CheckStorageType),
3802
    ("name", None, ht.TMaybeString),
3803
    ]
3804
  REQ_BGL = False
3805

    
3806
  def CheckArguments(self):
3807
    _CheckOutputFields(static=self._FIELDS_STATIC,
3808
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3809
                       selected=self.op.output_fields)
3810

    
3811
  def ExpandNames(self):
3812
    self.needed_locks = {}
3813
    self.share_locks[locking.LEVEL_NODE] = 1
3814

    
3815
    if self.op.nodes:
3816
      self.needed_locks[locking.LEVEL_NODE] = \
3817
        _GetWantedNodes(self, self.op.nodes)
3818
    else:
3819
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3820

    
3821
  def Exec(self, feedback_fn):
3822
    """Computes the list of nodes and their attributes.
3823

3824
    """
3825
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3826

    
3827
    # Always get name to sort by
3828
    if constants.SF_NAME in self.op.output_fields:
3829
      fields = self.op.output_fields[:]
3830
    else:
3831
      fields = [constants.SF_NAME] + self.op.output_fields
3832

    
3833
    # Never ask for node or type as it's only known to the LU
3834
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3835
      while extra in fields:
3836
        fields.remove(extra)
3837

    
3838
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3839
    name_idx = field_idx[constants.SF_NAME]
3840

    
3841
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3842
    data = self.rpc.call_storage_list(self.nodes,
3843
                                      self.op.storage_type, st_args,
3844
                                      self.op.name, fields)
3845

    
3846
    result = []
3847

    
3848
    for node in utils.NiceSort(self.nodes):
3849
      nresult = data[node]
3850
      if nresult.offline:
3851
        continue
3852

    
3853
      msg = nresult.fail_msg
3854
      if msg:
3855
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3856
        continue
3857

    
3858
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3859

    
3860
      for name in utils.NiceSort(rows.keys()):
3861
        row = rows[name]
3862

    
3863
        out = []
3864

    
3865
        for field in self.op.output_fields:
3866
          if field == constants.SF_NODE:
3867
            val = node
3868
          elif field == constants.SF_TYPE:
3869
            val = self.op.storage_type
3870
          elif field in field_idx:
3871
            val = row[field_idx[field]]
3872
          else:
3873
            raise errors.ParameterError(field)
3874

    
3875
          out.append(val)
3876

    
3877
        result.append(out)
3878

    
3879
    return result
3880

    
3881

    
3882
class _InstanceQuery(_QueryBase):
3883
  FIELDS = query.INSTANCE_FIELDS
3884

    
3885
  def ExpandNames(self, lu):
3886
    lu.needed_locks = {}
3887
    lu.share_locks[locking.LEVEL_INSTANCE] = 1
3888
    lu.share_locks[locking.LEVEL_NODE] = 1
3889

    
3890
    if self.names:
3891
      self.wanted = _GetWantedInstances(lu, self.names)
3892
    else:
3893
      self.wanted = locking.ALL_SET
3894

    
3895
    self.do_locking = (self.use_locking and
3896
                       query.IQ_LIVE in self.requested_data)
3897
    if self.do_locking:
3898
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3899
      lu.needed_locks[locking.LEVEL_NODE] = []
3900
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3901

    
3902
  def DeclareLocks(self, lu, level):
3903
    if level == locking.LEVEL_NODE and self.do_locking:
3904
      lu._LockInstancesNodes() # pylint: disable-msg=W0212
3905

    
3906
  def _GetQueryData(self, lu):
3907
    """Computes the list of instances and their attributes.
3908

3909
    """
3910
    all_info = lu.cfg.GetAllInstancesInfo()
3911

    
3912
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
3913

    
3914
    instance_list = [all_info[name] for name in instance_names]
3915
    nodes = frozenset([inst.primary_node for inst in instance_list])
3916
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
3917
    bad_nodes = []
3918
    offline_nodes = []
3919

    
3920
    # Gather data as requested
3921
    if query.IQ_LIVE in self.requested_data:
3922
      live_data = {}
3923
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
3924
      for name in nodes:
3925
        result = node_data[name]
3926
        if result.offline:
3927
          # offline nodes will be in both lists
3928
          assert result.fail_msg
3929
          offline_nodes.append(name)
3930
        if result.fail_msg:
3931
          bad_nodes.append(name)
3932
        elif result.payload:
3933
          live_data.update(result.payload)
3934
        # else no instance is alive
3935
    else:
3936
      live_data = {}
3937

    
3938
    if query.IQ_DISKUSAGE in self.requested_data:
3939
      disk_usage = dict((inst.name,
3940
                         _ComputeDiskSize(inst.disk_template,
3941
                                          [{"size": disk.size}
3942
                                           for disk in inst.disks]))
3943
                        for inst in instance_list)
3944
    else:
3945
      disk_usage = None
3946

    
3947
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
3948
                                   disk_usage, offline_nodes, bad_nodes,
3949
                                   live_data)
3950

    
3951

    
3952
#: Query type implementations
3953
_QUERY_IMPL = {
3954
  constants.QR_INSTANCE: _InstanceQuery,
3955
  constants.QR_NODE: _NodeQuery,
3956
  }
3957

    
3958

    
3959
def _GetQueryImplementation(name):
3960
  """Returns the implemtnation for a query type.
3961

3962
  @param name: Query type, must be one of L{constants.QR_OP_QUERY}
3963

3964
  """
3965
  try:
3966
    return _QUERY_IMPL[name]
3967
  except KeyError:
3968
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
3969
                               errors.ECODE_INVAL)
3970

    
3971

    
3972
class LUQuery(NoHooksLU):
3973
  """Query for resources/items of a certain kind.
3974

3975
  """
3976
  # pylint: disable-msg=W0142
3977
  _OP_PARAMS = [
3978
    ("what", ht.NoDefault, ht.TElemOf(constants.QR_OP_QUERY)),
3979
    ("fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
3980
    ("filter", None, ht.TOr(ht.TNone,
3981
                            ht.TListOf(ht.TOr(ht.TNonEmptyString, ht.TList)))),
3982
    ]
3983
  REQ_BGL = False
3984

    
3985
  def CheckArguments(self):
3986
    qcls = _GetQueryImplementation(self.op.what)
3987
    names = qlang.ReadSimpleFilter("name", self.op.filter)
3988

    
3989
    self.impl = qcls(names, self.op.fields, False)
3990

    
3991
  def ExpandNames(self):
3992
    self.impl.ExpandNames(self)
3993

    
3994
  def DeclareLocks(self, level):
3995
    self.impl.DeclareLocks(self, level)
3996

    
3997
  def Exec(self, feedback_fn):
3998
    return self.impl.NewStyleQuery(self)
3999

    
4000

    
4001
class LUQueryFields(NoHooksLU):
4002
  """Query for resources/items of a certain kind.
4003

4004
  """
4005
  # pylint: disable-msg=W0142
4006
  _OP_PARAMS = [
4007
    ("what", ht.NoDefault, ht.TElemOf(constants.QR_OP_QUERY)),
4008
    ("fields", None, ht.TOr(ht.TNone, ht.TListOf(ht.TNonEmptyString))),
4009
    ]
4010
  REQ_BGL = False
4011

    
4012
  def CheckArguments(self):
4013
    self.qcls = _GetQueryImplementation(self.op.what)
4014

    
4015
  def ExpandNames(self):
4016
    self.needed_locks = {}
4017

    
4018
  def Exec(self, feedback_fn):
4019
    return self.qcls.FieldsQuery(self.op.fields)
4020

    
4021

    
4022
class LUModifyNodeStorage(NoHooksLU):
4023
  """Logical unit for modifying a storage volume on a node.
4024

4025
  """
4026
  _OP_PARAMS = [
4027
    _PNodeName,
4028
    ("storage_type", ht.NoDefault, _CheckStorageType),
4029
    ("name", ht.NoDefault, ht.TNonEmptyString),
4030
    ("changes", ht.NoDefault, ht.TDict),
4031
    ]
4032
  REQ_BGL = False
4033

    
4034
  def CheckArguments(self):
4035
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4036

    
4037
    storage_type = self.op.storage_type
4038

    
4039
    try:
4040
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4041
    except KeyError:
4042
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4043
                                 " modified" % storage_type,
4044
                                 errors.ECODE_INVAL)
4045

    
4046
    diff = set(self.op.changes.keys()) - modifiable
4047
    if diff:
4048
      raise errors.OpPrereqError("The following fields can not be modified for"
4049
                                 " storage units of type '%s': %r" %
4050
                                 (storage_type, list(diff)),
4051
                                 errors.ECODE_INVAL)
4052

    
4053
  def ExpandNames(self):
4054
    self.needed_locks = {
4055
      locking.LEVEL_NODE: self.op.node_name,
4056
      }
4057

    
4058
  def Exec(self, feedback_fn):
4059
    """Computes the list of nodes and their attributes.
4060

4061
    """
4062
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4063
    result = self.rpc.call_storage_modify(self.op.node_name,
4064
                                          self.op.storage_type, st_args,
4065
                                          self.op.name, self.op.changes)
4066
    result.Raise("Failed to modify storage unit '%s' on %s" %
4067
                 (self.op.name, self.op.node_name))
4068

    
4069

    
4070
class LUAddNode(LogicalUnit):
4071
  """Logical unit for adding node to the cluster.
4072

4073
  """
4074
  HPATH = "node-add"
4075
  HTYPE = constants.HTYPE_NODE
4076
  _OP_PARAMS = [
4077
    _PNodeName,
4078
    ("primary_ip", None, ht.NoType),
4079
    ("secondary_ip", None, ht.TMaybeString),
4080
    ("readd", False, ht.TBool),
4081
    ("group", None, ht.TMaybeString),
4082
    ("master_capable", None, ht.TMaybeBool),
4083
    ("vm_capable", None, ht.TMaybeBool),
4084
    ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
4085
    ]
4086
  _NFLAGS = ["master_capable", "vm_capable"]
4087

    
4088
  def CheckArguments(self):
4089
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4090
    # validate/normalize the node name
4091
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4092
                                         family=self.primary_ip_family)
4093
    self.op.node_name = self.hostname.name
4094
    if self.op.readd and self.op.group:
4095
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4096
                                 " being readded", errors.ECODE_INVAL)
4097

    
4098
  def BuildHooksEnv(self):
4099
    """Build hooks env.
4100

4101
    This will run on all nodes before, and on all nodes + the new node after.
4102

4103
    """
4104
    env = {
4105
      "OP_TARGET": self.op.node_name,
4106
      "NODE_NAME": self.op.node_name,
4107
      "NODE_PIP": self.op.primary_ip,
4108
      "NODE_SIP": self.op.secondary_ip,
4109
      "MASTER_CAPABLE": str(self.op.master_capable),
4110
      "VM_CAPABLE": str(self.op.vm_capable),
4111
      }
4112
    nodes_0 = self.cfg.GetNodeList()
4113
    nodes_1 = nodes_0 + [self.op.node_name, ]
4114
    return env, nodes_0, nodes_1
4115

    
4116
  def CheckPrereq(self):
4117
    """Check prerequisites.
4118

4119
    This checks:
4120
     - the new node is not already in the config
4121
     - it is resolvable
4122
     - its parameters (single/dual homed) matches the cluster
4123

4124
    Any errors are signaled by raising errors.OpPrereqError.
4125

4126
    """
4127
    cfg = self.cfg
4128
    hostname = self.hostname
4129
    node = hostname.name
4130
    primary_ip = self.op.primary_ip = hostname.ip
4131
    if self.op.secondary_ip is None:
4132
      if self.primary_ip_family == netutils.IP6Address.family:
4133
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4134
                                   " IPv4 address must be given as secondary",
4135
                                   errors.ECODE_INVAL)
4136
      self.op.secondary_ip = primary_ip
4137

    
4138
    secondary_ip = self.op.secondary_ip
4139
    if not netutils.IP4Address.IsValid(secondary_ip):
4140
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4141
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4142

    
4143
    node_list = cfg.GetNodeList()
4144
    if not self.op.readd and node in node_list:
4145
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4146
                                 node, errors.ECODE_EXISTS)
4147
    elif self.op.readd and node not in node_list:
4148
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4149
                                 errors.ECODE_NOENT)
4150

    
4151
    self.changed_primary_ip = False
4152

    
4153
    for existing_node_name in node_list:
4154
      existing_node = cfg.GetNodeInfo(existing_node_name)
4155

    
4156
      if self.op.readd and node == existing_node_name:
4157
        if existing_node.secondary_ip != secondary_ip:
4158
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4159
                                     " address configuration as before",
4160
                                     errors.ECODE_INVAL)
4161
        if existing_node.primary_ip != primary_ip:
4162
          self.changed_primary_ip = True
4163

    
4164
        continue
4165

    
4166
      if (existing_node.primary_ip == primary_ip or
4167
          existing_node.secondary_ip == primary_ip or
4168
          existing_node.primary_ip == secondary_ip or
4169
          existing_node.secondary_ip == secondary_ip):
4170
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4171
                                   " existing node %s" % existing_node.name,
4172
                                   errors.ECODE_NOTUNIQUE)
4173

    
4174
    # After this 'if' block, None is no longer a valid value for the
4175
    # _capable op attributes
4176
    if self.op.readd:
4177
      old_node = self.cfg.GetNodeInfo(node)
4178
      assert old_node is not None, "Can't retrieve locked node %s" % node
4179
      for attr in self._NFLAGS:
4180
        if getattr(self.op, attr) is None:
4181
          setattr(self.op, attr, getattr(old_node, attr))
4182
    else:
4183
      for attr in self._NFLAGS:
4184
        if getattr(self.op, attr) is None:
4185
          setattr(self.op, attr, True)
4186

    
4187
    if self.op.readd and not self.op.vm_capable:
4188
      pri, sec = cfg.GetNodeInstances(node)
4189
      if pri or sec:
4190
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4191
                                   " flag set to false, but it already holds"
4192
                                   " instances" % node,
4193
                                   errors.ECODE_STATE)
4194

    
4195
    # check that the type of the node (single versus dual homed) is the
4196
    # same as for the master
4197
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4198
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4199
    newbie_singlehomed = secondary_ip == primary_ip
4200
    if master_singlehomed != newbie_singlehomed:
4201
      if master_singlehomed:
4202
        raise errors.OpPrereqError("The master has no secondary ip but the"
4203
                                   " new node has one",
4204
                                   errors.ECODE_INVAL)
4205
      else:
4206
        raise errors.OpPrereqError("The master has a secondary ip but the"
4207
                                   " new node doesn't have one",
4208
                                   errors.ECODE_INVAL)
4209

    
4210
    # checks reachability
4211
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4212
      raise errors.OpPrereqError("Node not reachable by ping",
4213
                                 errors.ECODE_ENVIRON)
4214

    
4215
    if not newbie_singlehomed:
4216
      # check reachability from my secondary ip to newbie's secondary ip
4217
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4218
                           source=myself.secondary_ip):
4219
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4220
                                   " based ping to node daemon port",
4221
                                   errors.ECODE_ENVIRON)
4222

    
4223
    if self.op.readd:
4224
      exceptions = [node]
4225
    else:
4226
      exceptions = []
4227

    
4228
    if self.op.master_capable:
4229
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4230
    else:
4231
      self.master_candidate = False
4232

    
4233
    if self.op.readd:
4234
      self.new_node = old_node
4235
    else:
4236
      node_group = cfg.LookupNodeGroup(self.op.group)
4237
      self.new_node = objects.Node(name=node,
4238
                                   primary_ip=primary_ip,
4239
                                   secondary_ip=secondary_ip,
4240
                                   master_candidate=self.master_candidate,
4241
                                   offline=False, drained=False,
4242
                                   group=node_group)
4243

    
4244
    if self.op.ndparams:
4245
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4246

    
4247
  def Exec(self, feedback_fn):
4248
    """Adds the new node to the cluster.
4249

4250
    """
4251
    new_node = self.new_node
4252
    node = new_node.name
4253

    
4254
    # We adding a new node so we assume it's powered
4255
    new_node.powered = True
4256

    
4257
    # for re-adds, reset the offline/drained/master-candidate flags;
4258
    # we need to reset here, otherwise offline would prevent RPC calls
4259
    # later in the procedure; this also means that if the re-add
4260
    # fails, we are left with a non-offlined, broken node
4261
    if self.op.readd:
4262
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4263
      self.LogInfo("Readding a node, the offline/drained flags were reset")
4264
      # if we demote the node, we do cleanup later in the procedure
4265
      new_node.master_candidate = self.master_candidate
4266
      if self.changed_primary_ip:
4267
        new_node.primary_ip = self.op.primary_ip
4268

    
4269
    # copy the master/vm_capable flags
4270
    for attr in self._NFLAGS:
4271
      setattr(new_node, attr, getattr(self.op, attr))
4272

    
4273
    # notify the user about any possible mc promotion
4274
    if new_node.master_candidate:
4275
      self.LogInfo("Node will be a master candidate")
4276

    
4277
    if self.op.ndparams:
4278
      new_node.ndparams = self.op.ndparams
4279
    else:
4280
      new_node.ndparams = {}
4281

    
4282
    # check connectivity
4283
    result = self.rpc.call_version([node])[node]
4284
    result.Raise("Can't get version information from node %s" % node)
4285
    if constants.PROTOCOL_VERSION == result.payload:
4286
      logging.info("Communication to node %s fine, sw version %s match",
4287
                   node, result.payload)
4288
    else:
4289
      raise errors.OpExecError("Version mismatch master version %s,"
4290
                               " node version %s" %
4291
                               (constants.PROTOCOL_VERSION, result.payload))
4292

    
4293
    # Add node to our /etc/hosts, and add key to known_hosts
4294
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4295
      master_node = self.cfg.GetMasterNode()
4296
      result = self.rpc.call_etc_hosts_modify(master_node,
4297
                                              constants.ETC_HOSTS_ADD,
4298
                                              self.hostname.name,
4299
                                              self.hostname.ip)
4300
      result.Raise("Can't update hosts file with new host data")
4301

    
4302
    if new_node.secondary_ip != new_node.primary_ip:
4303
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4304
                               False)
4305

    
4306
    node_verify_list = [self.cfg.GetMasterNode()]
4307
    node_verify_param = {
4308
      constants.NV_NODELIST: [node],
4309
      # TODO: do a node-net-test as well?
4310
    }
4311

    
4312
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4313
                                       self.cfg.GetClusterName())
4314
    for verifier in node_verify_list:
4315
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
4316
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
4317
      if nl_payload:
4318
        for failed in nl_payload:
4319
          feedback_fn("ssh/hostname verification failed"
4320
                      " (checking from %s): %s" %
4321
                      (verifier, nl_payload[failed]))
4322
        raise errors.OpExecError("ssh/hostname verification failed.")
4323

    
4324
    if self.op.readd:
4325
      _RedistributeAncillaryFiles(self)
4326
      self.context.ReaddNode(new_node)
4327
      # make sure we redistribute the config
4328
      self.cfg.Update(new_node, feedback_fn)
4329
      # and make sure the new node will not have old files around
4330
      if not new_node.master_candidate:
4331
        result = self.rpc.call_node_demote_from_mc(new_node.name)
4332
        msg = result.fail_msg
4333
        if msg:
4334
          self.LogWarning("Node failed to demote itself from master"
4335
                          " candidate status: %s" % msg)
4336
    else:
4337
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
4338
                                  additional_vm=self.op.vm_capable)
4339
      self.context.AddNode(new_node, self.proc.GetECId())
4340

    
4341

    
4342
class LUSetNodeParams(LogicalUnit):
4343
  """Modifies the parameters of a node.
4344

4345
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4346
      to the node role (as _ROLE_*)
4347
  @cvar _R2F: a dictionary from node role to tuples of flags
4348
  @cvar _FLAGS: a list of attribute names corresponding to the flags
4349

4350
  """
4351
  HPATH = "node-modify"
4352
  HTYPE = constants.HTYPE_NODE
4353
  _OP_PARAMS = [
4354
    _PNodeName,
4355
    ("master_candidate", None, ht.TMaybeBool),
4356
    ("offline", None, ht.TMaybeBool),
4357
    ("drained", None, ht.TMaybeBool),
4358
    ("auto_promote", False, ht.TBool),
4359
    ("master_capable", None, ht.TMaybeBool),
4360
    ("vm_capable", None, ht.TMaybeBool),
4361
    ("secondary_ip", None, ht.TMaybeString),
4362
    ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
4363
    ("powered", None, ht.TMaybeBool),
4364
    _PForce,
4365
    ]
4366
  REQ_BGL = False
4367
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4368
  _F2R = {
4369
    (True, False, False): _ROLE_CANDIDATE,
4370
    (False, True, False): _ROLE_DRAINED,
4371
    (False, False, True): _ROLE_OFFLINE,
4372
    (False, False, False): _ROLE_REGULAR,
4373
    }
4374
  _R2F = dict((v, k) for k, v in _F2R.items())
4375
  _FLAGS = ["master_candidate", "drained", "offline"]
4376

    
4377
  def CheckArguments(self):
4378
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4379
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4380
                self.op.master_capable, self.op.vm_capable,
4381
                self.op.secondary_ip, self.op.ndparams]
4382
    if all_mods.count(None) == len(all_mods):
4383
      raise errors.OpPrereqError("Please pass at least one modification",
4384
                                 errors.ECODE_INVAL)
4385
    if all_mods.count(True) > 1:
4386
      raise errors.OpPrereqError("Can't set the node into more than one"
4387
                                 " state at the same time",
4388
                                 errors.ECODE_INVAL)
4389

    
4390
    # Boolean value that tells us whether we might be demoting from MC
4391
    self.might_demote = (self.op.master_candidate == False or
4392
                         self.op.offline == True or
4393
                         self.op.drained == True or
4394
                         self.op.master_capable == False)
4395

    
4396
    if self.op.secondary_ip:
4397
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4398
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4399
                                   " address" % self.op.secondary_ip,
4400
                                   errors.ECODE_INVAL)
4401

    
4402
    self.lock_all = self.op.auto_promote and self.might_demote
4403
    self.lock_instances = self.op.secondary_ip is not None
4404

    
4405
  def ExpandNames(self):
4406
    if self.lock_all:
4407
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4408
    else:
4409
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4410

    
4411
    if self.lock_instances:
4412
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4413

    
4414
  def DeclareLocks(self, level):
4415
    # If we have locked all instances, before waiting to lock nodes, release
4416
    # all the ones living on nodes unrelated to the current operation.
4417
    if level == locking.LEVEL_NODE and self.lock_instances:
4418
      instances_release = []
4419
      instances_keep = []
4420
      self.affected_instances = []
4421
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4422
        for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4423
          instance = self.context.cfg.GetInstanceInfo(instance_name)
4424
          i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR
4425
          if i_mirrored and self.op.node_name in instance.all_nodes:
4426
            instances_keep.append(instance_name)
4427
            self.affected_instances.append(instance)
4428
          else:
4429
            instances_release.append(instance_name)
4430
        if instances_release:
4431
          self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4432
          self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4433

    
4434
  def BuildHooksEnv(self):
4435
    """Build hooks env.
4436

4437
    This runs on the master node.
4438

4439
    """
4440
    env = {
4441
      "OP_TARGET": self.op.node_name,
4442
      "MASTER_CANDIDATE": str(self.op.master_candidate),
4443
      "OFFLINE": str(self.op.offline),
4444
      "DRAINED": str(self.op.drained),
4445
      "MASTER_CAPABLE": str(self.op.master_capable),
4446
      "VM_CAPABLE": str(self.op.vm_capable),
4447
      }
4448
    nl = [self.cfg.GetMasterNode(),
4449
          self.op.node_name]
4450
    return env, nl, nl
4451

    
4452
  def CheckPrereq(self):
4453
    """Check prerequisites.
4454

4455
    This only checks the instance list against the existing names.
4456

4457
    """
4458
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4459

    
4460
    if (self.op.master_candidate is not None or
4461
        self.op.drained is not None or
4462
        self.op.offline is not None):
4463
      # we can't change the master's node flags
4464
      if self.op.node_name == self.cfg.GetMasterNode():
4465
        raise errors.OpPrereqError("The master role can be changed"
4466
                                   " only via master-failover",
4467
                                   errors.ECODE_INVAL)
4468

    
4469
    if self.op.master_candidate and not node.master_capable:
4470
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4471
                                 " it a master candidate" % node.name,
4472
                                 errors.ECODE_STATE)
4473

    
4474
    if self.op.vm_capable == False:
4475
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4476
      if ipri or isec:
4477
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4478
                                   " the vm_capable flag" % node.name,
4479
                                   errors.ECODE_STATE)
4480

    
4481
    if node.master_candidate and self.might_demote and not self.lock_all:
4482
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
4483
      # check if after removing the current node, we're missing master
4484
      # candidates
4485
      (mc_remaining, mc_should, _) = \
4486
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4487
      if mc_remaining < mc_should:
4488
        raise errors.OpPrereqError("Not enough master candidates, please"
4489
                                   " pass auto_promote to allow promotion",
4490
                                   errors.ECODE_STATE)
4491

    
4492
    self.old_flags = old_flags = (node.master_candidate,
4493
                                  node.drained, node.offline)
4494
    assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4495
    self.old_role = old_role = self._F2R[old_flags]
4496

    
4497
    # Check for ineffective changes
4498
    for attr in self._FLAGS:
4499
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4500
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4501
        setattr(self.op, attr, None)
4502

    
4503
    # Past this point, any flag change to False means a transition
4504
    # away from the respective state, as only real changes are kept
4505

    
4506
    # TODO: We might query the real power state if it supports OOB
4507
    if _SupportsOob(self.cfg, node):
4508
      if self.op.offline is False and not (node.powered or
4509
                                           self.op.powered == True):
4510
        raise errors.OpPrereqError(("Please power on node %s first before you"
4511
                                    " can reset offline state") %
4512
                                   self.op.node_name)
4513
    elif self.op.powered is not None:
4514
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
4515
                                  " which does not support out-of-band"
4516
                                  " handling") % self.op.node_name)
4517

    
4518
    # If we're being deofflined/drained, we'll MC ourself if needed
4519
    if (self.op.drained == False or self.op.offline == False or
4520
        (self.op.master_capable and not node.master_capable)):
4521
      if _DecideSelfPromotion(self):
4522
        self.op.master_candidate = True
4523
        self.LogInfo("Auto-promoting node to master candidate")
4524

    
4525
    # If we're no longer master capable, we'll demote ourselves from MC
4526
    if self.op.master_capable == False and node.master_candidate:
4527
      self.LogInfo("Demoting from master candidate")
4528
      self.op.master_candidate = False
4529

    
4530
    # Compute new role
4531
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4532
    if self.op.master_candidate:
4533
      new_role = self._ROLE_CANDIDATE
4534
    elif self.op.drained:
4535
      new_role = self._ROLE_DRAINED
4536
    elif self.op.offline:
4537
      new_role = self._ROLE_OFFLINE
4538
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4539
      # False is still in new flags, which means we're un-setting (the
4540
      # only) True flag
4541
      new_role = self._ROLE_REGULAR
4542
    else: # no new flags, nothing, keep old role
4543
      new_role = old_role
4544

    
4545
    self.new_role = new_role
4546

    
4547
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
4548
      # Trying to transition out of offline status
4549
      result = self.rpc.call_version([node.name])[node.name]
4550
      if result.fail_msg:
4551
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4552
                                   " to report its version: %s" %
4553
                                   (node.name, result.fail_msg),
4554
                                   errors.ECODE_STATE)
4555
      else:
4556
        self.LogWarning("Transitioning node from offline to online state"
4557
                        " without using re-add. Please make sure the node"
4558
                        " is healthy!")
4559

    
4560
    if self.op.secondary_ip:
4561
      # Ok even without locking, because this can't be changed by any LU
4562
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4563
      master_singlehomed = master.secondary_ip == master.primary_ip
4564
      if master_singlehomed and self.op.secondary_ip:
4565
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4566
                                   " homed cluster", errors.ECODE_INVAL)
4567

    
4568
      if node.offline:
4569
        if self.affected_instances:
4570
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
4571
                                     " node has instances (%s) configured"
4572
                                     " to use it" % self.affected_instances)
4573
      else:
4574
        # On online nodes, check that no instances are running, and that
4575
        # the node has the new ip and we can reach it.
4576
        for instance in self.affected_instances:
4577
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
4578

    
4579
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4580
        if master.name != node.name:
4581
          # check reachability from master secondary ip to new secondary ip
4582
          if not netutils.TcpPing(self.op.secondary_ip,
4583
                                  constants.DEFAULT_NODED_PORT,
4584
                                  source=master.secondary_ip):
4585
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4586
                                       " based ping to node daemon port",
4587
                                       errors.ECODE_ENVIRON)
4588

    
4589
    if self.op.ndparams:
4590
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4591
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4592
      self.new_ndparams = new_ndparams
4593

    
4594
  def Exec(self, feedback_fn):
4595
    """Modifies a node.
4596

4597
    """
4598
    node = self.node
4599
    old_role = self.old_role
4600
    new_role = self.new_role
4601

    
4602
    result = []
4603

    
4604
    if self.op.ndparams:
4605
      node.ndparams = self.new_ndparams
4606

    
4607
    if self.op.powered is not None:
4608
      node.powered = self.op.powered
4609

    
4610
    for attr in ["master_capable", "vm_capable"]:
4611
      val = getattr(self.op, attr)
4612
      if val is not None:
4613
        setattr(node, attr, val)
4614
        result.append((attr, str(val)))
4615

    
4616
    if new_role != old_role:
4617
      # Tell the node to demote itself, if no longer MC and not offline
4618
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4619
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4620
        if msg:
4621
          self.LogWarning("Node failed to demote itself: %s", msg)
4622

    
4623
      new_flags = self._R2F[new_role]
4624
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4625
        if of != nf:
4626
          result.append((desc, str(nf)))
4627
      (node.master_candidate, node.drained, node.offline) = new_flags
4628

    
4629
      # we locked all nodes, we adjust the CP before updating this node
4630
      if self.lock_all:
4631
        _AdjustCandidatePool(self, [node.name])
4632

    
4633
    if self.op.secondary_ip:
4634
      node.secondary_ip = self.op.secondary_ip
4635
      result.append(("secondary_ip", self.op.secondary_ip))
4636

    
4637
    # this will trigger configuration file update, if needed
4638
    self.cfg.Update(node, feedback_fn)
4639

    
4640
    # this will trigger job queue propagation or cleanup if the mc
4641
    # flag changed
4642
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4643
      self.context.ReaddNode(node)
4644

    
4645
    return result
4646

    
4647

    
4648
class LUPowercycleNode(NoHooksLU):
4649
  """Powercycles a node.
4650

4651
  """
4652
  _OP_PARAMS = [
4653
    _PNodeName,
4654
    _PForce,
4655
    ]
4656
  REQ_BGL = False
4657

    
4658
  def CheckArguments(self):
4659
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4660
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4661
      raise errors.OpPrereqError("The node is the master and the force"
4662
                                 " parameter was not set",
4663
                                 errors.ECODE_INVAL)
4664

    
4665
  def ExpandNames(self):
4666
    """Locking for PowercycleNode.
4667

4668
    This is a last-resort option and shouldn't block on other
4669
    jobs. Therefore, we grab no locks.
4670

4671
    """
4672
    self.needed_locks = {}
4673

    
4674
  def Exec(self, feedback_fn):
4675
    """Reboots a node.
4676

4677
    """
4678
    result = self.rpc.call_node_powercycle(self.op.node_name,
4679
                                           self.cfg.GetHypervisorType())
4680
    result.Raise("Failed to schedule the reboot")
4681
    return result.payload
4682

    
4683

    
4684
class LUQueryClusterInfo(NoHooksLU):
4685
  """Query cluster configuration.
4686

4687
  """
4688
  REQ_BGL = False
4689

    
4690
  def ExpandNames(self):
4691
    self.needed_locks = {}
4692

    
4693
  def Exec(self, feedback_fn):
4694
    """Return cluster config.
4695

4696
    """
4697
    cluster = self.cfg.GetClusterInfo()
4698
    os_hvp = {}
4699

    
4700
    # Filter just for enabled hypervisors
4701
    for os_name, hv_dict in cluster.os_hvp.items():
4702
      os_hvp[os_name] = {}
4703
      for hv_name, hv_params in hv_dict.items():
4704
        if hv_name in cluster.enabled_hypervisors:
4705
          os_hvp[os_name][hv_name] = hv_params
4706

    
4707
    # Convert ip_family to ip_version
4708
    primary_ip_version = constants.IP4_VERSION
4709
    if cluster.primary_ip_family == netutils.IP6Address.family:
4710
      primary_ip_version = constants.IP6_VERSION
4711

    
4712
    result = {
4713
      "software_version": constants.RELEASE_VERSION,
4714
      "protocol_version": constants.PROTOCOL_VERSION,
4715
      "config_version": constants.CONFIG_VERSION,
4716
      "os_api_version": max(constants.OS_API_VERSIONS),
4717
      "export_version": constants.EXPORT_VERSION,
4718
      "architecture": (platform.architecture()[0], platform.machine()),
4719
      "name": cluster.cluster_name,
4720
      "master": cluster.master_node,
4721
      "default_hypervisor": cluster.enabled_hypervisors[0],
4722
      "enabled_hypervisors": cluster.enabled_hypervisors,
4723
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4724
                        for hypervisor_name in cluster.enabled_hypervisors]),
4725
      "os_hvp": os_hvp,
4726
      "beparams": cluster.beparams,
4727
      "osparams": cluster.osparams,
4728
      "nicparams": cluster.nicparams,
4729
      "candidate_pool_size": cluster.candidate_pool_size,
4730
      "master_netdev": cluster.master_netdev,
4731
      "volume_group_name": cluster.volume_group_name,
4732
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
4733
      "file_storage_dir": cluster.file_storage_dir,
4734
      "maintain_node_health": cluster.maintain_node_health,
4735
      "ctime": cluster.ctime,
4736
      "mtime": cluster.mtime,
4737
      "uuid": cluster.uuid,
4738
      "tags": list(cluster.GetTags()),
4739
      "uid_pool": cluster.uid_pool,
4740
      "default_iallocator": cluster.default_iallocator,
4741
      "reserved_lvs": cluster.reserved_lvs,
4742
      "primary_ip_version": primary_ip_version,
4743
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4744
      }
4745

    
4746
    return result
4747

    
4748

    
4749
class LUQueryConfigValues(NoHooksLU):
4750
  """Return configuration values.
4751

4752
  """
4753
  _OP_PARAMS = [_POutputFields]
4754
  REQ_BGL = False
4755
  _FIELDS_DYNAMIC = utils.FieldSet()
4756
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4757
                                  "watcher_pause", "volume_group_name")
4758

    
4759
  def CheckArguments(self):
4760
    _CheckOutputFields(static=self._FIELDS_STATIC,
4761
                       dynamic=self._FIELDS_DYNAMIC,
4762
                       selected=self.op.output_fields)
4763

    
4764
  def ExpandNames(self):
4765
    self.needed_locks = {}
4766

    
4767
  def Exec(self, feedback_fn):
4768
    """Dump a representation of the cluster config to the standard output.
4769

4770
    """
4771
    values = []
4772
    for field in self.op.output_fields:
4773
      if field == "cluster_name":
4774
        entry = self.cfg.GetClusterName()
4775
      elif field == "master_node":
4776
        entry = self.cfg.GetMasterNode()
4777
      elif field == "drain_flag":
4778
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4779
      elif field == "watcher_pause":
4780
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4781
      elif field == "volume_group_name":
4782
        entry = self.cfg.GetVGName()
4783
      else:
4784
        raise errors.ParameterError(field)
4785
      values.append(entry)
4786
    return values
4787

    
4788

    
4789
class LUActivateInstanceDisks(NoHooksLU):
4790
  """Bring up an instance's disks.
4791

4792
  """
4793
  _OP_PARAMS = [
4794
    _PInstanceName,
4795
    ("ignore_size", False, ht.TBool),
4796
    ]
4797
  REQ_BGL = False
4798

    
4799
  def ExpandNames(self):
4800
    self._ExpandAndLockInstance()
4801
    self.needed_locks[locking.LEVEL_NODE] = []
4802
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4803

    
4804
  def DeclareLocks(self, level):
4805
    if level == locking.LEVEL_NODE:
4806
      self._LockInstancesNodes()
4807

    
4808
  def CheckPrereq(self):
4809
    """Check prerequisites.
4810

4811
    This checks that the instance is in the cluster.
4812

4813
    """
4814
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4815
    assert self.instance is not None, \
4816
      "Cannot retrieve locked instance %s" % self.op.instance_name
4817
    _CheckNodeOnline(self, self.instance.primary_node)
4818

    
4819
  def Exec(self, feedback_fn):
4820
    """Activate the disks.
4821

4822
    """
4823
    disks_ok, disks_info = \
4824
              _AssembleInstanceDisks(self, self.instance,
4825
                                     ignore_size=self.op.ignore_size)
4826
    if not disks_ok:
4827
      raise errors.OpExecError("Cannot activate block devices")
4828

    
4829
    return disks_info
4830

    
4831

    
4832
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4833
                           ignore_size=False):
4834
  """Prepare the block devices for an instance.
4835

4836
  This sets up the block devices on all nodes.
4837

4838
  @type lu: L{LogicalUnit}
4839
  @param lu: the logical unit on whose behalf we execute
4840
  @type instance: L{objects.Instance}
4841
  @param instance: the instance for whose disks we assemble
4842
  @type disks: list of L{objects.Disk} or None
4843
  @param disks: which disks to assemble (or all, if None)
4844
  @type ignore_secondaries: boolean
4845
  @param ignore_secondaries: if true, errors on secondary nodes
4846
      won't result in an error return from the function
4847
  @type ignore_size: boolean
4848
  @param ignore_size: if true, the current known size of the disk
4849
      will not be used during the disk activation, useful for cases
4850
      when the size is wrong
4851
  @return: False if the operation failed, otherwise a list of
4852
      (host, instance_visible_name, node_visible_name)
4853
      with the mapping from node devices to instance devices
4854

4855
  """
4856
  device_info = []
4857
  disks_ok = True
4858
  iname = instance.name
4859
  disks = _ExpandCheckDisks(instance, disks)
4860

    
4861
  # With the two passes mechanism we try to reduce the window of
4862
  # opportunity for the race condition of switching DRBD to primary
4863
  # before handshaking occured, but we do not eliminate it
4864

    
4865
  # The proper fix would be to wait (with some limits) until the
4866
  # connection has been made and drbd transitions from WFConnection
4867
  # into any other network-connected state (Connected, SyncTarget,
4868
  # SyncSource, etc.)
4869

    
4870
  # 1st pass, assemble on all nodes in secondary mode
4871
  for inst_disk in disks:
4872
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4873
      if ignore_size:
4874
        node_disk = node_disk.Copy()
4875
        node_disk.UnsetSize()
4876
      lu.cfg.SetDiskID(node_disk, node)
4877
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4878
      msg = result.fail_msg
4879
      if msg:
4880
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4881
                           " (is_primary=False, pass=1): %s",
4882
                           inst_disk.iv_name, node, msg)
4883
        if not ignore_secondaries:
4884
          disks_ok = False
4885

    
4886
  # FIXME: race condition on drbd migration to primary
4887

    
4888
  # 2nd pass, do only the primary node
4889
  for inst_disk in disks:
4890
    dev_path = None
4891

    
4892
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4893
      if node != instance.primary_node:
4894
        continue
4895
      if ignore_size:
4896
        node_disk = node_disk.Copy()
4897
        node_disk.UnsetSize()
4898
      lu.cfg.SetDiskID(node_disk, node)
4899
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4900
      msg = result.fail_msg
4901
      if msg:
4902
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4903
                           " (is_primary=True, pass=2): %s",
4904
                           inst_disk.iv_name, node, msg)
4905
        disks_ok = False
4906
      else:
4907
        dev_path = result.payload
4908

    
4909
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4910

    
4911
  # leave the disks configured for the primary node
4912
  # this is a workaround that would be fixed better by
4913
  # improving the logical/physical id handling
4914
  for disk in disks:
4915
    lu.cfg.SetDiskID(disk, instance.primary_node)
4916

    
4917
  return disks_ok, device_info
4918

    
4919

    
4920
def _StartInstanceDisks(lu, instance, force):
4921
  """Start the disks of an instance.
4922

4923
  """
4924
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4925
                                           ignore_secondaries=force)
4926
  if not disks_ok:
4927
    _ShutdownInstanceDisks(lu, instance)
4928
    if force is not None and not force:
4929
      lu.proc.LogWarning("", hint="If the message above refers to a"
4930
                         " secondary node,"
4931
                         " you can retry the operation using '--force'.")
4932
    raise errors.OpExecError("Disk consistency error")
4933

    
4934

    
4935
class LUDeactivateInstanceDisks(NoHooksLU):
4936
  """Shutdown an instance's disks.
4937

4938
  """
4939
  _OP_PARAMS = [
4940
    _PInstanceName,
4941
    ]
4942
  REQ_BGL = False
4943

    
4944
  def ExpandNames(self):
4945
    self._ExpandAndLockInstance()
4946
    self.needed_locks[locking.LEVEL_NODE] = []
4947
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4948

    
4949
  def DeclareLocks(self, level):
4950
    if level == locking.LEVEL_NODE:
4951
      self._LockInstancesNodes()
4952

    
4953
  def CheckPrereq(self):
4954
    """Check prerequisites.
4955

4956
    This checks that the instance is in the cluster.
4957

4958
    """
4959
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4960
    assert self.instance is not None, \
4961
      "Cannot retrieve locked instance %s" % self.op.instance_name
4962

    
4963
  def Exec(self, feedback_fn):
4964
    """Deactivate the disks
4965

4966
    """
4967
    instance = self.instance
4968
    _SafeShutdownInstanceDisks(self, instance)
4969

    
4970

    
4971
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4972
  """Shutdown block devices of an instance.
4973

4974
  This function checks if an instance is running, before calling
4975
  _ShutdownInstanceDisks.
4976

4977
  """
4978
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4979
  _ShutdownInstanceDisks(lu, instance, disks=disks)
4980

    
4981

    
4982
def _ExpandCheckDisks(instance, disks):
4983
  """Return the instance disks selected by the disks list
4984

4985
  @type disks: list of L{objects.Disk} or None
4986
  @param disks: selected disks
4987
  @rtype: list of L{objects.Disk}
4988
  @return: selected instance disks to act on
4989

4990
  """
4991
  if disks is None:
4992
    return instance.disks
4993
  else:
4994
    if not set(disks).issubset(instance.disks):
4995
      raise errors.ProgrammerError("Can only act on disks belonging to the"
4996
                                   " target instance")
4997
    return disks
4998

    
4999

    
5000
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5001
  """Shutdown block devices of an instance.
5002

5003
  This does the shutdown on all nodes of the instance.
5004

5005
  If the ignore_primary is false, errors on the primary node are
5006
  ignored.
5007

5008
  """
5009
  all_result = True
5010
  disks = _ExpandCheckDisks(instance, disks)
5011

    
5012
  for disk in disks:
5013
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5014
      lu.cfg.SetDiskID(top_disk, node)
5015
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5016
      msg = result.fail_msg
5017
      if msg:
5018
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5019
                      disk.iv_name, node, msg)
5020
        if ((node == instance.primary_node and not ignore_primary) or
5021
            (node != instance.primary_node and not result.offline)):
5022
          all_result = False
5023
  return all_result
5024

    
5025

    
5026
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5027
  """Checks if a node has enough free memory.
5028

5029
  This function check if a given node has the needed amount of free
5030
  memory. In case the node has less memory or we cannot get the
5031
  information from the node, this function raise an OpPrereqError
5032
  exception.
5033

5034
  @type lu: C{LogicalUnit}
5035
  @param lu: a logical unit from which we get configuration data
5036
  @type node: C{str}
5037
  @param node: the node to check
5038
  @type reason: C{str}
5039
  @param reason: string to use in the error message
5040
  @type requested: C{int}
5041
  @param requested: the amount of memory in MiB to check for
5042
  @type hypervisor_name: C{str}
5043
  @param hypervisor_name: the hypervisor to ask for memory stats
5044
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5045
      we cannot check the node
5046

5047
  """
5048
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5049
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5050
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5051
  free_mem = nodeinfo[node].payload.get('memory_free', None)
5052
  if not isinstance(free_mem, int):
5053
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5054
                               " was '%s'" % (node, free_mem),
5055
                               errors.ECODE_ENVIRON)
5056
  if requested > free_mem:
5057
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5058
                               " needed %s MiB, available %s MiB" %
5059
                               (node, reason, requested, free_mem),
5060
                               errors.ECODE_NORES)
5061

    
5062

    
5063
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5064
  """Checks if nodes have enough free disk space in the all VGs.
5065

5066
  This function check if all given nodes have the needed amount of
5067
  free disk. In case any node has less disk or we cannot get the
5068
  information from the node, this function raise an OpPrereqError
5069
  exception.
5070

5071
  @type lu: C{LogicalUnit}
5072
  @param lu: a logical unit from which we get configuration data
5073
  @type nodenames: C{list}
5074
  @param nodenames: the list of node names to check
5075
  @type req_sizes: C{dict}
5076
  @param req_sizes: the hash of vg and corresponding amount of disk in
5077
      MiB to check for
5078
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5079
      or we cannot check the node
5080

5081
  """
5082
  if req_sizes is not None:
5083
    for vg, req_size in req_sizes.iteritems():
5084
      _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5085

    
5086

    
5087
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5088
  """Checks if nodes have enough free disk space in the specified VG.
5089

5090
  This function check if all given nodes have the needed amount of
5091
  free disk. In case any node has less disk or we cannot get the
5092
  information from the node, this function raise an OpPrereqError
5093
  exception.
5094

5095
  @type lu: C{LogicalUnit}
5096
  @param lu: a logical unit from which we get configuration data
5097
  @type nodenames: C{list}
5098
  @param nodenames: the list of node names to check
5099
  @type vg: C{str}
5100
  @param vg: the volume group to check
5101
  @type requested: C{int}
5102
  @param requested: the amount of disk in MiB to check for
5103
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5104
      or we cannot check the node
5105

5106
  """
5107
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5108
  for node in nodenames:
5109
    info = nodeinfo[node]
5110
    info.Raise("Cannot get current information from node %s" % node,
5111
               prereq=True, ecode=errors.ECODE_ENVIRON)
5112
    vg_free = info.payload.get("vg_free", None)
5113
    if not isinstance(vg_free, int):
5114
      raise errors.OpPrereqError("Can't compute free disk space on node"
5115
                                 " %s for vg %s, result was '%s'" %
5116
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5117
    if requested > vg_free:
5118
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5119
                                 " vg %s: required %d MiB, available %d MiB" %
5120
                                 (node, vg, requested, vg_free),
5121
                                 errors.ECODE_NORES)
5122

    
5123

    
5124
class LUStartupInstance(LogicalUnit):
5125
  """Starts an instance.
5126

5127
  """
5128
  HPATH = "instance-start"
5129
  HTYPE = constants.HTYPE_INSTANCE
5130
  _OP_PARAMS = [
5131
    _PInstanceName,
5132
    _PForce,
5133
    _PIgnoreOfflineNodes,
5134
    ("hvparams", ht.EmptyDict, ht.TDict),
5135
    ("beparams", ht.EmptyDict, ht.TDict),
5136
    ]
5137
  REQ_BGL = False
5138

    
5139
  def CheckArguments(self):
5140
    # extra beparams
5141
    if self.op.beparams:
5142
      # fill the beparams dict
5143
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5144

    
5145
  def ExpandNames(self):
5146
    self._ExpandAndLockInstance()
5147

    
5148
  def BuildHooksEnv(self):
5149
    """Build hooks env.
5150

5151
    This runs on master, primary and secondary nodes of the instance.
5152

5153
    """
5154
    env = {
5155
      "FORCE": self.op.force,
5156
      }
5157
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5158
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5159
    return env, nl, nl
5160

    
5161
  def CheckPrereq(self):
5162
    """Check prerequisites.
5163

5164
    This checks that the instance is in the cluster.
5165

5166
    """
5167
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5168
    assert self.instance is not None, \
5169
      "Cannot retrieve locked instance %s" % self.op.instance_name
5170

    
5171
    # extra hvparams
5172
    if self.op.hvparams:
5173
      # check hypervisor parameter syntax (locally)
5174
      cluster = self.cfg.GetClusterInfo()
5175
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5176
      filled_hvp = cluster.FillHV(instance)
5177
      filled_hvp.update(self.op.hvparams)
5178
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5179
      hv_type.CheckParameterSyntax(filled_hvp)
5180
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5181

    
5182
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5183

    
5184
    if self.primary_offline and self.op.ignore_offline_nodes:
5185
      self.proc.LogWarning("Ignoring offline primary node")
5186

    
5187
      if self.op.hvparams or self.op.beparams:
5188
        self.proc.LogWarning("Overridden parameters are ignored")
5189
    else:
5190
      _CheckNodeOnline(self, instance.primary_node)
5191

    
5192
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5193

    
5194
      # check bridges existence
5195
      _CheckInstanceBridgesExist(self, instance)
5196

    
5197
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5198
                                                instance.name,
5199
                                                instance.hypervisor)
5200
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5201
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5202
      if not remote_info.payload: # not running already
5203
        _CheckNodeFreeMemory(self, instance.primary_node,
5204
                             "starting instance %s" % instance.name,
5205
                             bep[constants.BE_MEMORY], instance.hypervisor)
5206

    
5207
  def Exec(self, feedback_fn):
5208
    """Start the instance.
5209

5210
    """
5211
    instance = self.instance
5212
    force = self.op.force
5213

    
5214
    self.cfg.MarkInstanceUp(instance.name)
5215

    
5216
    if self.primary_offline:
5217
      assert self.op.ignore_offline_nodes
5218
      self.proc.LogInfo("Primary node offline, marked instance as started")
5219
    else:
5220
      node_current = instance.primary_node
5221

    
5222
      _StartInstanceDisks(self, instance, force)
5223

    
5224
      result = self.rpc.call_instance_start(node_current, instance,
5225
                                            self.op.hvparams, self.op.beparams)
5226
      msg = result.fail_msg
5227
      if msg:
5228
        _ShutdownInstanceDisks(self, instance)
5229
        raise errors.OpExecError("Could not start instance: %s" % msg)
5230

    
5231

    
5232
class LURebootInstance(LogicalUnit):
5233
  """Reboot an instance.
5234

5235
  """
5236
  HPATH = "instance-reboot"
5237
  HTYPE = constants.HTYPE_INSTANCE
5238
  _OP_PARAMS = [
5239
    _PInstanceName,
5240
    ("ignore_secondaries", False, ht.TBool),
5241
    ("reboot_type", ht.NoDefault, ht.TElemOf(constants.REBOOT_TYPES)),
5242
    _PShutdownTimeout,
5243
    ]
5244
  REQ_BGL = False
5245

    
5246
  def ExpandNames(self):
5247
    self._ExpandAndLockInstance()
5248

    
5249
  def BuildHooksEnv(self):
5250
    """Build hooks env.
5251

5252
    This runs on master, primary and secondary nodes of the instance.
5253

5254
    """
5255
    env = {
5256
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5257
      "REBOOT_TYPE": self.op.reboot_type,
5258
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5259
      }
5260
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5261
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5262
    return env, nl, nl
5263

    
5264
  def CheckPrereq(self):
5265
    """Check prerequisites.
5266

5267
    This checks that the instance is in the cluster.
5268

5269
    """
5270
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5271
    assert self.instance is not None, \
5272
      "Cannot retrieve locked instance %s" % self.op.instance_name
5273

    
5274
    _CheckNodeOnline(self, instance.primary_node)
5275

    
5276
    # check bridges existence
5277
    _CheckInstanceBridgesExist(self, instance)
5278

    
5279
  def Exec(self, feedback_fn):
5280
    """Reboot the instance.
5281

5282
    """
5283
    instance = self.instance
5284
    ignore_secondaries = self.op.ignore_secondaries
5285
    reboot_type = self.op.reboot_type
5286

    
5287
    node_current = instance.primary_node
5288

    
5289
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5290
                       constants.INSTANCE_REBOOT_HARD]:
5291
      for disk in instance.disks:
5292
        self.cfg.SetDiskID(disk, node_current)
5293
      result = self.rpc.call_instance_reboot(node_current, instance,
5294
                                             reboot_type,
5295
                                             self.op.shutdown_timeout)
5296
      result.Raise("Could not reboot instance")
5297
    else:
5298
      result = self.rpc.call_instance_shutdown(node_current, instance,
5299
                                               self.op.shutdown_timeout)
5300
      result.Raise("Could not shutdown instance for full reboot")
5301
      _ShutdownInstanceDisks(self, instance)
5302
      _StartInstanceDisks(self, instance, ignore_secondaries)
5303
      result = self.rpc.call_instance_start(node_current, instance, None, None)
5304
      msg = result.fail_msg
5305
      if msg:
5306
        _ShutdownInstanceDisks(self, instance)
5307
        raise errors.OpExecError("Could not start instance for"
5308
                                 " full reboot: %s" % msg)
5309

    
5310
    self.cfg.MarkInstanceUp(instance.name)
5311

    
5312

    
5313
class LUShutdownInstance(LogicalUnit):
5314
  """Shutdown an instance.
5315

5316
  """
5317
  HPATH = "instance-stop"
5318
  HTYPE = constants.HTYPE_INSTANCE
5319
  _OP_PARAMS = [
5320
    _PInstanceName,
5321
    _PIgnoreOfflineNodes,
5322
    ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, ht.TPositiveInt),
5323
    ]
5324
  REQ_BGL = False
5325

    
5326
  def ExpandNames(self):
5327
    self._ExpandAndLockInstance()
5328

    
5329
  def BuildHooksEnv(self):
5330
    """Build hooks env.
5331

5332
    This runs on master, primary and secondary nodes of the instance.
5333

5334
    """
5335
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5336
    env["TIMEOUT"] = self.op.timeout
5337
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5338
    return env, nl, nl
5339

    
5340
  def CheckPrereq(self):
5341
    """Check prerequisites.
5342

5343
    This checks that the instance is in the cluster.
5344

5345
    """
5346
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5347
    assert self.instance is not None, \
5348
      "Cannot retrieve locked instance %s" % self.op.instance_name
5349

    
5350
    self.primary_offline = \
5351
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
5352

    
5353
    if self.primary_offline and self.op.ignore_offline_nodes:
5354
      self.proc.LogWarning("Ignoring offline primary node")
5355
    else:
5356
      _CheckNodeOnline(self, self.instance.primary_node)
5357

    
5358
  def Exec(self, feedback_fn):
5359
    """Shutdown the instance.
5360

5361
    """
5362
    instance = self.instance
5363
    node_current = instance.primary_node
5364
    timeout = self.op.timeout
5365

    
5366
    self.cfg.MarkInstanceDown(instance.name)
5367

    
5368
    if self.primary_offline:
5369
      assert self.op.ignore_offline_nodes
5370
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
5371
    else:
5372
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5373
      msg = result.fail_msg
5374
      if msg:
5375
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5376

    
5377
      _ShutdownInstanceDisks(self, instance)
5378

    
5379

    
5380
class LUReinstallInstance(LogicalUnit):
5381
  """Reinstall an instance.
5382

5383
  """
5384
  HPATH = "instance-reinstall"
5385
  HTYPE = constants.HTYPE_INSTANCE
5386
  _OP_PARAMS = [
5387
    _PInstanceName,
5388
    ("os_type", None, ht.TMaybeString),
5389
    ("force_variant", False, ht.TBool),
5390
    ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
5391
    ]
5392
  REQ_BGL = False
5393

    
5394
  def ExpandNames(self):
5395
    self._ExpandAndLockInstance()
5396

    
5397
  def BuildHooksEnv(self):
5398
    """Build hooks env.
5399

5400
    This runs on master, primary and secondary nodes of the instance.
5401

5402
    """
5403
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5404
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5405
    return env, nl, nl
5406

    
5407
  def CheckPrereq(self):
5408
    """Check prerequisites.
5409

5410
    This checks that the instance is in the cluster and is not running.
5411

5412
    """
5413
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5414
    assert instance is not None, \
5415
      "Cannot retrieve locked instance %s" % self.op.instance_name
5416
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5417
                     " offline, cannot reinstall")
5418
    for node in instance.secondary_nodes:
5419
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
5420
                       " cannot reinstall")
5421

    
5422
    if instance.disk_template == constants.DT_DISKLESS:
5423
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5424
                                 self.op.instance_name,
5425
                                 errors.ECODE_INVAL)
5426
    _CheckInstanceDown(self, instance, "cannot reinstall")
5427

    
5428
    if self.op.os_type is not None:
5429
      # OS verification
5430
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5431
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5432
      instance_os = self.op.os_type
5433
    else:
5434
      instance_os = instance.os
5435

    
5436
    nodelist = list(instance.all_nodes)
5437

    
5438
    if self.op.osparams:
5439
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5440
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5441
      self.os_inst = i_osdict # the new dict (without defaults)
5442
    else:
5443
      self.os_inst = None
5444

    
5445
    self.instance = instance
5446

    
5447
  def Exec(self, feedback_fn):
5448
    """Reinstall the instance.
5449

5450
    """
5451
    inst = self.instance
5452

    
5453
    if self.op.os_type is not None:
5454
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5455
      inst.os = self.op.os_type
5456
      # Write to configuration
5457
      self.cfg.Update(inst, feedback_fn)
5458

    
5459
    _StartInstanceDisks(self, inst, None)
5460
    try:
5461
      feedback_fn("Running the instance OS create scripts...")
5462
      # FIXME: pass debug option from opcode to backend
5463
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5464
                                             self.op.debug_level,
5465
                                             osparams=self.os_inst)
5466
      result.Raise("Could not install OS for instance %s on node %s" %
5467
                   (inst.name, inst.primary_node))
5468
    finally:
5469
      _ShutdownInstanceDisks(self, inst)
5470

    
5471

    
5472
class LURecreateInstanceDisks(LogicalUnit):
5473
  """Recreate an instance's missing disks.
5474

5475
  """
5476
  HPATH = "instance-recreate-disks"
5477
  HTYPE = constants.HTYPE_INSTANCE
5478
  _OP_PARAMS = [
5479
    _PInstanceName,
5480
    ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
5481
    ]
5482
  REQ_BGL = False
5483

    
5484
  def ExpandNames(self):
5485
    self._ExpandAndLockInstance()
5486

    
5487
  def BuildHooksEnv(self):
5488
    """Build hooks env.
5489

5490
    This runs on master, primary and secondary nodes of the instance.
5491

5492
    """
5493
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5494
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5495
    return env, nl, nl
5496

    
5497
  def CheckPrereq(self):
5498
    """Check prerequisites.
5499

5500
    This checks that the instance is in the cluster and is not running.
5501

5502
    """
5503
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5504
    assert instance is not None, \
5505
      "Cannot retrieve locked instance %s" % self.op.instance_name
5506
    _CheckNodeOnline(self, instance.primary_node)
5507

    
5508
    if instance.disk_template == constants.DT_DISKLESS:
5509
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5510
                                 self.op.instance_name, errors.ECODE_INVAL)
5511
    _CheckInstanceDown(self, instance, "cannot recreate disks")
5512

    
5513
    if not self.op.disks:
5514
      self.op.disks = range(len(instance.disks))
5515
    else:
5516
      for idx in self.op.disks:
5517
        if idx >= len(instance.disks):
5518
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5519
                                     errors.ECODE_INVAL)
5520

    
5521
    self.instance = instance
5522

    
5523
  def Exec(self, feedback_fn):
5524
    """Recreate the disks.
5525

5526
    """
5527
    to_skip = []
5528
    for idx, _ in enumerate(self.instance.disks):
5529
      if idx not in self.op.disks: # disk idx has not been passed in
5530
        to_skip.append(idx)
5531
        continue
5532

    
5533
    _CreateDisks(self, self.instance, to_skip=to_skip)
5534

    
5535

    
5536
class LURenameInstance(LogicalUnit):
5537
  """Rename an instance.
5538

5539
  """
5540
  HPATH = "instance-rename"
5541
  HTYPE = constants.HTYPE_INSTANCE
5542
  _OP_PARAMS = [
5543
    _PInstanceName,
5544
    ("new_name", ht.NoDefault, ht.TNonEmptyString),
5545
    ("ip_check", False, ht.TBool),
5546
    ("name_check", True, ht.TBool),
5547
    ]
5548

    
5549
  def CheckArguments(self):
5550
    """Check arguments.
5551

5552
    """
5553
    if self.op.ip_check and not self.op.name_check:
5554
      # TODO: make the ip check more flexible and not depend on the name check
5555
      raise errors.OpPrereqError("Cannot do ip check without a name check",
5556
                                 errors.ECODE_INVAL)
5557

    
5558
  def BuildHooksEnv(self):
5559
    """Build hooks env.
5560

5561
    This runs on master, primary and secondary nodes of the instance.
5562

5563
    """
5564
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5565
    env["INSTANCE_NEW_NAME"] = self.op.new_name
5566
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5567
    return env, nl, nl
5568

    
5569
  def CheckPrereq(self):
5570
    """Check prerequisites.
5571

5572
    This checks that the instance is in the cluster and is not running.
5573

5574
    """
5575
    self.op.instance_name = _ExpandInstanceName(self.cfg,
5576
                                                self.op.instance_name)
5577
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5578
    assert instance is not None
5579
    _CheckNodeOnline(self, instance.primary_node)
5580
    _CheckInstanceDown(self, instance, "cannot rename")
5581
    self.instance = instance
5582

    
5583
    new_name = self.op.new_name
5584
    if self.op.name_check:
5585
      hostname = netutils.GetHostname(name=new_name)
5586
      new_name = self.op.new_name = hostname.name
5587
      if (self.op.ip_check and
5588
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5589
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
5590
                                   (hostname.ip, new_name),
5591
                                   errors.ECODE_NOTUNIQUE)
5592

    
5593
    instance_list = self.cfg.GetInstanceList()
5594
    if new_name in instance_list and new_name != instance.name:
5595
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5596
                                 new_name, errors.ECODE_EXISTS)
5597

    
5598
  def Exec(self, feedback_fn):
5599
    """Reinstall the instance.
5600

5601
    """
5602
    inst = self.instance
5603
    old_name = inst.name
5604

    
5605
    rename_file_storage = False
5606
    if (inst.disk_template == constants.DT_FILE and
5607
        self.op.new_name != inst.name):
5608
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5609
      rename_file_storage = True
5610

    
5611
    self.cfg.RenameInstance(inst.name, self.op.new_name)
5612
    # Change the instance lock. This is definitely safe while we hold the BGL
5613
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5614
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5615

    
5616
    # re-read the instance from the configuration after rename
5617
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
5618

    
5619
    if rename_file_storage:
5620
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5621
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5622
                                                     old_file_storage_dir,
5623
                                                     new_file_storage_dir)
5624
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
5625
                   " (but the instance has been renamed in Ganeti)" %
5626
                   (inst.primary_node, old_file_storage_dir,
5627
                    new_file_storage_dir))
5628

    
5629
    _StartInstanceDisks(self, inst, None)
5630
    try:
5631
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5632
                                                 old_name, self.op.debug_level)
5633
      msg = result.fail_msg
5634
      if msg:
5635
        msg = ("Could not run OS rename script for instance %s on node %s"
5636
               " (but the instance has been renamed in Ganeti): %s" %
5637
               (inst.name, inst.primary_node, msg))
5638
        self.proc.LogWarning(msg)
5639
    finally:
5640
      _ShutdownInstanceDisks(self, inst)
5641

    
5642
    return inst.name
5643

    
5644

    
5645
class LURemoveInstance(LogicalUnit):
5646
  """Remove an instance.
5647

5648
  """
5649
  HPATH = "instance-remove"
5650
  HTYPE = constants.HTYPE_INSTANCE
5651
  _OP_PARAMS = [
5652
    _PInstanceName,
5653
    ("ignore_failures", False, ht.TBool),
5654
    _PShutdownTimeout,
5655
    ]
5656
  REQ_BGL = False
5657

    
5658
  def ExpandNames(self):
5659
    self._ExpandAndLockInstance()
5660
    self.needed_locks[locking.LEVEL_NODE] = []
5661
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5662

    
5663
  def DeclareLocks(self, level):
5664
    if level == locking.LEVEL_NODE:
5665
      self._LockInstancesNodes()
5666

    
5667
  def BuildHooksEnv(self):
5668
    """Build hooks env.
5669

5670
    This runs on master, primary and secondary nodes of the instance.
5671

5672
    """
5673
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5674
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5675
    nl = [self.cfg.GetMasterNode()]
5676
    nl_post = list(self.instance.all_nodes) + nl
5677
    return env, nl, nl_post
5678

    
5679
  def CheckPrereq(self):
5680
    """Check prerequisites.
5681

5682
    This checks that the instance is in the cluster.
5683

5684
    """
5685
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5686
    assert self.instance is not None, \
5687
      "Cannot retrieve locked instance %s" % self.op.instance_name
5688

    
5689
  def Exec(self, feedback_fn):
5690
    """Remove the instance.
5691

5692
    """
5693
    instance = self.instance
5694
    logging.info("Shutting down instance %s on node %s",
5695
                 instance.name, instance.primary_node)
5696

    
5697
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5698
                                             self.op.shutdown_timeout)
5699
    msg = result.fail_msg
5700
    if msg:
5701
      if self.op.ignore_failures:
5702
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
5703
      else:
5704
        raise errors.OpExecError("Could not shutdown instance %s on"
5705
                                 " node %s: %s" %
5706
                                 (instance.name, instance.primary_node, msg))
5707

    
5708
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5709

    
5710

    
5711
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5712
  """Utility function to remove an instance.
5713

5714
  """
5715
  logging.info("Removing block devices for instance %s", instance.name)
5716

    
5717
  if not _RemoveDisks(lu, instance):
5718
    if not ignore_failures:
5719
      raise errors.OpExecError("Can't remove instance's disks")
5720
    feedback_fn("Warning: can't remove instance's disks")
5721

    
5722
  logging.info("Removing instance %s out of cluster config", instance.name)
5723

    
5724
  lu.cfg.RemoveInstance(instance.name)
5725

    
5726
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5727
    "Instance lock removal conflict"
5728

    
5729
  # Remove lock for the instance
5730
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5731

    
5732

    
5733
class LUQueryInstances(NoHooksLU):
5734
  """Logical unit for querying instances.
5735

5736
  """
5737
  # pylint: disable-msg=W0142
5738
  _OP_PARAMS = [
5739
    _POutputFields,
5740
    ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
5741
    ("use_locking", False, ht.TBool),
5742
    ]
5743
  REQ_BGL = False
5744

    
5745
  def CheckArguments(self):
5746
    self.iq = _InstanceQuery(self.op.names, self.op.output_fields,
5747
                             self.op.use_locking)
5748

    
5749
  def ExpandNames(self):
5750
    self.iq.ExpandNames(self)
5751

    
5752
  def DeclareLocks(self, level):
5753
    self.iq.DeclareLocks(self, level)
5754

    
5755
  def Exec(self, feedback_fn):
5756
    return self.iq.OldStyleQuery(self)
5757

    
5758

    
5759
class LUFailoverInstance(LogicalUnit):
5760
  """Failover an instance.
5761

5762
  """
5763
  HPATH = "instance-failover"
5764
  HTYPE = constants.HTYPE_INSTANCE
5765
  _OP_PARAMS = [
5766
    _PInstanceName,
5767
    ("ignore_consistency", False, ht.TBool),
5768
    _PShutdownTimeout,
5769
    ]
5770
  REQ_BGL = False
5771

    
5772
  def ExpandNames(self):
5773
    self._ExpandAndLockInstance()
5774
    self.needed_locks[locking.LEVEL_NODE] = []
5775
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5776

    
5777
  def DeclareLocks(self, level):
5778
    if level == locking.LEVEL_NODE:
5779
      self._LockInstancesNodes()
5780

    
5781
  def BuildHooksEnv(self):
5782
    """Build hooks env.
5783

5784
    This runs on master, primary and secondary nodes of the instance.
5785

5786
    """
5787
    instance = self.instance
5788
    source_node = instance.primary_node
5789
    target_node = instance.secondary_nodes[0]
5790
    env = {
5791
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5792
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5793
      "OLD_PRIMARY": source_node,
5794
      "OLD_SECONDARY": target_node,
5795
      "NEW_PRIMARY": target_node,
5796
      "NEW_SECONDARY": source_node,
5797
      }
5798
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5799
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5800
    nl_post = list(nl)
5801
    nl_post.append(source_node)
5802
    return env, nl, nl_post
5803

    
5804
  def CheckPrereq(self):
5805
    """Check prerequisites.
5806

5807
    This checks that the instance is in the cluster.
5808

5809
    """
5810
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5811
    assert self.instance is not None, \
5812
      "Cannot retrieve locked instance %s" % self.op.instance_name
5813

    
5814
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5815
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5816
      raise errors.OpPrereqError("Instance's disk layout is not"
5817
                                 " network mirrored, cannot failover.",
5818
                                 errors.ECODE_STATE)
5819

    
5820
    secondary_nodes = instance.secondary_nodes
5821
    if not secondary_nodes:
5822
      raise errors.ProgrammerError("no secondary node but using "
5823
                                   "a mirrored disk template")
5824

    
5825
    target_node = secondary_nodes[0]
5826
    _CheckNodeOnline(self, target_node)
5827
    _CheckNodeNotDrained(self, target_node)
5828
    if instance.admin_up:
5829
      # check memory requirements on the secondary node
5830
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5831
                           instance.name, bep[constants.BE_MEMORY],
5832
                           instance.hypervisor)
5833
    else:
5834
      self.LogInfo("Not checking memory on the secondary node as"
5835
                   " instance will not be started")
5836

    
5837
    # check bridge existance
5838
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5839

    
5840
  def Exec(self, feedback_fn):
5841
    """Failover an instance.
5842

5843
    The failover is done by shutting it down on its present node and
5844
    starting it on the secondary.
5845

5846
    """
5847
    instance = self.instance
5848
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5849

    
5850
    source_node = instance.primary_node
5851
    target_node = instance.secondary_nodes[0]
5852

    
5853
    if instance.admin_up:
5854
      feedback_fn("* checking disk consistency between source and target")
5855
      for dev in instance.disks:
5856
        # for drbd, these are drbd over lvm
5857
        if not _CheckDiskConsistency(self, dev, target_node, False):
5858
          if not self.op.ignore_consistency:
5859
            raise errors.OpExecError("Disk %s is degraded on target node,"
5860
                                     " aborting failover." % dev.iv_name)
5861
    else:
5862
      feedback_fn("* not checking disk consistency as instance is not running")
5863

    
5864
    feedback_fn("* shutting down instance on source node")
5865
    logging.info("Shutting down instance %s on node %s",
5866
                 instance.name, source_node)
5867

    
5868
    result = self.rpc.call_instance_shutdown(source_node, instance,
5869
                                             self.op.shutdown_timeout)
5870
    msg = result.fail_msg
5871
    if msg:
5872
      if self.op.ignore_consistency or primary_node.offline:
5873
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5874
                             " Proceeding anyway. Please make sure node"
5875
                             " %s is down. Error details: %s",
5876
                             instance.name, source_node, source_node, msg)
5877
      else:
5878
        raise errors.OpExecError("Could not shutdown instance %s on"
5879
                                 " node %s: %s" %
5880
                                 (instance.name, source_node, msg))
5881

    
5882
    feedback_fn("* deactivating the instance's disks on source node")
5883
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5884
      raise errors.OpExecError("Can't shut down the instance's disks.")
5885

    
5886
    instance.primary_node = target_node
5887
    # distribute new instance config to the other nodes
5888
    self.cfg.Update(instance, feedback_fn)
5889

    
5890
    # Only start the instance if it's marked as up
5891
    if instance.admin_up:
5892
      feedback_fn("* activating the instance's disks on target node")
5893
      logging.info("Starting instance %s on node %s",
5894
                   instance.name, target_node)
5895

    
5896
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5897
                                           ignore_secondaries=True)
5898
      if not disks_ok:
5899
        _ShutdownInstanceDisks(self, instance)
5900
        raise errors.OpExecError("Can't activate the instance's disks")
5901

    
5902
      feedback_fn("* starting the instance on the target node")
5903
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5904
      msg = result.fail_msg
5905
      if msg:
5906
        _ShutdownInstanceDisks(self, instance)
5907
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5908
                                 (instance.name, target_node, msg))
5909

    
5910

    
5911
class LUMigrateInstance(LogicalUnit):
5912
  """Migrate an instance.
5913

5914
  This is migration without shutting down, compared to the failover,
5915
  which is done with shutdown.
5916

5917
  """
5918
  HPATH = "instance-migrate"
5919
  HTYPE = constants.HTYPE_INSTANCE
5920
  _OP_PARAMS = [
5921
    _PInstanceName,
5922
    _PMigrationMode,
5923
    _PMigrationLive,
5924
    ("cleanup", False, ht.TBool),
5925
    ]
5926

    
5927
  REQ_BGL = False
5928

    
5929
  def ExpandNames(self):
5930
    self._ExpandAndLockInstance()
5931

    
5932
    self.needed_locks[locking.LEVEL_NODE] = []
5933
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5934

    
5935
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5936
                                       self.op.cleanup)
5937
    self.tasklets = [self._migrater]
5938

    
5939
  def DeclareLocks(self, level):
5940
    if level == locking.LEVEL_NODE:
5941
      self._LockInstancesNodes()
5942

    
5943
  def BuildHooksEnv(self):
5944
    """Build hooks env.
5945

5946
    This runs on master, primary and secondary nodes of the instance.
5947

5948
    """
5949
    instance = self._migrater.instance
5950
    source_node = instance.primary_node
5951
    target_node = instance.secondary_nodes[0]
5952
    env = _BuildInstanceHookEnvByObject(self, instance)
5953
    env["MIGRATE_LIVE"] = self._migrater.live
5954
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5955
    env.update({
5956
        "OLD_PRIMARY": source_node,
5957
        "OLD_SECONDARY": target_node,
5958
        "NEW_PRIMARY": target_node,
5959
        "NEW_SECONDARY": source_node,
5960
        })
5961
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5962
    nl_post = list(nl)
5963
    nl_post.append(source_node)
5964
    return env, nl, nl_post
5965

    
5966

    
5967
class LUMoveInstance(LogicalUnit):
5968
  """Move an instance by data-copying.
5969

5970
  """
5971
  HPATH = "instance-move"
5972
  HTYPE = constants.HTYPE_INSTANCE
5973
  _OP_PARAMS = [
5974
    _PInstanceName,
5975
    ("target_node", ht.NoDefault, ht.TNonEmptyString),
5976
    _PShutdownTimeout,
5977
    ]
5978
  REQ_BGL = False
5979

    
5980
  def ExpandNames(self):
5981
    self._ExpandAndLockInstance()
5982
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5983
    self.op.target_node = target_node
5984
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5985
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5986

    
5987
  def DeclareLocks(self, level):
5988
    if level == locking.LEVEL_NODE:
5989
      self._LockInstancesNodes(primary_only=True)
5990

    
5991
  def BuildHooksEnv(self):
5992
    """Build hooks env.
5993

5994
    This runs on master, primary and secondary nodes of the instance.
5995

5996
    """
5997
    env = {
5998
      "TARGET_NODE": self.op.target_node,
5999
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6000
      }
6001
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6002
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
6003
                                       self.op.target_node]
6004
    return env, nl, nl
6005

    
6006
  def CheckPrereq(self):
6007
    """Check prerequisites.
6008

6009
    This checks that the instance is in the cluster.
6010

6011
    """
6012
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6013
    assert self.instance is not None, \
6014
      "Cannot retrieve locked instance %s" % self.op.instance_name
6015

    
6016
    node = self.cfg.GetNodeInfo(self.op.target_node)
6017
    assert node is not None, \
6018
      "Cannot retrieve locked node %s" % self.op.target_node
6019

    
6020
    self.target_node = target_node = node.name
6021

    
6022
    if target_node == instance.primary_node:
6023
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6024
                                 (instance.name, target_node),
6025
                                 errors.ECODE_STATE)
6026

    
6027
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6028

    
6029
    for idx, dsk in enumerate(instance.disks):
6030
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6031
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6032
                                   " cannot copy" % idx, errors.ECODE_STATE)
6033

    
6034
    _CheckNodeOnline(self, target_node)
6035
    _CheckNodeNotDrained(self, target_node)
6036
    _CheckNodeVmCapable(self, target_node)
6037

    
6038
    if instance.admin_up:
6039
      # check memory requirements on the secondary node
6040
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6041
                           instance.name, bep[constants.BE_MEMORY],
6042
                           instance.hypervisor)
6043
    else:
6044
      self.LogInfo("Not checking memory on the secondary node as"
6045
                   " instance will not be started")
6046

    
6047
    # check bridge existance
6048
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6049

    
6050
  def Exec(self, feedback_fn):
6051
    """Move an instance.
6052

6053
    The move is done by shutting it down on its present node, copying
6054
    the data over (slow) and starting it on the new node.
6055

6056
    """
6057
    instance = self.instance
6058

    
6059
    source_node = instance.primary_node
6060
    target_node = self.target_node
6061

    
6062
    self.LogInfo("Shutting down instance %s on source node %s",
6063
                 instance.name, source_node)
6064

    
6065
    result = self.rpc.call_instance_shutdown(source_node, instance,
6066
                                             self.op.shutdown_timeout)
6067
    msg = result.fail_msg
6068
    if msg:
6069
      if self.op.ignore_consistency:
6070
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6071
                             " Proceeding anyway. Please make sure node"
6072
                             " %s is down. Error details: %s",
6073
                             instance.name, source_node, source_node, msg)
6074
      else:
6075
        raise errors.OpExecError("Could not shutdown instance %s on"
6076
                                 " node %s: %s" %
6077
                                 (instance.name, source_node, msg))
6078

    
6079
    # create the target disks
6080
    try:
6081
      _CreateDisks(self, instance, target_node=target_node)
6082
    except errors.OpExecError:
6083
      self.LogWarning("Device creation failed, reverting...")
6084
      try:
6085
        _RemoveDisks(self, instance, target_node=target_node)
6086
      finally:
6087
        self.cfg.ReleaseDRBDMinors(instance.name)
6088
        raise
6089

    
6090
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6091

    
6092
    errs = []
6093
    # activate, get path, copy the data over
6094
    for idx, disk in enumerate(instance.disks):
6095
      self.LogInfo("Copying data for disk %d", idx)
6096
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6097
                                               instance.name, True)
6098
      if result.fail_msg:
6099
        self.LogWarning("Can't assemble newly created disk %d: %s",
6100
                        idx, result.fail_msg)
6101
        errs.append(result.fail_msg)
6102
        break
6103
      dev_path = result.payload
6104
      result = self.rpc.call_blockdev_export(source_node, disk,
6105
                                             target_node, dev_path,
6106
                                             cluster_name)
6107
      if result.fail_msg:
6108
        self.LogWarning("Can't copy data over for disk %d: %s",
6109
                        idx, result.fail_msg)
6110
        errs.append(result.fail_msg)
6111
        break
6112

    
6113
    if errs:
6114
      self.LogWarning("Some disks failed to copy, aborting")
6115
      try:
6116
        _RemoveDisks(self, instance, target_node=target_node)
6117
      finally:
6118
        self.cfg.ReleaseDRBDMinors(instance.name)
6119
        raise errors.OpExecError("Errors during disk copy: %s" %
6120
                                 (",".join(errs),))
6121

    
6122
    instance.primary_node = target_node
6123
    self.cfg.Update(instance, feedback_fn)
6124

    
6125
    self.LogInfo("Removing the disks on the original node")
6126
    _RemoveDisks(self, instance, target_node=source_node)
6127

    
6128
    # Only start the instance if it's marked as up
6129
    if instance.admin_up:
6130
      self.LogInfo("Starting instance %s on node %s",
6131
                   instance.name, target_node)
6132

    
6133
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6134
                                           ignore_secondaries=True)
6135
      if not disks_ok:
6136
        _ShutdownInstanceDisks(self, instance)
6137
        raise errors.OpExecError("Can't activate the instance's disks")
6138

    
6139
      result = self.rpc.call_instance_start(target_node, instance, None, None)
6140
      msg = result.fail_msg
6141
      if msg:
6142
        _ShutdownInstanceDisks(self, instance)
6143
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6144
                                 (instance.name, target_node, msg))
6145

    
6146

    
6147
class LUMigrateNode(LogicalUnit):
6148
  """Migrate all instances from a node.
6149

6150
  """
6151
  HPATH = "node-migrate"
6152
  HTYPE = constants.HTYPE_NODE
6153
  _OP_PARAMS = [
6154
    _PNodeName,
6155
    _PMigrationMode,
6156
    _PMigrationLive,
6157
    ]
6158
  REQ_BGL = False
6159

    
6160
  def ExpandNames(self):
6161
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6162

    
6163
    self.needed_locks = {
6164
      locking.LEVEL_NODE: [self.op.node_name],
6165
      }
6166

    
6167
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6168

    
6169
    # Create tasklets for migrating instances for all instances on this node
6170
    names = []
6171
    tasklets = []
6172

    
6173
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6174
      logging.debug("Migrating instance %s", inst.name)
6175
      names.append(inst.name)
6176

    
6177
      tasklets.append(TLMigrateInstance(self, inst.name, False))
6178

    
6179
    self.tasklets = tasklets
6180

    
6181
    # Declare instance locks
6182
    self.needed_locks[locking.LEVEL_INSTANCE] = names
6183

    
6184
  def DeclareLocks(self, level):
6185
    if level == locking.LEVEL_NODE:
6186
      self._LockInstancesNodes()
6187

    
6188
  def BuildHooksEnv(self):
6189
    """Build hooks env.
6190

6191
    This runs on the master, the primary and all the secondaries.
6192

6193
    """
6194
    env = {
6195
      "NODE_NAME": self.op.node_name,
6196
      }
6197

    
6198
    nl = [self.cfg.GetMasterNode()]
6199

    
6200
    return (env, nl, nl)
6201

    
6202

    
6203
class TLMigrateInstance(Tasklet):
6204
  """Tasklet class for instance migration.
6205

6206
  @type live: boolean
6207
  @ivar live: whether the migration will be done live or non-live;
6208
      this variable is initalized only after CheckPrereq has run
6209

6210
  """
6211
  def __init__(self, lu, instance_name, cleanup):
6212
    """Initializes this class.
6213

6214
    """
6215
    Tasklet.__init__(self, lu)
6216

    
6217
    # Parameters
6218
    self.instance_name = instance_name
6219
    self.cleanup = cleanup
6220
    self.live = False # will be overridden later
6221

    
6222
  def CheckPrereq(self):
6223
    """Check prerequisites.
6224

6225
    This checks that the instance is in the cluster.
6226

6227
    """
6228
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6229
    instance = self.cfg.GetInstanceInfo(instance_name)
6230
    assert instance is not None
6231

    
6232
    if instance.disk_template != constants.DT_DRBD8:
6233
      raise errors.OpPrereqError("Instance's disk layout is not"
6234
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
6235

    
6236
    secondary_nodes = instance.secondary_nodes
6237
    if not secondary_nodes:
6238
      raise errors.ConfigurationError("No secondary node but using"
6239
                                      " drbd8 disk template")
6240

    
6241
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
6242

    
6243
    target_node = secondary_nodes[0]
6244
    # check memory requirements on the secondary node
6245
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6246
                         instance.name, i_be[constants.BE_MEMORY],
6247
                         instance.hypervisor)
6248

    
6249
    # check bridge existance
6250
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6251

    
6252
    if not self.cleanup:
6253
      _CheckNodeNotDrained(self.lu, target_node)
6254
      result = self.rpc.call_instance_migratable(instance.primary_node,
6255
                                                 instance)
6256
      result.Raise("Can't migrate, please use failover",
6257
                   prereq=True, ecode=errors.ECODE_STATE)
6258

    
6259
    self.instance = instance
6260

    
6261
    if self.lu.op.live is not None and self.lu.op.mode is not None:
6262
      raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6263
                                 " parameters are accepted",
6264
                                 errors.ECODE_INVAL)
6265
    if self.lu.op.live is not None:
6266
      if self.lu.op.live:
6267
        self.lu.op.mode = constants.HT_MIGRATION_LIVE
6268
      else:
6269
        self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6270
      # reset the 'live' parameter to None so that repeated
6271
      # invocations of CheckPrereq do not raise an exception
6272
      self.lu.op.live = None
6273
    elif self.lu.op.mode is None:
6274
      # read the default value from the hypervisor
6275
      i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6276
      self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6277

    
6278
    self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6279

    
6280
  def _WaitUntilSync(self):
6281
    """Poll with custom rpc for disk sync.
6282

6283
    This uses our own step-based rpc call.
6284

6285
    """
6286
    self.feedback_fn("* wait until resync is done")
6287
    all_done = False
6288
    while not all_done:
6289
      all_done = True
6290
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6291
                                            self.nodes_ip,
6292
                                            self.instance.disks)
6293
      min_percent = 100
6294
      for node, nres in result.items():
6295
        nres.Raise("Cannot resync disks on node %s" % node)
6296
        node_done, node_percent = nres.payload
6297
        all_done = all_done and node_done
6298
        if node_percent is not None:
6299
          min_percent = min(min_percent, node_percent)
6300
      if not all_done:
6301
        if min_percent < 100:
6302
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
6303
        time.sleep(2)
6304

    
6305
  def _EnsureSecondary(self, node):
6306
    """Demote a node to secondary.
6307

6308
    """
6309
    self.feedback_fn("* switching node %s to secondary mode" % node)
6310

    
6311
    for dev in self.instance.disks:
6312
      self.cfg.SetDiskID(dev, node)
6313

    
6314
    result = self.rpc.call_blockdev_close(node, self.instance.name,
6315
                                          self.instance.disks)
6316
    result.Raise("Cannot change disk to secondary on node %s" % node)
6317

    
6318
  def _GoStandalone(self):
6319
    """Disconnect from the network.
6320

6321
    """
6322
    self.feedback_fn("* changing into standalone mode")
6323
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6324
                                               self.instance.disks)
6325
    for node, nres in result.items():
6326
      nres.Raise("Cannot disconnect disks node %s" % node)
6327

    
6328
  def _GoReconnect(self, multimaster):
6329
    """Reconnect to the network.
6330

6331
    """
6332
    if multimaster:
6333
      msg = "dual-master"
6334
    else:
6335
      msg = "single-master"
6336
    self.feedback_fn("* changing disks into %s mode" % msg)
6337
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6338
                                           self.instance.disks,
6339
                                           self.instance.name, multimaster)
6340
    for node, nres in result.items():
6341
      nres.Raise("Cannot change disks config on node %s" % node)
6342

    
6343
  def _ExecCleanup(self):
6344
    """Try to cleanup after a failed migration.
6345

6346
    The cleanup is done by:
6347
      - check that the instance is running only on one node
6348
        (and update the config if needed)
6349
      - change disks on its secondary node to secondary
6350
      - wait until disks are fully synchronized
6351
      - disconnect from the network
6352
      - change disks into single-master mode
6353
      - wait again until disks are fully synchronized
6354

6355
    """
6356
    instance = self.instance
6357
    target_node = self.target_node
6358
    source_node = self.source_node
6359

    
6360
    # check running on only one node
6361
    self.feedback_fn("* checking where the instance actually runs"
6362
                     " (if this hangs, the hypervisor might be in"
6363
                     " a bad state)")
6364
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6365
    for node, result in ins_l.items():
6366
      result.Raise("Can't contact node %s" % node)
6367

    
6368
    runningon_source = instance.name in ins_l[source_node].payload
6369
    runningon_target = instance.name in ins_l[target_node].payload
6370

    
6371
    if runningon_source and runningon_target:
6372
      raise errors.OpExecError("Instance seems to be running on two nodes,"
6373
                               " or the hypervisor is confused. You will have"
6374
                               " to ensure manually that it runs only on one"
6375
                               " and restart this operation.")
6376

    
6377
    if not (runningon_source or runningon_target):
6378
      raise errors.OpExecError("Instance does not seem to be running at all."
6379
                               " In this case, it's safer to repair by"
6380
                               " running 'gnt-instance stop' to ensure disk"
6381
                               " shutdown, and then restarting it.")
6382

    
6383
    if runningon_target:
6384
      # the migration has actually succeeded, we need to update the config
6385
      self.feedback_fn("* instance running on secondary node (%s),"
6386
                       " updating config" % target_node)
6387
      instance.primary_node = target_node
6388
      self.cfg.Update(instance, self.feedback_fn)
6389
      demoted_node = source_node
6390
    else:
6391
      self.feedback_fn("* instance confirmed to be running on its"
6392
                       " primary node (%s)" % source_node)
6393
      demoted_node = target_node
6394

    
6395
    self._EnsureSecondary(demoted_node)
6396
    try:
6397
      self._WaitUntilSync()
6398
    except errors.OpExecError:
6399
      # we ignore here errors, since if the device is standalone, it
6400
      # won't be able to sync
6401
      pass
6402
    self._GoStandalone()
6403
    self._GoReconnect(False)
6404
    self._WaitUntilSync()
6405

    
6406
    self.feedback_fn("* done")
6407

    
6408
  def _RevertDiskStatus(self):
6409
    """Try to revert the disk status after a failed migration.
6410

6411
    """
6412
    target_node = self.target_node
6413
    try:
6414
      self._EnsureSecondary(target_node)
6415
      self._GoStandalone()
6416
      self._GoReconnect(False)
6417
      self._WaitUntilSync()
6418
    except errors.OpExecError, err:
6419
      self.lu.LogWarning("Migration failed and I can't reconnect the"
6420
                         " drives: error '%s'\n"
6421
                         "Please look and recover the instance status" %
6422
                         str(err))
6423

    
6424
  def _AbortMigration(self):
6425
    """Call the hypervisor code to abort a started migration.
6426

6427
    """
6428
    instance = self.instance
6429
    target_node = self.target_node
6430
    migration_info = self.migration_info
6431

    
6432
    abort_result = self.rpc.call_finalize_migration(target_node,
6433
                                                    instance,
6434
                                                    migration_info,
6435
                                                    False)
6436
    abort_msg = abort_result.fail_msg
6437
    if abort_msg:
6438
      logging.error("Aborting migration failed on target node %s: %s",
6439
                    target_node, abort_msg)
6440
      # Don't raise an exception here, as we stil have to try to revert the
6441
      # disk status, even if this step failed.
6442

    
6443
  def _ExecMigration(self):
6444
    """Migrate an instance.
6445

6446
    The migrate is done by:
6447
      - change the disks into dual-master mode
6448
      - wait until disks are fully synchronized again
6449
      - migrate the instance
6450
      - change disks on the new secondary node (the old primary) to secondary
6451
      - wait until disks are fully synchronized
6452
      - change disks into single-master mode
6453

6454
    """
6455
    instance = self.instance
6456
    target_node = self.target_node
6457
    source_node = self.source_node
6458

    
6459
    self.feedback_fn("* checking disk consistency between source and target")
6460
    for dev in instance.disks:
6461
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6462
        raise errors.OpExecError("Disk %s is degraded or not fully"
6463
                                 " synchronized on target node,"
6464
                                 " aborting migrate." % dev.iv_name)
6465

    
6466
    # First get the migration information from the remote node
6467
    result = self.rpc.call_migration_info(source_node, instance)
6468
    msg = result.fail_msg
6469
    if msg:
6470
      log_err = ("Failed fetching source migration information from %s: %s" %
6471
                 (source_node, msg))
6472
      logging.error(log_err)
6473
      raise errors.OpExecError(log_err)
6474

    
6475
    self.migration_info = migration_info = result.payload
6476

    
6477
    # Then switch the disks to master/master mode
6478
    self._EnsureSecondary(target_node)
6479
    self._GoStandalone()
6480
    self._GoReconnect(True)
6481
    self._WaitUntilSync()
6482

    
6483
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
6484
    result = self.rpc.call_accept_instance(target_node,
6485
                                           instance,
6486
                                           migration_info,
6487
                                           self.nodes_ip[target_node])
6488

    
6489
    msg = result.fail_msg
6490
    if msg:
6491
      logging.error("Instance pre-migration failed, trying to revert"
6492
                    " disk status: %s", msg)
6493
      self.feedback_fn("Pre-migration failed, aborting")
6494
      self._AbortMigration()
6495
      self._RevertDiskStatus()
6496
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6497
                               (instance.name, msg))
6498

    
6499
    self.feedback_fn("* migrating instance to %s" % target_node)
6500
    time.sleep(10)
6501
    result = self.rpc.call_instance_migrate(source_node, instance,
6502
                                            self.nodes_ip[target_node],
6503
                                            self.live)
6504
    msg = result.fail_msg
6505
    if msg:
6506
      logging.error("Instance migration failed, trying to revert"
6507
                    " disk status: %s", msg)
6508
      self.feedback_fn("Migration failed, aborting")
6509
      self._AbortMigration()
6510
      self._RevertDiskStatus()
6511
      raise errors.OpExecError("Could not migrate instance %s: %s" %
6512
                               (instance.name, msg))
6513
    time.sleep(10)
6514

    
6515
    instance.primary_node = target_node
6516
    # distribute new instance config to the other nodes
6517
    self.cfg.Update(instance, self.feedback_fn)
6518

    
6519
    result = self.rpc.call_finalize_migration(target_node,
6520
                                              instance,
6521
                                              migration_info,
6522
                                              True)
6523
    msg = result.fail_msg
6524
    if msg:
6525
      logging.error("Instance migration succeeded, but finalization failed:"
6526
                    " %s", msg)
6527
      raise errors.OpExecError("Could not finalize instance migration: %s" %
6528
                               msg)
6529

    
6530
    self._EnsureSecondary(source_node)
6531
    self._WaitUntilSync()
6532
    self._GoStandalone()
6533
    self._GoReconnect(False)
6534
    self._WaitUntilSync()
6535

    
6536
    self.feedback_fn("* done")
6537

    
6538
  def Exec(self, feedback_fn):
6539
    """Perform the migration.
6540

6541
    """
6542
    feedback_fn("Migrating instance %s" % self.instance.name)
6543

    
6544
    self.feedback_fn = feedback_fn
6545

    
6546
    self.source_node = self.instance.primary_node
6547
    self.target_node = self.instance.secondary_nodes[0]
6548
    self.all_nodes = [self.source_node, self.target_node]
6549
    self.nodes_ip = {
6550
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6551
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6552
      }
6553

    
6554
    if self.cleanup:
6555
      return self._ExecCleanup()
6556
    else:
6557
      return self._ExecMigration()
6558

    
6559

    
6560
def _CreateBlockDev(lu, node, instance, device, force_create,
6561
                    info, force_open):
6562
  """Create a tree of block devices on a given node.
6563

6564
  If this device type has to be created on secondaries, create it and
6565
  all its children.
6566

6567
  If not, just recurse to children keeping the same 'force' value.
6568

6569
  @param lu: the lu on whose behalf we execute
6570
  @param node: the node on which to create the device
6571
  @type instance: L{objects.Instance}
6572
  @param instance: the instance which owns the device
6573
  @type device: L{objects.Disk}
6574
  @param device: the device to create
6575
  @type force_create: boolean
6576
  @param force_create: whether to force creation of this device; this
6577
      will be change to True whenever we find a device which has
6578
      CreateOnSecondary() attribute
6579
  @param info: the extra 'metadata' we should attach to the device
6580
      (this will be represented as a LVM tag)
6581
  @type force_open: boolean
6582
  @param force_open: this parameter will be passes to the
6583
      L{backend.BlockdevCreate} function where it specifies
6584
      whether we run on primary or not, and it affects both
6585
      the child assembly and the device own Open() execution
6586

6587
  """
6588
  if device.CreateOnSecondary():
6589
    force_create = True
6590

    
6591
  if device.children:
6592
    for child in device.children:
6593
      _CreateBlockDev(lu, node, instance, child, force_create,
6594
                      info, force_open)
6595

    
6596
  if not force_create:
6597
    return
6598

    
6599
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6600

    
6601

    
6602
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6603
  """Create a single block device on a given node.
6604

6605
  This will not recurse over children of the device, so they must be
6606
  created in advance.
6607

6608
  @param lu: the lu on whose behalf we execute
6609
  @param node: the node on which to create the device
6610
  @type instance: L{objects.Instance}
6611
  @param instance: the instance which owns the device
6612
  @type device: L{objects.Disk}
6613
  @param device: the device to create
6614
  @param info: the extra 'metadata' we should attach to the device
6615
      (this will be represented as a LVM tag)
6616
  @type force_open: boolean
6617
  @param force_open: this parameter will be passes to the
6618
      L{backend.BlockdevCreate} function where it specifies
6619
      whether we run on primary or not, and it affects both
6620
      the child assembly and the device own Open() execution
6621

6622
  """
6623
  lu.cfg.SetDiskID(device, node)
6624
  result = lu.rpc.call_blockdev_create(node, device, device.size,
6625
                                       instance.name, force_open, info)
6626
  result.Raise("Can't create block device %s on"
6627
               " node %s for instance %s" % (device, node, instance.name))
6628
  if device.physical_id is None:
6629
    device.physical_id = result.payload
6630

    
6631

    
6632
def _GenerateUniqueNames(lu, exts):
6633
  """Generate a suitable LV name.
6634

6635
  This will generate a logical volume name for the given instance.
6636

6637
  """
6638
  results = []
6639
  for val in exts:
6640
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6641
    results.append("%s%s" % (new_id, val))
6642
  return results
6643

    
6644

    
6645
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6646
                         p_minor, s_minor):
6647
  """Generate a drbd8 device complete with its children.
6648

6649
  """
6650
  port = lu.cfg.AllocatePort()
6651
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6652
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6653
                          logical_id=(vgname, names[0]))
6654
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6655
                          logical_id=(vgname, names[1]))
6656
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6657
                          logical_id=(primary, secondary, port,
6658
                                      p_minor, s_minor,
6659
                                      shared_secret),
6660
                          children=[dev_data, dev_meta],
6661
                          iv_name=iv_name)
6662
  return drbd_dev
6663

    
6664

    
6665
def _GenerateDiskTemplate(lu, template_name,
6666
                          instance_name, primary_node,
6667
                          secondary_nodes, disk_info,
6668
                          file_storage_dir, file_driver,
6669
                          base_index, feedback_fn):
6670
  """Generate the entire disk layout for a given template type.
6671

6672
  """
6673
  #TODO: compute space requirements
6674

    
6675
  vgname = lu.cfg.GetVGName()
6676
  disk_count = len(disk_info)
6677
  disks = []
6678
  if template_name == constants.DT_DISKLESS:
6679
    pass
6680
  elif template_name == constants.DT_PLAIN:
6681
    if len(secondary_nodes) != 0:
6682
      raise errors.ProgrammerError("Wrong template configuration")
6683

    
6684
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6685
                                      for i in range(disk_count)])
6686
    for idx, disk in enumerate(disk_info):
6687
      disk_index = idx + base_index
6688
      vg = disk.get("vg", vgname)
6689
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6690
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6691
                              logical_id=(vg, names[idx]),
6692
                              iv_name="disk/%d" % disk_index,
6693
                              mode=disk["mode"])
6694
      disks.append(disk_dev)
6695
  elif template_name == constants.DT_DRBD8:
6696
    if len(secondary_nodes) != 1:
6697
      raise errors.ProgrammerError("Wrong template configuration")
6698
    remote_node = secondary_nodes[0]
6699
    minors = lu.cfg.AllocateDRBDMinor(
6700
      [primary_node, remote_node] * len(disk_info), instance_name)
6701

    
6702
    names = []
6703
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6704
                                               for i in range(disk_count)]):
6705
      names.append(lv_prefix + "_data")
6706
      names.append(lv_prefix + "_meta")
6707
    for idx, disk in enumerate(disk_info):
6708
      disk_index = idx + base_index
6709
      vg = disk.get("vg", vgname)
6710
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6711
                                      disk["size"], vg, names[idx*2:idx*2+2],
6712
                                      "disk/%d" % disk_index,
6713
                                      minors[idx*2], minors[idx*2+1])
6714
      disk_dev.mode = disk["mode"]
6715
      disks.append(disk_dev)
6716
  elif template_name == constants.DT_FILE:
6717
    if len(secondary_nodes) != 0:
6718
      raise errors.ProgrammerError("Wrong template configuration")
6719

    
6720
    _RequireFileStorage()
6721

    
6722
    for idx, disk in enumerate(disk_info):
6723
      disk_index = idx + base_index
6724
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6725
                              iv_name="disk/%d" % disk_index,
6726
                              logical_id=(file_driver,
6727
                                          "%s/disk%d" % (file_storage_dir,
6728
                                                         disk_index)),
6729
                              mode=disk["mode"])
6730
      disks.append(disk_dev)
6731
  else:
6732
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6733
  return disks
6734

    
6735

    
6736
def _GetInstanceInfoText(instance):
6737
  """Compute that text that should be added to the disk's metadata.
6738

6739
  """
6740
  return "originstname+%s" % instance.name
6741

    
6742

    
6743
def _CalcEta(time_taken, written, total_size):
6744
  """Calculates the ETA based on size written and total size.
6745

6746
  @param time_taken: The time taken so far
6747
  @param written: amount written so far
6748
  @param total_size: The total size of data to be written
6749
  @return: The remaining time in seconds
6750

6751
  """
6752
  avg_time = time_taken / float(written)
6753
  return (total_size - written) * avg_time
6754

    
6755

    
6756
def _WipeDisks(lu, instance):
6757
  """Wipes instance disks.
6758

6759
  @type lu: L{LogicalUnit}
6760
  @param lu: the logical unit on whose behalf we execute
6761
  @type instance: L{objects.Instance}
6762
  @param instance: the instance whose disks we should create
6763
  @return: the success of the wipe
6764

6765
  """
6766
  node = instance.primary_node
6767
  for idx, device in enumerate(instance.disks):
6768
    lu.LogInfo("* Wiping disk %d", idx)
6769
    logging.info("Wiping disk %d for instance %s", idx, instance.name)
6770

    
6771
    # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6772
    # MAX_WIPE_CHUNK at max
6773
    wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6774
                          constants.MIN_WIPE_CHUNK_PERCENT)
6775

    
6776
    offset = 0
6777
    size = device.size
6778
    last_output = 0
6779
    start_time = time.time()
6780

    
6781
    while offset < size:
6782
      wipe_size = min(wipe_chunk_size, size - offset)
6783
      result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6784
      result.Raise("Could not wipe disk %d at offset %d for size %d" %
6785
                   (idx, offset, wipe_size))
6786
      now = time.time()
6787
      offset += wipe_size
6788
      if now - last_output >= 60:
6789
        eta = _CalcEta(now - start_time, offset, size)
6790
        lu.LogInfo(" - done: %.1f%% ETA: %s" %
6791
                   (offset / float(size) * 100, utils.FormatSeconds(eta)))
6792
        last_output = now
6793

    
6794

    
6795
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6796
  """Create all disks for an instance.
6797

6798
  This abstracts away some work from AddInstance.
6799

6800
  @type lu: L{LogicalUnit}
6801
  @param lu: the logical unit on whose behalf we execute
6802
  @type instance: L{objects.Instance}
6803
  @param instance: the instance whose disks we should create
6804
  @type to_skip: list
6805
  @param to_skip: list of indices to skip
6806
  @type target_node: string
6807
  @param target_node: if passed, overrides the target node for creation
6808
  @rtype: boolean
6809
  @return: the success of the creation
6810

6811
  """
6812
  info = _GetInstanceInfoText(instance)
6813
  if target_node is None:
6814
    pnode = instance.primary_node
6815
    all_nodes = instance.all_nodes
6816
  else:
6817
    pnode = target_node
6818
    all_nodes = [pnode]
6819

    
6820
  if instance.disk_template == constants.DT_FILE:
6821
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6822
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6823

    
6824
    result.Raise("Failed to create directory '%s' on"
6825
                 " node %s" % (file_storage_dir, pnode))
6826

    
6827
  # Note: this needs to be kept in sync with adding of disks in
6828
  # LUSetInstanceParams
6829
  for idx, device in enumerate(instance.disks):
6830
    if to_skip and idx in to_skip:
6831
      continue
6832
    logging.info("Creating volume %s for instance %s",
6833
                 device.iv_name, instance.name)
6834
    #HARDCODE
6835
    for node in all_nodes:
6836
      f_create = node == pnode
6837
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6838

    
6839

    
6840
def _RemoveDisks(lu, instance, target_node=None):
6841
  """Remove all disks for an instance.
6842

6843
  This abstracts away some work from `AddInstance()` and
6844
  `RemoveInstance()`. Note that in case some of the devices couldn't
6845
  be removed, the removal will continue with the other ones (compare
6846
  with `_CreateDisks()`).
6847

6848
  @type lu: L{LogicalUnit}
6849
  @param lu: the logical unit on whose behalf we execute
6850
  @type instance: L{objects.Instance}
6851
  @param instance: the instance whose disks we should remove
6852
  @type target_node: string
6853
  @param target_node: used to override the node on which to remove the disks
6854
  @rtype: boolean
6855
  @return: the success of the removal
6856

6857
  """
6858
  logging.info("Removing block devices for instance %s", instance.name)
6859

    
6860
  all_result = True
6861
  for device in instance.disks:
6862
    if target_node:
6863
      edata = [(target_node, device)]
6864
    else:
6865
      edata = device.ComputeNodeTree(instance.primary_node)
6866
    for node, disk in edata:
6867
      lu.cfg.SetDiskID(disk, node)
6868
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6869
      if msg:
6870
        lu.LogWarning("Could not remove block device %s on node %s,"
6871
                      " continuing anyway: %s", device.iv_name, node, msg)
6872
        all_result = False
6873

    
6874
  if instance.disk_template == constants.DT_FILE:
6875
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6876
    if target_node:
6877
      tgt = target_node
6878
    else:
6879
      tgt = instance.primary_node
6880
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6881
    if result.fail_msg:
6882
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6883
                    file_storage_dir, instance.primary_node, result.fail_msg)
6884
      all_result = False
6885

    
6886
  return all_result
6887

    
6888

    
6889
def _ComputeDiskSizePerVG(disk_template, disks):
6890
  """Compute disk size requirements in the volume group
6891

6892
  """
6893
  def _compute(disks, payload):
6894
    """Universal algorithm
6895

6896
    """
6897
    vgs = {}
6898
    for disk in disks:
6899
      vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
6900

    
6901
    return vgs
6902

    
6903
  # Required free disk space as a function of disk and swap space
6904
  req_size_dict = {
6905
    constants.DT_DISKLESS: None,
6906
    constants.DT_PLAIN: _compute(disks, 0),
6907
    # 128 MB are added for drbd metadata for each disk
6908
    constants.DT_DRBD8: _compute(disks, 128),
6909
    constants.DT_FILE: None,
6910
  }
6911

    
6912
  if disk_template not in req_size_dict:
6913
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6914
                                 " is unknown" %  disk_template)
6915

    
6916
  return req_size_dict[disk_template]
6917

    
6918

    
6919
def _ComputeDiskSize(disk_template, disks):
6920
  """Compute disk size requirements in the volume group
6921

6922
  """
6923
  # Required free disk space as a function of disk and swap space
6924
  req_size_dict = {
6925
    constants.DT_DISKLESS: None,
6926
    constants.DT_PLAIN: sum(d["size"] for d in disks),
6927
    # 128 MB are added for drbd metadata for each disk
6928
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6929
    constants.DT_FILE: None,
6930
  }
6931

    
6932
  if disk_template not in req_size_dict:
6933
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6934
                                 " is unknown" %  disk_template)
6935

    
6936
  return req_size_dict[disk_template]
6937

    
6938

    
6939
def _CheckHVParams(lu, nodenames, hvname, hvparams):
6940
  """Hypervisor parameter validation.
6941

6942
  This function abstract the hypervisor parameter validation to be
6943
  used in both instance create and instance modify.
6944

6945
  @type lu: L{LogicalUnit}
6946
  @param lu: the logical unit for which we check
6947
  @type nodenames: list
6948
  @param nodenames: the list of nodes on which we should check
6949
  @type hvname: string
6950
  @param hvname: the name of the hypervisor we should use
6951
  @type hvparams: dict
6952
  @param hvparams: the parameters which we need to check
6953
  @raise errors.OpPrereqError: if the parameters are not valid
6954

6955
  """
6956
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6957
                                                  hvname,
6958
                                                  hvparams)
6959
  for node in nodenames:
6960
    info = hvinfo[node]
6961
    if info.offline:
6962
      continue
6963
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6964

    
6965

    
6966
def _CheckOSParams(lu, required, nodenames, osname, osparams):
6967
  """OS parameters validation.
6968

6969
  @type lu: L{LogicalUnit}
6970
  @param lu: the logical unit for which we check
6971
  @type required: boolean
6972
  @param required: whether the validation should fail if the OS is not
6973
      found
6974
  @type nodenames: list
6975
  @param nodenames: the list of nodes on which we should check
6976
  @type osname: string
6977
  @param osname: the name of the hypervisor we should use
6978
  @type osparams: dict
6979
  @param osparams: the parameters which we need to check
6980
  @raise errors.OpPrereqError: if the parameters are not valid
6981

6982
  """
6983
  result = lu.rpc.call_os_validate(required, nodenames, osname,
6984
                                   [constants.OS_VALIDATE_PARAMETERS],
6985
                                   osparams)
6986
  for node, nres in result.items():
6987
    # we don't check for offline cases since this should be run only
6988
    # against the master node and/or an instance's nodes
6989
    nres.Raise("OS Parameters validation failed on node %s" % node)
6990
    if not nres.payload:
6991
      lu.LogInfo("OS %s not found on node %s, validation skipped",
6992
                 osname, node)
6993

    
6994

    
6995
class LUCreateInstance(LogicalUnit):
6996
  """Create an instance.
6997

6998
  """
6999
  HPATH = "instance-add"
7000
  HTYPE = constants.HTYPE_INSTANCE
7001
  _OP_PARAMS = [
7002
    _PInstanceName,
7003
    ("mode", ht.NoDefault, ht.TElemOf(constants.INSTANCE_CREATE_MODES)),
7004
    ("start", True, ht.TBool),
7005
    ("wait_for_sync", True, ht.TBool),
7006
    ("ip_check", True, ht.TBool),
7007
    ("name_check", True, ht.TBool),
7008
    ("disks", ht.NoDefault, ht.TListOf(ht.TDict)),
7009
    ("nics", ht.NoDefault, ht.TListOf(ht.TDict)),
7010
    ("hvparams", ht.EmptyDict, ht.TDict),
7011
    ("beparams", ht.EmptyDict, ht.TDict),
7012
    ("osparams", ht.EmptyDict, ht.TDict),
7013
    ("no_install", None, ht.TMaybeBool),
7014
    ("os_type", None, ht.TMaybeString),
7015
    ("force_variant", False, ht.TBool),
7016
    ("source_handshake", None, ht.TOr(ht.TList, ht.TNone)),
7017
    ("source_x509_ca", None, ht.TMaybeString),
7018
    ("source_instance_name", None, ht.TMaybeString),
7019
    ("source_shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
7020
     ht.TPositiveInt),
7021
    ("src_node", None, ht.TMaybeString),
7022
    ("src_path", None, ht.TMaybeString),
7023
    ("pnode", None, ht.TMaybeString),
7024
    ("snode", None, ht.TMaybeString),
7025
    ("iallocator", None, ht.TMaybeString),
7026
    ("hypervisor", None, ht.TMaybeString),
7027
    ("disk_template", ht.NoDefault, _CheckDiskTemplate),
7028
    ("identify_defaults", False, ht.TBool),
7029
    ("file_driver", None, ht.TOr(ht.TNone, ht.TElemOf(constants.FILE_DRIVER))),
7030
    ("file_storage_dir", None, ht.TMaybeString),
7031
    ]
7032
  REQ_BGL = False
7033

    
7034
  def CheckArguments(self):
7035
    """Check arguments.
7036

7037
    """
7038
    # do not require name_check to ease forward/backward compatibility
7039
    # for tools
7040
    if self.op.no_install and self.op.start:
7041
      self.LogInfo("No-installation mode selected, disabling startup")
7042
      self.op.start = False
7043
    # validate/normalize the instance name
7044
    self.op.instance_name = \
7045
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
7046

    
7047
    if self.op.ip_check and not self.op.name_check:
7048
      # TODO: make the ip check more flexible and not depend on the name check
7049
      raise errors.OpPrereqError("Cannot do ip check without a name check",
7050
                                 errors.ECODE_INVAL)
7051

    
7052
    # check nics' parameter names
7053
    for nic in self.op.nics:
7054
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7055

    
7056
    # check disks. parameter names and consistent adopt/no-adopt strategy
7057
    has_adopt = has_no_adopt = False
7058
    for disk in self.op.disks:
7059
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7060
      if "adopt" in disk:
7061
        has_adopt = True
7062
      else:
7063
        has_no_adopt = True
7064
    if has_adopt and has_no_adopt:
7065
      raise errors.OpPrereqError("Either all disks are adopted or none is",
7066
                                 errors.ECODE_INVAL)
7067
    if has_adopt:
7068
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7069
        raise errors.OpPrereqError("Disk adoption is not supported for the"
7070
                                   " '%s' disk template" %
7071
                                   self.op.disk_template,
7072
                                   errors.ECODE_INVAL)
7073
      if self.op.iallocator is not None:
7074
        raise errors.OpPrereqError("Disk adoption not allowed with an"
7075
                                   " iallocator script", errors.ECODE_INVAL)
7076
      if self.op.mode == constants.INSTANCE_IMPORT:
7077
        raise errors.OpPrereqError("Disk adoption not allowed for"
7078
                                   " instance import", errors.ECODE_INVAL)
7079

    
7080
    self.adopt_disks = has_adopt
7081

    
7082
    # instance name verification
7083
    if self.op.name_check:
7084
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7085
      self.op.instance_name = self.hostname1.name
7086
      # used in CheckPrereq for ip ping check
7087
      self.check_ip = self.hostname1.ip
7088
    else:
7089
      self.check_ip = None
7090

    
7091
    # file storage checks
7092
    if (self.op.file_driver and
7093
        not self.op.file_driver in constants.FILE_DRIVER):
7094
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
7095
                                 self.op.file_driver, errors.ECODE_INVAL)
7096

    
7097
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7098
      raise errors.OpPrereqError("File storage directory path not absolute",
7099
                                 errors.ECODE_INVAL)
7100

    
7101
    ### Node/iallocator related checks
7102
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7103

    
7104
    if self.op.pnode is not None:
7105
      if self.op.disk_template in constants.DTS_NET_MIRROR:
7106
        if self.op.snode is None:
7107
          raise errors.OpPrereqError("The networked disk templates need"
7108
                                     " a mirror node", errors.ECODE_INVAL)
7109
      elif self.op.snode:
7110
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7111
                        " template")
7112
        self.op.snode = None
7113

    
7114
    self._cds = _GetClusterDomainSecret()
7115

    
7116
    if self.op.mode == constants.INSTANCE_IMPORT:
7117
      # On import force_variant must be True, because if we forced it at
7118
      # initial install, our only chance when importing it back is that it
7119
      # works again!
7120
      self.op.force_variant = True
7121

    
7122
      if self.op.no_install:
7123
        self.LogInfo("No-installation mode has no effect during import")
7124

    
7125
    elif self.op.mode == constants.INSTANCE_CREATE:
7126
      if self.op.os_type is None:
7127
        raise errors.OpPrereqError("No guest OS specified",
7128
                                   errors.ECODE_INVAL)
7129
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7130
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7131
                                   " installation" % self.op.os_type,
7132
                                   errors.ECODE_STATE)
7133
      if self.op.disk_template is None:
7134
        raise errors.OpPrereqError("No disk template specified",
7135
                                   errors.ECODE_INVAL)
7136

    
7137
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7138
      # Check handshake to ensure both clusters have the same domain secret
7139
      src_handshake = self.op.source_handshake
7140
      if not src_handshake:
7141
        raise errors.OpPrereqError("Missing source handshake",
7142
                                   errors.ECODE_INVAL)
7143

    
7144
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7145
                                                           src_handshake)
7146
      if errmsg:
7147
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7148
                                   errors.ECODE_INVAL)
7149

    
7150
      # Load and check source CA
7151
      self.source_x509_ca_pem = self.op.source_x509_ca
7152
      if not self.source_x509_ca_pem:
7153
        raise errors.OpPrereqError("Missing source X509 CA",
7154
                                   errors.ECODE_INVAL)
7155

    
7156
      try:
7157
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7158
                                                    self._cds)
7159
      except OpenSSL.crypto.Error, err:
7160
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7161
                                   (err, ), errors.ECODE_INVAL)
7162

    
7163
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7164
      if errcode is not None:
7165
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7166
                                   errors.ECODE_INVAL)
7167

    
7168
      self.source_x509_ca = cert
7169

    
7170
      src_instance_name = self.op.source_instance_name
7171
      if not src_instance_name:
7172
        raise errors.OpPrereqError("Missing source instance name",
7173
                                   errors.ECODE_INVAL)
7174

    
7175
      self.source_instance_name = \
7176
          netutils.GetHostname(name=src_instance_name).name
7177

    
7178
    else:
7179
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
7180
                                 self.op.mode, errors.ECODE_INVAL)
7181

    
7182
  def ExpandNames(self):
7183
    """ExpandNames for CreateInstance.
7184

7185
    Figure out the right locks for instance creation.
7186

7187
    """
7188
    self.needed_locks = {}
7189

    
7190
    instance_name = self.op.instance_name
7191
    # this is just a preventive check, but someone might still add this
7192
    # instance in the meantime, and creation will fail at lock-add time
7193
    if instance_name in self.cfg.GetInstanceList():
7194
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7195
                                 instance_name, errors.ECODE_EXISTS)
7196

    
7197
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7198

    
7199
    if self.op.iallocator:
7200
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7201
    else:
7202
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7203
      nodelist = [self.op.pnode]
7204
      if self.op.snode is not None:
7205
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7206
        nodelist.append(self.op.snode)
7207
      self.needed_locks[locking.LEVEL_NODE] = nodelist
7208

    
7209
    # in case of import lock the source node too
7210
    if self.op.mode == constants.INSTANCE_IMPORT:
7211
      src_node = self.op.src_node
7212
      src_path = self.op.src_path
7213

    
7214
      if src_path is None:
7215
        self.op.src_path = src_path = self.op.instance_name
7216

    
7217
      if src_node is None:
7218
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7219
        self.op.src_node = None
7220
        if os.path.isabs(src_path):
7221
          raise errors.OpPrereqError("Importing an instance from an absolute"
7222
                                     " path requires a source node option.",
7223
                                     errors.ECODE_INVAL)
7224
      else:
7225
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7226
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7227
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
7228
        if not os.path.isabs(src_path):
7229
          self.op.src_path = src_path = \
7230
            utils.PathJoin(constants.EXPORT_DIR, src_path)
7231

    
7232
  def _RunAllocator(self):
7233
    """Run the allocator based on input opcode.
7234

7235
    """
7236
    nics = [n.ToDict() for n in self.nics]
7237
    ial = IAllocator(self.cfg, self.rpc,
7238
                     mode=constants.IALLOCATOR_MODE_ALLOC,
7239
                     name=self.op.instance_name,
7240
                     disk_template=self.op.disk_template,
7241
                     tags=[],
7242
                     os=self.op.os_type,
7243
                     vcpus=self.be_full[constants.BE_VCPUS],
7244
                     mem_size=self.be_full[constants.BE_MEMORY],
7245
                     disks=self.disks,
7246
                     nics=nics,
7247
                     hypervisor=self.op.hypervisor,
7248
                     )
7249

    
7250
    ial.Run(self.op.iallocator)
7251

    
7252
    if not ial.success:
7253
      raise errors.OpPrereqError("Can't compute nodes using"
7254
                                 " iallocator '%s': %s" %
7255
                                 (self.op.iallocator, ial.info),
7256
                                 errors.ECODE_NORES)
7257
    if len(ial.result) != ial.required_nodes:
7258
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7259
                                 " of nodes (%s), required %s" %
7260
                                 (self.op.iallocator, len(ial.result),
7261
                                  ial.required_nodes), errors.ECODE_FAULT)
7262
    self.op.pnode = ial.result[0]
7263
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7264
                 self.op.instance_name, self.op.iallocator,
7265
                 utils.CommaJoin(ial.result))
7266
    if ial.required_nodes == 2:
7267
      self.op.snode = ial.result[1]
7268

    
7269
  def BuildHooksEnv(self):
7270
    """Build hooks env.
7271

7272
    This runs on master, primary and secondary nodes of the instance.
7273

7274
    """
7275
    env = {
7276
      "ADD_MODE": self.op.mode,
7277
      }
7278
    if self.op.mode == constants.INSTANCE_IMPORT:
7279
      env["SRC_NODE"] = self.op.src_node
7280
      env["SRC_PATH"] = self.op.src_path
7281
      env["SRC_IMAGES"] = self.src_images
7282

    
7283
    env.update(_BuildInstanceHookEnv(
7284
      name=self.op.instance_name,
7285
      primary_node=self.op.pnode,
7286
      secondary_nodes=self.secondaries,
7287
      status=self.op.start,
7288
      os_type=self.op.os_type,
7289
      memory=self.be_full[constants.BE_MEMORY],
7290
      vcpus=self.be_full[constants.BE_VCPUS],
7291
      nics=_NICListToTuple(self, self.nics),
7292
      disk_template=self.op.disk_template,
7293
      disks=[(d["size"], d["mode"]) for d in self.disks],
7294
      bep=self.be_full,
7295
      hvp=self.hv_full,
7296
      hypervisor_name=self.op.hypervisor,
7297
    ))
7298

    
7299
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7300
          self.secondaries)
7301
    return env, nl, nl
7302

    
7303
  def _ReadExportInfo(self):
7304
    """Reads the export information from disk.
7305

7306
    It will override the opcode source node and path with the actual
7307
    information, if these two were not specified before.
7308

7309
    @return: the export information
7310

7311
    """
7312
    assert self.op.mode == constants.INSTANCE_IMPORT
7313

    
7314
    src_node = self.op.src_node
7315
    src_path = self.op.src_path
7316

    
7317
    if src_node is None:
7318
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7319
      exp_list = self.rpc.call_export_list(locked_nodes)
7320
      found = False
7321
      for node in exp_list:
7322
        if exp_list[node].fail_msg:
7323
          continue
7324
        if src_path in exp_list[node].payload:
7325
          found = True
7326
          self.op.src_node = src_node = node
7327
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7328
                                                       src_path)
7329
          break
7330
      if not found:
7331
        raise errors.OpPrereqError("No export found for relative path %s" %
7332
                                    src_path, errors.ECODE_INVAL)
7333

    
7334
    _CheckNodeOnline(self, src_node)
7335
    result = self.rpc.call_export_info(src_node, src_path)
7336
    result.Raise("No export or invalid export found in dir %s" % src_path)
7337

    
7338
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7339
    if not export_info.has_section(constants.INISECT_EXP):
7340
      raise errors.ProgrammerError("Corrupted export config",
7341
                                   errors.ECODE_ENVIRON)
7342

    
7343
    ei_version = export_info.get(constants.INISECT_EXP, "version")
7344
    if (int(ei_version) != constants.EXPORT_VERSION):
7345
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7346
                                 (ei_version, constants.EXPORT_VERSION),
7347
                                 errors.ECODE_ENVIRON)
7348
    return export_info
7349

    
7350
  def _ReadExportParams(self, einfo):
7351
    """Use export parameters as defaults.
7352

7353
    In case the opcode doesn't specify (as in override) some instance
7354
    parameters, then try to use them from the export information, if
7355
    that declares them.
7356

7357
    """
7358
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7359

    
7360
    if self.op.disk_template is None:
7361
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
7362
        self.op.disk_template = einfo.get(constants.INISECT_INS,
7363
                                          "disk_template")
7364
      else:
7365
        raise errors.OpPrereqError("No disk template specified and the export"
7366
                                   " is missing the disk_template information",
7367
                                   errors.ECODE_INVAL)
7368

    
7369
    if not self.op.disks:
7370
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
7371
        disks = []
7372
        # TODO: import the disk iv_name too
7373
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7374
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7375
          disks.append({"size": disk_sz})
7376
        self.op.disks = disks
7377
      else:
7378
        raise errors.OpPrereqError("No disk info specified and the export"
7379
                                   " is missing the disk information",
7380
                                   errors.ECODE_INVAL)
7381

    
7382
    if (not self.op.nics and
7383
        einfo.has_option(constants.INISECT_INS, "nic_count")):
7384
      nics = []
7385
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7386
        ndict = {}
7387
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7388
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7389
          ndict[name] = v
7390
        nics.append(ndict)
7391
      self.op.nics = nics
7392

    
7393
    if (self.op.hypervisor is None and
7394
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
7395
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7396
    if einfo.has_section(constants.INISECT_HYP):
7397
      # use the export parameters but do not override the ones
7398
      # specified by the user
7399
      for name, value in einfo.items(constants.INISECT_HYP):
7400
        if name not in self.op.hvparams:
7401
          self.op.hvparams[name] = value
7402

    
7403
    if einfo.has_section(constants.INISECT_BEP):
7404
      # use the parameters, without overriding
7405
      for name, value in einfo.items(constants.INISECT_BEP):
7406
        if name not in self.op.beparams:
7407
          self.op.beparams[name] = value
7408
    else:
7409
      # try to read the parameters old style, from the main section
7410
      for name in constants.BES_PARAMETERS:
7411
        if (name not in self.op.beparams and
7412
            einfo.has_option(constants.INISECT_INS, name)):
7413
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7414

    
7415
    if einfo.has_section(constants.INISECT_OSP):
7416
      # use the parameters, without overriding
7417
      for name, value in einfo.items(constants.INISECT_OSP):
7418
        if name not in self.op.osparams:
7419
          self.op.osparams[name] = value
7420

    
7421
  def _RevertToDefaults(self, cluster):
7422
    """Revert the instance parameters to the default values.
7423

7424
    """
7425
    # hvparams
7426
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7427
    for name in self.op.hvparams.keys():
7428
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7429
        del self.op.hvparams[name]
7430
    # beparams
7431
    be_defs = cluster.SimpleFillBE({})
7432
    for name in self.op.beparams.keys():
7433
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
7434
        del self.op.beparams[name]
7435
    # nic params
7436
    nic_defs = cluster.SimpleFillNIC({})
7437
    for nic in self.op.nics:
7438
      for name in constants.NICS_PARAMETERS:
7439
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7440
          del nic[name]
7441
    # osparams
7442
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7443
    for name in self.op.osparams.keys():
7444
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
7445
        del self.op.osparams[name]
7446

    
7447
  def CheckPrereq(self):
7448
    """Check prerequisites.
7449

7450
    """
7451
    if self.op.mode == constants.INSTANCE_IMPORT:
7452
      export_info = self._ReadExportInfo()
7453
      self._ReadExportParams(export_info)
7454

    
7455
    _CheckDiskTemplate(self.op.disk_template)
7456

    
7457
    if (not self.cfg.GetVGName() and
7458
        self.op.disk_template not in constants.DTS_NOT_LVM):
7459
      raise errors.OpPrereqError("Cluster does not support lvm-based"
7460
                                 " instances", errors.ECODE_STATE)
7461

    
7462
    if self.op.hypervisor is None:
7463
      self.op.hypervisor = self.cfg.GetHypervisorType()
7464

    
7465
    cluster = self.cfg.GetClusterInfo()
7466
    enabled_hvs = cluster.enabled_hypervisors
7467
    if self.op.hypervisor not in enabled_hvs:
7468
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7469
                                 " cluster (%s)" % (self.op.hypervisor,
7470
                                  ",".join(enabled_hvs)),
7471
                                 errors.ECODE_STATE)
7472

    
7473
    # check hypervisor parameter syntax (locally)
7474
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7475
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7476
                                      self.op.hvparams)
7477
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7478
    hv_type.CheckParameterSyntax(filled_hvp)
7479
    self.hv_full = filled_hvp
7480
    # check that we don't specify global parameters on an instance
7481
    _CheckGlobalHvParams(self.op.hvparams)
7482

    
7483
    # fill and remember the beparams dict
7484
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7485
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
7486

    
7487
    # build os parameters
7488
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7489

    
7490
    # now that hvp/bep are in final format, let's reset to defaults,
7491
    # if told to do so
7492
    if self.op.identify_defaults:
7493
      self._RevertToDefaults(cluster)
7494

    
7495
    # NIC buildup
7496
    self.nics = []
7497
    for idx, nic in enumerate(self.op.nics):
7498
      nic_mode_req = nic.get("mode", None)
7499
      nic_mode = nic_mode_req
7500
      if nic_mode is None:
7501
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7502

    
7503
      # in routed mode, for the first nic, the default ip is 'auto'
7504
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7505
        default_ip_mode = constants.VALUE_AUTO
7506
      else:
7507
        default_ip_mode = constants.VALUE_NONE
7508

    
7509
      # ip validity checks
7510
      ip = nic.get("ip", default_ip_mode)
7511
      if ip is None or ip.lower() == constants.VALUE_NONE:
7512
        nic_ip = None
7513
      elif ip.lower() == constants.VALUE_AUTO:
7514
        if not self.op.name_check:
7515
          raise errors.OpPrereqError("IP address set to auto but name checks"
7516
                                     " have been skipped",
7517
                                     errors.ECODE_INVAL)
7518
        nic_ip = self.hostname1.ip
7519
      else:
7520
        if not netutils.IPAddress.IsValid(ip):
7521
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7522
                                     errors.ECODE_INVAL)
7523
        nic_ip = ip
7524

    
7525
      # TODO: check the ip address for uniqueness
7526
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7527
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
7528
                                   errors.ECODE_INVAL)
7529

    
7530
      # MAC address verification
7531
      mac = nic.get("mac", constants.VALUE_AUTO)
7532
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7533
        mac = utils.NormalizeAndValidateMac(mac)
7534

    
7535
        try:
7536
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
7537
        except errors.ReservationError:
7538
          raise errors.OpPrereqError("MAC address %s already in use"
7539
                                     " in cluster" % mac,
7540
                                     errors.ECODE_NOTUNIQUE)
7541

    
7542
      # bridge verification
7543
      bridge = nic.get("bridge", None)
7544
      link = nic.get("link", None)
7545
      if bridge and link:
7546
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7547
                                   " at the same time", errors.ECODE_INVAL)
7548
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7549
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7550
                                   errors.ECODE_INVAL)
7551
      elif bridge:
7552
        link = bridge
7553

    
7554
      nicparams = {}
7555
      if nic_mode_req:
7556
        nicparams[constants.NIC_MODE] = nic_mode_req
7557
      if link:
7558
        nicparams[constants.NIC_LINK] = link
7559

    
7560
      check_params = cluster.SimpleFillNIC(nicparams)
7561
      objects.NIC.CheckParameterSyntax(check_params)
7562
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7563

    
7564
    # disk checks/pre-build
7565
    self.disks = []
7566
    for disk in self.op.disks:
7567
      mode = disk.get("mode", constants.DISK_RDWR)
7568
      if mode not in constants.DISK_ACCESS_SET:
7569
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7570
                                   mode, errors.ECODE_INVAL)
7571
      size = disk.get("size", None)
7572
      if size is None:
7573
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7574
      try:
7575
        size = int(size)
7576
      except (TypeError, ValueError):
7577
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7578
                                   errors.ECODE_INVAL)
7579
      vg = disk.get("vg", self.cfg.GetVGName())
7580
      new_disk = {"size": size, "mode": mode, "vg": vg}
7581
      if "adopt" in disk:
7582
        new_disk["adopt"] = disk["adopt"]
7583
      self.disks.append(new_disk)
7584

    
7585
    if self.op.mode == constants.INSTANCE_IMPORT:
7586

    
7587
      # Check that the new instance doesn't have less disks than the export
7588
      instance_disks = len(self.disks)
7589
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7590
      if instance_disks < export_disks:
7591
        raise errors.OpPrereqError("Not enough disks to import."
7592
                                   " (instance: %d, export: %d)" %
7593
                                   (instance_disks, export_disks),
7594
                                   errors.ECODE_INVAL)
7595

    
7596
      disk_images = []
7597
      for idx in range(export_disks):
7598
        option = 'disk%d_dump' % idx
7599
        if export_info.has_option(constants.INISECT_INS, option):
7600
          # FIXME: are the old os-es, disk sizes, etc. useful?
7601
          export_name = export_info.get(constants.INISECT_INS, option)
7602
          image = utils.PathJoin(self.op.src_path, export_name)
7603
          disk_images.append(image)
7604
        else:
7605
          disk_images.append(False)
7606

    
7607
      self.src_images = disk_images
7608

    
7609
      old_name = export_info.get(constants.INISECT_INS, 'name')
7610
      try:
7611
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7612
      except (TypeError, ValueError), err:
7613
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
7614
                                   " an integer: %s" % str(err),
7615
                                   errors.ECODE_STATE)
7616
      if self.op.instance_name == old_name:
7617
        for idx, nic in enumerate(self.nics):
7618
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7619
            nic_mac_ini = 'nic%d_mac' % idx
7620
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7621

    
7622
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7623

    
7624
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
7625
    if self.op.ip_check:
7626
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7627
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
7628
                                   (self.check_ip, self.op.instance_name),
7629
                                   errors.ECODE_NOTUNIQUE)
7630

    
7631
    #### mac address generation
7632
    # By generating here the mac address both the allocator and the hooks get
7633
    # the real final mac address rather than the 'auto' or 'generate' value.
7634
    # There is a race condition between the generation and the instance object
7635
    # creation, which means that we know the mac is valid now, but we're not
7636
    # sure it will be when we actually add the instance. If things go bad
7637
    # adding the instance will abort because of a duplicate mac, and the
7638
    # creation job will fail.
7639
    for nic in self.nics:
7640
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7641
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7642

    
7643
    #### allocator run
7644

    
7645
    if self.op.iallocator is not None:
7646
      self._RunAllocator()
7647

    
7648
    #### node related checks
7649

    
7650
    # check primary node
7651
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7652
    assert self.pnode is not None, \
7653
      "Cannot retrieve locked node %s" % self.op.pnode
7654
    if pnode.offline:
7655
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7656
                                 pnode.name, errors.ECODE_STATE)
7657
    if pnode.drained:
7658
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7659
                                 pnode.name, errors.ECODE_STATE)
7660
    if not pnode.vm_capable:
7661
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7662
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
7663

    
7664
    self.secondaries = []
7665

    
7666
    # mirror node verification
7667
    if self.op.disk_template in constants.DTS_NET_MIRROR:
7668
      if self.op.snode == pnode.name:
7669
        raise errors.OpPrereqError("The secondary node cannot be the"
7670
                                   " primary node.", errors.ECODE_INVAL)
7671
      _CheckNodeOnline(self, self.op.snode)
7672
      _CheckNodeNotDrained(self, self.op.snode)
7673
      _CheckNodeVmCapable(self, self.op.snode)
7674
      self.secondaries.append(self.op.snode)
7675

    
7676
    nodenames = [pnode.name] + self.secondaries
7677

    
7678
    if not self.adopt_disks:
7679
      # Check lv size requirements, if not adopting
7680
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7681
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7682

    
7683
    else: # instead, we must check the adoption data
7684
      all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks])
7685
      if len(all_lvs) != len(self.disks):
7686
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
7687
                                   errors.ECODE_INVAL)
7688
      for lv_name in all_lvs:
7689
        try:
7690
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
7691
          # to ReserveLV uses the same syntax
7692
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7693
        except errors.ReservationError:
7694
          raise errors.OpPrereqError("LV named %s used by another instance" %
7695
                                     lv_name, errors.ECODE_NOTUNIQUE)
7696

    
7697
      vg_names = self.rpc.call_vg_list([pnode.name])
7698
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
7699

    
7700
      node_lvs = self.rpc.call_lv_list([pnode.name],
7701
                                       vg_names[pnode.name].payload.keys()
7702
                                      )[pnode.name]
7703
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7704
      node_lvs = node_lvs.payload
7705

    
7706
      delta = all_lvs.difference(node_lvs.keys())
7707
      if delta:
7708
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
7709
                                   utils.CommaJoin(delta),
7710
                                   errors.ECODE_INVAL)
7711
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7712
      if online_lvs:
7713
        raise errors.OpPrereqError("Online logical volumes found, cannot"
7714
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
7715
                                   errors.ECODE_STATE)
7716
      # update the size of disk based on what is found
7717
      for dsk in self.disks:
7718
        dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0]))
7719

    
7720
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7721

    
7722
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7723
    # check OS parameters (remotely)
7724
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7725

    
7726
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7727

    
7728
    # memory check on primary node
7729
    if self.op.start:
7730
      _CheckNodeFreeMemory(self, self.pnode.name,
7731
                           "creating instance %s" % self.op.instance_name,
7732
                           self.be_full[constants.BE_MEMORY],
7733
                           self.op.hypervisor)
7734

    
7735
    self.dry_run_result = list(nodenames)
7736

    
7737
  def Exec(self, feedback_fn):
7738
    """Create and add the instance to the cluster.
7739

7740
    """
7741
    instance = self.op.instance_name
7742
    pnode_name = self.pnode.name
7743

    
7744
    ht_kind = self.op.hypervisor
7745
    if ht_kind in constants.HTS_REQ_PORT:
7746
      network_port = self.cfg.AllocatePort()
7747
    else:
7748
      network_port = None
7749

    
7750
    if constants.ENABLE_FILE_STORAGE:
7751
      # this is needed because os.path.join does not accept None arguments
7752
      if self.op.file_storage_dir is None:
7753
        string_file_storage_dir = ""
7754
      else:
7755
        string_file_storage_dir = self.op.file_storage_dir
7756

    
7757
      # build the full file storage dir path
7758
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7759
                                        string_file_storage_dir, instance)
7760
    else:
7761
      file_storage_dir = ""
7762

    
7763
    disks = _GenerateDiskTemplate(self,
7764
                                  self.op.disk_template,
7765
                                  instance, pnode_name,
7766
                                  self.secondaries,
7767
                                  self.disks,
7768
                                  file_storage_dir,
7769
                                  self.op.file_driver,
7770
                                  0,
7771
                                  feedback_fn)
7772

    
7773
    iobj = objects.Instance(name=instance, os=self.op.os_type,
7774
                            primary_node=pnode_name,
7775
                            nics=self.nics, disks=disks,
7776
                            disk_template=self.op.disk_template,
7777
                            admin_up=False,
7778
                            network_port=network_port,
7779
                            beparams=self.op.beparams,
7780
                            hvparams=self.op.hvparams,
7781
                            hypervisor=self.op.hypervisor,
7782
                            osparams=self.op.osparams,
7783
                            )
7784

    
7785
    if self.adopt_disks:
7786
      # rename LVs to the newly-generated names; we need to construct
7787
      # 'fake' LV disks with the old data, plus the new unique_id
7788
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7789
      rename_to = []
7790
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7791
        rename_to.append(t_dsk.logical_id)
7792
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7793
        self.cfg.SetDiskID(t_dsk, pnode_name)
7794
      result = self.rpc.call_blockdev_rename(pnode_name,
7795
                                             zip(tmp_disks, rename_to))
7796
      result.Raise("Failed to rename adoped LVs")
7797
    else:
7798
      feedback_fn("* creating instance disks...")
7799
      try:
7800
        _CreateDisks(self, iobj)
7801
      except errors.OpExecError:
7802
        self.LogWarning("Device creation failed, reverting...")
7803
        try:
7804
          _RemoveDisks(self, iobj)
7805
        finally:
7806
          self.cfg.ReleaseDRBDMinors(instance)
7807
          raise
7808

    
7809
      if self.cfg.GetClusterInfo().prealloc_wipe_disks:
7810
        feedback_fn("* wiping instance disks...")
7811
        try:
7812
          _WipeDisks(self, iobj)
7813
        except errors.OpExecError:
7814
          self.LogWarning("Device wiping failed, reverting...")
7815
          try:
7816
            _RemoveDisks(self, iobj)
7817
          finally:
7818
            self.cfg.ReleaseDRBDMinors(instance)
7819
            raise
7820

    
7821
    feedback_fn("adding instance %s to cluster config" % instance)
7822

    
7823
    self.cfg.AddInstance(iobj, self.proc.GetECId())
7824

    
7825
    # Declare that we don't want to remove the instance lock anymore, as we've
7826
    # added the instance to the config
7827
    del self.remove_locks[locking.LEVEL_INSTANCE]
7828
    # Unlock all the nodes
7829
    if self.op.mode == constants.INSTANCE_IMPORT:
7830
      nodes_keep = [self.op.src_node]
7831
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7832
                       if node != self.op.src_node]
7833
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7834
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7835
    else:
7836
      self.context.glm.release(locking.LEVEL_NODE)
7837
      del self.acquired_locks[locking.LEVEL_NODE]
7838

    
7839
    if self.op.wait_for_sync:
7840
      disk_abort = not _WaitForSync(self, iobj)
7841
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
7842
      # make sure the disks are not degraded (still sync-ing is ok)
7843
      time.sleep(15)
7844
      feedback_fn("* checking mirrors status")
7845
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7846
    else:
7847
      disk_abort = False
7848

    
7849
    if disk_abort:
7850
      _RemoveDisks(self, iobj)
7851
      self.cfg.RemoveInstance(iobj.name)
7852
      # Make sure the instance lock gets removed
7853
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7854
      raise errors.OpExecError("There are some degraded disks for"
7855
                               " this instance")
7856

    
7857
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7858
      if self.op.mode == constants.INSTANCE_CREATE:
7859
        if not self.op.no_install:
7860
          feedback_fn("* running the instance OS create scripts...")
7861
          # FIXME: pass debug option from opcode to backend
7862
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7863
                                                 self.op.debug_level)
7864
          result.Raise("Could not add os for instance %s"
7865
                       " on node %s" % (instance, pnode_name))
7866

    
7867
      elif self.op.mode == constants.INSTANCE_IMPORT:
7868
        feedback_fn("* running the instance OS import scripts...")
7869

    
7870
        transfers = []
7871

    
7872
        for idx, image in enumerate(self.src_images):
7873
          if not image:
7874
            continue
7875

    
7876
          # FIXME: pass debug option from opcode to backend
7877
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7878
                                             constants.IEIO_FILE, (image, ),
7879
                                             constants.IEIO_SCRIPT,
7880
                                             (iobj.disks[idx], idx),
7881
                                             None)
7882
          transfers.append(dt)
7883

    
7884
        import_result = \
7885
          masterd.instance.TransferInstanceData(self, feedback_fn,
7886
                                                self.op.src_node, pnode_name,
7887
                                                self.pnode.secondary_ip,
7888
                                                iobj, transfers)
7889
        if not compat.all(import_result):
7890
          self.LogWarning("Some disks for instance %s on node %s were not"
7891
                          " imported successfully" % (instance, pnode_name))
7892

    
7893
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7894
        feedback_fn("* preparing remote import...")
7895
        # The source cluster will stop the instance before attempting to make a
7896
        # connection. In some cases stopping an instance can take a long time,
7897
        # hence the shutdown timeout is added to the connection timeout.
7898
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
7899
                           self.op.source_shutdown_timeout)
7900
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7901

    
7902
        assert iobj.primary_node == self.pnode.name
7903
        disk_results = \
7904
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
7905
                                        self.source_x509_ca,
7906
                                        self._cds, timeouts)
7907
        if not compat.all(disk_results):
7908
          # TODO: Should the instance still be started, even if some disks
7909
          # failed to import (valid for local imports, too)?
7910
          self.LogWarning("Some disks for instance %s on node %s were not"
7911
                          " imported successfully" % (instance, pnode_name))
7912

    
7913
        # Run rename script on newly imported instance
7914
        assert iobj.name == instance
7915
        feedback_fn("Running rename script for %s" % instance)
7916
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7917
                                                   self.source_instance_name,
7918
                                                   self.op.debug_level)
7919
        if result.fail_msg:
7920
          self.LogWarning("Failed to run rename script for %s on node"
7921
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
7922

    
7923
      else:
7924
        # also checked in the prereq part
7925
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7926
                                     % self.op.mode)
7927

    
7928
    if self.op.start:
7929
      iobj.admin_up = True
7930
      self.cfg.Update(iobj, feedback_fn)
7931
      logging.info("Starting instance %s on node %s", instance, pnode_name)
7932
      feedback_fn("* starting instance...")
7933
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7934
      result.Raise("Could not start instance")
7935

    
7936
    return list(iobj.all_nodes)
7937

    
7938

    
7939
class LUConnectConsole(NoHooksLU):
7940
  """Connect to an instance's console.
7941

7942
  This is somewhat special in that it returns the command line that
7943
  you need to run on the master node in order to connect to the
7944
  console.
7945

7946
  """
7947
  _OP_PARAMS = [
7948
    _PInstanceName
7949
    ]
7950
  REQ_BGL = False
7951

    
7952
  def ExpandNames(self):
7953
    self._ExpandAndLockInstance()
7954

    
7955
  def CheckPrereq(self):
7956
    """Check prerequisites.
7957

7958
    This checks that the instance is in the cluster.
7959

7960
    """
7961
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7962
    assert self.instance is not None, \
7963
      "Cannot retrieve locked instance %s" % self.op.instance_name
7964
    _CheckNodeOnline(self, self.instance.primary_node)
7965

    
7966
  def Exec(self, feedback_fn):
7967
    """Connect to the console of an instance
7968

7969
    """
7970
    instance = self.instance
7971
    node = instance.primary_node
7972

    
7973
    node_insts = self.rpc.call_instance_list([node],
7974
                                             [instance.hypervisor])[node]
7975
    node_insts.Raise("Can't get node information from %s" % node)
7976

    
7977
    if instance.name not in node_insts.payload:
7978
      if instance.admin_up:
7979
        state = "ERROR_down"
7980
      else:
7981
        state = "ADMIN_down"
7982
      raise errors.OpExecError("Instance %s is not running (state %s)" %
7983
                               (instance.name, state))
7984

    
7985
    logging.debug("Connecting to console of %s on %s", instance.name, node)
7986

    
7987
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
7988
    cluster = self.cfg.GetClusterInfo()
7989
    # beparams and hvparams are passed separately, to avoid editing the
7990
    # instance and then saving the defaults in the instance itself.
7991
    hvparams = cluster.FillHV(instance)
7992
    beparams = cluster.FillBE(instance)
7993
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7994

    
7995
    # build ssh cmdline
7996
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7997

    
7998

    
7999
class LUReplaceDisks(LogicalUnit):
8000
  """Replace the disks of an instance.
8001

8002
  """
8003
  HPATH = "mirrors-replace"
8004
  HTYPE = constants.HTYPE_INSTANCE
8005
  _OP_PARAMS = [
8006
    _PInstanceName,
8007
    ("mode", ht.NoDefault, ht.TElemOf(constants.REPLACE_MODES)),
8008
    ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
8009
    ("remote_node", None, ht.TMaybeString),
8010
    ("iallocator", None, ht.TMaybeString),
8011
    ("early_release", False, ht.TBool),
8012
    ]
8013
  REQ_BGL = False
8014

    
8015
  def CheckArguments(self):
8016
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8017
                                  self.op.iallocator)
8018

    
8019
  def ExpandNames(self):
8020
    self._ExpandAndLockInstance()
8021

    
8022
    if self.op.iallocator is not None:
8023
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8024

    
8025
    elif self.op.remote_node is not None:
8026
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8027
      self.op.remote_node = remote_node
8028

    
8029
      # Warning: do not remove the locking of the new secondary here
8030
      # unless DRBD8.AddChildren is changed to work in parallel;
8031
      # currently it doesn't since parallel invocations of
8032
      # FindUnusedMinor will conflict
8033
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
8034
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8035

    
8036
    else:
8037
      self.needed_locks[locking.LEVEL_NODE] = []
8038
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8039

    
8040
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8041
                                   self.op.iallocator, self.op.remote_node,
8042
                                   self.op.disks, False, self.op.early_release)
8043

    
8044
    self.tasklets = [self.replacer]
8045

    
8046
  def DeclareLocks(self, level):
8047
    # If we're not already locking all nodes in the set we have to declare the
8048
    # instance's primary/secondary nodes.
8049
    if (level == locking.LEVEL_NODE and
8050
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
8051
      self._LockInstancesNodes()
8052

    
8053
  def BuildHooksEnv(self):
8054
    """Build hooks env.
8055

8056
    This runs on the master, the primary and all the secondaries.
8057

8058
    """
8059
    instance = self.replacer.instance
8060
    env = {
8061
      "MODE": self.op.mode,
8062
      "NEW_SECONDARY": self.op.remote_node,
8063
      "OLD_SECONDARY": instance.secondary_nodes[0],
8064
      }
8065
    env.update(_BuildInstanceHookEnvByObject(self, instance))
8066
    nl = [
8067
      self.cfg.GetMasterNode(),
8068
      instance.primary_node,
8069
      ]
8070
    if self.op.remote_node is not None:
8071
      nl.append(self.op.remote_node)
8072
    return env, nl, nl
8073

    
8074

    
8075
class TLReplaceDisks(Tasklet):
8076
  """Replaces disks for an instance.
8077

8078
  Note: Locking is not within the scope of this class.
8079

8080
  """
8081
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8082
               disks, delay_iallocator, early_release):
8083
    """Initializes this class.
8084

8085
    """
8086
    Tasklet.__init__(self, lu)
8087

    
8088
    # Parameters
8089
    self.instance_name = instance_name
8090
    self.mode = mode
8091
    self.iallocator_name = iallocator_name
8092
    self.remote_node = remote_node
8093
    self.disks = disks
8094
    self.delay_iallocator = delay_iallocator
8095
    self.early_release = early_release
8096

    
8097
    # Runtime data
8098
    self.instance = None
8099
    self.new_node = None
8100
    self.target_node = None
8101
    self.other_node = None
8102
    self.remote_node_info = None
8103
    self.node_secondary_ip = None
8104

    
8105
  @staticmethod
8106
  def CheckArguments(mode, remote_node, iallocator):
8107
    """Helper function for users of this class.
8108

8109
    """
8110
    # check for valid parameter combination
8111
    if mode == constants.REPLACE_DISK_CHG:
8112
      if remote_node is None and iallocator is None:
8113
        raise errors.OpPrereqError("When changing the secondary either an"
8114
                                   " iallocator script must be used or the"
8115
                                   " new node given", errors.ECODE_INVAL)
8116

    
8117
      if remote_node is not None and iallocator is not None:
8118
        raise errors.OpPrereqError("Give either the iallocator or the new"
8119
                                   " secondary, not both", errors.ECODE_INVAL)
8120

    
8121
    elif remote_node is not None or iallocator is not None:
8122
      # Not replacing the secondary
8123
      raise errors.OpPrereqError("The iallocator and new node options can"
8124
                                 " only be used when changing the"
8125
                                 " secondary node", errors.ECODE_INVAL)
8126

    
8127
  @staticmethod
8128
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8129
    """Compute a new secondary node using an IAllocator.
8130

8131
    """
8132
    ial = IAllocator(lu.cfg, lu.rpc,
8133
                     mode=constants.IALLOCATOR_MODE_RELOC,
8134
                     name=instance_name,
8135
                     relocate_from=relocate_from)
8136

    
8137
    ial.Run(iallocator_name)
8138

    
8139
    if not ial.success:
8140
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8141
                                 " %s" % (iallocator_name, ial.info),
8142
                                 errors.ECODE_NORES)
8143

    
8144
    if len(ial.result) != ial.required_nodes:
8145
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8146
                                 " of nodes (%s), required %s" %
8147
                                 (iallocator_name,
8148
                                  len(ial.result), ial.required_nodes),
8149
                                 errors.ECODE_FAULT)
8150

    
8151
    remote_node_name = ial.result[0]
8152

    
8153
    lu.LogInfo("Selected new secondary for instance '%s': %s",
8154
               instance_name, remote_node_name)
8155

    
8156
    return remote_node_name
8157

    
8158
  def _FindFaultyDisks(self, node_name):
8159
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8160
                                    node_name, True)
8161

    
8162
  def CheckPrereq(self):
8163
    """Check prerequisites.
8164

8165
    This checks that the instance is in the cluster.
8166

8167
    """
8168
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8169
    assert instance is not None, \
8170
      "Cannot retrieve locked instance %s" % self.instance_name
8171

    
8172
    if instance.disk_template != constants.DT_DRBD8:
8173
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8174
                                 " instances", errors.ECODE_INVAL)
8175

    
8176
    if len(instance.secondary_nodes) != 1:
8177
      raise errors.OpPrereqError("The instance has a strange layout,"
8178
                                 " expected one secondary but found %d" %
8179
                                 len(instance.secondary_nodes),
8180
                                 errors.ECODE_FAULT)
8181

    
8182
    if not self.delay_iallocator:
8183
      self._CheckPrereq2()
8184

    
8185
  def _CheckPrereq2(self):
8186
    """Check prerequisites, second part.
8187

8188
    This function should always be part of CheckPrereq. It was separated and is
8189
    now called from Exec because during node evacuation iallocator was only
8190
    called with an unmodified cluster model, not taking planned changes into
8191
    account.
8192

8193
    """
8194
    instance = self.instance
8195
    secondary_node = instance.secondary_nodes[0]
8196

    
8197
    if self.iallocator_name is None:
8198
      remote_node = self.remote_node
8199
    else:
8200
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8201
                                       instance.name, instance.secondary_nodes)
8202

    
8203
    if remote_node is not None:
8204
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8205
      assert self.remote_node_info is not None, \
8206
        "Cannot retrieve locked node %s" % remote_node
8207
    else:
8208
      self.remote_node_info = None
8209

    
8210
    if remote_node == self.instance.primary_node:
8211
      raise errors.OpPrereqError("The specified node is the primary node of"
8212
                                 " the instance.", errors.ECODE_INVAL)
8213

    
8214
    if remote_node == secondary_node:
8215
      raise errors.OpPrereqError("The specified node is already the"
8216
                                 " secondary node of the instance.",
8217
                                 errors.ECODE_INVAL)
8218

    
8219
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8220
                                    constants.REPLACE_DISK_CHG):
8221
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
8222
                                 errors.ECODE_INVAL)
8223

    
8224
    if self.mode == constants.REPLACE_DISK_AUTO:
8225
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
8226
      faulty_secondary = self._FindFaultyDisks(secondary_node)
8227

    
8228
      if faulty_primary and faulty_secondary:
8229
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8230
                                   " one node and can not be repaired"
8231
                                   " automatically" % self.instance_name,
8232
                                   errors.ECODE_STATE)
8233

    
8234
      if faulty_primary:
8235
        self.disks = faulty_primary
8236
        self.target_node = instance.primary_node
8237
        self.other_node = secondary_node
8238
        check_nodes = [self.target_node, self.other_node]
8239
      elif faulty_secondary:
8240
        self.disks = faulty_secondary
8241
        self.target_node = secondary_node
8242
        self.other_node = instance.primary_node
8243
        check_nodes = [self.target_node, self.other_node]
8244
      else:
8245
        self.disks = []
8246
        check_nodes = []
8247

    
8248
    else:
8249
      # Non-automatic modes
8250
      if self.mode == constants.REPLACE_DISK_PRI:
8251
        self.target_node = instance.primary_node
8252
        self.other_node = secondary_node
8253
        check_nodes = [self.target_node, self.other_node]
8254

    
8255
      elif self.mode == constants.REPLACE_DISK_SEC:
8256
        self.target_node = secondary_node
8257
        self.other_node = instance.primary_node
8258
        check_nodes = [self.target_node, self.other_node]
8259

    
8260
      elif self.mode == constants.REPLACE_DISK_CHG:
8261
        self.new_node = remote_node
8262
        self.other_node = instance.primary_node
8263
        self.target_node = secondary_node
8264
        check_nodes = [self.new_node, self.other_node]
8265

    
8266
        _CheckNodeNotDrained(self.lu, remote_node)
8267
        _CheckNodeVmCapable(self.lu, remote_node)
8268

    
8269
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
8270
        assert old_node_info is not None
8271
        if old_node_info.offline and not self.early_release:
8272
          # doesn't make sense to delay the release
8273
          self.early_release = True
8274
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8275
                          " early-release mode", secondary_node)
8276

    
8277
      else:
8278
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8279
                                     self.mode)
8280

    
8281
      # If not specified all disks should be replaced
8282
      if not self.disks:
8283
        self.disks = range(len(self.instance.disks))
8284

    
8285
    for node in check_nodes:
8286
      _CheckNodeOnline(self.lu, node)
8287

    
8288
    # Check whether disks are valid
8289
    for disk_idx in self.disks:
8290
      instance.FindDisk(disk_idx)
8291

    
8292
    # Get secondary node IP addresses
8293
    node_2nd_ip = {}
8294

    
8295
    for node_name in [self.target_node, self.other_node, self.new_node]:
8296
      if node_name is not None:
8297
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8298

    
8299
    self.node_secondary_ip = node_2nd_ip
8300

    
8301
  def Exec(self, feedback_fn):
8302
    """Execute disk replacement.
8303

8304
    This dispatches the disk replacement to the appropriate handler.
8305

8306
    """
8307
    if self.delay_iallocator:
8308
      self._CheckPrereq2()
8309

    
8310
    if not self.disks:
8311
      feedback_fn("No disks need replacement")
8312
      return
8313

    
8314
    feedback_fn("Replacing disk(s) %s for %s" %
8315
                (utils.CommaJoin(self.disks), self.instance.name))
8316

    
8317
    activate_disks = (not self.instance.admin_up)
8318

    
8319
    # Activate the instance disks if we're replacing them on a down instance
8320
    if activate_disks:
8321
      _StartInstanceDisks(self.lu, self.instance, True)
8322

    
8323
    try:
8324
      # Should we replace the secondary node?
8325
      if self.new_node is not None:
8326
        fn = self._ExecDrbd8Secondary
8327
      else:
8328
        fn = self._ExecDrbd8DiskOnly
8329

    
8330
      return fn(feedback_fn)
8331

    
8332
    finally:
8333
      # Deactivate the instance disks if we're replacing them on a
8334
      # down instance
8335
      if activate_disks:
8336
        _SafeShutdownInstanceDisks(self.lu, self.instance)
8337

    
8338
  def _CheckVolumeGroup(self, nodes):
8339
    self.lu.LogInfo("Checking volume groups")
8340

    
8341
    vgname = self.cfg.GetVGName()
8342

    
8343
    # Make sure volume group exists on all involved nodes
8344
    results = self.rpc.call_vg_list(nodes)
8345
    if not results:
8346
      raise errors.OpExecError("Can't list volume groups on the nodes")
8347

    
8348
    for node in nodes:
8349
      res = results[node]
8350
      res.Raise("Error checking node %s" % node)
8351
      if vgname not in res.payload:
8352
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
8353
                                 (vgname, node))
8354

    
8355
  def _CheckDisksExistence(self, nodes):
8356
    # Check disk existence
8357
    for idx, dev in enumerate(self.instance.disks):
8358
      if idx not in self.disks:
8359
        continue
8360

    
8361
      for node in nodes:
8362
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8363
        self.cfg.SetDiskID(dev, node)
8364

    
8365
        result = self.rpc.call_blockdev_find(node, dev)
8366

    
8367
        msg = result.fail_msg
8368
        if msg or not result.payload:
8369
          if not msg:
8370
            msg = "disk not found"
8371
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8372
                                   (idx, node, msg))
8373

    
8374
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8375
    for idx, dev in enumerate(self.instance.disks):
8376
      if idx not in self.disks:
8377
        continue
8378

    
8379
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8380
                      (idx, node_name))
8381

    
8382
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8383
                                   ldisk=ldisk):
8384
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8385
                                 " replace disks for instance %s" %
8386
                                 (node_name, self.instance.name))
8387

    
8388
  def _CreateNewStorage(self, node_name):
8389
    vgname = self.cfg.GetVGName()
8390
    iv_names = {}
8391

    
8392
    for idx, dev in enumerate(self.instance.disks):
8393
      if idx not in self.disks:
8394
        continue
8395

    
8396
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8397

    
8398
      self.cfg.SetDiskID(dev, node_name)
8399

    
8400
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8401
      names = _GenerateUniqueNames(self.lu, lv_names)
8402

    
8403
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8404
                             logical_id=(vgname, names[0]))
8405
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8406
                             logical_id=(vgname, names[1]))
8407

    
8408
      new_lvs = [lv_data, lv_meta]
8409
      old_lvs = dev.children
8410
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8411

    
8412
      # we pass force_create=True to force the LVM creation
8413
      for new_lv in new_lvs:
8414
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8415
                        _GetInstanceInfoText(self.instance), False)
8416

    
8417
    return iv_names
8418

    
8419
  def _CheckDevices(self, node_name, iv_names):
8420
    for name, (dev, _, _) in iv_names.iteritems():
8421
      self.cfg.SetDiskID(dev, node_name)
8422

    
8423
      result = self.rpc.call_blockdev_find(node_name, dev)
8424

    
8425
      msg = result.fail_msg
8426
      if msg or not result.payload:
8427
        if not msg:
8428
          msg = "disk not found"
8429
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
8430
                                 (name, msg))
8431

    
8432
      if result.payload.is_degraded:
8433
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
8434

    
8435
  def _RemoveOldStorage(self, node_name, iv_names):
8436
    for name, (_, old_lvs, _) in iv_names.iteritems():
8437
      self.lu.LogInfo("Remove logical volumes for %s" % name)
8438

    
8439
      for lv in old_lvs:
8440
        self.cfg.SetDiskID(lv, node_name)
8441

    
8442
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8443
        if msg:
8444
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
8445
                             hint="remove unused LVs manually")
8446

    
8447
  def _ReleaseNodeLock(self, node_name):
8448
    """Releases the lock for a given node."""
8449
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8450

    
8451
  def _ExecDrbd8DiskOnly(self, feedback_fn):
8452
    """Replace a disk on the primary or secondary for DRBD 8.
8453

8454
    The algorithm for replace is quite complicated:
8455

8456
      1. for each disk to be replaced:
8457

8458
        1. create new LVs on the target node with unique names
8459
        1. detach old LVs from the drbd device
8460
        1. rename old LVs to name_replaced.<time_t>
8461
        1. rename new LVs to old LVs
8462
        1. attach the new LVs (with the old names now) to the drbd device
8463

8464
      1. wait for sync across all devices
8465

8466
      1. for each modified disk:
8467

8468
        1. remove old LVs (which have the name name_replaces.<time_t>)
8469

8470
    Failures are not very well handled.
8471

8472
    """
8473
    steps_total = 6
8474

    
8475
    # Step: check device activation
8476
    self.lu.LogStep(1, steps_total, "Check device existence")
8477
    self._CheckDisksExistence([self.other_node, self.target_node])
8478
    self._CheckVolumeGroup([self.target_node, self.other_node])
8479

    
8480
    # Step: check other node consistency
8481
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8482
    self._CheckDisksConsistency(self.other_node,
8483
                                self.other_node == self.instance.primary_node,
8484
                                False)
8485

    
8486
    # Step: create new storage
8487
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8488
    iv_names = self._CreateNewStorage(self.target_node)
8489

    
8490
    # Step: for each lv, detach+rename*2+attach
8491
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8492
    for dev, old_lvs, new_lvs in iv_names.itervalues():
8493
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8494

    
8495
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8496
                                                     old_lvs)
8497
      result.Raise("Can't detach drbd from local storage on node"
8498
                   " %s for device %s" % (self.target_node, dev.iv_name))
8499
      #dev.children = []
8500
      #cfg.Update(instance)
8501

    
8502
      # ok, we created the new LVs, so now we know we have the needed
8503
      # storage; as such, we proceed on the target node to rename
8504
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8505
      # using the assumption that logical_id == physical_id (which in
8506
      # turn is the unique_id on that node)
8507

    
8508
      # FIXME(iustin): use a better name for the replaced LVs
8509
      temp_suffix = int(time.time())
8510
      ren_fn = lambda d, suff: (d.physical_id[0],
8511
                                d.physical_id[1] + "_replaced-%s" % suff)
8512

    
8513
      # Build the rename list based on what LVs exist on the node
8514
      rename_old_to_new = []
8515
      for to_ren in old_lvs:
8516
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8517
        if not result.fail_msg and result.payload:
8518
          # device exists
8519
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8520

    
8521
      self.lu.LogInfo("Renaming the old LVs on the target node")
8522
      result = self.rpc.call_blockdev_rename(self.target_node,
8523
                                             rename_old_to_new)
8524
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
8525

    
8526
      # Now we rename the new LVs to the old LVs
8527
      self.lu.LogInfo("Renaming the new LVs on the target node")
8528
      rename_new_to_old = [(new, old.physical_id)
8529
                           for old, new in zip(old_lvs, new_lvs)]
8530
      result = self.rpc.call_blockdev_rename(self.target_node,
8531
                                             rename_new_to_old)
8532
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
8533

    
8534
      for old, new in zip(old_lvs, new_lvs):
8535
        new.logical_id = old.logical_id
8536
        self.cfg.SetDiskID(new, self.target_node)
8537

    
8538
      for disk in old_lvs:
8539
        disk.logical_id = ren_fn(disk, temp_suffix)
8540
        self.cfg.SetDiskID(disk, self.target_node)
8541

    
8542
      # Now that the new lvs have the old name, we can add them to the device
8543
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8544
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8545
                                                  new_lvs)
8546
      msg = result.fail_msg
8547
      if msg:
8548
        for new_lv in new_lvs:
8549
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
8550
                                               new_lv).fail_msg
8551
          if msg2:
8552
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8553
                               hint=("cleanup manually the unused logical"
8554
                                     "volumes"))
8555
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8556

    
8557
      dev.children = new_lvs
8558

    
8559
      self.cfg.Update(self.instance, feedback_fn)
8560

    
8561
    cstep = 5
8562
    if self.early_release:
8563
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8564
      cstep += 1
8565
      self._RemoveOldStorage(self.target_node, iv_names)
8566
      # WARNING: we release both node locks here, do not do other RPCs
8567
      # than WaitForSync to the primary node
8568
      self._ReleaseNodeLock([self.target_node, self.other_node])
8569

    
8570
    # Wait for sync
8571
    # This can fail as the old devices are degraded and _WaitForSync
8572
    # does a combined result over all disks, so we don't check its return value
8573
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8574
    cstep += 1
8575
    _WaitForSync(self.lu, self.instance)
8576

    
8577
    # Check all devices manually
8578
    self._CheckDevices(self.instance.primary_node, iv_names)
8579

    
8580
    # Step: remove old storage
8581
    if not self.early_release:
8582
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8583
      cstep += 1
8584
      self._RemoveOldStorage(self.target_node, iv_names)
8585

    
8586
  def _ExecDrbd8Secondary(self, feedback_fn):
8587
    """Replace the secondary node for DRBD 8.
8588

8589
    The algorithm for replace is quite complicated:
8590
      - for all disks of the instance:
8591
        - create new LVs on the new node with same names
8592
        - shutdown the drbd device on the old secondary
8593
        - disconnect the drbd network on the primary
8594
        - create the drbd device on the new secondary
8595
        - network attach the drbd on the primary, using an artifice:
8596
          the drbd code for Attach() will connect to the network if it
8597
          finds a device which is connected to the good local disks but
8598
          not network enabled
8599
      - wait for sync across all devices
8600
      - remove all disks from the old secondary
8601

8602
    Failures are not very well handled.
8603

8604
    """
8605
    steps_total = 6
8606

    
8607
    # Step: check device activation
8608
    self.lu.LogStep(1, steps_total, "Check device existence")
8609
    self._CheckDisksExistence([self.instance.primary_node])
8610
    self._CheckVolumeGroup([self.instance.primary_node])
8611

    
8612
    # Step: check other node consistency
8613
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8614
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
8615

    
8616
    # Step: create new storage
8617
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8618
    for idx, dev in enumerate(self.instance.disks):
8619
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8620
                      (self.new_node, idx))
8621
      # we pass force_create=True to force LVM creation
8622
      for new_lv in dev.children:
8623
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8624
                        _GetInstanceInfoText(self.instance), False)
8625

    
8626
    # Step 4: dbrd minors and drbd setups changes
8627
    # after this, we must manually remove the drbd minors on both the
8628
    # error and the success paths
8629
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8630
    minors = self.cfg.AllocateDRBDMinor([self.new_node
8631
                                         for dev in self.instance.disks],
8632
                                        self.instance.name)
8633
    logging.debug("Allocated minors %r", minors)
8634

    
8635
    iv_names = {}
8636
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8637
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8638
                      (self.new_node, idx))
8639
      # create new devices on new_node; note that we create two IDs:
8640
      # one without port, so the drbd will be activated without
8641
      # networking information on the new node at this stage, and one
8642
      # with network, for the latter activation in step 4
8643
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8644
      if self.instance.primary_node == o_node1:
8645
        p_minor = o_minor1
8646
      else:
8647
        assert self.instance.primary_node == o_node2, "Three-node instance?"
8648
        p_minor = o_minor2
8649

    
8650
      new_alone_id = (self.instance.primary_node, self.new_node, None,
8651
                      p_minor, new_minor, o_secret)
8652
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
8653
                    p_minor, new_minor, o_secret)
8654

    
8655
      iv_names[idx] = (dev, dev.children, new_net_id)
8656
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8657
                    new_net_id)
8658
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8659
                              logical_id=new_alone_id,
8660
                              children=dev.children,
8661
                              size=dev.size)
8662
      try:
8663
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8664
                              _GetInstanceInfoText(self.instance), False)
8665
      except errors.GenericError:
8666
        self.cfg.ReleaseDRBDMinors(self.instance.name)
8667
        raise
8668

    
8669
    # We have new devices, shutdown the drbd on the old secondary
8670
    for idx, dev in enumerate(self.instance.disks):
8671
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8672
      self.cfg.SetDiskID(dev, self.target_node)
8673
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8674
      if msg:
8675
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8676
                           "node: %s" % (idx, msg),
8677
                           hint=("Please cleanup this device manually as"
8678
                                 " soon as possible"))
8679

    
8680
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8681
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8682
                                               self.node_secondary_ip,
8683
                                               self.instance.disks)\
8684
                                              [self.instance.primary_node]
8685

    
8686
    msg = result.fail_msg
8687
    if msg:
8688
      # detaches didn't succeed (unlikely)
8689
      self.cfg.ReleaseDRBDMinors(self.instance.name)
8690
      raise errors.OpExecError("Can't detach the disks from the network on"
8691
                               " old node: %s" % (msg,))
8692

    
8693
    # if we managed to detach at least one, we update all the disks of
8694
    # the instance to point to the new secondary
8695
    self.lu.LogInfo("Updating instance configuration")
8696
    for dev, _, new_logical_id in iv_names.itervalues():
8697
      dev.logical_id = new_logical_id
8698
      self.cfg.SetDiskID(dev, self.instance.primary_node)
8699

    
8700
    self.cfg.Update(self.instance, feedback_fn)
8701

    
8702
    # and now perform the drbd attach
8703
    self.lu.LogInfo("Attaching primary drbds to new secondary"
8704
                    " (standalone => connected)")
8705
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8706
                                            self.new_node],
8707
                                           self.node_secondary_ip,
8708
                                           self.instance.disks,
8709
                                           self.instance.name,
8710
                                           False)
8711
    for to_node, to_result in result.items():
8712
      msg = to_result.fail_msg
8713
      if msg:
8714
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8715
                           to_node, msg,
8716
                           hint=("please do a gnt-instance info to see the"
8717
                                 " status of disks"))
8718
    cstep = 5
8719
    if self.early_release:
8720
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8721
      cstep += 1
8722
      self._RemoveOldStorage(self.target_node, iv_names)
8723
      # WARNING: we release all node locks here, do not do other RPCs
8724
      # than WaitForSync to the primary node
8725
      self._ReleaseNodeLock([self.instance.primary_node,
8726
                             self.target_node,
8727
                             self.new_node])
8728

    
8729
    # Wait for sync
8730
    # This can fail as the old devices are degraded and _WaitForSync
8731
    # does a combined result over all disks, so we don't check its return value
8732
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8733
    cstep += 1
8734
    _WaitForSync(self.lu, self.instance)
8735

    
8736
    # Check all devices manually
8737
    self._CheckDevices(self.instance.primary_node, iv_names)
8738

    
8739
    # Step: remove old storage
8740
    if not self.early_release:
8741
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8742
      self._RemoveOldStorage(self.target_node, iv_names)
8743

    
8744

    
8745
class LURepairNodeStorage(NoHooksLU):
8746
  """Repairs the volume group on a node.
8747

8748
  """
8749
  _OP_PARAMS = [
8750
    _PNodeName,
8751
    ("storage_type", ht.NoDefault, _CheckStorageType),
8752
    ("name", ht.NoDefault, ht.TNonEmptyString),
8753
    ("ignore_consistency", False, ht.TBool),
8754
    ]
8755
  REQ_BGL = False
8756

    
8757
  def CheckArguments(self):
8758
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8759

    
8760
    storage_type = self.op.storage_type
8761

    
8762
    if (constants.SO_FIX_CONSISTENCY not in
8763
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8764
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
8765
                                 " repaired" % storage_type,
8766
                                 errors.ECODE_INVAL)
8767

    
8768
  def ExpandNames(self):
8769
    self.needed_locks = {
8770
      locking.LEVEL_NODE: [self.op.node_name],
8771
      }
8772

    
8773
  def _CheckFaultyDisks(self, instance, node_name):
8774
    """Ensure faulty disks abort the opcode or at least warn."""
8775
    try:
8776
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8777
                                  node_name, True):
8778
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8779
                                   " node '%s'" % (instance.name, node_name),
8780
                                   errors.ECODE_STATE)
8781
    except errors.OpPrereqError, err:
8782
      if self.op.ignore_consistency:
8783
        self.proc.LogWarning(str(err.args[0]))
8784
      else:
8785
        raise
8786

    
8787
  def CheckPrereq(self):
8788
    """Check prerequisites.
8789

8790
    """
8791
    # Check whether any instance on this node has faulty disks
8792
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8793
      if not inst.admin_up:
8794
        continue
8795
      check_nodes = set(inst.all_nodes)
8796
      check_nodes.discard(self.op.node_name)
8797
      for inst_node_name in check_nodes:
8798
        self._CheckFaultyDisks(inst, inst_node_name)
8799

    
8800
  def Exec(self, feedback_fn):
8801
    feedback_fn("Repairing storage unit '%s' on %s ..." %
8802
                (self.op.name, self.op.node_name))
8803

    
8804
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8805
    result = self.rpc.call_storage_execute(self.op.node_name,
8806
                                           self.op.storage_type, st_args,
8807
                                           self.op.name,
8808
                                           constants.SO_FIX_CONSISTENCY)
8809
    result.Raise("Failed to repair storage unit '%s' on %s" %
8810
                 (self.op.name, self.op.node_name))
8811

    
8812

    
8813
class LUNodeEvacuationStrategy(NoHooksLU):
8814
  """Computes the node evacuation strategy.
8815

8816
  """
8817
  _OP_PARAMS = [
8818
    ("nodes", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
8819
    ("remote_node", None, ht.TMaybeString),
8820
    ("iallocator", None, ht.TMaybeString),
8821
    ]
8822
  REQ_BGL = False
8823

    
8824
  def CheckArguments(self):
8825
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8826

    
8827
  def ExpandNames(self):
8828
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8829
    self.needed_locks = locks = {}
8830
    if self.op.remote_node is None:
8831
      locks[locking.LEVEL_NODE] = locking.ALL_SET
8832
    else:
8833
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8834
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8835

    
8836
  def Exec(self, feedback_fn):
8837
    if self.op.remote_node is not None:
8838
      instances = []
8839
      for node in self.op.nodes:
8840
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8841
      result = []
8842
      for i in instances:
8843
        if i.primary_node == self.op.remote_node:
8844
          raise errors.OpPrereqError("Node %s is the primary node of"
8845
                                     " instance %s, cannot use it as"
8846
                                     " secondary" %
8847
                                     (self.op.remote_node, i.name),
8848
                                     errors.ECODE_INVAL)
8849
        result.append([i.name, self.op.remote_node])
8850
    else:
8851
      ial = IAllocator(self.cfg, self.rpc,
8852
                       mode=constants.IALLOCATOR_MODE_MEVAC,
8853
                       evac_nodes=self.op.nodes)
8854
      ial.Run(self.op.iallocator, validate=True)
8855
      if not ial.success:
8856
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8857
                                 errors.ECODE_NORES)
8858
      result = ial.result
8859
    return result
8860

    
8861

    
8862
class LUGrowDisk(LogicalUnit):
8863
  """Grow a disk of an instance.
8864

8865
  """
8866
  HPATH = "disk-grow"
8867
  HTYPE = constants.HTYPE_INSTANCE
8868
  _OP_PARAMS = [
8869
    _PInstanceName,
8870
    ("disk", ht.NoDefault, ht.TInt),
8871
    ("amount", ht.NoDefault, ht.TInt),
8872
    ("wait_for_sync", True, ht.TBool),
8873
    ]
8874
  REQ_BGL = False
8875

    
8876
  def ExpandNames(self):
8877
    self._ExpandAndLockInstance()
8878
    self.needed_locks[locking.LEVEL_NODE] = []
8879
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8880

    
8881
  def DeclareLocks(self, level):
8882
    if level == locking.LEVEL_NODE:
8883
      self._LockInstancesNodes()
8884

    
8885
  def BuildHooksEnv(self):
8886
    """Build hooks env.
8887

8888
    This runs on the master, the primary and all the secondaries.
8889

8890
    """
8891
    env = {
8892
      "DISK": self.op.disk,
8893
      "AMOUNT": self.op.amount,
8894
      }
8895
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8896
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8897
    return env, nl, nl
8898

    
8899
  def CheckPrereq(self):
8900
    """Check prerequisites.
8901

8902
    This checks that the instance is in the cluster.
8903

8904
    """
8905
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8906
    assert instance is not None, \
8907
      "Cannot retrieve locked instance %s" % self.op.instance_name
8908
    nodenames = list(instance.all_nodes)
8909
    for node in nodenames:
8910
      _CheckNodeOnline(self, node)
8911

    
8912
    self.instance = instance
8913

    
8914
    if instance.disk_template not in constants.DTS_GROWABLE:
8915
      raise errors.OpPrereqError("Instance's disk layout does not support"
8916
                                 " growing.", errors.ECODE_INVAL)
8917

    
8918
    self.disk = instance.FindDisk(self.op.disk)
8919

    
8920
    if instance.disk_template != constants.DT_FILE:
8921
      # TODO: check the free disk space for file, when that feature
8922
      # will be supported
8923
      _CheckNodesFreeDiskPerVG(self, nodenames,
8924
                               {self.disk.physical_id[0]: self.op.amount})
8925

    
8926
  def Exec(self, feedback_fn):
8927
    """Execute disk grow.
8928

8929
    """
8930
    instance = self.instance
8931
    disk = self.disk
8932

    
8933
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8934
    if not disks_ok:
8935
      raise errors.OpExecError("Cannot activate block device to grow")
8936

    
8937
    for node in instance.all_nodes:
8938
      self.cfg.SetDiskID(disk, node)
8939
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8940
      result.Raise("Grow request failed to node %s" % node)
8941

    
8942
      # TODO: Rewrite code to work properly
8943
      # DRBD goes into sync mode for a short amount of time after executing the
8944
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8945
      # calling "resize" in sync mode fails. Sleeping for a short amount of
8946
      # time is a work-around.
8947
      time.sleep(5)
8948

    
8949
    disk.RecordGrow(self.op.amount)
8950
    self.cfg.Update(instance, feedback_fn)
8951
    if self.op.wait_for_sync:
8952
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
8953
      if disk_abort:
8954
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8955
                             " status.\nPlease check the instance.")
8956
      if not instance.admin_up:
8957
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8958
    elif not instance.admin_up:
8959
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
8960
                           " not supposed to be running because no wait for"
8961
                           " sync mode was requested.")
8962

    
8963

    
8964
class LUQueryInstanceData(NoHooksLU):
8965
  """Query runtime instance data.
8966

8967
  """
8968
  _OP_PARAMS = [
8969
    ("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
8970
    ("static", False, ht.TBool),
8971
    ]
8972
  REQ_BGL = False
8973

    
8974
  def ExpandNames(self):
8975
    self.needed_locks = {}
8976
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8977

    
8978
    if self.op.instances:
8979
      self.wanted_names = []
8980
      for name in self.op.instances:
8981
        full_name = _ExpandInstanceName(self.cfg, name)
8982
        self.wanted_names.append(full_name)
8983
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8984
    else:
8985
      self.wanted_names = None
8986
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8987

    
8988
    self.needed_locks[locking.LEVEL_NODE] = []
8989
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8990

    
8991
  def DeclareLocks(self, level):
8992
    if level == locking.LEVEL_NODE:
8993
      self._LockInstancesNodes()
8994

    
8995
  def CheckPrereq(self):
8996
    """Check prerequisites.
8997

8998
    This only checks the optional instance list against the existing names.
8999

9000
    """
9001
    if self.wanted_names is None:
9002
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
9003

    
9004
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
9005
                             in self.wanted_names]
9006

    
9007
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
9008
    """Returns the status of a block device
9009

9010
    """
9011
    if self.op.static or not node:
9012
      return None
9013

    
9014
    self.cfg.SetDiskID(dev, node)
9015

    
9016
    result = self.rpc.call_blockdev_find(node, dev)
9017
    if result.offline:
9018
      return None
9019

    
9020
    result.Raise("Can't compute disk status for %s" % instance_name)
9021

    
9022
    status = result.payload
9023
    if status is None:
9024
      return None
9025

    
9026
    return (status.dev_path, status.major, status.minor,
9027
            status.sync_percent, status.estimated_time,
9028
            status.is_degraded, status.ldisk_status)
9029

    
9030
  def _ComputeDiskStatus(self, instance, snode, dev):
9031
    """Compute block device status.
9032

9033
    """
9034
    if dev.dev_type in constants.LDS_DRBD:
9035
      # we change the snode then (otherwise we use the one passed in)
9036
      if dev.logical_id[0] == instance.primary_node:
9037
        snode = dev.logical_id[1]
9038
      else:
9039
        snode = dev.logical_id[0]
9040

    
9041
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9042
                                              instance.name, dev)
9043
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9044

    
9045
    if dev.children:
9046
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
9047
                      for child in dev.children]
9048
    else:
9049
      dev_children = []
9050

    
9051
    data = {
9052
      "iv_name": dev.iv_name,
9053
      "dev_type": dev.dev_type,
9054
      "logical_id": dev.logical_id,
9055
      "physical_id": dev.physical_id,
9056
      "pstatus": dev_pstatus,
9057
      "sstatus": dev_sstatus,
9058
      "children": dev_children,
9059
      "mode": dev.mode,
9060
      "size": dev.size,
9061
      }
9062

    
9063
    return data
9064

    
9065
  def Exec(self, feedback_fn):
9066
    """Gather and return data"""
9067
    result = {}
9068

    
9069
    cluster = self.cfg.GetClusterInfo()
9070

    
9071
    for instance in self.wanted_instances:
9072
      if not self.op.static:
9073
        remote_info = self.rpc.call_instance_info(instance.primary_node,
9074
                                                  instance.name,
9075
                                                  instance.hypervisor)
9076
        remote_info.Raise("Error checking node %s" % instance.primary_node)
9077
        remote_info = remote_info.payload
9078
        if remote_info and "state" in remote_info:
9079
          remote_state = "up"
9080
        else:
9081
          remote_state = "down"
9082
      else:
9083
        remote_state = None
9084
      if instance.admin_up:
9085
        config_state = "up"
9086
      else:
9087
        config_state = "down"
9088

    
9089
      disks = [self._ComputeDiskStatus(instance, None, device)
9090
               for device in instance.disks]
9091

    
9092
      idict = {
9093
        "name": instance.name,
9094
        "config_state": config_state,
9095
        "run_state": remote_state,
9096
        "pnode": instance.primary_node,
9097
        "snodes": instance.secondary_nodes,
9098
        "os": instance.os,
9099
        # this happens to be the same format used for hooks
9100
        "nics": _NICListToTuple(self, instance.nics),
9101
        "disk_template": instance.disk_template,
9102
        "disks": disks,
9103
        "hypervisor": instance.hypervisor,
9104
        "network_port": instance.network_port,
9105
        "hv_instance": instance.hvparams,
9106
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
9107
        "be_instance": instance.beparams,
9108
        "be_actual": cluster.FillBE(instance),
9109
        "os_instance": instance.osparams,
9110
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9111
        "serial_no": instance.serial_no,
9112
        "mtime": instance.mtime,
9113
        "ctime": instance.ctime,
9114
        "uuid": instance.uuid,
9115
        }
9116

    
9117
      result[instance.name] = idict
9118

    
9119
    return result
9120

    
9121

    
9122
class LUSetInstanceParams(LogicalUnit):
9123
  """Modifies an instances's parameters.
9124

9125
  """
9126
  HPATH = "instance-modify"
9127
  HTYPE = constants.HTYPE_INSTANCE
9128
  _OP_PARAMS = [
9129
    _PInstanceName,
9130
    ("nics", ht.EmptyList, ht.TList),
9131
    ("disks", ht.EmptyList, ht.TList),
9132
    ("beparams", ht.EmptyDict, ht.TDict),
9133
    ("hvparams", ht.EmptyDict, ht.TDict),
9134
    ("disk_template", None, ht.TMaybeString),
9135
    ("remote_node", None, ht.TMaybeString),
9136
    ("os_name", None, ht.TMaybeString),
9137
    ("force_variant", False, ht.TBool),
9138
    ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
9139
    _PForce,
9140
    ]
9141
  REQ_BGL = False
9142

    
9143
  def CheckArguments(self):
9144
    if not (self.op.nics or self.op.disks or self.op.disk_template or
9145
            self.op.hvparams or self.op.beparams or self.op.os_name):
9146
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9147

    
9148
    if self.op.hvparams:
9149
      _CheckGlobalHvParams(self.op.hvparams)
9150

    
9151
    # Disk validation
9152
    disk_addremove = 0
9153
    for disk_op, disk_dict in self.op.disks:
9154
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9155
      if disk_op == constants.DDM_REMOVE:
9156
        disk_addremove += 1
9157
        continue
9158
      elif disk_op == constants.DDM_ADD:
9159
        disk_addremove += 1
9160
      else:
9161
        if not isinstance(disk_op, int):
9162
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9163
        if not isinstance(disk_dict, dict):
9164
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9165
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9166

    
9167
      if disk_op == constants.DDM_ADD:
9168
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
9169
        if mode not in constants.DISK_ACCESS_SET:
9170
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9171
                                     errors.ECODE_INVAL)
9172
        size = disk_dict.get('size', None)
9173
        if size is None:
9174
          raise errors.OpPrereqError("Required disk parameter size missing",
9175
                                     errors.ECODE_INVAL)
9176
        try:
9177
          size = int(size)
9178
        except (TypeError, ValueError), err:
9179
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9180
                                     str(err), errors.ECODE_INVAL)
9181
        disk_dict['size'] = size
9182
      else:
9183
        # modification of disk
9184
        if 'size' in disk_dict:
9185
          raise errors.OpPrereqError("Disk size change not possible, use"
9186
                                     " grow-disk", errors.ECODE_INVAL)
9187

    
9188
    if disk_addremove > 1:
9189
      raise errors.OpPrereqError("Only one disk add or remove operation"
9190
                                 " supported at a time", errors.ECODE_INVAL)
9191

    
9192
    if self.op.disks and self.op.disk_template is not None:
9193
      raise errors.OpPrereqError("Disk template conversion and other disk"
9194
                                 " changes not supported at the same time",
9195
                                 errors.ECODE_INVAL)
9196

    
9197
    if self.op.disk_template:
9198
      _CheckDiskTemplate(self.op.disk_template)
9199
      if (self.op.disk_template in constants.DTS_NET_MIRROR and
9200
          self.op.remote_node is None):
9201
        raise errors.OpPrereqError("Changing the disk template to a mirrored"
9202
                                   " one requires specifying a secondary node",
9203
                                   errors.ECODE_INVAL)
9204

    
9205
    # NIC validation
9206
    nic_addremove = 0
9207
    for nic_op, nic_dict in self.op.nics:
9208
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9209
      if nic_op == constants.DDM_REMOVE:
9210
        nic_addremove += 1
9211
        continue
9212
      elif nic_op == constants.DDM_ADD:
9213
        nic_addremove += 1
9214
      else:
9215
        if not isinstance(nic_op, int):
9216
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9217
        if not isinstance(nic_dict, dict):
9218
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9219
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9220

    
9221
      # nic_dict should be a dict
9222
      nic_ip = nic_dict.get('ip', None)
9223
      if nic_ip is not None:
9224
        if nic_ip.lower() == constants.VALUE_NONE:
9225
          nic_dict['ip'] = None
9226
        else:
9227
          if not netutils.IPAddress.IsValid(nic_ip):
9228
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9229
                                       errors.ECODE_INVAL)
9230

    
9231
      nic_bridge = nic_dict.get('bridge', None)
9232
      nic_link = nic_dict.get('link', None)
9233
      if nic_bridge and nic_link:
9234
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9235
                                   " at the same time", errors.ECODE_INVAL)
9236
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9237
        nic_dict['bridge'] = None
9238
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9239
        nic_dict['link'] = None
9240

    
9241
      if nic_op == constants.DDM_ADD:
9242
        nic_mac = nic_dict.get('mac', None)
9243
        if nic_mac is None:
9244
          nic_dict['mac'] = constants.VALUE_AUTO
9245

    
9246
      if 'mac' in nic_dict:
9247
        nic_mac = nic_dict['mac']
9248
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9249
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9250

    
9251
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9252
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9253
                                     " modifying an existing nic",
9254
                                     errors.ECODE_INVAL)
9255

    
9256
    if nic_addremove > 1:
9257
      raise errors.OpPrereqError("Only one NIC add or remove operation"
9258
                                 " supported at a time", errors.ECODE_INVAL)
9259

    
9260
  def ExpandNames(self):
9261
    self._ExpandAndLockInstance()
9262
    self.needed_locks[locking.LEVEL_NODE] = []
9263
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9264

    
9265
  def DeclareLocks(self, level):
9266
    if level == locking.LEVEL_NODE:
9267
      self._LockInstancesNodes()
9268
      if self.op.disk_template and self.op.remote_node:
9269
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9270
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9271

    
9272
  def BuildHooksEnv(self):
9273
    """Build hooks env.
9274

9275
    This runs on the master, primary and secondaries.
9276

9277
    """
9278
    args = dict()
9279
    if constants.BE_MEMORY in self.be_new:
9280
      args['memory'] = self.be_new[constants.BE_MEMORY]
9281
    if constants.BE_VCPUS in self.be_new:
9282
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
9283
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9284
    # information at all.
9285
    if self.op.nics:
9286
      args['nics'] = []
9287
      nic_override = dict(self.op.nics)
9288
      for idx, nic in enumerate(self.instance.nics):
9289
        if idx in nic_override:
9290
          this_nic_override = nic_override[idx]
9291
        else:
9292
          this_nic_override = {}
9293
        if 'ip' in this_nic_override:
9294
          ip = this_nic_override['ip']
9295
        else:
9296
          ip = nic.ip
9297
        if 'mac' in this_nic_override:
9298
          mac = this_nic_override['mac']
9299
        else:
9300
          mac = nic.mac
9301
        if idx in self.nic_pnew:
9302
          nicparams = self.nic_pnew[idx]
9303
        else:
9304
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9305
        mode = nicparams[constants.NIC_MODE]
9306
        link = nicparams[constants.NIC_LINK]
9307
        args['nics'].append((ip, mac, mode, link))
9308
      if constants.DDM_ADD in nic_override:
9309
        ip = nic_override[constants.DDM_ADD].get('ip', None)
9310
        mac = nic_override[constants.DDM_ADD]['mac']
9311
        nicparams = self.nic_pnew[constants.DDM_ADD]
9312
        mode = nicparams[constants.NIC_MODE]
9313
        link = nicparams[constants.NIC_LINK]
9314
        args['nics'].append((ip, mac, mode, link))
9315
      elif constants.DDM_REMOVE in nic_override:
9316
        del args['nics'][-1]
9317

    
9318
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9319
    if self.op.disk_template:
9320
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9321
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9322
    return env, nl, nl
9323

    
9324
  def CheckPrereq(self):
9325
    """Check prerequisites.
9326

9327
    This only checks the instance list against the existing names.
9328

9329
    """
9330
    # checking the new params on the primary/secondary nodes
9331

    
9332
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9333
    cluster = self.cluster = self.cfg.GetClusterInfo()
9334
    assert self.instance is not None, \
9335
      "Cannot retrieve locked instance %s" % self.op.instance_name
9336
    pnode = instance.primary_node
9337
    nodelist = list(instance.all_nodes)
9338

    
9339
    # OS change
9340
    if self.op.os_name and not self.op.force:
9341
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9342
                      self.op.force_variant)
9343
      instance_os = self.op.os_name
9344
    else:
9345
      instance_os = instance.os
9346

    
9347
    if self.op.disk_template:
9348
      if instance.disk_template == self.op.disk_template:
9349
        raise errors.OpPrereqError("Instance already has disk template %s" %
9350
                                   instance.disk_template, errors.ECODE_INVAL)
9351

    
9352
      if (instance.disk_template,
9353
          self.op.disk_template) not in self._DISK_CONVERSIONS:
9354
        raise errors.OpPrereqError("Unsupported disk template conversion from"
9355
                                   " %s to %s" % (instance.disk_template,
9356
                                                  self.op.disk_template),
9357
                                   errors.ECODE_INVAL)
9358
      _CheckInstanceDown(self, instance, "cannot change disk template")
9359
      if self.op.disk_template in constants.DTS_NET_MIRROR:
9360
        if self.op.remote_node == pnode:
9361
          raise errors.OpPrereqError("Given new secondary node %s is the same"
9362
                                     " as the primary node of the instance" %
9363
                                     self.op.remote_node, errors.ECODE_STATE)
9364
        _CheckNodeOnline(self, self.op.remote_node)
9365
        _CheckNodeNotDrained(self, self.op.remote_node)
9366
        # FIXME: here we assume that the old instance type is DT_PLAIN
9367
        assert instance.disk_template == constants.DT_PLAIN
9368
        disks = [{"size": d.size, "vg": d.logical_id[0]}
9369
                 for d in instance.disks]
9370
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9371
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9372

    
9373
    # hvparams processing
9374
    if self.op.hvparams:
9375
      hv_type = instance.hypervisor
9376
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9377
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9378
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9379

    
9380
      # local check
9381
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9382
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9383
      self.hv_new = hv_new # the new actual values
9384
      self.hv_inst = i_hvdict # the new dict (without defaults)
9385
    else:
9386
      self.hv_new = self.hv_inst = {}
9387

    
9388
    # beparams processing
9389
    if self.op.beparams:
9390
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9391
                                   use_none=True)
9392
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9393
      be_new = cluster.SimpleFillBE(i_bedict)
9394
      self.be_new = be_new # the new actual values
9395
      self.be_inst = i_bedict # the new dict (without defaults)
9396
    else:
9397
      self.be_new = self.be_inst = {}
9398

    
9399
    # osparams processing
9400
    if self.op.osparams:
9401
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9402
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9403
      self.os_inst = i_osdict # the new dict (without defaults)
9404
    else:
9405
      self.os_inst = {}
9406

    
9407
    self.warn = []
9408

    
9409
    if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9410
      mem_check_list = [pnode]
9411
      if be_new[constants.BE_AUTO_BALANCE]:
9412
        # either we changed auto_balance to yes or it was from before
9413
        mem_check_list.extend(instance.secondary_nodes)
9414
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
9415
                                                  instance.hypervisor)
9416
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9417
                                         instance.hypervisor)
9418
      pninfo = nodeinfo[pnode]
9419
      msg = pninfo.fail_msg
9420
      if msg:
9421
        # Assume the primary node is unreachable and go ahead
9422
        self.warn.append("Can't get info from primary node %s: %s" %
9423
                         (pnode,  msg))
9424
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
9425
        self.warn.append("Node data from primary node %s doesn't contain"
9426
                         " free memory information" % pnode)
9427
      elif instance_info.fail_msg:
9428
        self.warn.append("Can't get instance runtime information: %s" %
9429
                        instance_info.fail_msg)
9430
      else:
9431
        if instance_info.payload:
9432
          current_mem = int(instance_info.payload['memory'])
9433
        else:
9434
          # Assume instance not running
9435
          # (there is a slight race condition here, but it's not very probable,
9436
          # and we have no other way to check)
9437
          current_mem = 0
9438
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9439
                    pninfo.payload['memory_free'])
9440
        if miss_mem > 0:
9441
          raise errors.OpPrereqError("This change will prevent the instance"
9442
                                     " from starting, due to %d MB of memory"
9443
                                     " missing on its primary node" % miss_mem,
9444
                                     errors.ECODE_NORES)
9445

    
9446
      if be_new[constants.BE_AUTO_BALANCE]:
9447
        for node, nres in nodeinfo.items():
9448
          if node not in instance.secondary_nodes:
9449
            continue
9450
          msg = nres.fail_msg
9451
          if msg:
9452
            self.warn.append("Can't get info from secondary node %s: %s" %
9453
                             (node, msg))
9454
          elif not isinstance(nres.payload.get('memory_free', None), int):
9455
            self.warn.append("Secondary node %s didn't return free"
9456
                             " memory information" % node)
9457
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9458
            self.warn.append("Not enough memory to failover instance to"
9459
                             " secondary node %s" % node)
9460

    
9461
    # NIC processing
9462
    self.nic_pnew = {}
9463
    self.nic_pinst = {}
9464
    for nic_op, nic_dict in self.op.nics:
9465
      if nic_op == constants.DDM_REMOVE:
9466
        if not instance.nics:
9467
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9468
                                     errors.ECODE_INVAL)
9469
        continue
9470
      if nic_op != constants.DDM_ADD:
9471
        # an existing nic
9472
        if not instance.nics:
9473
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9474
                                     " no NICs" % nic_op,
9475
                                     errors.ECODE_INVAL)
9476
        if nic_op < 0 or nic_op >= len(instance.nics):
9477
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9478
                                     " are 0 to %d" %
9479
                                     (nic_op, len(instance.nics) - 1),
9480
                                     errors.ECODE_INVAL)
9481
        old_nic_params = instance.nics[nic_op].nicparams
9482
        old_nic_ip = instance.nics[nic_op].ip
9483
      else:
9484
        old_nic_params = {}
9485
        old_nic_ip = None
9486

    
9487
      update_params_dict = dict([(key, nic_dict[key])
9488
                                 for key in constants.NICS_PARAMETERS
9489
                                 if key in nic_dict])
9490

    
9491
      if 'bridge' in nic_dict:
9492
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9493

    
9494
      new_nic_params = _GetUpdatedParams(old_nic_params,
9495
                                         update_params_dict)
9496
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9497
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9498
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9499
      self.nic_pinst[nic_op] = new_nic_params
9500
      self.nic_pnew[nic_op] = new_filled_nic_params
9501
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9502

    
9503
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
9504
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9505
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9506
        if msg:
9507
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9508
          if self.op.force:
9509
            self.warn.append(msg)
9510
          else:
9511
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9512
      if new_nic_mode == constants.NIC_MODE_ROUTED:
9513
        if 'ip' in nic_dict:
9514
          nic_ip = nic_dict['ip']
9515
        else:
9516
          nic_ip = old_nic_ip
9517
        if nic_ip is None:
9518
          raise errors.OpPrereqError('Cannot set the nic ip to None'
9519
                                     ' on a routed nic', errors.ECODE_INVAL)
9520
      if 'mac' in nic_dict:
9521
        nic_mac = nic_dict['mac']
9522
        if nic_mac is None:
9523
          raise errors.OpPrereqError('Cannot set the nic mac to None',
9524
                                     errors.ECODE_INVAL)
9525
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9526
          # otherwise generate the mac
9527
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9528
        else:
9529
          # or validate/reserve the current one
9530
          try:
9531
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9532
          except errors.ReservationError:
9533
            raise errors.OpPrereqError("MAC address %s already in use"
9534
                                       " in cluster" % nic_mac,
9535
                                       errors.ECODE_NOTUNIQUE)
9536

    
9537
    # DISK processing
9538
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9539
      raise errors.OpPrereqError("Disk operations not supported for"
9540
                                 " diskless instances",
9541
                                 errors.ECODE_INVAL)
9542
    for disk_op, _ in self.op.disks:
9543
      if disk_op == constants.DDM_REMOVE:
9544
        if len(instance.disks) == 1:
9545
          raise errors.OpPrereqError("Cannot remove the last disk of"
9546
                                     " an instance", errors.ECODE_INVAL)
9547
        _CheckInstanceDown(self, instance, "cannot remove disks")
9548

    
9549
      if (disk_op == constants.DDM_ADD and
9550
          len(instance.nics) >= constants.MAX_DISKS):
9551
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9552
                                   " add more" % constants.MAX_DISKS,
9553
                                   errors.ECODE_STATE)
9554
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9555
        # an existing disk
9556
        if disk_op < 0 or disk_op >= len(instance.disks):
9557
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
9558
                                     " are 0 to %d" %
9559
                                     (disk_op, len(instance.disks)),
9560
                                     errors.ECODE_INVAL)
9561

    
9562
    return
9563

    
9564
  def _ConvertPlainToDrbd(self, feedback_fn):
9565
    """Converts an instance from plain to drbd.
9566

9567
    """
9568
    feedback_fn("Converting template to drbd")
9569
    instance = self.instance
9570
    pnode = instance.primary_node
9571
    snode = self.op.remote_node
9572

    
9573
    # create a fake disk info for _GenerateDiskTemplate
9574
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9575
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9576
                                      instance.name, pnode, [snode],
9577
                                      disk_info, None, None, 0, feedback_fn)
9578
    info = _GetInstanceInfoText(instance)
9579
    feedback_fn("Creating aditional volumes...")
9580
    # first, create the missing data and meta devices
9581
    for disk in new_disks:
9582
      # unfortunately this is... not too nice
9583
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9584
                            info, True)
9585
      for child in disk.children:
9586
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
9587
    # at this stage, all new LVs have been created, we can rename the
9588
    # old ones
9589
    feedback_fn("Renaming original volumes...")
9590
    rename_list = [(o, n.children[0].logical_id)
9591
                   for (o, n) in zip(instance.disks, new_disks)]
9592
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
9593
    result.Raise("Failed to rename original LVs")
9594

    
9595
    feedback_fn("Initializing DRBD devices...")
9596
    # all child devices are in place, we can now create the DRBD devices
9597
    for disk in new_disks:
9598
      for node in [pnode, snode]:
9599
        f_create = node == pnode
9600
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9601

    
9602
    # at this point, the instance has been modified
9603
    instance.disk_template = constants.DT_DRBD8
9604
    instance.disks = new_disks
9605
    self.cfg.Update(instance, feedback_fn)
9606

    
9607
    # disks are created, waiting for sync
9608
    disk_abort = not _WaitForSync(self, instance)
9609
    if disk_abort:
9610
      raise errors.OpExecError("There are some degraded disks for"
9611
                               " this instance, please cleanup manually")
9612

    
9613
  def _ConvertDrbdToPlain(self, feedback_fn):
9614
    """Converts an instance from drbd to plain.
9615

9616
    """
9617
    instance = self.instance
9618
    assert len(instance.secondary_nodes) == 1
9619
    pnode = instance.primary_node
9620
    snode = instance.secondary_nodes[0]
9621
    feedback_fn("Converting template to plain")
9622

    
9623
    old_disks = instance.disks
9624
    new_disks = [d.children[0] for d in old_disks]
9625

    
9626
    # copy over size and mode
9627
    for parent, child in zip(old_disks, new_disks):
9628
      child.size = parent.size
9629
      child.mode = parent.mode
9630

    
9631
    # update instance structure
9632
    instance.disks = new_disks
9633
    instance.disk_template = constants.DT_PLAIN
9634
    self.cfg.Update(instance, feedback_fn)
9635

    
9636
    feedback_fn("Removing volumes on the secondary node...")
9637
    for disk in old_disks:
9638
      self.cfg.SetDiskID(disk, snode)
9639
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9640
      if msg:
9641
        self.LogWarning("Could not remove block device %s on node %s,"
9642
                        " continuing anyway: %s", disk.iv_name, snode, msg)
9643

    
9644
    feedback_fn("Removing unneeded volumes on the primary node...")
9645
    for idx, disk in enumerate(old_disks):
9646
      meta = disk.children[1]
9647
      self.cfg.SetDiskID(meta, pnode)
9648
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9649
      if msg:
9650
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
9651
                        " continuing anyway: %s", idx, pnode, msg)
9652

    
9653
  def Exec(self, feedback_fn):
9654
    """Modifies an instance.
9655

9656
    All parameters take effect only at the next restart of the instance.
9657

9658
    """
9659
    # Process here the warnings from CheckPrereq, as we don't have a
9660
    # feedback_fn there.
9661
    for warn in self.warn:
9662
      feedback_fn("WARNING: %s" % warn)
9663

    
9664
    result = []
9665
    instance = self.instance
9666
    # disk changes
9667
    for disk_op, disk_dict in self.op.disks:
9668
      if disk_op == constants.DDM_REMOVE:
9669
        # remove the last disk
9670
        device = instance.disks.pop()
9671
        device_idx = len(instance.disks)
9672
        for node, disk in device.ComputeNodeTree(instance.primary_node):
9673
          self.cfg.SetDiskID(disk, node)
9674
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9675
          if msg:
9676
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
9677
                            " continuing anyway", device_idx, node, msg)
9678
        result.append(("disk/%d" % device_idx, "remove"))
9679
      elif disk_op == constants.DDM_ADD:
9680
        # add a new disk
9681
        if instance.disk_template == constants.DT_FILE:
9682
          file_driver, file_path = instance.disks[0].logical_id
9683
          file_path = os.path.dirname(file_path)
9684
        else:
9685
          file_driver = file_path = None
9686
        disk_idx_base = len(instance.disks)
9687
        new_disk = _GenerateDiskTemplate(self,
9688
                                         instance.disk_template,
9689
                                         instance.name, instance.primary_node,
9690
                                         instance.secondary_nodes,
9691
                                         [disk_dict],
9692
                                         file_path,
9693
                                         file_driver,
9694
                                         disk_idx_base, feedback_fn)[0]
9695
        instance.disks.append(new_disk)
9696
        info = _GetInstanceInfoText(instance)
9697

    
9698
        logging.info("Creating volume %s for instance %s",
9699
                     new_disk.iv_name, instance.name)
9700
        # Note: this needs to be kept in sync with _CreateDisks
9701
        #HARDCODE
9702
        for node in instance.all_nodes:
9703
          f_create = node == instance.primary_node
9704
          try:
9705
            _CreateBlockDev(self, node, instance, new_disk,
9706
                            f_create, info, f_create)
9707
          except errors.OpExecError, err:
9708
            self.LogWarning("Failed to create volume %s (%s) on"
9709
                            " node %s: %s",
9710
                            new_disk.iv_name, new_disk, node, err)
9711
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9712
                       (new_disk.size, new_disk.mode)))
9713
      else:
9714
        # change a given disk
9715
        instance.disks[disk_op].mode = disk_dict['mode']
9716
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9717

    
9718
    if self.op.disk_template:
9719
      r_shut = _ShutdownInstanceDisks(self, instance)
9720
      if not r_shut:
9721
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
9722
                                 " proceed with disk template conversion")
9723
      mode = (instance.disk_template, self.op.disk_template)
9724
      try:
9725
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
9726
      except:
9727
        self.cfg.ReleaseDRBDMinors(instance.name)
9728
        raise
9729
      result.append(("disk_template", self.op.disk_template))
9730

    
9731
    # NIC changes
9732
    for nic_op, nic_dict in self.op.nics:
9733
      if nic_op == constants.DDM_REMOVE:
9734
        # remove the last nic
9735
        del instance.nics[-1]
9736
        result.append(("nic.%d" % len(instance.nics), "remove"))
9737
      elif nic_op == constants.DDM_ADD:
9738
        # mac and bridge should be set, by now
9739
        mac = nic_dict['mac']
9740
        ip = nic_dict.get('ip', None)
9741
        nicparams = self.nic_pinst[constants.DDM_ADD]
9742
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9743
        instance.nics.append(new_nic)
9744
        result.append(("nic.%d" % (len(instance.nics) - 1),
9745
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
9746
                       (new_nic.mac, new_nic.ip,
9747
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9748
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9749
                       )))
9750
      else:
9751
        for key in 'mac', 'ip':
9752
          if key in nic_dict:
9753
            setattr(instance.nics[nic_op], key, nic_dict[key])
9754
        if nic_op in self.nic_pinst:
9755
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9756
        for key, val in nic_dict.iteritems():
9757
          result.append(("nic.%s/%d" % (key, nic_op), val))
9758

    
9759
    # hvparams changes
9760
    if self.op.hvparams:
9761
      instance.hvparams = self.hv_inst
9762
      for key, val in self.op.hvparams.iteritems():
9763
        result.append(("hv/%s" % key, val))
9764

    
9765
    # beparams changes
9766
    if self.op.beparams:
9767
      instance.beparams = self.be_inst
9768
      for key, val in self.op.beparams.iteritems():
9769
        result.append(("be/%s" % key, val))
9770

    
9771
    # OS change
9772
    if self.op.os_name:
9773
      instance.os = self.op.os_name
9774

    
9775
    # osparams changes
9776
    if self.op.osparams:
9777
      instance.osparams = self.os_inst
9778
      for key, val in self.op.osparams.iteritems():
9779
        result.append(("os/%s" % key, val))
9780

    
9781
    self.cfg.Update(instance, feedback_fn)
9782

    
9783
    return result
9784

    
9785
  _DISK_CONVERSIONS = {
9786
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9787
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9788
    }
9789

    
9790

    
9791
class LUQueryExports(NoHooksLU):
9792
  """Query the exports list
9793

9794
  """
9795
  _OP_PARAMS = [
9796
    ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
9797
    ("use_locking", False, ht.TBool),
9798
    ]
9799
  REQ_BGL = False
9800

    
9801
  def ExpandNames(self):
9802
    self.needed_locks = {}
9803
    self.share_locks[locking.LEVEL_NODE] = 1
9804
    if not self.op.nodes:
9805
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9806
    else:
9807
      self.needed_locks[locking.LEVEL_NODE] = \
9808
        _GetWantedNodes(self, self.op.nodes)
9809

    
9810
  def Exec(self, feedback_fn):
9811
    """Compute the list of all the exported system images.
9812

9813
    @rtype: dict
9814
    @return: a dictionary with the structure node->(export-list)
9815
        where export-list is a list of the instances exported on
9816
        that node.
9817

9818
    """
9819
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9820
    rpcresult = self.rpc.call_export_list(self.nodes)
9821
    result = {}
9822
    for node in rpcresult:
9823
      if rpcresult[node].fail_msg:
9824
        result[node] = False
9825
      else:
9826
        result[node] = rpcresult[node].payload
9827

    
9828
    return result
9829

    
9830

    
9831
class LUPrepareExport(NoHooksLU):
9832
  """Prepares an instance for an export and returns useful information.
9833

9834
  """
9835
  _OP_PARAMS = [
9836
    _PInstanceName,
9837
    ("mode", ht.NoDefault, ht.TElemOf(constants.EXPORT_MODES)),
9838
    ]
9839
  REQ_BGL = False
9840

    
9841
  def ExpandNames(self):
9842
    self._ExpandAndLockInstance()
9843

    
9844
  def CheckPrereq(self):
9845
    """Check prerequisites.
9846

9847
    """
9848
    instance_name = self.op.instance_name
9849

    
9850
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9851
    assert self.instance is not None, \
9852
          "Cannot retrieve locked instance %s" % self.op.instance_name
9853
    _CheckNodeOnline(self, self.instance.primary_node)
9854

    
9855
    self._cds = _GetClusterDomainSecret()
9856

    
9857
  def Exec(self, feedback_fn):
9858
    """Prepares an instance for an export.
9859

9860
    """
9861
    instance = self.instance
9862

    
9863
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9864
      salt = utils.GenerateSecret(8)
9865

    
9866
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9867
      result = self.rpc.call_x509_cert_create(instance.primary_node,
9868
                                              constants.RIE_CERT_VALIDITY)
9869
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
9870

    
9871
      (name, cert_pem) = result.payload
9872

    
9873
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9874
                                             cert_pem)
9875

    
9876
      return {
9877
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9878
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9879
                          salt),
9880
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9881
        }
9882

    
9883
    return None
9884

    
9885

    
9886
class LUExportInstance(LogicalUnit):
9887
  """Export an instance to an image in the cluster.
9888

9889
  """
9890
  HPATH = "instance-export"
9891
  HTYPE = constants.HTYPE_INSTANCE
9892
  _OP_PARAMS = [
9893
    _PInstanceName,
9894
    ("target_node", ht.NoDefault, ht.TOr(ht.TNonEmptyString, ht.TList)),
9895
    ("shutdown", True, ht.TBool),
9896
    _PShutdownTimeout,
9897
    ("remove_instance", False, ht.TBool),
9898
    ("ignore_remove_failures", False, ht.TBool),
9899
    ("mode", constants.EXPORT_MODE_LOCAL, ht.TElemOf(constants.EXPORT_MODES)),
9900
    ("x509_key_name", None, ht.TOr(ht.TList, ht.TNone)),
9901
    ("destination_x509_ca", None, ht.TMaybeString),
9902
    ]
9903
  REQ_BGL = False
9904

    
9905
  def CheckArguments(self):
9906
    """Check the arguments.
9907

9908
    """
9909
    self.x509_key_name = self.op.x509_key_name
9910
    self.dest_x509_ca_pem = self.op.destination_x509_ca
9911

    
9912
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9913
      if not self.x509_key_name:
9914
        raise errors.OpPrereqError("Missing X509 key name for encryption",
9915
                                   errors.ECODE_INVAL)
9916

    
9917
      if not self.dest_x509_ca_pem:
9918
        raise errors.OpPrereqError("Missing destination X509 CA",
9919
                                   errors.ECODE_INVAL)
9920

    
9921
  def ExpandNames(self):
9922
    self._ExpandAndLockInstance()
9923

    
9924
    # Lock all nodes for local exports
9925
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9926
      # FIXME: lock only instance primary and destination node
9927
      #
9928
      # Sad but true, for now we have do lock all nodes, as we don't know where
9929
      # the previous export might be, and in this LU we search for it and
9930
      # remove it from its current node. In the future we could fix this by:
9931
      #  - making a tasklet to search (share-lock all), then create the
9932
      #    new one, then one to remove, after
9933
      #  - removing the removal operation altogether
9934
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9935

    
9936
  def DeclareLocks(self, level):
9937
    """Last minute lock declaration."""
9938
    # All nodes are locked anyway, so nothing to do here.
9939

    
9940
  def BuildHooksEnv(self):
9941
    """Build hooks env.
9942

9943
    This will run on the master, primary node and target node.
9944

9945
    """
9946
    env = {
9947
      "EXPORT_MODE": self.op.mode,
9948
      "EXPORT_NODE": self.op.target_node,
9949
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9950
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9951
      # TODO: Generic function for boolean env variables
9952
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9953
      }
9954

    
9955
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9956

    
9957
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9958

    
9959
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9960
      nl.append(self.op.target_node)
9961

    
9962
    return env, nl, nl
9963

    
9964
  def CheckPrereq(self):
9965
    """Check prerequisites.
9966

9967
    This checks that the instance and node names are valid.
9968

9969
    """
9970
    instance_name = self.op.instance_name
9971

    
9972
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9973
    assert self.instance is not None, \
9974
          "Cannot retrieve locked instance %s" % self.op.instance_name
9975
    _CheckNodeOnline(self, self.instance.primary_node)
9976

    
9977
    if (self.op.remove_instance and self.instance.admin_up and
9978
        not self.op.shutdown):
9979
      raise errors.OpPrereqError("Can not remove instance without shutting it"
9980
                                 " down before")
9981

    
9982
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9983
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9984
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9985
      assert self.dst_node is not None
9986

    
9987
      _CheckNodeOnline(self, self.dst_node.name)
9988
      _CheckNodeNotDrained(self, self.dst_node.name)
9989

    
9990
      self._cds = None
9991
      self.dest_disk_info = None
9992
      self.dest_x509_ca = None
9993

    
9994
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9995
      self.dst_node = None
9996

    
9997
      if len(self.op.target_node) != len(self.instance.disks):
9998
        raise errors.OpPrereqError(("Received destination information for %s"
9999
                                    " disks, but instance %s has %s disks") %
10000
                                   (len(self.op.target_node), instance_name,
10001
                                    len(self.instance.disks)),
10002
                                   errors.ECODE_INVAL)
10003

    
10004
      cds = _GetClusterDomainSecret()
10005

    
10006
      # Check X509 key name
10007
      try:
10008
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10009
      except (TypeError, ValueError), err:
10010
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10011

    
10012
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10013
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10014
                                   errors.ECODE_INVAL)
10015

    
10016
      # Load and verify CA
10017
      try:
10018
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10019
      except OpenSSL.crypto.Error, err:
10020
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10021
                                   (err, ), errors.ECODE_INVAL)
10022

    
10023
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10024
      if errcode is not None:
10025
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10026
                                   (msg, ), errors.ECODE_INVAL)
10027

    
10028
      self.dest_x509_ca = cert
10029

    
10030
      # Verify target information
10031
      disk_info = []
10032
      for idx, disk_data in enumerate(self.op.target_node):
10033
        try:
10034
          (host, port, magic) = \
10035
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10036
        except errors.GenericError, err:
10037
          raise errors.OpPrereqError("Target info for disk %s: %s" %
10038
                                     (idx, err), errors.ECODE_INVAL)
10039

    
10040
        disk_info.append((host, port, magic))
10041

    
10042
      assert len(disk_info) == len(self.op.target_node)
10043
      self.dest_disk_info = disk_info
10044

    
10045
    else:
10046
      raise errors.ProgrammerError("Unhandled export mode %r" %
10047
                                   self.op.mode)
10048

    
10049
    # instance disk type verification
10050
    # TODO: Implement export support for file-based disks
10051
    for disk in self.instance.disks:
10052
      if disk.dev_type == constants.LD_FILE:
10053
        raise errors.OpPrereqError("Export not supported for instances with"
10054
                                   " file-based disks", errors.ECODE_INVAL)
10055

    
10056
  def _CleanupExports(self, feedback_fn):
10057
    """Removes exports of current instance from all other nodes.
10058

10059
    If an instance in a cluster with nodes A..D was exported to node C, its
10060
    exports will be removed from the nodes A, B and D.
10061

10062
    """
10063
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
10064

    
10065
    nodelist = self.cfg.GetNodeList()
10066
    nodelist.remove(self.dst_node.name)
10067

    
10068
    # on one-node clusters nodelist will be empty after the removal
10069
    # if we proceed the backup would be removed because OpQueryExports
10070
    # substitutes an empty list with the full cluster node list.
10071
    iname = self.instance.name
10072
    if nodelist:
10073
      feedback_fn("Removing old exports for instance %s" % iname)
10074
      exportlist = self.rpc.call_export_list(nodelist)
10075
      for node in exportlist:
10076
        if exportlist[node].fail_msg:
10077
          continue
10078
        if iname in exportlist[node].payload:
10079
          msg = self.rpc.call_export_remove(node, iname).fail_msg
10080
          if msg:
10081
            self.LogWarning("Could not remove older export for instance %s"
10082
                            " on node %s: %s", iname, node, msg)
10083

    
10084
  def Exec(self, feedback_fn):
10085
    """Export an instance to an image in the cluster.
10086

10087
    """
10088
    assert self.op.mode in constants.EXPORT_MODES
10089

    
10090
    instance = self.instance
10091
    src_node = instance.primary_node
10092

    
10093
    if self.op.shutdown:
10094
      # shutdown the instance, but not the disks
10095
      feedback_fn("Shutting down instance %s" % instance.name)
10096
      result = self.rpc.call_instance_shutdown(src_node, instance,
10097
                                               self.op.shutdown_timeout)
10098
      # TODO: Maybe ignore failures if ignore_remove_failures is set
10099
      result.Raise("Could not shutdown instance %s on"
10100
                   " node %s" % (instance.name, src_node))
10101

    
10102
    # set the disks ID correctly since call_instance_start needs the
10103
    # correct drbd minor to create the symlinks
10104
    for disk in instance.disks:
10105
      self.cfg.SetDiskID(disk, src_node)
10106

    
10107
    activate_disks = (not instance.admin_up)
10108

    
10109
    if activate_disks:
10110
      # Activate the instance disks if we'exporting a stopped instance
10111
      feedback_fn("Activating disks for %s" % instance.name)
10112
      _StartInstanceDisks(self, instance, None)
10113

    
10114
    try:
10115
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10116
                                                     instance)
10117

    
10118
      helper.CreateSnapshots()
10119
      try:
10120
        if (self.op.shutdown and instance.admin_up and
10121
            not self.op.remove_instance):
10122
          assert not activate_disks
10123
          feedback_fn("Starting instance %s" % instance.name)
10124
          result = self.rpc.call_instance_start(src_node, instance, None, None)
10125
          msg = result.fail_msg
10126
          if msg:
10127
            feedback_fn("Failed to start instance: %s" % msg)
10128
            _ShutdownInstanceDisks(self, instance)
10129
            raise errors.OpExecError("Could not start instance: %s" % msg)
10130

    
10131
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
10132
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10133
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10134
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
10135
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10136

    
10137
          (key_name, _, _) = self.x509_key_name
10138

    
10139
          dest_ca_pem = \
10140
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10141
                                            self.dest_x509_ca)
10142

    
10143
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10144
                                                     key_name, dest_ca_pem,
10145
                                                     timeouts)
10146
      finally:
10147
        helper.Cleanup()
10148

    
10149
      # Check for backwards compatibility
10150
      assert len(dresults) == len(instance.disks)
10151
      assert compat.all(isinstance(i, bool) for i in dresults), \
10152
             "Not all results are boolean: %r" % dresults
10153

    
10154
    finally:
10155
      if activate_disks:
10156
        feedback_fn("Deactivating disks for %s" % instance.name)
10157
        _ShutdownInstanceDisks(self, instance)
10158

    
10159
    if not (compat.all(dresults) and fin_resu):
10160
      failures = []
10161
      if not fin_resu:
10162
        failures.append("export finalization")
10163
      if not compat.all(dresults):
10164
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10165
                               if not dsk)
10166
        failures.append("disk export: disk(s) %s" % fdsk)
10167

    
10168
      raise errors.OpExecError("Export failed, errors in %s" %
10169
                               utils.CommaJoin(failures))
10170

    
10171
    # At this point, the export was successful, we can cleanup/finish
10172

    
10173
    # Remove instance if requested
10174
    if self.op.remove_instance:
10175
      feedback_fn("Removing instance %s" % instance.name)
10176
      _RemoveInstance(self, feedback_fn, instance,
10177
                      self.op.ignore_remove_failures)
10178

    
10179
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10180
      self._CleanupExports(feedback_fn)
10181

    
10182
    return fin_resu, dresults
10183

    
10184

    
10185
class LURemoveExport(NoHooksLU):
10186
  """Remove exports related to the named instance.
10187

10188
  """
10189
  _OP_PARAMS = [
10190
    _PInstanceName,
10191
    ]
10192
  REQ_BGL = False
10193

    
10194
  def ExpandNames(self):
10195
    self.needed_locks = {}
10196
    # We need all nodes to be locked in order for RemoveExport to work, but we
10197
    # don't need to lock the instance itself, as nothing will happen to it (and
10198
    # we can remove exports also for a removed instance)
10199
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10200

    
10201
  def Exec(self, feedback_fn):
10202
    """Remove any export.
10203

10204
    """
10205
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10206
    # If the instance was not found we'll try with the name that was passed in.
10207
    # This will only work if it was an FQDN, though.
10208
    fqdn_warn = False
10209
    if not instance_name:
10210
      fqdn_warn = True
10211
      instance_name = self.op.instance_name
10212

    
10213
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10214
    exportlist = self.rpc.call_export_list(locked_nodes)
10215
    found = False
10216
    for node in exportlist:
10217
      msg = exportlist[node].fail_msg
10218
      if msg:
10219
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10220
        continue
10221
      if instance_name in exportlist[node].payload:
10222
        found = True
10223
        result = self.rpc.call_export_remove(node, instance_name)
10224
        msg = result.fail_msg
10225
        if msg:
10226
          logging.error("Could not remove export for instance %s"
10227
                        " on node %s: %s", instance_name, node, msg)
10228

    
10229
    if fqdn_warn and not found:
10230
      feedback_fn("Export not found. If trying to remove an export belonging"
10231
                  " to a deleted instance please use its Fully Qualified"
10232
                  " Domain Name.")
10233

    
10234

    
10235
class LUAddGroup(LogicalUnit):
10236
  """Logical unit for creating node groups.
10237

10238
  """
10239
  HPATH = "group-add"
10240
  HTYPE = constants.HTYPE_GROUP
10241

    
10242
  _OP_PARAMS = [
10243
    _PGroupName,
10244
    ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
10245
    ("alloc_policy", None, ht.TOr(ht.TNone,
10246
                                  ht.TElemOf(constants.VALID_ALLOC_POLICIES))),
10247
    ]
10248

    
10249
  REQ_BGL = False
10250

    
10251
  def ExpandNames(self):
10252
    # We need the new group's UUID here so that we can create and acquire the
10253
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10254
    # that it should not check whether the UUID exists in the configuration.
10255
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10256
    self.needed_locks = {}
10257
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10258

    
10259
  def CheckPrereq(self):
10260
    """Check prerequisites.
10261

10262
    This checks that the given group name is not an existing node group
10263
    already.
10264

10265
    """
10266
    try:
10267
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10268
    except errors.OpPrereqError:
10269
      pass
10270
    else:
10271
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10272
                                 " node group (UUID: %s)" %
10273
                                 (self.op.group_name, existing_uuid),
10274
                                 errors.ECODE_EXISTS)
10275

    
10276
    if self.op.ndparams:
10277
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10278

    
10279
  def BuildHooksEnv(self):
10280
    """Build hooks env.
10281

10282
    """
10283
    env = {
10284
      "GROUP_NAME": self.op.group_name,
10285
      }
10286
    mn = self.cfg.GetMasterNode()
10287
    return env, [mn], [mn]
10288

    
10289
  def Exec(self, feedback_fn):
10290
    """Add the node group to the cluster.
10291

10292
    """
10293
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10294
                                  uuid=self.group_uuid,
10295
                                  alloc_policy=self.op.alloc_policy,
10296
                                  ndparams=self.op.ndparams)
10297

    
10298
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10299
    del self.remove_locks[locking.LEVEL_NODEGROUP]
10300

    
10301

    
10302
class LUQueryGroups(NoHooksLU):
10303
  """Logical unit for querying node groups.
10304

10305
  """
10306
  # pylint: disable-msg=W0142
10307
  _OP_PARAMS = [
10308
    _POutputFields,
10309
    ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
10310
    ]
10311

    
10312
  REQ_BGL = False
10313

    
10314
  _FIELDS_DYNAMIC = utils.FieldSet()
10315

    
10316
  _SIMPLE_FIELDS = ["name", "uuid", "alloc_policy",
10317
                    "ctime", "mtime", "serial_no"]
10318

    
10319
  _FIELDS_STATIC = utils.FieldSet(
10320
      "node_cnt", "node_list", "pinst_cnt", "pinst_list", *_SIMPLE_FIELDS)
10321

    
10322
  def CheckArguments(self):
10323
    _CheckOutputFields(static=self._FIELDS_STATIC,
10324
                       dynamic=self._FIELDS_DYNAMIC,
10325
                       selected=self.op.output_fields)
10326

    
10327
  def ExpandNames(self):
10328
    self.needed_locks = {}
10329

    
10330
  def Exec(self, feedback_fn):
10331
    """Computes the list of groups and their attributes.
10332

10333
    """
10334
    all_groups = self.cfg.GetAllNodeGroupsInfo()
10335
    name_to_uuid = dict((g.name, g.uuid) for g in all_groups.values())
10336

    
10337
    if not self.op.names:
10338
      sorted_names = utils.NiceSort(name_to_uuid.keys())
10339
      my_groups = [name_to_uuid[n] for n in sorted_names]
10340
    else:
10341
      # Accept names to be either names or UUIDs.
10342
      all_uuid = frozenset(all_groups.keys())
10343
      my_groups = []
10344
      missing = []
10345

    
10346
      for name in self.op.names:
10347
        if name in all_uuid:
10348
          my_groups.append(name)
10349
        elif name in name_to_uuid:
10350
          my_groups.append(name_to_uuid[name])
10351
        else:
10352
          missing.append(name)
10353

    
10354
      if missing:
10355
        raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
10356
                                   errors.ECODE_NOENT)
10357

    
10358
    do_nodes = bool(frozenset(["node_cnt", "node_list"]).
10359
                    intersection(self.op.output_fields))
10360

    
10361
    do_instances = bool(frozenset(["pinst_cnt", "pinst_list"]).
10362
                        intersection(self.op.output_fields))
10363

    
10364
    # We need to map group->[nodes], and group->[instances]. The former is
10365
    # directly attainable, but the latter we have to do through instance->node,
10366
    # hence we need to process nodes even if we only need instance information.
10367
    if do_nodes or do_instances:
10368
      all_nodes = self.cfg.GetAllNodesInfo()
10369
      group_to_nodes = dict((all_groups[name].uuid, []) for name in my_groups)
10370
      node_to_group = {}
10371

    
10372
      for node in all_nodes.values():
10373
        if node.group in group_to_nodes:
10374
          group_to_nodes[node.group].append(node.name)
10375
          node_to_group[node.name] = node.group
10376

    
10377
      if do_instances:
10378
        all_instances = self.cfg.GetAllInstancesInfo()
10379
        group_to_instances = dict((all_groups[name].uuid, [])
10380
                                  for name in my_groups)
10381
        for instance in all_instances.values():
10382
          node = instance.primary_node
10383
          if node in node_to_group:
10384
            group_to_instances[node_to_group[node]].append(instance.name)
10385

    
10386
    output = []
10387

    
10388
    for uuid in my_groups:
10389
      group = all_groups[uuid]
10390
      group_output = []
10391

    
10392
      for field in self.op.output_fields:
10393
        if field in self._SIMPLE_FIELDS:
10394
          val = getattr(group, field)
10395
        elif field == "node_list":
10396
          val = utils.NiceSort(group_to_nodes[group.uuid])
10397
        elif field == "node_cnt":
10398
          val = len(group_to_nodes[group.uuid])
10399
        elif field == "pinst_list":
10400
          val = utils.NiceSort(group_to_instances[group.uuid])
10401
        elif field == "pinst_cnt":
10402
          val = len(group_to_instances[group.uuid])
10403
        else:
10404
          raise errors.ParameterError(field)
10405
        group_output.append(val)
10406
      output.append(group_output)
10407

    
10408
    return output
10409

    
10410

    
10411
class LUSetGroupParams(LogicalUnit):
10412
  """Modifies the parameters of a node group.
10413

10414
  """
10415
  HPATH = "group-modify"
10416
  HTYPE = constants.HTYPE_GROUP
10417

    
10418
  _OP_PARAMS = [
10419
    _PGroupName,
10420
    ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
10421
    ("alloc_policy", None, ht.TOr(ht.TNone,
10422
                                  ht.TElemOf(constants.VALID_ALLOC_POLICIES))),
10423
    ]
10424

    
10425
  REQ_BGL = False
10426

    
10427
  def CheckArguments(self):
10428
    all_changes = [
10429
      self.op.ndparams,
10430
      self.op.alloc_policy,
10431
      ]
10432

    
10433
    if all_changes.count(None) == len(all_changes):
10434
      raise errors.OpPrereqError("Please pass at least one modification",
10435
                                 errors.ECODE_INVAL)
10436

    
10437
  def ExpandNames(self):
10438
    # This raises errors.OpPrereqError on its own:
10439
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10440

    
10441
    self.needed_locks = {
10442
      locking.LEVEL_NODEGROUP: [self.group_uuid],
10443
      }
10444

    
10445
  def CheckPrereq(self):
10446
    """Check prerequisites.
10447

10448
    """
10449
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
10450

    
10451
    if self.group is None:
10452
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10453
                               (self.op.group_name, self.group_uuid))
10454

    
10455
    if self.op.ndparams:
10456
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10457
      self.new_ndparams = self.group.SimpleFillND(self.op.ndparams)
10458

    
10459
  def BuildHooksEnv(self):
10460
    """Build hooks env.
10461

10462
    """
10463
    env = {
10464
      "GROUP_NAME": self.op.group_name,
10465
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
10466
      }
10467
    mn = self.cfg.GetMasterNode()
10468
    return env, [mn], [mn]
10469

    
10470
  def Exec(self, feedback_fn):
10471
    """Modifies the node group.
10472

10473
    """
10474
    result = []
10475

    
10476
    if self.op.ndparams:
10477
      self.group.ndparams = self.new_ndparams
10478
      result.append(("ndparams", str(self.group.ndparams)))
10479

    
10480
    if self.op.alloc_policy:
10481
      self.group.alloc_policy = self.op.alloc_policy
10482

    
10483
    self.cfg.Update(self.group, feedback_fn)
10484
    return result
10485

    
10486

    
10487

    
10488
class LURemoveGroup(LogicalUnit):
10489
  HPATH = "group-remove"
10490
  HTYPE = constants.HTYPE_GROUP
10491

    
10492
  _OP_PARAMS = [
10493
    _PGroupName,
10494
    ]
10495

    
10496
  REQ_BGL = False
10497

    
10498
  def ExpandNames(self):
10499
    # This will raises errors.OpPrereqError on its own:
10500
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10501
    self.needed_locks = {
10502
      locking.LEVEL_NODEGROUP: [self.group_uuid],
10503
      }
10504

    
10505
  def CheckPrereq(self):
10506
    """Check prerequisites.
10507

10508
    This checks that the given group name exists as a node group, that is
10509
    empty (i.e., contains no nodes), and that is not the last group of the
10510
    cluster.
10511

10512
    """
10513
    # Verify that the group is empty.
10514
    group_nodes = [node.name
10515
                   for node in self.cfg.GetAllNodesInfo().values()
10516
                   if node.group == self.group_uuid]
10517

    
10518
    if group_nodes:
10519
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
10520
                                 " nodes: %s" %
10521
                                 (self.op.group_name,
10522
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
10523
                                 errors.ECODE_STATE)
10524

    
10525
    # Verify the cluster would not be left group-less.
10526
    if len(self.cfg.GetNodeGroupList()) == 1:
10527
      raise errors.OpPrereqError("Group '%s' is the last group in the cluster,"
10528
                                 " which cannot be left without at least one"
10529
                                 " group" % self.op.group_name,
10530
                                 errors.ECODE_STATE)
10531

    
10532
  def BuildHooksEnv(self):
10533
    """Build hooks env.
10534

10535
    """
10536
    env = {
10537
      "GROUP_NAME": self.op.group_name,
10538
      }
10539
    mn = self.cfg.GetMasterNode()
10540
    return env, [mn], [mn]
10541

    
10542
  def Exec(self, feedback_fn):
10543
    """Remove the node group.
10544

10545
    """
10546
    try:
10547
      self.cfg.RemoveNodeGroup(self.group_uuid)
10548
    except errors.ConfigurationError:
10549
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
10550
                               (self.op.group_name, self.group_uuid))
10551

    
10552
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10553

    
10554

    
10555
class LURenameGroup(LogicalUnit):
10556
  HPATH = "group-rename"
10557
  HTYPE = constants.HTYPE_GROUP
10558

    
10559
  _OP_PARAMS = [
10560
    ("old_name", ht.NoDefault, ht.TNonEmptyString),
10561
    ("new_name", ht.NoDefault, ht.TNonEmptyString),
10562
    ]
10563

    
10564
  REQ_BGL = False
10565

    
10566
  def ExpandNames(self):
10567
    # This raises errors.OpPrereqError on its own:
10568
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.old_name)
10569

    
10570
    self.needed_locks = {
10571
      locking.LEVEL_NODEGROUP: [self.group_uuid],
10572
      }
10573

    
10574
  def CheckPrereq(self):
10575
    """Check prerequisites.
10576

10577
    This checks that the given old_name exists as a node group, and that
10578
    new_name doesn't.
10579

10580
    """
10581
    try:
10582
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
10583
    except errors.OpPrereqError:
10584
      pass
10585
    else:
10586
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
10587
                                 " node group (UUID: %s)" %
10588
                                 (self.op.new_name, new_name_uuid),
10589
                                 errors.ECODE_EXISTS)
10590

    
10591
  def BuildHooksEnv(self):
10592
    """Build hooks env.
10593

10594
    """
10595
    env = {
10596
      "OLD_NAME": self.op.old_name,
10597
      "NEW_NAME": self.op.new_name,
10598
      }
10599

    
10600
    mn = self.cfg.GetMasterNode()
10601
    all_nodes = self.cfg.GetAllNodesInfo()
10602
    run_nodes = [mn]
10603
    all_nodes.pop(mn, None)
10604

    
10605
    for node in all_nodes.values():
10606
      if node.group == self.group_uuid:
10607
        run_nodes.append(node.name)
10608

    
10609
    return env, run_nodes, run_nodes
10610

    
10611
  def Exec(self, feedback_fn):
10612
    """Rename the node group.
10613

10614
    """
10615
    group = self.cfg.GetNodeGroup(self.group_uuid)
10616

    
10617
    if group is None:
10618
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10619
                               (self.op.old_name, self.group_uuid))
10620

    
10621
    group.name = self.op.new_name
10622
    self.cfg.Update(group, feedback_fn)
10623

    
10624
    return self.op.new_name
10625

    
10626

    
10627
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10628
  """Generic tags LU.
10629

10630
  This is an abstract class which is the parent of all the other tags LUs.
10631

10632
  """
10633

    
10634
  def ExpandNames(self):
10635
    self.needed_locks = {}
10636
    if self.op.kind == constants.TAG_NODE:
10637
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10638
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
10639
    elif self.op.kind == constants.TAG_INSTANCE:
10640
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10641
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10642

    
10643
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10644
    # not possible to acquire the BGL based on opcode parameters)
10645

    
10646
  def CheckPrereq(self):
10647
    """Check prerequisites.
10648

10649
    """
10650
    if self.op.kind == constants.TAG_CLUSTER:
10651
      self.target = self.cfg.GetClusterInfo()
10652
    elif self.op.kind == constants.TAG_NODE:
10653
      self.target = self.cfg.GetNodeInfo(self.op.name)
10654
    elif self.op.kind == constants.TAG_INSTANCE:
10655
      self.target = self.cfg.GetInstanceInfo(self.op.name)
10656
    else:
10657
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10658
                                 str(self.op.kind), errors.ECODE_INVAL)
10659

    
10660

    
10661
class LUGetTags(TagsLU):
10662
  """Returns the tags of a given object.
10663

10664
  """
10665
  _OP_PARAMS = [
10666
    ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10667
    # Name is only meaningful for nodes and instances
10668
    ("name", ht.NoDefault, ht.TMaybeString),
10669
    ]
10670
  REQ_BGL = False
10671

    
10672
  def ExpandNames(self):
10673
    TagsLU.ExpandNames(self)
10674

    
10675
    # Share locks as this is only a read operation
10676
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10677

    
10678
  def Exec(self, feedback_fn):
10679
    """Returns the tag list.
10680

10681
    """
10682
    return list(self.target.GetTags())
10683

    
10684

    
10685
class LUSearchTags(NoHooksLU):
10686
  """Searches the tags for a given pattern.
10687

10688
  """
10689
  _OP_PARAMS = [
10690
    ("pattern", ht.NoDefault, ht.TNonEmptyString),
10691
    ]
10692
  REQ_BGL = False
10693

    
10694
  def ExpandNames(self):
10695
    self.needed_locks = {}
10696

    
10697
  def CheckPrereq(self):
10698
    """Check prerequisites.
10699

10700
    This checks the pattern passed for validity by compiling it.
10701

10702
    """
10703
    try:
10704
      self.re = re.compile(self.op.pattern)
10705
    except re.error, err:
10706
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10707
                                 (self.op.pattern, err), errors.ECODE_INVAL)
10708

    
10709
  def Exec(self, feedback_fn):
10710
    """Returns the tag list.
10711

10712
    """
10713
    cfg = self.cfg
10714
    tgts = [("/cluster", cfg.GetClusterInfo())]
10715
    ilist = cfg.GetAllInstancesInfo().values()
10716
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10717
    nlist = cfg.GetAllNodesInfo().values()
10718
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10719
    results = []
10720
    for path, target in tgts:
10721
      for tag in target.GetTags():
10722
        if self.re.search(tag):
10723
          results.append((path, tag))
10724
    return results
10725

    
10726

    
10727
class LUAddTags(TagsLU):
10728
  """Sets a tag on a given object.
10729

10730
  """
10731
  _OP_PARAMS = [
10732
    ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10733
    # Name is only meaningful for nodes and instances
10734
    ("name", ht.NoDefault, ht.TMaybeString),
10735
    ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
10736
    ]
10737
  REQ_BGL = False
10738

    
10739
  def CheckPrereq(self):
10740
    """Check prerequisites.
10741

10742
    This checks the type and length of the tag name and value.
10743

10744
    """
10745
    TagsLU.CheckPrereq(self)
10746
    for tag in self.op.tags:
10747
      objects.TaggableObject.ValidateTag(tag)
10748

    
10749
  def Exec(self, feedback_fn):
10750
    """Sets the tag.
10751

10752
    """
10753
    try:
10754
      for tag in self.op.tags:
10755
        self.target.AddTag(tag)
10756
    except errors.TagError, err:
10757
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
10758
    self.cfg.Update(self.target, feedback_fn)
10759

    
10760

    
10761
class LUDelTags(TagsLU):
10762
  """Delete a list of tags from a given object.
10763

10764
  """
10765
  _OP_PARAMS = [
10766
    ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10767
    # Name is only meaningful for nodes and instances
10768
    ("name", ht.NoDefault, ht.TMaybeString),
10769
    ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
10770
    ]
10771
  REQ_BGL = False
10772

    
10773
  def CheckPrereq(self):
10774
    """Check prerequisites.
10775

10776
    This checks that we have the given tag.
10777

10778
    """
10779
    TagsLU.CheckPrereq(self)
10780
    for tag in self.op.tags:
10781
      objects.TaggableObject.ValidateTag(tag)
10782
    del_tags = frozenset(self.op.tags)
10783
    cur_tags = self.target.GetTags()
10784

    
10785
    diff_tags = del_tags - cur_tags
10786
    if diff_tags:
10787
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
10788
      raise errors.OpPrereqError("Tag(s) %s not found" %
10789
                                 (utils.CommaJoin(diff_names), ),
10790
                                 errors.ECODE_NOENT)
10791

    
10792
  def Exec(self, feedback_fn):
10793
    """Remove the tag from the object.
10794

10795
    """
10796
    for tag in self.op.tags:
10797
      self.target.RemoveTag(tag)
10798
    self.cfg.Update(self.target, feedback_fn)
10799

    
10800

    
10801
class LUTestDelay(NoHooksLU):
10802
  """Sleep for a specified amount of time.
10803

10804
  This LU sleeps on the master and/or nodes for a specified amount of
10805
  time.
10806

10807
  """
10808
  _OP_PARAMS = [
10809
    ("duration", ht.NoDefault, ht.TFloat),
10810
    ("on_master", True, ht.TBool),
10811
    ("on_nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
10812
    ("repeat", 0, ht.TPositiveInt)
10813
    ]
10814
  REQ_BGL = False
10815

    
10816
  def ExpandNames(self):
10817
    """Expand names and set required locks.
10818

10819
    This expands the node list, if any.
10820

10821
    """
10822
    self.needed_locks = {}
10823
    if self.op.on_nodes:
10824
      # _GetWantedNodes can be used here, but is not always appropriate to use
10825
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10826
      # more information.
10827
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10828
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10829

    
10830
  def _TestDelay(self):
10831
    """Do the actual sleep.
10832

10833
    """
10834
    if self.op.on_master:
10835
      if not utils.TestDelay(self.op.duration):
10836
        raise errors.OpExecError("Error during master delay test")
10837
    if self.op.on_nodes:
10838
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10839
      for node, node_result in result.items():
10840
        node_result.Raise("Failure during rpc call to node %s" % node)
10841

    
10842
  def Exec(self, feedback_fn):
10843
    """Execute the test delay opcode, with the wanted repetitions.
10844

10845
    """
10846
    if self.op.repeat == 0:
10847
      self._TestDelay()
10848
    else:
10849
      top_value = self.op.repeat - 1
10850
      for i in range(self.op.repeat):
10851
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10852
        self._TestDelay()
10853

    
10854

    
10855
class LUTestJobqueue(NoHooksLU):
10856
  """Utility LU to test some aspects of the job queue.
10857

10858
  """
10859
  _OP_PARAMS = [
10860
    ("notify_waitlock", False, ht.TBool),
10861
    ("notify_exec", False, ht.TBool),
10862
    ("log_messages", ht.EmptyList, ht.TListOf(ht.TString)),
10863
    ("fail", False, ht.TBool),
10864
    ]
10865
  REQ_BGL = False
10866

    
10867
  # Must be lower than default timeout for WaitForJobChange to see whether it
10868
  # notices changed jobs
10869
  _CLIENT_CONNECT_TIMEOUT = 20.0
10870
  _CLIENT_CONFIRM_TIMEOUT = 60.0
10871

    
10872
  @classmethod
10873
  def _NotifyUsingSocket(cls, cb, errcls):
10874
    """Opens a Unix socket and waits for another program to connect.
10875

10876
    @type cb: callable
10877
    @param cb: Callback to send socket name to client
10878
    @type errcls: class
10879
    @param errcls: Exception class to use for errors
10880

10881
    """
10882
    # Using a temporary directory as there's no easy way to create temporary
10883
    # sockets without writing a custom loop around tempfile.mktemp and
10884
    # socket.bind
10885
    tmpdir = tempfile.mkdtemp()
10886
    try:
10887
      tmpsock = utils.PathJoin(tmpdir, "sock")
10888

    
10889
      logging.debug("Creating temporary socket at %s", tmpsock)
10890
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10891
      try:
10892
        sock.bind(tmpsock)
10893
        sock.listen(1)
10894

    
10895
        # Send details to client
10896
        cb(tmpsock)
10897

    
10898
        # Wait for client to connect before continuing
10899
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10900
        try:
10901
          (conn, _) = sock.accept()
10902
        except socket.error, err:
10903
          raise errcls("Client didn't connect in time (%s)" % err)
10904
      finally:
10905
        sock.close()
10906
    finally:
10907
      # Remove as soon as client is connected
10908
      shutil.rmtree(tmpdir)
10909

    
10910
    # Wait for client to close
10911
    try:
10912
      try:
10913
        # pylint: disable-msg=E1101
10914
        # Instance of '_socketobject' has no ... member
10915
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10916
        conn.recv(1)
10917
      except socket.error, err:
10918
        raise errcls("Client failed to confirm notification (%s)" % err)
10919
    finally:
10920
      conn.close()
10921

    
10922
  def _SendNotification(self, test, arg, sockname):
10923
    """Sends a notification to the client.
10924

10925
    @type test: string
10926
    @param test: Test name
10927
    @param arg: Test argument (depends on test)
10928
    @type sockname: string
10929
    @param sockname: Socket path
10930

10931
    """
10932
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10933

    
10934
  def _Notify(self, prereq, test, arg):
10935
    """Notifies the client of a test.
10936

10937
    @type prereq: bool
10938
    @param prereq: Whether this is a prereq-phase test
10939
    @type test: string
10940
    @param test: Test name
10941
    @param arg: Test argument (depends on test)
10942

10943
    """
10944
    if prereq:
10945
      errcls = errors.OpPrereqError
10946
    else:
10947
      errcls = errors.OpExecError
10948

    
10949
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10950
                                                  test, arg),
10951
                                   errcls)
10952

    
10953
  def CheckArguments(self):
10954
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10955
    self.expandnames_calls = 0
10956

    
10957
  def ExpandNames(self):
10958
    checkargs_calls = getattr(self, "checkargs_calls", 0)
10959
    if checkargs_calls < 1:
10960
      raise errors.ProgrammerError("CheckArguments was not called")
10961

    
10962
    self.expandnames_calls += 1
10963

    
10964
    if self.op.notify_waitlock:
10965
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
10966

    
10967
    self.LogInfo("Expanding names")
10968

    
10969
    # Get lock on master node (just to get a lock, not for a particular reason)
10970
    self.needed_locks = {
10971
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10972
      }
10973

    
10974
  def Exec(self, feedback_fn):
10975
    if self.expandnames_calls < 1:
10976
      raise errors.ProgrammerError("ExpandNames was not called")
10977

    
10978
    if self.op.notify_exec:
10979
      self._Notify(False, constants.JQT_EXEC, None)
10980

    
10981
    self.LogInfo("Executing")
10982

    
10983
    if self.op.log_messages:
10984
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10985
      for idx, msg in enumerate(self.op.log_messages):
10986
        self.LogInfo("Sending log message %s", idx + 1)
10987
        feedback_fn(constants.JQT_MSGPREFIX + msg)
10988
        # Report how many test messages have been sent
10989
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10990

    
10991
    if self.op.fail:
10992
      raise errors.OpExecError("Opcode failure was requested")
10993

    
10994
    return True
10995

    
10996

    
10997
class IAllocator(object):
10998
  """IAllocator framework.
10999

11000
  An IAllocator instance has three sets of attributes:
11001
    - cfg that is needed to query the cluster
11002
    - input data (all members of the _KEYS class attribute are required)
11003
    - four buffer attributes (in|out_data|text), that represent the
11004
      input (to the external script) in text and data structure format,
11005
      and the output from it, again in two formats
11006
    - the result variables from the script (success, info, nodes) for
11007
      easy usage
11008

11009
  """
11010
  # pylint: disable-msg=R0902
11011
  # lots of instance attributes
11012
  _ALLO_KEYS = [
11013
    "name", "mem_size", "disks", "disk_template",
11014
    "os", "tags", "nics", "vcpus", "hypervisor",
11015
    ]
11016
  _RELO_KEYS = [
11017
    "name", "relocate_from",
11018
    ]
11019
  _EVAC_KEYS = [
11020
    "evac_nodes",
11021
    ]
11022

    
11023
  def __init__(self, cfg, rpc, mode, **kwargs):
11024
    self.cfg = cfg
11025
    self.rpc = rpc
11026
    # init buffer variables
11027
    self.in_text = self.out_text = self.in_data = self.out_data = None
11028
    # init all input fields so that pylint is happy
11029
    self.mode = mode
11030
    self.mem_size = self.disks = self.disk_template = None
11031
    self.os = self.tags = self.nics = self.vcpus = None
11032
    self.hypervisor = None
11033
    self.relocate_from = None
11034
    self.name = None
11035
    self.evac_nodes = None
11036
    # computed fields
11037
    self.required_nodes = None
11038
    # init result fields
11039
    self.success = self.info = self.result = None
11040
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11041
      keyset = self._ALLO_KEYS
11042
      fn = self._AddNewInstance
11043
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11044
      keyset = self._RELO_KEYS
11045
      fn = self._AddRelocateInstance
11046
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11047
      keyset = self._EVAC_KEYS
11048
      fn = self._AddEvacuateNodes
11049
    else:
11050
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
11051
                                   " IAllocator" % self.mode)
11052
    for key in kwargs:
11053
      if key not in keyset:
11054
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
11055
                                     " IAllocator" % key)
11056
      setattr(self, key, kwargs[key])
11057

    
11058
    for key in keyset:
11059
      if key not in kwargs:
11060
        raise errors.ProgrammerError("Missing input parameter '%s' to"
11061
                                     " IAllocator" % key)
11062
    self._BuildInputData(fn)
11063

    
11064
  def _ComputeClusterData(self):
11065
    """Compute the generic allocator input data.
11066

11067
    This is the data that is independent of the actual operation.
11068

11069
    """
11070
    cfg = self.cfg
11071
    cluster_info = cfg.GetClusterInfo()
11072
    # cluster data
11073
    data = {
11074
      "version": constants.IALLOCATOR_VERSION,
11075
      "cluster_name": cfg.GetClusterName(),
11076
      "cluster_tags": list(cluster_info.GetTags()),
11077
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
11078
      # we don't have job IDs
11079
      }
11080
    iinfo = cfg.GetAllInstancesInfo().values()
11081
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
11082

    
11083
    # node data
11084
    node_list = cfg.GetNodeList()
11085

    
11086
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11087
      hypervisor_name = self.hypervisor
11088
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11089
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
11090
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11091
      hypervisor_name = cluster_info.enabled_hypervisors[0]
11092

    
11093
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
11094
                                        hypervisor_name)
11095
    node_iinfo = \
11096
      self.rpc.call_all_instances_info(node_list,
11097
                                       cluster_info.enabled_hypervisors)
11098

    
11099
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
11100

    
11101
    data["nodes"] = self._ComputeNodeData(cfg, node_data, node_iinfo, i_list)
11102

    
11103
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11104

    
11105
    self.in_data = data
11106

    
11107
  @staticmethod
11108
  def _ComputeNodeGroupData(cfg):
11109
    """Compute node groups data.
11110

11111
    """
11112
    ng = {}
11113
    for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11114
      ng[guuid] = {
11115
        "name": gdata.name,
11116
        "alloc_policy": gdata.alloc_policy,
11117
        }
11118
    return ng
11119

    
11120
  @staticmethod
11121
  def _ComputeNodeData(cfg, node_data, node_iinfo, i_list):
11122
    """Compute global node data.
11123

11124
    """
11125
    node_results = {}
11126
    for nname, nresult in node_data.items():
11127
      # first fill in static (config-based) values
11128
      ninfo = cfg.GetNodeInfo(nname)
11129
      pnr = {
11130
        "tags": list(ninfo.GetTags()),
11131
        "primary_ip": ninfo.primary_ip,
11132
        "secondary_ip": ninfo.secondary_ip,
11133
        "offline": ninfo.offline,
11134
        "drained": ninfo.drained,
11135
        "master_candidate": ninfo.master_candidate,
11136
        "group": ninfo.group,
11137
        "master_capable": ninfo.master_capable,
11138
        "vm_capable": ninfo.vm_capable,
11139
        }
11140

    
11141
      if not (ninfo.offline or ninfo.drained):
11142
        nresult.Raise("Can't get data for node %s" % nname)
11143
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11144
                                nname)
11145
        remote_info = nresult.payload
11146

    
11147
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
11148
                     'vg_size', 'vg_free', 'cpu_total']:
11149
          if attr not in remote_info:
11150
            raise errors.OpExecError("Node '%s' didn't return attribute"
11151
                                     " '%s'" % (nname, attr))
11152
          if not isinstance(remote_info[attr], int):
11153
            raise errors.OpExecError("Node '%s' returned invalid value"
11154
                                     " for '%s': %s" %
11155
                                     (nname, attr, remote_info[attr]))
11156
        # compute memory used by primary instances
11157
        i_p_mem = i_p_up_mem = 0
11158
        for iinfo, beinfo in i_list:
11159
          if iinfo.primary_node == nname:
11160
            i_p_mem += beinfo[constants.BE_MEMORY]
11161
            if iinfo.name not in node_iinfo[nname].payload:
11162
              i_used_mem = 0
11163
            else:
11164
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11165
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11166
            remote_info['memory_free'] -= max(0, i_mem_diff)
11167

    
11168
            if iinfo.admin_up:
11169
              i_p_up_mem += beinfo[constants.BE_MEMORY]
11170

    
11171
        # compute memory used by instances
11172
        pnr_dyn = {
11173
          "total_memory": remote_info['memory_total'],
11174
          "reserved_memory": remote_info['memory_dom0'],
11175
          "free_memory": remote_info['memory_free'],
11176
          "total_disk": remote_info['vg_size'],
11177
          "free_disk": remote_info['vg_free'],
11178
          "total_cpus": remote_info['cpu_total'],
11179
          "i_pri_memory": i_p_mem,
11180
          "i_pri_up_memory": i_p_up_mem,
11181
          }
11182
        pnr.update(pnr_dyn)
11183

    
11184
      node_results[nname] = pnr
11185

    
11186
    return node_results
11187

    
11188
  @staticmethod
11189
  def _ComputeInstanceData(cluster_info, i_list):
11190
    """Compute global instance data.
11191

11192
    """
11193
    instance_data = {}
11194
    for iinfo, beinfo in i_list:
11195
      nic_data = []
11196
      for nic in iinfo.nics:
11197
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11198
        nic_dict = {"mac": nic.mac,
11199
                    "ip": nic.ip,
11200
                    "mode": filled_params[constants.NIC_MODE],
11201
                    "link": filled_params[constants.NIC_LINK],
11202
                   }
11203
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11204
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11205
        nic_data.append(nic_dict)
11206
      pir = {
11207
        "tags": list(iinfo.GetTags()),
11208
        "admin_up": iinfo.admin_up,
11209
        "vcpus": beinfo[constants.BE_VCPUS],
11210
        "memory": beinfo[constants.BE_MEMORY],
11211
        "os": iinfo.os,
11212
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11213
        "nics": nic_data,
11214
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
11215
        "disk_template": iinfo.disk_template,
11216
        "hypervisor": iinfo.hypervisor,
11217
        }
11218
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11219
                                                 pir["disks"])
11220
      instance_data[iinfo.name] = pir
11221

    
11222
    return instance_data
11223

    
11224
  def _AddNewInstance(self):
11225
    """Add new instance data to allocator structure.
11226

11227
    This in combination with _AllocatorGetClusterData will create the
11228
    correct structure needed as input for the allocator.
11229

11230
    The checks for the completeness of the opcode must have already been
11231
    done.
11232

11233
    """
11234
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11235

    
11236
    if self.disk_template in constants.DTS_NET_MIRROR:
11237
      self.required_nodes = 2
11238
    else:
11239
      self.required_nodes = 1
11240
    request = {
11241
      "name": self.name,
11242
      "disk_template": self.disk_template,
11243
      "tags": self.tags,
11244
      "os": self.os,
11245
      "vcpus": self.vcpus,
11246
      "memory": self.mem_size,
11247
      "disks": self.disks,
11248
      "disk_space_total": disk_space,
11249
      "nics": self.nics,
11250
      "required_nodes": self.required_nodes,
11251
      }
11252
    return request
11253

    
11254
  def _AddRelocateInstance(self):
11255
    """Add relocate instance data to allocator structure.
11256

11257
    This in combination with _IAllocatorGetClusterData will create the
11258
    correct structure needed as input for the allocator.
11259

11260
    The checks for the completeness of the opcode must have already been
11261
    done.
11262

11263
    """
11264
    instance = self.cfg.GetInstanceInfo(self.name)
11265
    if instance is None:
11266
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
11267
                                   " IAllocator" % self.name)
11268

    
11269
    if instance.disk_template not in constants.DTS_NET_MIRROR:
11270
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11271
                                 errors.ECODE_INVAL)
11272

    
11273
    if len(instance.secondary_nodes) != 1:
11274
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
11275
                                 errors.ECODE_STATE)
11276

    
11277
    self.required_nodes = 1
11278
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
11279
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11280

    
11281
    request = {
11282
      "name": self.name,
11283
      "disk_space_total": disk_space,
11284
      "required_nodes": self.required_nodes,
11285
      "relocate_from": self.relocate_from,
11286
      }
11287
    return request
11288

    
11289
  def _AddEvacuateNodes(self):
11290
    """Add evacuate nodes data to allocator structure.
11291

11292
    """
11293
    request = {
11294
      "evac_nodes": self.evac_nodes
11295
      }
11296
    return request
11297

    
11298
  def _BuildInputData(self, fn):
11299
    """Build input data structures.
11300

11301
    """
11302
    self._ComputeClusterData()
11303

    
11304
    request = fn()
11305
    request["type"] = self.mode
11306
    self.in_data["request"] = request
11307

    
11308
    self.in_text = serializer.Dump(self.in_data)
11309

    
11310
  def Run(self, name, validate=True, call_fn=None):
11311
    """Run an instance allocator and return the results.
11312

11313
    """
11314
    if call_fn is None:
11315
      call_fn = self.rpc.call_iallocator_runner
11316

    
11317
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11318
    result.Raise("Failure while running the iallocator script")
11319

    
11320
    self.out_text = result.payload
11321
    if validate:
11322
      self._ValidateResult()
11323

    
11324
  def _ValidateResult(self):
11325
    """Process the allocator results.
11326

11327
    This will process and if successful save the result in
11328
    self.out_data and the other parameters.
11329

11330
    """
11331
    try:
11332
      rdict = serializer.Load(self.out_text)
11333
    except Exception, err:
11334
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11335

    
11336
    if not isinstance(rdict, dict):
11337
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
11338

    
11339
    # TODO: remove backwards compatiblity in later versions
11340
    if "nodes" in rdict and "result" not in rdict:
11341
      rdict["result"] = rdict["nodes"]
11342
      del rdict["nodes"]
11343

    
11344
    for key in "success", "info", "result":
11345
      if key not in rdict:
11346
        raise errors.OpExecError("Can't parse iallocator results:"
11347
                                 " missing key '%s'" % key)
11348
      setattr(self, key, rdict[key])
11349

    
11350
    if not isinstance(rdict["result"], list):
11351
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11352
                               " is not a list")
11353
    self.out_data = rdict
11354

    
11355

    
11356
class LUTestAllocator(NoHooksLU):
11357
  """Run allocator tests.
11358

11359
  This LU runs the allocator tests
11360

11361
  """
11362
  _OP_PARAMS = [
11363
    ("direction", ht.NoDefault,
11364
     ht.TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
11365
    ("mode", ht.NoDefault, ht.TElemOf(constants.VALID_IALLOCATOR_MODES)),
11366
    ("name", ht.NoDefault, ht.TNonEmptyString),
11367
    ("nics", ht.NoDefault, ht.TOr(ht.TNone, ht.TListOf(
11368
      ht.TDictOf(ht.TElemOf(["mac", "ip", "bridge"]),
11369
               ht.TOr(ht.TNone, ht.TNonEmptyString))))),
11370
    ("disks", ht.NoDefault, ht.TOr(ht.TNone, ht.TList)),
11371
    ("hypervisor", None, ht.TMaybeString),
11372
    ("allocator", None, ht.TMaybeString),
11373
    ("tags", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
11374
    ("mem_size", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
11375
    ("vcpus", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
11376
    ("os", None, ht.TMaybeString),
11377
    ("disk_template", None, ht.TMaybeString),
11378
    ("evac_nodes", None, ht.TOr(ht.TNone, ht.TListOf(ht.TNonEmptyString))),
11379
    ]
11380

    
11381
  def CheckPrereq(self):
11382
    """Check prerequisites.
11383

11384
    This checks the opcode parameters depending on the director and mode test.
11385

11386
    """
11387
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11388
      for attr in ["mem_size", "disks", "disk_template",
11389
                   "os", "tags", "nics", "vcpus"]:
11390
        if not hasattr(self.op, attr):
11391
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11392
                                     attr, errors.ECODE_INVAL)
11393
      iname = self.cfg.ExpandInstanceName(self.op.name)
11394
      if iname is not None:
11395
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11396
                                   iname, errors.ECODE_EXISTS)
11397
      if not isinstance(self.op.nics, list):
11398
        raise errors.OpPrereqError("Invalid parameter 'nics'",
11399
                                   errors.ECODE_INVAL)
11400
      if not isinstance(self.op.disks, list):
11401
        raise errors.OpPrereqError("Invalid parameter 'disks'",
11402
                                   errors.ECODE_INVAL)
11403
      for row in self.op.disks:
11404
        if (not isinstance(row, dict) or
11405
            "size" not in row or
11406
            not isinstance(row["size"], int) or
11407
            "mode" not in row or
11408
            row["mode"] not in ['r', 'w']):
11409
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
11410
                                     " parameter", errors.ECODE_INVAL)
11411
      if self.op.hypervisor is None:
11412
        self.op.hypervisor = self.cfg.GetHypervisorType()
11413
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11414
      fname = _ExpandInstanceName(self.cfg, self.op.name)
11415
      self.op.name = fname
11416
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11417
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11418
      if not hasattr(self.op, "evac_nodes"):
11419
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11420
                                   " opcode input", errors.ECODE_INVAL)
11421
    else:
11422
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11423
                                 self.op.mode, errors.ECODE_INVAL)
11424

    
11425
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11426
      if self.op.allocator is None:
11427
        raise errors.OpPrereqError("Missing allocator name",
11428
                                   errors.ECODE_INVAL)
11429
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11430
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
11431
                                 self.op.direction, errors.ECODE_INVAL)
11432

    
11433
  def Exec(self, feedback_fn):
11434
    """Run the allocator test.
11435

11436
    """
11437
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11438
      ial = IAllocator(self.cfg, self.rpc,
11439
                       mode=self.op.mode,
11440
                       name=self.op.name,
11441
                       mem_size=self.op.mem_size,
11442
                       disks=self.op.disks,
11443
                       disk_template=self.op.disk_template,
11444
                       os=self.op.os,
11445
                       tags=self.op.tags,
11446
                       nics=self.op.nics,
11447
                       vcpus=self.op.vcpus,
11448
                       hypervisor=self.op.hypervisor,
11449
                       )
11450
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11451
      ial = IAllocator(self.cfg, self.rpc,
11452
                       mode=self.op.mode,
11453
                       name=self.op.name,
11454
                       relocate_from=list(self.relocate_from),
11455
                       )
11456
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11457
      ial = IAllocator(self.cfg, self.rpc,
11458
                       mode=self.op.mode,
11459
                       evac_nodes=self.op.evac_nodes)
11460
    else:
11461
      raise errors.ProgrammerError("Uncatched mode %s in"
11462
                                   " LUTestAllocator.Exec", self.op.mode)
11463

    
11464
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
11465
      result = ial.in_text
11466
    else:
11467
      ial.Run(self.op.allocator, validate=False)
11468
      result = ial.out_text
11469
    return result