Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 0bff0b12

History | View | Annotate | Download (385.6 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43

    
44
from ganeti import ssh
45
from ganeti import utils
46
from ganeti import errors
47
from ganeti import hypervisor
48
from ganeti import locking
49
from ganeti import constants
50
from ganeti import objects
51
from ganeti import serializer
52
from ganeti import ssconf
53
from ganeti import uidpool
54
from ganeti import compat
55
from ganeti import masterd
56
from ganeti import netutils
57
from ganeti import ht
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61

    
62
import ganeti.masterd.instance # pylint: disable-msg=W0611
63

    
64

    
65
def _SupportsOob(cfg, node):
66
  """Tells if node supports OOB.
67

68
  @type cfg: L{config.ConfigWriter}
69
  @param cfg: The cluster configuration
70
  @type node: L{objects.Node}
71
  @param node: The node
72
  @return: The OOB script if supported or an empty string otherwise
73

74
  """
75
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
76

    
77

    
78
# End types
79
class LogicalUnit(object):
80
  """Logical Unit base class.
81

82
  Subclasses must follow these rules:
83
    - implement ExpandNames
84
    - implement CheckPrereq (except when tasklets are used)
85
    - implement Exec (except when tasklets are used)
86
    - implement BuildHooksEnv
87
    - redefine HPATH and HTYPE
88
    - optionally redefine their run requirements:
89
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
90

91
  Note that all commands require root permissions.
92

93
  @ivar dry_run_result: the value (if any) that will be returned to the caller
94
      in dry-run mode (signalled by opcode dry_run parameter)
95

96
  """
97
  HPATH = None
98
  HTYPE = None
99
  REQ_BGL = True
100

    
101
  def __init__(self, processor, op, context, rpc):
102
    """Constructor for LogicalUnit.
103

104
    This needs to be overridden in derived classes in order to check op
105
    validity.
106

107
    """
108
    self.proc = processor
109
    self.op = op
110
    self.cfg = context.cfg
111
    self.context = context
112
    self.rpc = rpc
113
    # Dicts used to declare locking needs to mcpu
114
    self.needed_locks = None
115
    self.acquired_locks = {}
116
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
117
    self.add_locks = {}
118
    self.remove_locks = {}
119
    # Used to force good behavior when calling helper functions
120
    self.recalculate_locks = {}
121
    self.__ssh = None
122
    # logging
123
    self.Log = processor.Log # pylint: disable-msg=C0103
124
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
125
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
126
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
127
    # support for dry-run
128
    self.dry_run_result = None
129
    # support for generic debug attribute
130
    if (not hasattr(self.op, "debug_level") or
131
        not isinstance(self.op.debug_level, int)):
132
      self.op.debug_level = 0
133

    
134
    # Tasklets
135
    self.tasklets = None
136

    
137
    # The new kind-of-type-system
138
    op_id = self.op.OP_ID
139
    for attr_name, aval, test in self.op.GetAllParams():
140
      if not hasattr(op, attr_name):
141
        if aval == ht.NoDefault:
142
          raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
143
                                     (op_id, attr_name), errors.ECODE_INVAL)
144
        else:
145
          if callable(aval):
146
            dval = aval()
147
          else:
148
            dval = aval
149
          setattr(self.op, attr_name, dval)
150
      attr_val = getattr(op, attr_name)
151
      if test == ht.NoType:
152
        # no tests here
153
        continue
154
      if not callable(test):
155
        raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
156
                                     " given type is not a proper type (%s)" %
157
                                     (op_id, attr_name, test))
158
      if not test(attr_val):
159
        logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
160
                      self.op.OP_ID, attr_name, type(attr_val), attr_val)
161
        raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
162
                                   (op_id, attr_name), errors.ECODE_INVAL)
163

    
164
    self.CheckArguments()
165

    
166
  def __GetSSH(self):
167
    """Returns the SshRunner object
168

169
    """
170
    if not self.__ssh:
171
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
172
    return self.__ssh
173

    
174
  ssh = property(fget=__GetSSH)
175

    
176
  def CheckArguments(self):
177
    """Check syntactic validity for the opcode arguments.
178

179
    This method is for doing a simple syntactic check and ensure
180
    validity of opcode parameters, without any cluster-related
181
    checks. While the same can be accomplished in ExpandNames and/or
182
    CheckPrereq, doing these separate is better because:
183

184
      - ExpandNames is left as as purely a lock-related function
185
      - CheckPrereq is run after we have acquired locks (and possible
186
        waited for them)
187

188
    The function is allowed to change the self.op attribute so that
189
    later methods can no longer worry about missing parameters.
190

191
    """
192
    pass
193

    
194
  def ExpandNames(self):
195
    """Expand names for this LU.
196

197
    This method is called before starting to execute the opcode, and it should
198
    update all the parameters of the opcode to their canonical form (e.g. a
199
    short node name must be fully expanded after this method has successfully
200
    completed). This way locking, hooks, logging, etc. can work correctly.
201

202
    LUs which implement this method must also populate the self.needed_locks
203
    member, as a dict with lock levels as keys, and a list of needed lock names
204
    as values. Rules:
205

206
      - use an empty dict if you don't need any lock
207
      - if you don't need any lock at a particular level omit that level
208
      - don't put anything for the BGL level
209
      - if you want all locks at a level use locking.ALL_SET as a value
210

211
    If you need to share locks (rather than acquire them exclusively) at one
212
    level you can modify self.share_locks, setting a true value (usually 1) for
213
    that level. By default locks are not shared.
214

215
    This function can also define a list of tasklets, which then will be
216
    executed in order instead of the usual LU-level CheckPrereq and Exec
217
    functions, if those are not defined by the LU.
218

219
    Examples::
220

221
      # Acquire all nodes and one instance
222
      self.needed_locks = {
223
        locking.LEVEL_NODE: locking.ALL_SET,
224
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
225
      }
226
      # Acquire just two nodes
227
      self.needed_locks = {
228
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
229
      }
230
      # Acquire no locks
231
      self.needed_locks = {} # No, you can't leave it to the default value None
232

233
    """
234
    # The implementation of this method is mandatory only if the new LU is
235
    # concurrent, so that old LUs don't need to be changed all at the same
236
    # time.
237
    if self.REQ_BGL:
238
      self.needed_locks = {} # Exclusive LUs don't need locks.
239
    else:
240
      raise NotImplementedError
241

    
242
  def DeclareLocks(self, level):
243
    """Declare LU locking needs for a level
244

245
    While most LUs can just declare their locking needs at ExpandNames time,
246
    sometimes there's the need to calculate some locks after having acquired
247
    the ones before. This function is called just before acquiring locks at a
248
    particular level, but after acquiring the ones at lower levels, and permits
249
    such calculations. It can be used to modify self.needed_locks, and by
250
    default it does nothing.
251

252
    This function is only called if you have something already set in
253
    self.needed_locks for the level.
254

255
    @param level: Locking level which is going to be locked
256
    @type level: member of ganeti.locking.LEVELS
257

258
    """
259

    
260
  def CheckPrereq(self):
261
    """Check prerequisites for this LU.
262

263
    This method should check that the prerequisites for the execution
264
    of this LU are fulfilled. It can do internode communication, but
265
    it should be idempotent - no cluster or system changes are
266
    allowed.
267

268
    The method should raise errors.OpPrereqError in case something is
269
    not fulfilled. Its return value is ignored.
270

271
    This method should also update all the parameters of the opcode to
272
    their canonical form if it hasn't been done by ExpandNames before.
273

274
    """
275
    if self.tasklets is not None:
276
      for (idx, tl) in enumerate(self.tasklets):
277
        logging.debug("Checking prerequisites for tasklet %s/%s",
278
                      idx + 1, len(self.tasklets))
279
        tl.CheckPrereq()
280
    else:
281
      pass
282

    
283
  def Exec(self, feedback_fn):
284
    """Execute the LU.
285

286
    This method should implement the actual work. It should raise
287
    errors.OpExecError for failures that are somewhat dealt with in
288
    code, or expected.
289

290
    """
291
    if self.tasklets is not None:
292
      for (idx, tl) in enumerate(self.tasklets):
293
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
294
        tl.Exec(feedback_fn)
295
    else:
296
      raise NotImplementedError
297

    
298
  def BuildHooksEnv(self):
299
    """Build hooks environment for this LU.
300

301
    This method should return a three-node tuple consisting of: a dict
302
    containing the environment that will be used for running the
303
    specific hook for this LU, a list of node names on which the hook
304
    should run before the execution, and a list of node names on which
305
    the hook should run after the execution.
306

307
    The keys of the dict must not have 'GANETI_' prefixed as this will
308
    be handled in the hooks runner. Also note additional keys will be
309
    added by the hooks runner. If the LU doesn't define any
310
    environment, an empty dict (and not None) should be returned.
311

312
    No nodes should be returned as an empty list (and not None).
313

314
    Note that if the HPATH for a LU class is None, this function will
315
    not be called.
316

317
    """
318
    raise NotImplementedError
319

    
320
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
321
    """Notify the LU about the results of its hooks.
322

323
    This method is called every time a hooks phase is executed, and notifies
324
    the Logical Unit about the hooks' result. The LU can then use it to alter
325
    its result based on the hooks.  By default the method does nothing and the
326
    previous result is passed back unchanged but any LU can define it if it
327
    wants to use the local cluster hook-scripts somehow.
328

329
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
330
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
331
    @param hook_results: the results of the multi-node hooks rpc call
332
    @param feedback_fn: function used send feedback back to the caller
333
    @param lu_result: the previous Exec result this LU had, or None
334
        in the PRE phase
335
    @return: the new Exec result, based on the previous result
336
        and hook results
337

338
    """
339
    # API must be kept, thus we ignore the unused argument and could
340
    # be a function warnings
341
    # pylint: disable-msg=W0613,R0201
342
    return lu_result
343

    
344
  def _ExpandAndLockInstance(self):
345
    """Helper function to expand and lock an instance.
346

347
    Many LUs that work on an instance take its name in self.op.instance_name
348
    and need to expand it and then declare the expanded name for locking. This
349
    function does it, and then updates self.op.instance_name to the expanded
350
    name. It also initializes needed_locks as a dict, if this hasn't been done
351
    before.
352

353
    """
354
    if self.needed_locks is None:
355
      self.needed_locks = {}
356
    else:
357
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
358
        "_ExpandAndLockInstance called with instance-level locks set"
359
    self.op.instance_name = _ExpandInstanceName(self.cfg,
360
                                                self.op.instance_name)
361
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
362

    
363
  def _LockInstancesNodes(self, primary_only=False):
364
    """Helper function to declare instances' nodes for locking.
365

366
    This function should be called after locking one or more instances to lock
367
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
368
    with all primary or secondary nodes for instances already locked and
369
    present in self.needed_locks[locking.LEVEL_INSTANCE].
370

371
    It should be called from DeclareLocks, and for safety only works if
372
    self.recalculate_locks[locking.LEVEL_NODE] is set.
373

374
    In the future it may grow parameters to just lock some instance's nodes, or
375
    to just lock primaries or secondary nodes, if needed.
376

377
    If should be called in DeclareLocks in a way similar to::
378

379
      if level == locking.LEVEL_NODE:
380
        self._LockInstancesNodes()
381

382
    @type primary_only: boolean
383
    @param primary_only: only lock primary nodes of locked instances
384

385
    """
386
    assert locking.LEVEL_NODE in self.recalculate_locks, \
387
      "_LockInstancesNodes helper function called with no nodes to recalculate"
388

    
389
    # TODO: check if we're really been called with the instance locks held
390

    
391
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
392
    # future we might want to have different behaviors depending on the value
393
    # of self.recalculate_locks[locking.LEVEL_NODE]
394
    wanted_nodes = []
395
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
396
      instance = self.context.cfg.GetInstanceInfo(instance_name)
397
      wanted_nodes.append(instance.primary_node)
398
      if not primary_only:
399
        wanted_nodes.extend(instance.secondary_nodes)
400

    
401
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
402
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
403
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
404
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
405

    
406
    del self.recalculate_locks[locking.LEVEL_NODE]
407

    
408

    
409
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
410
  """Simple LU which runs no hooks.
411

412
  This LU is intended as a parent for other LogicalUnits which will
413
  run no hooks, in order to reduce duplicate code.
414

415
  """
416
  HPATH = None
417
  HTYPE = None
418

    
419
  def BuildHooksEnv(self):
420
    """Empty BuildHooksEnv for NoHooksLu.
421

422
    This just raises an error.
423

424
    """
425
    assert False, "BuildHooksEnv called for NoHooksLUs"
426

    
427

    
428
class Tasklet:
429
  """Tasklet base class.
430

431
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
432
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
433
  tasklets know nothing about locks.
434

435
  Subclasses must follow these rules:
436
    - Implement CheckPrereq
437
    - Implement Exec
438

439
  """
440
  def __init__(self, lu):
441
    self.lu = lu
442

    
443
    # Shortcuts
444
    self.cfg = lu.cfg
445
    self.rpc = lu.rpc
446

    
447
  def CheckPrereq(self):
448
    """Check prerequisites for this tasklets.
449

450
    This method should check whether the prerequisites for the execution of
451
    this tasklet are fulfilled. It can do internode communication, but it
452
    should be idempotent - no cluster or system changes are allowed.
453

454
    The method should raise errors.OpPrereqError in case something is not
455
    fulfilled. Its return value is ignored.
456

457
    This method should also update all parameters to their canonical form if it
458
    hasn't been done before.
459

460
    """
461
    pass
462

    
463
  def Exec(self, feedback_fn):
464
    """Execute the tasklet.
465

466
    This method should implement the actual work. It should raise
467
    errors.OpExecError for failures that are somewhat dealt with in code, or
468
    expected.
469

470
    """
471
    raise NotImplementedError
472

    
473

    
474
class _QueryBase:
475
  """Base for query utility classes.
476

477
  """
478
  #: Attribute holding field definitions
479
  FIELDS = None
480

    
481
  def __init__(self, names, fields, use_locking):
482
    """Initializes this class.
483

484
    """
485
    self.names = names
486
    self.use_locking = use_locking
487

    
488
    self.query = query.Query(self.FIELDS, fields)
489
    self.requested_data = self.query.RequestedData()
490

    
491
    self.do_locking = None
492
    self.wanted = None
493

    
494
  def _GetNames(self, lu, all_names, lock_level):
495
    """Helper function to determine names asked for in the query.
496

497
    """
498
    if self.do_locking:
499
      names = lu.acquired_locks[lock_level]
500
    else:
501
      names = all_names
502

    
503
    if self.wanted == locking.ALL_SET:
504
      assert not self.names
505
      # caller didn't specify names, so ordering is not important
506
      return utils.NiceSort(names)
507

    
508
    # caller specified names and we must keep the same order
509
    assert self.names
510
    assert not self.do_locking or lu.acquired_locks[lock_level]
511

    
512
    missing = set(self.wanted).difference(names)
513
    if missing:
514
      raise errors.OpExecError("Some items were removed before retrieving"
515
                               " their data: %s" % missing)
516

    
517
    # Return expanded names
518
    return self.wanted
519

    
520
  @classmethod
521
  def FieldsQuery(cls, fields):
522
    """Returns list of available fields.
523

524
    @return: List of L{objects.QueryFieldDefinition}
525

526
    """
527
    return query.QueryFields(cls.FIELDS, fields)
528

    
529
  def ExpandNames(self, lu):
530
    """Expand names for this query.
531

532
    See L{LogicalUnit.ExpandNames}.
533

534
    """
535
    raise NotImplementedError()
536

    
537
  def DeclareLocks(self, lu, level):
538
    """Declare locks for this query.
539

540
    See L{LogicalUnit.DeclareLocks}.
541

542
    """
543
    raise NotImplementedError()
544

    
545
  def _GetQueryData(self, lu):
546
    """Collects all data for this query.
547

548
    @return: Query data object
549

550
    """
551
    raise NotImplementedError()
552

    
553
  def NewStyleQuery(self, lu):
554
    """Collect data and execute query.
555

556
    """
557
    return query.GetQueryResponse(self.query, self._GetQueryData(lu))
558

    
559
  def OldStyleQuery(self, lu):
560
    """Collect data and execute query.
561

562
    """
563
    return self.query.OldStyleQuery(self._GetQueryData(lu))
564

    
565

    
566
def _GetWantedNodes(lu, nodes):
567
  """Returns list of checked and expanded node names.
568

569
  @type lu: L{LogicalUnit}
570
  @param lu: the logical unit on whose behalf we execute
571
  @type nodes: list
572
  @param nodes: list of node names or None for all nodes
573
  @rtype: list
574
  @return: the list of nodes, sorted
575
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
576

577
  """
578
  if nodes:
579
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
580

    
581
  return utils.NiceSort(lu.cfg.GetNodeList())
582

    
583

    
584
def _GetWantedInstances(lu, instances):
585
  """Returns list of checked and expanded instance names.
586

587
  @type lu: L{LogicalUnit}
588
  @param lu: the logical unit on whose behalf we execute
589
  @type instances: list
590
  @param instances: list of instance names or None for all instances
591
  @rtype: list
592
  @return: the list of instances, sorted
593
  @raise errors.OpPrereqError: if the instances parameter is wrong type
594
  @raise errors.OpPrereqError: if any of the passed instances is not found
595

596
  """
597
  if instances:
598
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
599
  else:
600
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
601
  return wanted
602

    
603

    
604
def _GetUpdatedParams(old_params, update_dict,
605
                      use_default=True, use_none=False):
606
  """Return the new version of a parameter dictionary.
607

608
  @type old_params: dict
609
  @param old_params: old parameters
610
  @type update_dict: dict
611
  @param update_dict: dict containing new parameter values, or
612
      constants.VALUE_DEFAULT to reset the parameter to its default
613
      value
614
  @param use_default: boolean
615
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
616
      values as 'to be deleted' values
617
  @param use_none: boolean
618
  @type use_none: whether to recognise C{None} values as 'to be
619
      deleted' values
620
  @rtype: dict
621
  @return: the new parameter dictionary
622

623
  """
624
  params_copy = copy.deepcopy(old_params)
625
  for key, val in update_dict.iteritems():
626
    if ((use_default and val == constants.VALUE_DEFAULT) or
627
        (use_none and val is None)):
628
      try:
629
        del params_copy[key]
630
      except KeyError:
631
        pass
632
    else:
633
      params_copy[key] = val
634
  return params_copy
635

    
636

    
637
def _CheckOutputFields(static, dynamic, selected):
638
  """Checks whether all selected fields are valid.
639

640
  @type static: L{utils.FieldSet}
641
  @param static: static fields set
642
  @type dynamic: L{utils.FieldSet}
643
  @param dynamic: dynamic fields set
644

645
  """
646
  f = utils.FieldSet()
647
  f.Extend(static)
648
  f.Extend(dynamic)
649

    
650
  delta = f.NonMatching(selected)
651
  if delta:
652
    raise errors.OpPrereqError("Unknown output fields selected: %s"
653
                               % ",".join(delta), errors.ECODE_INVAL)
654

    
655

    
656
def _CheckGlobalHvParams(params):
657
  """Validates that given hypervisor params are not global ones.
658

659
  This will ensure that instances don't get customised versions of
660
  global params.
661

662
  """
663
  used_globals = constants.HVC_GLOBALS.intersection(params)
664
  if used_globals:
665
    msg = ("The following hypervisor parameters are global and cannot"
666
           " be customized at instance level, please modify them at"
667
           " cluster level: %s" % utils.CommaJoin(used_globals))
668
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
669

    
670

    
671
def _CheckNodeOnline(lu, node, msg=None):
672
  """Ensure that a given node is online.
673

674
  @param lu: the LU on behalf of which we make the check
675
  @param node: the node to check
676
  @param msg: if passed, should be a message to replace the default one
677
  @raise errors.OpPrereqError: if the node is offline
678

679
  """
680
  if msg is None:
681
    msg = "Can't use offline node"
682
  if lu.cfg.GetNodeInfo(node).offline:
683
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
684

    
685

    
686
def _CheckNodeNotDrained(lu, node):
687
  """Ensure that a given node is not drained.
688

689
  @param lu: the LU on behalf of which we make the check
690
  @param node: the node to check
691
  @raise errors.OpPrereqError: if the node is drained
692

693
  """
694
  if lu.cfg.GetNodeInfo(node).drained:
695
    raise errors.OpPrereqError("Can't use drained node %s" % node,
696
                               errors.ECODE_STATE)
697

    
698

    
699
def _CheckNodeVmCapable(lu, node):
700
  """Ensure that a given node is vm capable.
701

702
  @param lu: the LU on behalf of which we make the check
703
  @param node: the node to check
704
  @raise errors.OpPrereqError: if the node is not vm capable
705

706
  """
707
  if not lu.cfg.GetNodeInfo(node).vm_capable:
708
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
709
                               errors.ECODE_STATE)
710

    
711

    
712
def _CheckNodeHasOS(lu, node, os_name, force_variant):
713
  """Ensure that a node supports a given OS.
714

715
  @param lu: the LU on behalf of which we make the check
716
  @param node: the node to check
717
  @param os_name: the OS to query about
718
  @param force_variant: whether to ignore variant errors
719
  @raise errors.OpPrereqError: if the node is not supporting the OS
720

721
  """
722
  result = lu.rpc.call_os_get(node, os_name)
723
  result.Raise("OS '%s' not in supported OS list for node %s" %
724
               (os_name, node),
725
               prereq=True, ecode=errors.ECODE_INVAL)
726
  if not force_variant:
727
    _CheckOSVariant(result.payload, os_name)
728

    
729

    
730
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
731
  """Ensure that a node has the given secondary ip.
732

733
  @type lu: L{LogicalUnit}
734
  @param lu: the LU on behalf of which we make the check
735
  @type node: string
736
  @param node: the node to check
737
  @type secondary_ip: string
738
  @param secondary_ip: the ip to check
739
  @type prereq: boolean
740
  @param prereq: whether to throw a prerequisite or an execute error
741
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
742
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
743

744
  """
745
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
746
  result.Raise("Failure checking secondary ip on node %s" % node,
747
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
748
  if not result.payload:
749
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
750
           " please fix and re-run this command" % secondary_ip)
751
    if prereq:
752
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
753
    else:
754
      raise errors.OpExecError(msg)
755

    
756

    
757
def _GetClusterDomainSecret():
758
  """Reads the cluster domain secret.
759

760
  """
761
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
762
                               strict=True)
763

    
764

    
765
def _CheckInstanceDown(lu, instance, reason):
766
  """Ensure that an instance is not running."""
767
  if instance.admin_up:
768
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
769
                               (instance.name, reason), errors.ECODE_STATE)
770

    
771
  pnode = instance.primary_node
772
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
773
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
774
              prereq=True, ecode=errors.ECODE_ENVIRON)
775

    
776
  if instance.name in ins_l.payload:
777
    raise errors.OpPrereqError("Instance %s is running, %s" %
778
                               (instance.name, reason), errors.ECODE_STATE)
779

    
780

    
781
def _ExpandItemName(fn, name, kind):
782
  """Expand an item name.
783

784
  @param fn: the function to use for expansion
785
  @param name: requested item name
786
  @param kind: text description ('Node' or 'Instance')
787
  @return: the resolved (full) name
788
  @raise errors.OpPrereqError: if the item is not found
789

790
  """
791
  full_name = fn(name)
792
  if full_name is None:
793
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
794
                               errors.ECODE_NOENT)
795
  return full_name
796

    
797

    
798
def _ExpandNodeName(cfg, name):
799
  """Wrapper over L{_ExpandItemName} for nodes."""
800
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
801

    
802

    
803
def _ExpandInstanceName(cfg, name):
804
  """Wrapper over L{_ExpandItemName} for instance."""
805
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
806

    
807

    
808
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
809
                          memory, vcpus, nics, disk_template, disks,
810
                          bep, hvp, hypervisor_name):
811
  """Builds instance related env variables for hooks
812

813
  This builds the hook environment from individual variables.
814

815
  @type name: string
816
  @param name: the name of the instance
817
  @type primary_node: string
818
  @param primary_node: the name of the instance's primary node
819
  @type secondary_nodes: list
820
  @param secondary_nodes: list of secondary nodes as strings
821
  @type os_type: string
822
  @param os_type: the name of the instance's OS
823
  @type status: boolean
824
  @param status: the should_run status of the instance
825
  @type memory: string
826
  @param memory: the memory size of the instance
827
  @type vcpus: string
828
  @param vcpus: the count of VCPUs the instance has
829
  @type nics: list
830
  @param nics: list of tuples (ip, mac, mode, link) representing
831
      the NICs the instance has
832
  @type disk_template: string
833
  @param disk_template: the disk template of the instance
834
  @type disks: list
835
  @param disks: the list of (size, mode) pairs
836
  @type bep: dict
837
  @param bep: the backend parameters for the instance
838
  @type hvp: dict
839
  @param hvp: the hypervisor parameters for the instance
840
  @type hypervisor_name: string
841
  @param hypervisor_name: the hypervisor for the instance
842
  @rtype: dict
843
  @return: the hook environment for this instance
844

845
  """
846
  if status:
847
    str_status = "up"
848
  else:
849
    str_status = "down"
850
  env = {
851
    "OP_TARGET": name,
852
    "INSTANCE_NAME": name,
853
    "INSTANCE_PRIMARY": primary_node,
854
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
855
    "INSTANCE_OS_TYPE": os_type,
856
    "INSTANCE_STATUS": str_status,
857
    "INSTANCE_MEMORY": memory,
858
    "INSTANCE_VCPUS": vcpus,
859
    "INSTANCE_DISK_TEMPLATE": disk_template,
860
    "INSTANCE_HYPERVISOR": hypervisor_name,
861
  }
862

    
863
  if nics:
864
    nic_count = len(nics)
865
    for idx, (ip, mac, mode, link) in enumerate(nics):
866
      if ip is None:
867
        ip = ""
868
      env["INSTANCE_NIC%d_IP" % idx] = ip
869
      env["INSTANCE_NIC%d_MAC" % idx] = mac
870
      env["INSTANCE_NIC%d_MODE" % idx] = mode
871
      env["INSTANCE_NIC%d_LINK" % idx] = link
872
      if mode == constants.NIC_MODE_BRIDGED:
873
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
874
  else:
875
    nic_count = 0
876

    
877
  env["INSTANCE_NIC_COUNT"] = nic_count
878

    
879
  if disks:
880
    disk_count = len(disks)
881
    for idx, (size, mode) in enumerate(disks):
882
      env["INSTANCE_DISK%d_SIZE" % idx] = size
883
      env["INSTANCE_DISK%d_MODE" % idx] = mode
884
  else:
885
    disk_count = 0
886

    
887
  env["INSTANCE_DISK_COUNT"] = disk_count
888

    
889
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
890
    for key, value in source.items():
891
      env["INSTANCE_%s_%s" % (kind, key)] = value
892

    
893
  return env
894

    
895

    
896
def _NICListToTuple(lu, nics):
897
  """Build a list of nic information tuples.
898

899
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
900
  value in LUQueryInstanceData.
901

902
  @type lu:  L{LogicalUnit}
903
  @param lu: the logical unit on whose behalf we execute
904
  @type nics: list of L{objects.NIC}
905
  @param nics: list of nics to convert to hooks tuples
906

907
  """
908
  hooks_nics = []
909
  cluster = lu.cfg.GetClusterInfo()
910
  for nic in nics:
911
    ip = nic.ip
912
    mac = nic.mac
913
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
914
    mode = filled_params[constants.NIC_MODE]
915
    link = filled_params[constants.NIC_LINK]
916
    hooks_nics.append((ip, mac, mode, link))
917
  return hooks_nics
918

    
919

    
920
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
921
  """Builds instance related env variables for hooks from an object.
922

923
  @type lu: L{LogicalUnit}
924
  @param lu: the logical unit on whose behalf we execute
925
  @type instance: L{objects.Instance}
926
  @param instance: the instance for which we should build the
927
      environment
928
  @type override: dict
929
  @param override: dictionary with key/values that will override
930
      our values
931
  @rtype: dict
932
  @return: the hook environment dictionary
933

934
  """
935
  cluster = lu.cfg.GetClusterInfo()
936
  bep = cluster.FillBE(instance)
937
  hvp = cluster.FillHV(instance)
938
  args = {
939
    'name': instance.name,
940
    'primary_node': instance.primary_node,
941
    'secondary_nodes': instance.secondary_nodes,
942
    'os_type': instance.os,
943
    'status': instance.admin_up,
944
    'memory': bep[constants.BE_MEMORY],
945
    'vcpus': bep[constants.BE_VCPUS],
946
    'nics': _NICListToTuple(lu, instance.nics),
947
    'disk_template': instance.disk_template,
948
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
949
    'bep': bep,
950
    'hvp': hvp,
951
    'hypervisor_name': instance.hypervisor,
952
  }
953
  if override:
954
    args.update(override)
955
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
956

    
957

    
958
def _AdjustCandidatePool(lu, exceptions):
959
  """Adjust the candidate pool after node operations.
960

961
  """
962
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
963
  if mod_list:
964
    lu.LogInfo("Promoted nodes to master candidate role: %s",
965
               utils.CommaJoin(node.name for node in mod_list))
966
    for name in mod_list:
967
      lu.context.ReaddNode(name)
968
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
969
  if mc_now > mc_max:
970
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
971
               (mc_now, mc_max))
972

    
973

    
974
def _DecideSelfPromotion(lu, exceptions=None):
975
  """Decide whether I should promote myself as a master candidate.
976

977
  """
978
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
979
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
980
  # the new node will increase mc_max with one, so:
981
  mc_should = min(mc_should + 1, cp_size)
982
  return mc_now < mc_should
983

    
984

    
985
def _CheckNicsBridgesExist(lu, target_nics, target_node):
986
  """Check that the brigdes needed by a list of nics exist.
987

988
  """
989
  cluster = lu.cfg.GetClusterInfo()
990
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
991
  brlist = [params[constants.NIC_LINK] for params in paramslist
992
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
993
  if brlist:
994
    result = lu.rpc.call_bridges_exist(target_node, brlist)
995
    result.Raise("Error checking bridges on destination node '%s'" %
996
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
997

    
998

    
999
def _CheckInstanceBridgesExist(lu, instance, node=None):
1000
  """Check that the brigdes needed by an instance exist.
1001

1002
  """
1003
  if node is None:
1004
    node = instance.primary_node
1005
  _CheckNicsBridgesExist(lu, instance.nics, node)
1006

    
1007

    
1008
def _CheckOSVariant(os_obj, name):
1009
  """Check whether an OS name conforms to the os variants specification.
1010

1011
  @type os_obj: L{objects.OS}
1012
  @param os_obj: OS object to check
1013
  @type name: string
1014
  @param name: OS name passed by the user, to check for validity
1015

1016
  """
1017
  if not os_obj.supported_variants:
1018
    return
1019
  variant = objects.OS.GetVariant(name)
1020
  if not variant:
1021
    raise errors.OpPrereqError("OS name must include a variant",
1022
                               errors.ECODE_INVAL)
1023

    
1024
  if variant not in os_obj.supported_variants:
1025
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1026

    
1027

    
1028
def _GetNodeInstancesInner(cfg, fn):
1029
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1030

    
1031

    
1032
def _GetNodeInstances(cfg, node_name):
1033
  """Returns a list of all primary and secondary instances on a node.
1034

1035
  """
1036

    
1037
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1038

    
1039

    
1040
def _GetNodePrimaryInstances(cfg, node_name):
1041
  """Returns primary instances on a node.
1042

1043
  """
1044
  return _GetNodeInstancesInner(cfg,
1045
                                lambda inst: node_name == inst.primary_node)
1046

    
1047

    
1048
def _GetNodeSecondaryInstances(cfg, node_name):
1049
  """Returns secondary instances on a node.
1050

1051
  """
1052
  return _GetNodeInstancesInner(cfg,
1053
                                lambda inst: node_name in inst.secondary_nodes)
1054

    
1055

    
1056
def _GetStorageTypeArgs(cfg, storage_type):
1057
  """Returns the arguments for a storage type.
1058

1059
  """
1060
  # Special case for file storage
1061
  if storage_type == constants.ST_FILE:
1062
    # storage.FileStorage wants a list of storage directories
1063
    return [[cfg.GetFileStorageDir()]]
1064

    
1065
  return []
1066

    
1067

    
1068
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1069
  faulty = []
1070

    
1071
  for dev in instance.disks:
1072
    cfg.SetDiskID(dev, node_name)
1073

    
1074
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1075
  result.Raise("Failed to get disk status from node %s" % node_name,
1076
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1077

    
1078
  for idx, bdev_status in enumerate(result.payload):
1079
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1080
      faulty.append(idx)
1081

    
1082
  return faulty
1083

    
1084

    
1085
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1086
  """Check the sanity of iallocator and node arguments and use the
1087
  cluster-wide iallocator if appropriate.
1088

1089
  Check that at most one of (iallocator, node) is specified. If none is
1090
  specified, then the LU's opcode's iallocator slot is filled with the
1091
  cluster-wide default iallocator.
1092

1093
  @type iallocator_slot: string
1094
  @param iallocator_slot: the name of the opcode iallocator slot
1095
  @type node_slot: string
1096
  @param node_slot: the name of the opcode target node slot
1097

1098
  """
1099
  node = getattr(lu.op, node_slot, None)
1100
  iallocator = getattr(lu.op, iallocator_slot, None)
1101

    
1102
  if node is not None and iallocator is not None:
1103
    raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1104
                               errors.ECODE_INVAL)
1105
  elif node is None and iallocator is None:
1106
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1107
    if default_iallocator:
1108
      setattr(lu.op, iallocator_slot, default_iallocator)
1109
    else:
1110
      raise errors.OpPrereqError("No iallocator or node given and no"
1111
                                 " cluster-wide default iallocator found."
1112
                                 " Please specify either an iallocator or a"
1113
                                 " node, or set a cluster-wide default"
1114
                                 " iallocator.")
1115

    
1116

    
1117
class LUPostInitCluster(LogicalUnit):
1118
  """Logical unit for running hooks after cluster initialization.
1119

1120
  """
1121
  HPATH = "cluster-init"
1122
  HTYPE = constants.HTYPE_CLUSTER
1123

    
1124
  def BuildHooksEnv(self):
1125
    """Build hooks env.
1126

1127
    """
1128
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1129
    mn = self.cfg.GetMasterNode()
1130
    return env, [], [mn]
1131

    
1132
  def Exec(self, feedback_fn):
1133
    """Nothing to do.
1134

1135
    """
1136
    return True
1137

    
1138

    
1139
class LUDestroyCluster(LogicalUnit):
1140
  """Logical unit for destroying the cluster.
1141

1142
  """
1143
  HPATH = "cluster-destroy"
1144
  HTYPE = constants.HTYPE_CLUSTER
1145

    
1146
  def BuildHooksEnv(self):
1147
    """Build hooks env.
1148

1149
    """
1150
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1151
    return env, [], []
1152

    
1153
  def CheckPrereq(self):
1154
    """Check prerequisites.
1155

1156
    This checks whether the cluster is empty.
1157

1158
    Any errors are signaled by raising errors.OpPrereqError.
1159

1160
    """
1161
    master = self.cfg.GetMasterNode()
1162

    
1163
    nodelist = self.cfg.GetNodeList()
1164
    if len(nodelist) != 1 or nodelist[0] != master:
1165
      raise errors.OpPrereqError("There are still %d node(s) in"
1166
                                 " this cluster." % (len(nodelist) - 1),
1167
                                 errors.ECODE_INVAL)
1168
    instancelist = self.cfg.GetInstanceList()
1169
    if instancelist:
1170
      raise errors.OpPrereqError("There are still %d instance(s) in"
1171
                                 " this cluster." % len(instancelist),
1172
                                 errors.ECODE_INVAL)
1173

    
1174
  def Exec(self, feedback_fn):
1175
    """Destroys the cluster.
1176

1177
    """
1178
    master = self.cfg.GetMasterNode()
1179

    
1180
    # Run post hooks on master node before it's removed
1181
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1182
    try:
1183
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1184
    except:
1185
      # pylint: disable-msg=W0702
1186
      self.LogWarning("Errors occurred running hooks on %s" % master)
1187

    
1188
    result = self.rpc.call_node_stop_master(master, False)
1189
    result.Raise("Could not disable the master role")
1190

    
1191
    return master
1192

    
1193

    
1194
def _VerifyCertificate(filename):
1195
  """Verifies a certificate for LUVerifyCluster.
1196

1197
  @type filename: string
1198
  @param filename: Path to PEM file
1199

1200
  """
1201
  try:
1202
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1203
                                           utils.ReadFile(filename))
1204
  except Exception, err: # pylint: disable-msg=W0703
1205
    return (LUVerifyCluster.ETYPE_ERROR,
1206
            "Failed to load X509 certificate %s: %s" % (filename, err))
1207

    
1208
  (errcode, msg) = \
1209
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1210
                                constants.SSL_CERT_EXPIRATION_ERROR)
1211

    
1212
  if msg:
1213
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1214
  else:
1215
    fnamemsg = None
1216

    
1217
  if errcode is None:
1218
    return (None, fnamemsg)
1219
  elif errcode == utils.CERT_WARNING:
1220
    return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1221
  elif errcode == utils.CERT_ERROR:
1222
    return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1223

    
1224
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1225

    
1226

    
1227
class LUVerifyCluster(LogicalUnit):
1228
  """Verifies the cluster status.
1229

1230
  """
1231
  HPATH = "cluster-verify"
1232
  HTYPE = constants.HTYPE_CLUSTER
1233
  REQ_BGL = False
1234

    
1235
  TCLUSTER = "cluster"
1236
  TNODE = "node"
1237
  TINSTANCE = "instance"
1238

    
1239
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1240
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1241
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1242
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1243
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1244
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1245
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1246
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1247
  ENODEDRBD = (TNODE, "ENODEDRBD")
1248
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1249
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1250
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1251
  ENODEHV = (TNODE, "ENODEHV")
1252
  ENODELVM = (TNODE, "ENODELVM")
1253
  ENODEN1 = (TNODE, "ENODEN1")
1254
  ENODENET = (TNODE, "ENODENET")
1255
  ENODEOS = (TNODE, "ENODEOS")
1256
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1257
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1258
  ENODERPC = (TNODE, "ENODERPC")
1259
  ENODESSH = (TNODE, "ENODESSH")
1260
  ENODEVERSION = (TNODE, "ENODEVERSION")
1261
  ENODESETUP = (TNODE, "ENODESETUP")
1262
  ENODETIME = (TNODE, "ENODETIME")
1263

    
1264
  ETYPE_FIELD = "code"
1265
  ETYPE_ERROR = "ERROR"
1266
  ETYPE_WARNING = "WARNING"
1267

    
1268
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1269

    
1270
  class NodeImage(object):
1271
    """A class representing the logical and physical status of a node.
1272

1273
    @type name: string
1274
    @ivar name: the node name to which this object refers
1275
    @ivar volumes: a structure as returned from
1276
        L{ganeti.backend.GetVolumeList} (runtime)
1277
    @ivar instances: a list of running instances (runtime)
1278
    @ivar pinst: list of configured primary instances (config)
1279
    @ivar sinst: list of configured secondary instances (config)
1280
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1281
        of this node (config)
1282
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1283
    @ivar dfree: free disk, as reported by the node (runtime)
1284
    @ivar offline: the offline status (config)
1285
    @type rpc_fail: boolean
1286
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1287
        not whether the individual keys were correct) (runtime)
1288
    @type lvm_fail: boolean
1289
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1290
    @type hyp_fail: boolean
1291
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1292
    @type ghost: boolean
1293
    @ivar ghost: whether this is a known node or not (config)
1294
    @type os_fail: boolean
1295
    @ivar os_fail: whether the RPC call didn't return valid OS data
1296
    @type oslist: list
1297
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1298
    @type vm_capable: boolean
1299
    @ivar vm_capable: whether the node can host instances
1300

1301
    """
1302
    def __init__(self, offline=False, name=None, vm_capable=True):
1303
      self.name = name
1304
      self.volumes = {}
1305
      self.instances = []
1306
      self.pinst = []
1307
      self.sinst = []
1308
      self.sbp = {}
1309
      self.mfree = 0
1310
      self.dfree = 0
1311
      self.offline = offline
1312
      self.vm_capable = vm_capable
1313
      self.rpc_fail = False
1314
      self.lvm_fail = False
1315
      self.hyp_fail = False
1316
      self.ghost = False
1317
      self.os_fail = False
1318
      self.oslist = {}
1319

    
1320
  def ExpandNames(self):
1321
    self.needed_locks = {
1322
      locking.LEVEL_NODE: locking.ALL_SET,
1323
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1324
    }
1325
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1326

    
1327
  def _Error(self, ecode, item, msg, *args, **kwargs):
1328
    """Format an error message.
1329

1330
    Based on the opcode's error_codes parameter, either format a
1331
    parseable error code, or a simpler error string.
1332

1333
    This must be called only from Exec and functions called from Exec.
1334

1335
    """
1336
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1337
    itype, etxt = ecode
1338
    # first complete the msg
1339
    if args:
1340
      msg = msg % args
1341
    # then format the whole message
1342
    if self.op.error_codes:
1343
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1344
    else:
1345
      if item:
1346
        item = " " + item
1347
      else:
1348
        item = ""
1349
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1350
    # and finally report it via the feedback_fn
1351
    self._feedback_fn("  - %s" % msg)
1352

    
1353
  def _ErrorIf(self, cond, *args, **kwargs):
1354
    """Log an error message if the passed condition is True.
1355

1356
    """
1357
    cond = bool(cond) or self.op.debug_simulate_errors
1358
    if cond:
1359
      self._Error(*args, **kwargs)
1360
    # do not mark the operation as failed for WARN cases only
1361
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1362
      self.bad = self.bad or cond
1363

    
1364
  def _VerifyNode(self, ninfo, nresult):
1365
    """Perform some basic validation on data returned from a node.
1366

1367
      - check the result data structure is well formed and has all the
1368
        mandatory fields
1369
      - check ganeti version
1370

1371
    @type ninfo: L{objects.Node}
1372
    @param ninfo: the node to check
1373
    @param nresult: the results from the node
1374
    @rtype: boolean
1375
    @return: whether overall this call was successful (and we can expect
1376
         reasonable values in the respose)
1377

1378
    """
1379
    node = ninfo.name
1380
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1381

    
1382
    # main result, nresult should be a non-empty dict
1383
    test = not nresult or not isinstance(nresult, dict)
1384
    _ErrorIf(test, self.ENODERPC, node,
1385
                  "unable to verify node: no data returned")
1386
    if test:
1387
      return False
1388

    
1389
    # compares ganeti version
1390
    local_version = constants.PROTOCOL_VERSION
1391
    remote_version = nresult.get("version", None)
1392
    test = not (remote_version and
1393
                isinstance(remote_version, (list, tuple)) and
1394
                len(remote_version) == 2)
1395
    _ErrorIf(test, self.ENODERPC, node,
1396
             "connection to node returned invalid data")
1397
    if test:
1398
      return False
1399

    
1400
    test = local_version != remote_version[0]
1401
    _ErrorIf(test, self.ENODEVERSION, node,
1402
             "incompatible protocol versions: master %s,"
1403
             " node %s", local_version, remote_version[0])
1404
    if test:
1405
      return False
1406

    
1407
    # node seems compatible, we can actually try to look into its results
1408

    
1409
    # full package version
1410
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1411
                  self.ENODEVERSION, node,
1412
                  "software version mismatch: master %s, node %s",
1413
                  constants.RELEASE_VERSION, remote_version[1],
1414
                  code=self.ETYPE_WARNING)
1415

    
1416
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1417
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1418
      for hv_name, hv_result in hyp_result.iteritems():
1419
        test = hv_result is not None
1420
        _ErrorIf(test, self.ENODEHV, node,
1421
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1422

    
1423
    test = nresult.get(constants.NV_NODESETUP,
1424
                           ["Missing NODESETUP results"])
1425
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1426
             "; ".join(test))
1427

    
1428
    return True
1429

    
1430
  def _VerifyNodeTime(self, ninfo, nresult,
1431
                      nvinfo_starttime, nvinfo_endtime):
1432
    """Check the node time.
1433

1434
    @type ninfo: L{objects.Node}
1435
    @param ninfo: the node to check
1436
    @param nresult: the remote results for the node
1437
    @param nvinfo_starttime: the start time of the RPC call
1438
    @param nvinfo_endtime: the end time of the RPC call
1439

1440
    """
1441
    node = ninfo.name
1442
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1443

    
1444
    ntime = nresult.get(constants.NV_TIME, None)
1445
    try:
1446
      ntime_merged = utils.MergeTime(ntime)
1447
    except (ValueError, TypeError):
1448
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1449
      return
1450

    
1451
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1452
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1453
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1454
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1455
    else:
1456
      ntime_diff = None
1457

    
1458
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1459
             "Node time diverges by at least %s from master node time",
1460
             ntime_diff)
1461

    
1462
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1463
    """Check the node time.
1464

1465
    @type ninfo: L{objects.Node}
1466
    @param ninfo: the node to check
1467
    @param nresult: the remote results for the node
1468
    @param vg_name: the configured VG name
1469

1470
    """
1471
    if vg_name is None:
1472
      return
1473

    
1474
    node = ninfo.name
1475
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1476

    
1477
    # checks vg existence and size > 20G
1478
    vglist = nresult.get(constants.NV_VGLIST, None)
1479
    test = not vglist
1480
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1481
    if not test:
1482
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1483
                                            constants.MIN_VG_SIZE)
1484
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1485

    
1486
    # check pv names
1487
    pvlist = nresult.get(constants.NV_PVLIST, None)
1488
    test = pvlist is None
1489
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1490
    if not test:
1491
      # check that ':' is not present in PV names, since it's a
1492
      # special character for lvcreate (denotes the range of PEs to
1493
      # use on the PV)
1494
      for _, pvname, owner_vg in pvlist:
1495
        test = ":" in pvname
1496
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1497
                 " '%s' of VG '%s'", pvname, owner_vg)
1498

    
1499
  def _VerifyNodeNetwork(self, ninfo, nresult):
1500
    """Check the node time.
1501

1502
    @type ninfo: L{objects.Node}
1503
    @param ninfo: the node to check
1504
    @param nresult: the remote results for the node
1505

1506
    """
1507
    node = ninfo.name
1508
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1509

    
1510
    test = constants.NV_NODELIST not in nresult
1511
    _ErrorIf(test, self.ENODESSH, node,
1512
             "node hasn't returned node ssh connectivity data")
1513
    if not test:
1514
      if nresult[constants.NV_NODELIST]:
1515
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1516
          _ErrorIf(True, self.ENODESSH, node,
1517
                   "ssh communication with node '%s': %s", a_node, a_msg)
1518

    
1519
    test = constants.NV_NODENETTEST not in nresult
1520
    _ErrorIf(test, self.ENODENET, node,
1521
             "node hasn't returned node tcp connectivity data")
1522
    if not test:
1523
      if nresult[constants.NV_NODENETTEST]:
1524
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1525
        for anode in nlist:
1526
          _ErrorIf(True, self.ENODENET, node,
1527
                   "tcp communication with node '%s': %s",
1528
                   anode, nresult[constants.NV_NODENETTEST][anode])
1529

    
1530
    test = constants.NV_MASTERIP not in nresult
1531
    _ErrorIf(test, self.ENODENET, node,
1532
             "node hasn't returned node master IP reachability data")
1533
    if not test:
1534
      if not nresult[constants.NV_MASTERIP]:
1535
        if node == self.master_node:
1536
          msg = "the master node cannot reach the master IP (not configured?)"
1537
        else:
1538
          msg = "cannot reach the master IP"
1539
        _ErrorIf(True, self.ENODENET, node, msg)
1540

    
1541
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1542
                      diskstatus):
1543
    """Verify an instance.
1544

1545
    This function checks to see if the required block devices are
1546
    available on the instance's node.
1547

1548
    """
1549
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1550
    node_current = instanceconfig.primary_node
1551

    
1552
    node_vol_should = {}
1553
    instanceconfig.MapLVsByNode(node_vol_should)
1554

    
1555
    for node in node_vol_should:
1556
      n_img = node_image[node]
1557
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1558
        # ignore missing volumes on offline or broken nodes
1559
        continue
1560
      for volume in node_vol_should[node]:
1561
        test = volume not in n_img.volumes
1562
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1563
                 "volume %s missing on node %s", volume, node)
1564

    
1565
    if instanceconfig.admin_up:
1566
      pri_img = node_image[node_current]
1567
      test = instance not in pri_img.instances and not pri_img.offline
1568
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1569
               "instance not running on its primary node %s",
1570
               node_current)
1571

    
1572
    for node, n_img in node_image.items():
1573
      if (not node == node_current):
1574
        test = instance in n_img.instances
1575
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1576
                 "instance should not run on node %s", node)
1577

    
1578
    diskdata = [(nname, success, status, idx)
1579
                for (nname, disks) in diskstatus.items()
1580
                for idx, (success, status) in enumerate(disks)]
1581

    
1582
    for nname, success, bdev_status, idx in diskdata:
1583
      _ErrorIf(instanceconfig.admin_up and not success,
1584
               self.EINSTANCEFAULTYDISK, instance,
1585
               "couldn't retrieve status for disk/%s on %s: %s",
1586
               idx, nname, bdev_status)
1587
      _ErrorIf((instanceconfig.admin_up and success and
1588
                bdev_status.ldisk_status == constants.LDS_FAULTY),
1589
               self.EINSTANCEFAULTYDISK, instance,
1590
               "disk/%s on %s is faulty", idx, nname)
1591

    
1592
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1593
    """Verify if there are any unknown volumes in the cluster.
1594

1595
    The .os, .swap and backup volumes are ignored. All other volumes are
1596
    reported as unknown.
1597

1598
    @type reserved: L{ganeti.utils.FieldSet}
1599
    @param reserved: a FieldSet of reserved volume names
1600

1601
    """
1602
    for node, n_img in node_image.items():
1603
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1604
        # skip non-healthy nodes
1605
        continue
1606
      for volume in n_img.volumes:
1607
        test = ((node not in node_vol_should or
1608
                volume not in node_vol_should[node]) and
1609
                not reserved.Matches(volume))
1610
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1611
                      "volume %s is unknown", volume)
1612

    
1613
  def _VerifyOrphanInstances(self, instancelist, node_image):
1614
    """Verify the list of running instances.
1615

1616
    This checks what instances are running but unknown to the cluster.
1617

1618
    """
1619
    for node, n_img in node_image.items():
1620
      for o_inst in n_img.instances:
1621
        test = o_inst not in instancelist
1622
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1623
                      "instance %s on node %s should not exist", o_inst, node)
1624

    
1625
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1626
    """Verify N+1 Memory Resilience.
1627

1628
    Check that if one single node dies we can still start all the
1629
    instances it was primary for.
1630

1631
    """
1632
    for node, n_img in node_image.items():
1633
      # This code checks that every node which is now listed as
1634
      # secondary has enough memory to host all instances it is
1635
      # supposed to should a single other node in the cluster fail.
1636
      # FIXME: not ready for failover to an arbitrary node
1637
      # FIXME: does not support file-backed instances
1638
      # WARNING: we currently take into account down instances as well
1639
      # as up ones, considering that even if they're down someone
1640
      # might want to start them even in the event of a node failure.
1641
      for prinode, instances in n_img.sbp.items():
1642
        needed_mem = 0
1643
        for instance in instances:
1644
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1645
          if bep[constants.BE_AUTO_BALANCE]:
1646
            needed_mem += bep[constants.BE_MEMORY]
1647
        test = n_img.mfree < needed_mem
1648
        self._ErrorIf(test, self.ENODEN1, node,
1649
                      "not enough memory to accomodate instance failovers"
1650
                      " should node %s fail", prinode)
1651

    
1652
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1653
                       master_files):
1654
    """Verifies and computes the node required file checksums.
1655

1656
    @type ninfo: L{objects.Node}
1657
    @param ninfo: the node to check
1658
    @param nresult: the remote results for the node
1659
    @param file_list: required list of files
1660
    @param local_cksum: dictionary of local files and their checksums
1661
    @param master_files: list of files that only masters should have
1662

1663
    """
1664
    node = ninfo.name
1665
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1666

    
1667
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1668
    test = not isinstance(remote_cksum, dict)
1669
    _ErrorIf(test, self.ENODEFILECHECK, node,
1670
             "node hasn't returned file checksum data")
1671
    if test:
1672
      return
1673

    
1674
    for file_name in file_list:
1675
      node_is_mc = ninfo.master_candidate
1676
      must_have = (file_name not in master_files) or node_is_mc
1677
      # missing
1678
      test1 = file_name not in remote_cksum
1679
      # invalid checksum
1680
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1681
      # existing and good
1682
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1683
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1684
               "file '%s' missing", file_name)
1685
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1686
               "file '%s' has wrong checksum", file_name)
1687
      # not candidate and this is not a must-have file
1688
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1689
               "file '%s' should not exist on non master"
1690
               " candidates (and the file is outdated)", file_name)
1691
      # all good, except non-master/non-must have combination
1692
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1693
               "file '%s' should not exist"
1694
               " on non master candidates", file_name)
1695

    
1696
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1697
                      drbd_map):
1698
    """Verifies and the node DRBD status.
1699

1700
    @type ninfo: L{objects.Node}
1701
    @param ninfo: the node to check
1702
    @param nresult: the remote results for the node
1703
    @param instanceinfo: the dict of instances
1704
    @param drbd_helper: the configured DRBD usermode helper
1705
    @param drbd_map: the DRBD map as returned by
1706
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1707

1708
    """
1709
    node = ninfo.name
1710
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1711

    
1712
    if drbd_helper:
1713
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1714
      test = (helper_result == None)
1715
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
1716
               "no drbd usermode helper returned")
1717
      if helper_result:
1718
        status, payload = helper_result
1719
        test = not status
1720
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1721
                 "drbd usermode helper check unsuccessful: %s", payload)
1722
        test = status and (payload != drbd_helper)
1723
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1724
                 "wrong drbd usermode helper: %s", payload)
1725

    
1726
    # compute the DRBD minors
1727
    node_drbd = {}
1728
    for minor, instance in drbd_map[node].items():
1729
      test = instance not in instanceinfo
1730
      _ErrorIf(test, self.ECLUSTERCFG, None,
1731
               "ghost instance '%s' in temporary DRBD map", instance)
1732
        # ghost instance should not be running, but otherwise we
1733
        # don't give double warnings (both ghost instance and
1734
        # unallocated minor in use)
1735
      if test:
1736
        node_drbd[minor] = (instance, False)
1737
      else:
1738
        instance = instanceinfo[instance]
1739
        node_drbd[minor] = (instance.name, instance.admin_up)
1740

    
1741
    # and now check them
1742
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1743
    test = not isinstance(used_minors, (tuple, list))
1744
    _ErrorIf(test, self.ENODEDRBD, node,
1745
             "cannot parse drbd status file: %s", str(used_minors))
1746
    if test:
1747
      # we cannot check drbd status
1748
      return
1749

    
1750
    for minor, (iname, must_exist) in node_drbd.items():
1751
      test = minor not in used_minors and must_exist
1752
      _ErrorIf(test, self.ENODEDRBD, node,
1753
               "drbd minor %d of instance %s is not active", minor, iname)
1754
    for minor in used_minors:
1755
      test = minor not in node_drbd
1756
      _ErrorIf(test, self.ENODEDRBD, node,
1757
               "unallocated drbd minor %d is in use", minor)
1758

    
1759
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1760
    """Builds the node OS structures.
1761

1762
    @type ninfo: L{objects.Node}
1763
    @param ninfo: the node to check
1764
    @param nresult: the remote results for the node
1765
    @param nimg: the node image object
1766

1767
    """
1768
    node = ninfo.name
1769
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1770

    
1771
    remote_os = nresult.get(constants.NV_OSLIST, None)
1772
    test = (not isinstance(remote_os, list) or
1773
            not compat.all(isinstance(v, list) and len(v) == 7
1774
                           for v in remote_os))
1775

    
1776
    _ErrorIf(test, self.ENODEOS, node,
1777
             "node hasn't returned valid OS data")
1778

    
1779
    nimg.os_fail = test
1780

    
1781
    if test:
1782
      return
1783

    
1784
    os_dict = {}
1785

    
1786
    for (name, os_path, status, diagnose,
1787
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1788

    
1789
      if name not in os_dict:
1790
        os_dict[name] = []
1791

    
1792
      # parameters is a list of lists instead of list of tuples due to
1793
      # JSON lacking a real tuple type, fix it:
1794
      parameters = [tuple(v) for v in parameters]
1795
      os_dict[name].append((os_path, status, diagnose,
1796
                            set(variants), set(parameters), set(api_ver)))
1797

    
1798
    nimg.oslist = os_dict
1799

    
1800
  def _VerifyNodeOS(self, ninfo, nimg, base):
1801
    """Verifies the node OS list.
1802

1803
    @type ninfo: L{objects.Node}
1804
    @param ninfo: the node to check
1805
    @param nimg: the node image object
1806
    @param base: the 'template' node we match against (e.g. from the master)
1807

1808
    """
1809
    node = ninfo.name
1810
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1811

    
1812
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1813

    
1814
    for os_name, os_data in nimg.oslist.items():
1815
      assert os_data, "Empty OS status for OS %s?!" % os_name
1816
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1817
      _ErrorIf(not f_status, self.ENODEOS, node,
1818
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1819
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1820
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1821
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1822
      # this will catched in backend too
1823
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1824
               and not f_var, self.ENODEOS, node,
1825
               "OS %s with API at least %d does not declare any variant",
1826
               os_name, constants.OS_API_V15)
1827
      # comparisons with the 'base' image
1828
      test = os_name not in base.oslist
1829
      _ErrorIf(test, self.ENODEOS, node,
1830
               "Extra OS %s not present on reference node (%s)",
1831
               os_name, base.name)
1832
      if test:
1833
        continue
1834
      assert base.oslist[os_name], "Base node has empty OS status?"
1835
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1836
      if not b_status:
1837
        # base OS is invalid, skipping
1838
        continue
1839
      for kind, a, b in [("API version", f_api, b_api),
1840
                         ("variants list", f_var, b_var),
1841
                         ("parameters", f_param, b_param)]:
1842
        _ErrorIf(a != b, self.ENODEOS, node,
1843
                 "OS %s %s differs from reference node %s: %s vs. %s",
1844
                 kind, os_name, base.name,
1845
                 utils.CommaJoin(a), utils.CommaJoin(b))
1846

    
1847
    # check any missing OSes
1848
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1849
    _ErrorIf(missing, self.ENODEOS, node,
1850
             "OSes present on reference node %s but missing on this node: %s",
1851
             base.name, utils.CommaJoin(missing))
1852

    
1853
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1854
    """Verifies and updates the node volume data.
1855

1856
    This function will update a L{NodeImage}'s internal structures
1857
    with data from the remote call.
1858

1859
    @type ninfo: L{objects.Node}
1860
    @param ninfo: the node to check
1861
    @param nresult: the remote results for the node
1862
    @param nimg: the node image object
1863
    @param vg_name: the configured VG name
1864

1865
    """
1866
    node = ninfo.name
1867
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1868

    
1869
    nimg.lvm_fail = True
1870
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1871
    if vg_name is None:
1872
      pass
1873
    elif isinstance(lvdata, basestring):
1874
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1875
               utils.SafeEncode(lvdata))
1876
    elif not isinstance(lvdata, dict):
1877
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1878
    else:
1879
      nimg.volumes = lvdata
1880
      nimg.lvm_fail = False
1881

    
1882
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1883
    """Verifies and updates the node instance list.
1884

1885
    If the listing was successful, then updates this node's instance
1886
    list. Otherwise, it marks the RPC call as failed for the instance
1887
    list key.
1888

1889
    @type ninfo: L{objects.Node}
1890
    @param ninfo: the node to check
1891
    @param nresult: the remote results for the node
1892
    @param nimg: the node image object
1893

1894
    """
1895
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1896
    test = not isinstance(idata, list)
1897
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1898
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1899
    if test:
1900
      nimg.hyp_fail = True
1901
    else:
1902
      nimg.instances = idata
1903

    
1904
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1905
    """Verifies and computes a node information map
1906

1907
    @type ninfo: L{objects.Node}
1908
    @param ninfo: the node to check
1909
    @param nresult: the remote results for the node
1910
    @param nimg: the node image object
1911
    @param vg_name: the configured VG name
1912

1913
    """
1914
    node = ninfo.name
1915
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1916

    
1917
    # try to read free memory (from the hypervisor)
1918
    hv_info = nresult.get(constants.NV_HVINFO, None)
1919
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1920
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1921
    if not test:
1922
      try:
1923
        nimg.mfree = int(hv_info["memory_free"])
1924
      except (ValueError, TypeError):
1925
        _ErrorIf(True, self.ENODERPC, node,
1926
                 "node returned invalid nodeinfo, check hypervisor")
1927

    
1928
    # FIXME: devise a free space model for file based instances as well
1929
    if vg_name is not None:
1930
      test = (constants.NV_VGLIST not in nresult or
1931
              vg_name not in nresult[constants.NV_VGLIST])
1932
      _ErrorIf(test, self.ENODELVM, node,
1933
               "node didn't return data for the volume group '%s'"
1934
               " - it is either missing or broken", vg_name)
1935
      if not test:
1936
        try:
1937
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1938
        except (ValueError, TypeError):
1939
          _ErrorIf(True, self.ENODERPC, node,
1940
                   "node returned invalid LVM info, check LVM status")
1941

    
1942
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1943
    """Gets per-disk status information for all instances.
1944

1945
    @type nodelist: list of strings
1946
    @param nodelist: Node names
1947
    @type node_image: dict of (name, L{objects.Node})
1948
    @param node_image: Node objects
1949
    @type instanceinfo: dict of (name, L{objects.Instance})
1950
    @param instanceinfo: Instance objects
1951
    @rtype: {instance: {node: [(succes, payload)]}}
1952
    @return: a dictionary of per-instance dictionaries with nodes as
1953
        keys and disk information as values; the disk information is a
1954
        list of tuples (success, payload)
1955

1956
    """
1957
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1958

    
1959
    node_disks = {}
1960
    node_disks_devonly = {}
1961
    diskless_instances = set()
1962
    diskless = constants.DT_DISKLESS
1963

    
1964
    for nname in nodelist:
1965
      node_instances = list(itertools.chain(node_image[nname].pinst,
1966
                                            node_image[nname].sinst))
1967
      diskless_instances.update(inst for inst in node_instances
1968
                                if instanceinfo[inst].disk_template == diskless)
1969
      disks = [(inst, disk)
1970
               for inst in node_instances
1971
               for disk in instanceinfo[inst].disks]
1972

    
1973
      if not disks:
1974
        # No need to collect data
1975
        continue
1976

    
1977
      node_disks[nname] = disks
1978

    
1979
      # Creating copies as SetDiskID below will modify the objects and that can
1980
      # lead to incorrect data returned from nodes
1981
      devonly = [dev.Copy() for (_, dev) in disks]
1982

    
1983
      for dev in devonly:
1984
        self.cfg.SetDiskID(dev, nname)
1985

    
1986
      node_disks_devonly[nname] = devonly
1987

    
1988
    assert len(node_disks) == len(node_disks_devonly)
1989

    
1990
    # Collect data from all nodes with disks
1991
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
1992
                                                          node_disks_devonly)
1993

    
1994
    assert len(result) == len(node_disks)
1995

    
1996
    instdisk = {}
1997

    
1998
    for (nname, nres) in result.items():
1999
      disks = node_disks[nname]
2000

    
2001
      if nres.offline:
2002
        # No data from this node
2003
        data = len(disks) * [(False, "node offline")]
2004
      else:
2005
        msg = nres.fail_msg
2006
        _ErrorIf(msg, self.ENODERPC, nname,
2007
                 "while getting disk information: %s", msg)
2008
        if msg:
2009
          # No data from this node
2010
          data = len(disks) * [(False, msg)]
2011
        else:
2012
          data = []
2013
          for idx, i in enumerate(nres.payload):
2014
            if isinstance(i, (tuple, list)) and len(i) == 2:
2015
              data.append(i)
2016
            else:
2017
              logging.warning("Invalid result from node %s, entry %d: %s",
2018
                              nname, idx, i)
2019
              data.append((False, "Invalid result from the remote node"))
2020

    
2021
      for ((inst, _), status) in zip(disks, data):
2022
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2023

    
2024
    # Add empty entries for diskless instances.
2025
    for inst in diskless_instances:
2026
      assert inst not in instdisk
2027
      instdisk[inst] = {}
2028

    
2029
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2030
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2031
                      compat.all(isinstance(s, (tuple, list)) and
2032
                                 len(s) == 2 for s in statuses)
2033
                      for inst, nnames in instdisk.items()
2034
                      for nname, statuses in nnames.items())
2035
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2036

    
2037
    return instdisk
2038

    
2039
  def BuildHooksEnv(self):
2040
    """Build hooks env.
2041

2042
    Cluster-Verify hooks just ran in the post phase and their failure makes
2043
    the output be logged in the verify output and the verification to fail.
2044

2045
    """
2046
    all_nodes = self.cfg.GetNodeList()
2047
    env = {
2048
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2049
      }
2050
    for node in self.cfg.GetAllNodesInfo().values():
2051
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2052

    
2053
    return env, [], all_nodes
2054

    
2055
  def Exec(self, feedback_fn):
2056
    """Verify integrity of cluster, performing various test on nodes.
2057

2058
    """
2059
    self.bad = False
2060
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2061
    verbose = self.op.verbose
2062
    self._feedback_fn = feedback_fn
2063
    feedback_fn("* Verifying global settings")
2064
    for msg in self.cfg.VerifyConfig():
2065
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2066

    
2067
    # Check the cluster certificates
2068
    for cert_filename in constants.ALL_CERT_FILES:
2069
      (errcode, msg) = _VerifyCertificate(cert_filename)
2070
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2071

    
2072
    vg_name = self.cfg.GetVGName()
2073
    drbd_helper = self.cfg.GetDRBDHelper()
2074
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2075
    cluster = self.cfg.GetClusterInfo()
2076
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
2077
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2078
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2079
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2080
                        for iname in instancelist)
2081
    i_non_redundant = [] # Non redundant instances
2082
    i_non_a_balanced = [] # Non auto-balanced instances
2083
    n_offline = 0 # Count of offline nodes
2084
    n_drained = 0 # Count of nodes being drained
2085
    node_vol_should = {}
2086

    
2087
    # FIXME: verify OS list
2088
    # do local checksums
2089
    master_files = [constants.CLUSTER_CONF_FILE]
2090
    master_node = self.master_node = self.cfg.GetMasterNode()
2091
    master_ip = self.cfg.GetMasterIP()
2092

    
2093
    file_names = ssconf.SimpleStore().GetFileList()
2094
    file_names.extend(constants.ALL_CERT_FILES)
2095
    file_names.extend(master_files)
2096
    if cluster.modify_etc_hosts:
2097
      file_names.append(constants.ETC_HOSTS)
2098

    
2099
    local_checksums = utils.FingerprintFiles(file_names)
2100

    
2101
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2102
    node_verify_param = {
2103
      constants.NV_FILELIST: file_names,
2104
      constants.NV_NODELIST: [node.name for node in nodeinfo
2105
                              if not node.offline],
2106
      constants.NV_HYPERVISOR: hypervisors,
2107
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2108
                                  node.secondary_ip) for node in nodeinfo
2109
                                 if not node.offline],
2110
      constants.NV_INSTANCELIST: hypervisors,
2111
      constants.NV_VERSION: None,
2112
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2113
      constants.NV_NODESETUP: None,
2114
      constants.NV_TIME: None,
2115
      constants.NV_MASTERIP: (master_node, master_ip),
2116
      constants.NV_OSLIST: None,
2117
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2118
      }
2119

    
2120
    if vg_name is not None:
2121
      node_verify_param[constants.NV_VGLIST] = None
2122
      node_verify_param[constants.NV_LVLIST] = vg_name
2123
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2124
      node_verify_param[constants.NV_DRBDLIST] = None
2125

    
2126
    if drbd_helper:
2127
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2128

    
2129
    # Build our expected cluster state
2130
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2131
                                                 name=node.name,
2132
                                                 vm_capable=node.vm_capable))
2133
                      for node in nodeinfo)
2134

    
2135
    for instance in instancelist:
2136
      inst_config = instanceinfo[instance]
2137

    
2138
      for nname in inst_config.all_nodes:
2139
        if nname not in node_image:
2140
          # ghost node
2141
          gnode = self.NodeImage(name=nname)
2142
          gnode.ghost = True
2143
          node_image[nname] = gnode
2144

    
2145
      inst_config.MapLVsByNode(node_vol_should)
2146

    
2147
      pnode = inst_config.primary_node
2148
      node_image[pnode].pinst.append(instance)
2149

    
2150
      for snode in inst_config.secondary_nodes:
2151
        nimg = node_image[snode]
2152
        nimg.sinst.append(instance)
2153
        if pnode not in nimg.sbp:
2154
          nimg.sbp[pnode] = []
2155
        nimg.sbp[pnode].append(instance)
2156

    
2157
    # At this point, we have the in-memory data structures complete,
2158
    # except for the runtime information, which we'll gather next
2159

    
2160
    # Due to the way our RPC system works, exact response times cannot be
2161
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2162
    # time before and after executing the request, we can at least have a time
2163
    # window.
2164
    nvinfo_starttime = time.time()
2165
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2166
                                           self.cfg.GetClusterName())
2167
    nvinfo_endtime = time.time()
2168

    
2169
    all_drbd_map = self.cfg.ComputeDRBDMap()
2170

    
2171
    feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2172
    instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2173

    
2174
    feedback_fn("* Verifying node status")
2175

    
2176
    refos_img = None
2177

    
2178
    for node_i in nodeinfo:
2179
      node = node_i.name
2180
      nimg = node_image[node]
2181

    
2182
      if node_i.offline:
2183
        if verbose:
2184
          feedback_fn("* Skipping offline node %s" % (node,))
2185
        n_offline += 1
2186
        continue
2187

    
2188
      if node == master_node:
2189
        ntype = "master"
2190
      elif node_i.master_candidate:
2191
        ntype = "master candidate"
2192
      elif node_i.drained:
2193
        ntype = "drained"
2194
        n_drained += 1
2195
      else:
2196
        ntype = "regular"
2197
      if verbose:
2198
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2199

    
2200
      msg = all_nvinfo[node].fail_msg
2201
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2202
      if msg:
2203
        nimg.rpc_fail = True
2204
        continue
2205

    
2206
      nresult = all_nvinfo[node].payload
2207

    
2208
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2209
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2210
      self._VerifyNodeNetwork(node_i, nresult)
2211
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2212
                            master_files)
2213

    
2214
      if nimg.vm_capable:
2215
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2216
        self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2217
                             all_drbd_map)
2218

    
2219
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2220
        self._UpdateNodeInstances(node_i, nresult, nimg)
2221
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2222
        self._UpdateNodeOS(node_i, nresult, nimg)
2223
        if not nimg.os_fail:
2224
          if refos_img is None:
2225
            refos_img = nimg
2226
          self._VerifyNodeOS(node_i, nimg, refos_img)
2227

    
2228
    feedback_fn("* Verifying instance status")
2229
    for instance in instancelist:
2230
      if verbose:
2231
        feedback_fn("* Verifying instance %s" % instance)
2232
      inst_config = instanceinfo[instance]
2233
      self._VerifyInstance(instance, inst_config, node_image,
2234
                           instdisk[instance])
2235
      inst_nodes_offline = []
2236

    
2237
      pnode = inst_config.primary_node
2238
      pnode_img = node_image[pnode]
2239
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2240
               self.ENODERPC, pnode, "instance %s, connection to"
2241
               " primary node failed", instance)
2242

    
2243
      if pnode_img.offline:
2244
        inst_nodes_offline.append(pnode)
2245

    
2246
      # If the instance is non-redundant we cannot survive losing its primary
2247
      # node, so we are not N+1 compliant. On the other hand we have no disk
2248
      # templates with more than one secondary so that situation is not well
2249
      # supported either.
2250
      # FIXME: does not support file-backed instances
2251
      if not inst_config.secondary_nodes:
2252
        i_non_redundant.append(instance)
2253
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2254
               instance, "instance has multiple secondary nodes: %s",
2255
               utils.CommaJoin(inst_config.secondary_nodes),
2256
               code=self.ETYPE_WARNING)
2257

    
2258
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2259
        i_non_a_balanced.append(instance)
2260

    
2261
      for snode in inst_config.secondary_nodes:
2262
        s_img = node_image[snode]
2263
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2264
                 "instance %s, connection to secondary node failed", instance)
2265

    
2266
        if s_img.offline:
2267
          inst_nodes_offline.append(snode)
2268

    
2269
      # warn that the instance lives on offline nodes
2270
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2271
               "instance lives on offline node(s) %s",
2272
               utils.CommaJoin(inst_nodes_offline))
2273
      # ... or ghost/non-vm_capable nodes
2274
      for node in inst_config.all_nodes:
2275
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2276
                 "instance lives on ghost node %s", node)
2277
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2278
                 instance, "instance lives on non-vm_capable node %s", node)
2279

    
2280
    feedback_fn("* Verifying orphan volumes")
2281
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2282
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2283

    
2284
    feedback_fn("* Verifying orphan instances")
2285
    self._VerifyOrphanInstances(instancelist, node_image)
2286

    
2287
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2288
      feedback_fn("* Verifying N+1 Memory redundancy")
2289
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
2290

    
2291
    feedback_fn("* Other Notes")
2292
    if i_non_redundant:
2293
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2294
                  % len(i_non_redundant))
2295

    
2296
    if i_non_a_balanced:
2297
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2298
                  % len(i_non_a_balanced))
2299

    
2300
    if n_offline:
2301
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2302

    
2303
    if n_drained:
2304
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2305

    
2306
    return not self.bad
2307

    
2308
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2309
    """Analyze the post-hooks' result
2310

2311
    This method analyses the hook result, handles it, and sends some
2312
    nicely-formatted feedback back to the user.
2313

2314
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2315
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2316
    @param hooks_results: the results of the multi-node hooks rpc call
2317
    @param feedback_fn: function used send feedback back to the caller
2318
    @param lu_result: previous Exec result
2319
    @return: the new Exec result, based on the previous result
2320
        and hook results
2321

2322
    """
2323
    # We only really run POST phase hooks, and are only interested in
2324
    # their results
2325
    if phase == constants.HOOKS_PHASE_POST:
2326
      # Used to change hooks' output to proper indentation
2327
      feedback_fn("* Hooks Results")
2328
      assert hooks_results, "invalid result from hooks"
2329

    
2330
      for node_name in hooks_results:
2331
        res = hooks_results[node_name]
2332
        msg = res.fail_msg
2333
        test = msg and not res.offline
2334
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2335
                      "Communication failure in hooks execution: %s", msg)
2336
        if res.offline or msg:
2337
          # No need to investigate payload if node is offline or gave an error.
2338
          # override manually lu_result here as _ErrorIf only
2339
          # overrides self.bad
2340
          lu_result = 1
2341
          continue
2342
        for script, hkr, output in res.payload:
2343
          test = hkr == constants.HKR_FAIL
2344
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2345
                        "Script %s failed, output:", script)
2346
          if test:
2347
            output = self._HOOKS_INDENT_RE.sub('      ', output)
2348
            feedback_fn("%s" % output)
2349
            lu_result = 0
2350

    
2351
      return lu_result
2352

    
2353

    
2354
class LUVerifyDisks(NoHooksLU):
2355
  """Verifies the cluster disks status.
2356

2357
  """
2358
  REQ_BGL = False
2359

    
2360
  def ExpandNames(self):
2361
    self.needed_locks = {
2362
      locking.LEVEL_NODE: locking.ALL_SET,
2363
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2364
    }
2365
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2366

    
2367
  def Exec(self, feedback_fn):
2368
    """Verify integrity of cluster disks.
2369

2370
    @rtype: tuple of three items
2371
    @return: a tuple of (dict of node-to-node_error, list of instances
2372
        which need activate-disks, dict of instance: (node, volume) for
2373
        missing volumes
2374

2375
    """
2376
    result = res_nodes, res_instances, res_missing = {}, [], {}
2377

    
2378
    nodes = utils.NiceSort(self.cfg.GetNodeList())
2379
    instances = [self.cfg.GetInstanceInfo(name)
2380
                 for name in self.cfg.GetInstanceList()]
2381

    
2382
    nv_dict = {}
2383
    for inst in instances:
2384
      inst_lvs = {}
2385
      if (not inst.admin_up or
2386
          inst.disk_template not in constants.DTS_NET_MIRROR):
2387
        continue
2388
      inst.MapLVsByNode(inst_lvs)
2389
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2390
      for node, vol_list in inst_lvs.iteritems():
2391
        for vol in vol_list:
2392
          nv_dict[(node, vol)] = inst
2393

    
2394
    if not nv_dict:
2395
      return result
2396

    
2397
    vg_names = self.rpc.call_vg_list(nodes)
2398
    vg_names.Raise("Cannot get list of VGs")
2399

    
2400
    for node in nodes:
2401
      # node_volume
2402
      node_res = self.rpc.call_lv_list([node],
2403
                                       vg_names[node].payload.keys())[node]
2404
      if node_res.offline:
2405
        continue
2406
      msg = node_res.fail_msg
2407
      if msg:
2408
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2409
        res_nodes[node] = msg
2410
        continue
2411

    
2412
      lvs = node_res.payload
2413
      for lv_name, (_, _, lv_online) in lvs.items():
2414
        inst = nv_dict.pop((node, lv_name), None)
2415
        if (not lv_online and inst is not None
2416
            and inst.name not in res_instances):
2417
          res_instances.append(inst.name)
2418

    
2419
    # any leftover items in nv_dict are missing LVs, let's arrange the
2420
    # data better
2421
    for key, inst in nv_dict.iteritems():
2422
      if inst.name not in res_missing:
2423
        res_missing[inst.name] = []
2424
      res_missing[inst.name].append(key)
2425

    
2426
    return result
2427

    
2428

    
2429
class LURepairDiskSizes(NoHooksLU):
2430
  """Verifies the cluster disks sizes.
2431

2432
  """
2433
  REQ_BGL = False
2434

    
2435
  def ExpandNames(self):
2436
    if self.op.instances:
2437
      self.wanted_names = []
2438
      for name in self.op.instances:
2439
        full_name = _ExpandInstanceName(self.cfg, name)
2440
        self.wanted_names.append(full_name)
2441
      self.needed_locks = {
2442
        locking.LEVEL_NODE: [],
2443
        locking.LEVEL_INSTANCE: self.wanted_names,
2444
        }
2445
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2446
    else:
2447
      self.wanted_names = None
2448
      self.needed_locks = {
2449
        locking.LEVEL_NODE: locking.ALL_SET,
2450
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2451
        }
2452
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2453

    
2454
  def DeclareLocks(self, level):
2455
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2456
      self._LockInstancesNodes(primary_only=True)
2457

    
2458
  def CheckPrereq(self):
2459
    """Check prerequisites.
2460

2461
    This only checks the optional instance list against the existing names.
2462

2463
    """
2464
    if self.wanted_names is None:
2465
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2466

    
2467
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2468
                             in self.wanted_names]
2469

    
2470
  def _EnsureChildSizes(self, disk):
2471
    """Ensure children of the disk have the needed disk size.
2472

2473
    This is valid mainly for DRBD8 and fixes an issue where the
2474
    children have smaller disk size.
2475

2476
    @param disk: an L{ganeti.objects.Disk} object
2477

2478
    """
2479
    if disk.dev_type == constants.LD_DRBD8:
2480
      assert disk.children, "Empty children for DRBD8?"
2481
      fchild = disk.children[0]
2482
      mismatch = fchild.size < disk.size
2483
      if mismatch:
2484
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2485
                     fchild.size, disk.size)
2486
        fchild.size = disk.size
2487

    
2488
      # and we recurse on this child only, not on the metadev
2489
      return self._EnsureChildSizes(fchild) or mismatch
2490
    else:
2491
      return False
2492

    
2493
  def Exec(self, feedback_fn):
2494
    """Verify the size of cluster disks.
2495

2496
    """
2497
    # TODO: check child disks too
2498
    # TODO: check differences in size between primary/secondary nodes
2499
    per_node_disks = {}
2500
    for instance in self.wanted_instances:
2501
      pnode = instance.primary_node
2502
      if pnode not in per_node_disks:
2503
        per_node_disks[pnode] = []
2504
      for idx, disk in enumerate(instance.disks):
2505
        per_node_disks[pnode].append((instance, idx, disk))
2506

    
2507
    changed = []
2508
    for node, dskl in per_node_disks.items():
2509
      newl = [v[2].Copy() for v in dskl]
2510
      for dsk in newl:
2511
        self.cfg.SetDiskID(dsk, node)
2512
      result = self.rpc.call_blockdev_getsizes(node, newl)
2513
      if result.fail_msg:
2514
        self.LogWarning("Failure in blockdev_getsizes call to node"
2515
                        " %s, ignoring", node)
2516
        continue
2517
      if len(result.data) != len(dskl):
2518
        self.LogWarning("Invalid result from node %s, ignoring node results",
2519
                        node)
2520
        continue
2521
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2522
        if size is None:
2523
          self.LogWarning("Disk %d of instance %s did not return size"
2524
                          " information, ignoring", idx, instance.name)
2525
          continue
2526
        if not isinstance(size, (int, long)):
2527
          self.LogWarning("Disk %d of instance %s did not return valid"
2528
                          " size information, ignoring", idx, instance.name)
2529
          continue
2530
        size = size >> 20
2531
        if size != disk.size:
2532
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2533
                       " correcting: recorded %d, actual %d", idx,
2534
                       instance.name, disk.size, size)
2535
          disk.size = size
2536
          self.cfg.Update(instance, feedback_fn)
2537
          changed.append((instance.name, idx, size))
2538
        if self._EnsureChildSizes(disk):
2539
          self.cfg.Update(instance, feedback_fn)
2540
          changed.append((instance.name, idx, disk.size))
2541
    return changed
2542

    
2543

    
2544
class LURenameCluster(LogicalUnit):
2545
  """Rename the cluster.
2546

2547
  """
2548
  HPATH = "cluster-rename"
2549
  HTYPE = constants.HTYPE_CLUSTER
2550

    
2551
  def BuildHooksEnv(self):
2552
    """Build hooks env.
2553

2554
    """
2555
    env = {
2556
      "OP_TARGET": self.cfg.GetClusterName(),
2557
      "NEW_NAME": self.op.name,
2558
      }
2559
    mn = self.cfg.GetMasterNode()
2560
    all_nodes = self.cfg.GetNodeList()
2561
    return env, [mn], all_nodes
2562

    
2563
  def CheckPrereq(self):
2564
    """Verify that the passed name is a valid one.
2565

2566
    """
2567
    hostname = netutils.GetHostname(name=self.op.name,
2568
                                    family=self.cfg.GetPrimaryIPFamily())
2569

    
2570
    new_name = hostname.name
2571
    self.ip = new_ip = hostname.ip
2572
    old_name = self.cfg.GetClusterName()
2573
    old_ip = self.cfg.GetMasterIP()
2574
    if new_name == old_name and new_ip == old_ip:
2575
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2576
                                 " cluster has changed",
2577
                                 errors.ECODE_INVAL)
2578
    if new_ip != old_ip:
2579
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2580
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2581
                                   " reachable on the network" %
2582
                                   new_ip, errors.ECODE_NOTUNIQUE)
2583

    
2584
    self.op.name = new_name
2585

    
2586
  def Exec(self, feedback_fn):
2587
    """Rename the cluster.
2588

2589
    """
2590
    clustername = self.op.name
2591
    ip = self.ip
2592

    
2593
    # shutdown the master IP
2594
    master = self.cfg.GetMasterNode()
2595
    result = self.rpc.call_node_stop_master(master, False)
2596
    result.Raise("Could not disable the master role")
2597

    
2598
    try:
2599
      cluster = self.cfg.GetClusterInfo()
2600
      cluster.cluster_name = clustername
2601
      cluster.master_ip = ip
2602
      self.cfg.Update(cluster, feedback_fn)
2603

    
2604
      # update the known hosts file
2605
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2606
      node_list = self.cfg.GetOnlineNodeList()
2607
      try:
2608
        node_list.remove(master)
2609
      except ValueError:
2610
        pass
2611
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2612
    finally:
2613
      result = self.rpc.call_node_start_master(master, False, False)
2614
      msg = result.fail_msg
2615
      if msg:
2616
        self.LogWarning("Could not re-enable the master role on"
2617
                        " the master, please restart manually: %s", msg)
2618

    
2619
    return clustername
2620

    
2621

    
2622
class LUSetClusterParams(LogicalUnit):
2623
  """Change the parameters of the cluster.
2624

2625
  """
2626
  HPATH = "cluster-modify"
2627
  HTYPE = constants.HTYPE_CLUSTER
2628
  REQ_BGL = False
2629

    
2630
  def CheckArguments(self):
2631
    """Check parameters
2632

2633
    """
2634
    if self.op.uid_pool:
2635
      uidpool.CheckUidPool(self.op.uid_pool)
2636

    
2637
    if self.op.add_uids:
2638
      uidpool.CheckUidPool(self.op.add_uids)
2639

    
2640
    if self.op.remove_uids:
2641
      uidpool.CheckUidPool(self.op.remove_uids)
2642

    
2643
  def ExpandNames(self):
2644
    # FIXME: in the future maybe other cluster params won't require checking on
2645
    # all nodes to be modified.
2646
    self.needed_locks = {
2647
      locking.LEVEL_NODE: locking.ALL_SET,
2648
    }
2649
    self.share_locks[locking.LEVEL_NODE] = 1
2650

    
2651
  def BuildHooksEnv(self):
2652
    """Build hooks env.
2653

2654
    """
2655
    env = {
2656
      "OP_TARGET": self.cfg.GetClusterName(),
2657
      "NEW_VG_NAME": self.op.vg_name,
2658
      }
2659
    mn = self.cfg.GetMasterNode()
2660
    return env, [mn], [mn]
2661

    
2662
  def CheckPrereq(self):
2663
    """Check prerequisites.
2664

2665
    This checks whether the given params don't conflict and
2666
    if the given volume group is valid.
2667

2668
    """
2669
    if self.op.vg_name is not None and not self.op.vg_name:
2670
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2671
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2672
                                   " instances exist", errors.ECODE_INVAL)
2673

    
2674
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
2675
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2676
        raise errors.OpPrereqError("Cannot disable drbd helper while"
2677
                                   " drbd-based instances exist",
2678
                                   errors.ECODE_INVAL)
2679

    
2680
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2681

    
2682
    # if vg_name not None, checks given volume group on all nodes
2683
    if self.op.vg_name:
2684
      vglist = self.rpc.call_vg_list(node_list)
2685
      for node in node_list:
2686
        msg = vglist[node].fail_msg
2687
        if msg:
2688
          # ignoring down node
2689
          self.LogWarning("Error while gathering data on node %s"
2690
                          " (ignoring node): %s", node, msg)
2691
          continue
2692
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2693
                                              self.op.vg_name,
2694
                                              constants.MIN_VG_SIZE)
2695
        if vgstatus:
2696
          raise errors.OpPrereqError("Error on node '%s': %s" %
2697
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2698

    
2699
    if self.op.drbd_helper:
2700
      # checks given drbd helper on all nodes
2701
      helpers = self.rpc.call_drbd_helper(node_list)
2702
      for node in node_list:
2703
        ninfo = self.cfg.GetNodeInfo(node)
2704
        if ninfo.offline:
2705
          self.LogInfo("Not checking drbd helper on offline node %s", node)
2706
          continue
2707
        msg = helpers[node].fail_msg
2708
        if msg:
2709
          raise errors.OpPrereqError("Error checking drbd helper on node"
2710
                                     " '%s': %s" % (node, msg),
2711
                                     errors.ECODE_ENVIRON)
2712
        node_helper = helpers[node].payload
2713
        if node_helper != self.op.drbd_helper:
2714
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2715
                                     (node, node_helper), errors.ECODE_ENVIRON)
2716

    
2717
    self.cluster = cluster = self.cfg.GetClusterInfo()
2718
    # validate params changes
2719
    if self.op.beparams:
2720
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2721
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2722

    
2723
    if self.op.ndparams:
2724
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2725
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2726

    
2727
    if self.op.nicparams:
2728
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2729
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2730
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2731
      nic_errors = []
2732

    
2733
      # check all instances for consistency
2734
      for instance in self.cfg.GetAllInstancesInfo().values():
2735
        for nic_idx, nic in enumerate(instance.nics):
2736
          params_copy = copy.deepcopy(nic.nicparams)
2737
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2738

    
2739
          # check parameter syntax
2740
          try:
2741
            objects.NIC.CheckParameterSyntax(params_filled)
2742
          except errors.ConfigurationError, err:
2743
            nic_errors.append("Instance %s, nic/%d: %s" %
2744
                              (instance.name, nic_idx, err))
2745

    
2746
          # if we're moving instances to routed, check that they have an ip
2747
          target_mode = params_filled[constants.NIC_MODE]
2748
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2749
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2750
                              (instance.name, nic_idx))
2751
      if nic_errors:
2752
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2753
                                   "\n".join(nic_errors))
2754

    
2755
    # hypervisor list/parameters
2756
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2757
    if self.op.hvparams:
2758
      for hv_name, hv_dict in self.op.hvparams.items():
2759
        if hv_name not in self.new_hvparams:
2760
          self.new_hvparams[hv_name] = hv_dict
2761
        else:
2762
          self.new_hvparams[hv_name].update(hv_dict)
2763

    
2764
    # os hypervisor parameters
2765
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2766
    if self.op.os_hvp:
2767
      for os_name, hvs in self.op.os_hvp.items():
2768
        if os_name not in self.new_os_hvp:
2769
          self.new_os_hvp[os_name] = hvs
2770
        else:
2771
          for hv_name, hv_dict in hvs.items():
2772
            if hv_name not in self.new_os_hvp[os_name]:
2773
              self.new_os_hvp[os_name][hv_name] = hv_dict
2774
            else:
2775
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2776

    
2777
    # os parameters
2778
    self.new_osp = objects.FillDict(cluster.osparams, {})
2779
    if self.op.osparams:
2780
      for os_name, osp in self.op.osparams.items():
2781
        if os_name not in self.new_osp:
2782
          self.new_osp[os_name] = {}
2783

    
2784
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2785
                                                  use_none=True)
2786

    
2787
        if not self.new_osp[os_name]:
2788
          # we removed all parameters
2789
          del self.new_osp[os_name]
2790
        else:
2791
          # check the parameter validity (remote check)
2792
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2793
                         os_name, self.new_osp[os_name])
2794

    
2795
    # changes to the hypervisor list
2796
    if self.op.enabled_hypervisors is not None:
2797
      self.hv_list = self.op.enabled_hypervisors
2798
      for hv in self.hv_list:
2799
        # if the hypervisor doesn't already exist in the cluster
2800
        # hvparams, we initialize it to empty, and then (in both
2801
        # cases) we make sure to fill the defaults, as we might not
2802
        # have a complete defaults list if the hypervisor wasn't
2803
        # enabled before
2804
        if hv not in new_hvp:
2805
          new_hvp[hv] = {}
2806
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2807
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2808
    else:
2809
      self.hv_list = cluster.enabled_hypervisors
2810

    
2811
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2812
      # either the enabled list has changed, or the parameters have, validate
2813
      for hv_name, hv_params in self.new_hvparams.items():
2814
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2815
            (self.op.enabled_hypervisors and
2816
             hv_name in self.op.enabled_hypervisors)):
2817
          # either this is a new hypervisor, or its parameters have changed
2818
          hv_class = hypervisor.GetHypervisor(hv_name)
2819
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2820
          hv_class.CheckParameterSyntax(hv_params)
2821
          _CheckHVParams(self, node_list, hv_name, hv_params)
2822

    
2823
    if self.op.os_hvp:
2824
      # no need to check any newly-enabled hypervisors, since the
2825
      # defaults have already been checked in the above code-block
2826
      for os_name, os_hvp in self.new_os_hvp.items():
2827
        for hv_name, hv_params in os_hvp.items():
2828
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2829
          # we need to fill in the new os_hvp on top of the actual hv_p
2830
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2831
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2832
          hv_class = hypervisor.GetHypervisor(hv_name)
2833
          hv_class.CheckParameterSyntax(new_osp)
2834
          _CheckHVParams(self, node_list, hv_name, new_osp)
2835

    
2836
    if self.op.default_iallocator:
2837
      alloc_script = utils.FindFile(self.op.default_iallocator,
2838
                                    constants.IALLOCATOR_SEARCH_PATH,
2839
                                    os.path.isfile)
2840
      if alloc_script is None:
2841
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2842
                                   " specified" % self.op.default_iallocator,
2843
                                   errors.ECODE_INVAL)
2844

    
2845
  def Exec(self, feedback_fn):
2846
    """Change the parameters of the cluster.
2847

2848
    """
2849
    if self.op.vg_name is not None:
2850
      new_volume = self.op.vg_name
2851
      if not new_volume:
2852
        new_volume = None
2853
      if new_volume != self.cfg.GetVGName():
2854
        self.cfg.SetVGName(new_volume)
2855
      else:
2856
        feedback_fn("Cluster LVM configuration already in desired"
2857
                    " state, not changing")
2858
    if self.op.drbd_helper is not None:
2859
      new_helper = self.op.drbd_helper
2860
      if not new_helper:
2861
        new_helper = None
2862
      if new_helper != self.cfg.GetDRBDHelper():
2863
        self.cfg.SetDRBDHelper(new_helper)
2864
      else:
2865
        feedback_fn("Cluster DRBD helper already in desired state,"
2866
                    " not changing")
2867
    if self.op.hvparams:
2868
      self.cluster.hvparams = self.new_hvparams
2869
    if self.op.os_hvp:
2870
      self.cluster.os_hvp = self.new_os_hvp
2871
    if self.op.enabled_hypervisors is not None:
2872
      self.cluster.hvparams = self.new_hvparams
2873
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2874
    if self.op.beparams:
2875
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2876
    if self.op.nicparams:
2877
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2878
    if self.op.osparams:
2879
      self.cluster.osparams = self.new_osp
2880
    if self.op.ndparams:
2881
      self.cluster.ndparams = self.new_ndparams
2882

    
2883
    if self.op.candidate_pool_size is not None:
2884
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2885
      # we need to update the pool size here, otherwise the save will fail
2886
      _AdjustCandidatePool(self, [])
2887

    
2888
    if self.op.maintain_node_health is not None:
2889
      self.cluster.maintain_node_health = self.op.maintain_node_health
2890

    
2891
    if self.op.prealloc_wipe_disks is not None:
2892
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
2893

    
2894
    if self.op.add_uids is not None:
2895
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2896

    
2897
    if self.op.remove_uids is not None:
2898
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2899

    
2900
    if self.op.uid_pool is not None:
2901
      self.cluster.uid_pool = self.op.uid_pool
2902

    
2903
    if self.op.default_iallocator is not None:
2904
      self.cluster.default_iallocator = self.op.default_iallocator
2905

    
2906
    if self.op.reserved_lvs is not None:
2907
      self.cluster.reserved_lvs = self.op.reserved_lvs
2908

    
2909
    def helper_os(aname, mods, desc):
2910
      desc += " OS list"
2911
      lst = getattr(self.cluster, aname)
2912
      for key, val in mods:
2913
        if key == constants.DDM_ADD:
2914
          if val in lst:
2915
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
2916
          else:
2917
            lst.append(val)
2918
        elif key == constants.DDM_REMOVE:
2919
          if val in lst:
2920
            lst.remove(val)
2921
          else:
2922
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
2923
        else:
2924
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
2925

    
2926
    if self.op.hidden_os:
2927
      helper_os("hidden_os", self.op.hidden_os, "hidden")
2928

    
2929
    if self.op.blacklisted_os:
2930
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
2931

    
2932
    if self.op.master_netdev:
2933
      master = self.cfg.GetMasterNode()
2934
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
2935
                  self.cluster.master_netdev)
2936
      result = self.rpc.call_node_stop_master(master, False)
2937
      result.Raise("Could not disable the master ip")
2938
      feedback_fn("Changing master_netdev from %s to %s" %
2939
                  (self.cluster.master_netdev, self.op.master_netdev))
2940
      self.cluster.master_netdev = self.op.master_netdev
2941

    
2942
    self.cfg.Update(self.cluster, feedback_fn)
2943

    
2944
    if self.op.master_netdev:
2945
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
2946
                  self.op.master_netdev)
2947
      result = self.rpc.call_node_start_master(master, False, False)
2948
      if result.fail_msg:
2949
        self.LogWarning("Could not re-enable the master ip on"
2950
                        " the master, please restart manually: %s",
2951
                        result.fail_msg)
2952

    
2953

    
2954
def _UploadHelper(lu, nodes, fname):
2955
  """Helper for uploading a file and showing warnings.
2956

2957
  """
2958
  if os.path.exists(fname):
2959
    result = lu.rpc.call_upload_file(nodes, fname)
2960
    for to_node, to_result in result.items():
2961
      msg = to_result.fail_msg
2962
      if msg:
2963
        msg = ("Copy of file %s to node %s failed: %s" %
2964
               (fname, to_node, msg))
2965
        lu.proc.LogWarning(msg)
2966

    
2967

    
2968
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
2969
  """Distribute additional files which are part of the cluster configuration.
2970

2971
  ConfigWriter takes care of distributing the config and ssconf files, but
2972
  there are more files which should be distributed to all nodes. This function
2973
  makes sure those are copied.
2974

2975
  @param lu: calling logical unit
2976
  @param additional_nodes: list of nodes not in the config to distribute to
2977
  @type additional_vm: boolean
2978
  @param additional_vm: whether the additional nodes are vm-capable or not
2979

2980
  """
2981
  # 1. Gather target nodes
2982
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2983
  dist_nodes = lu.cfg.GetOnlineNodeList()
2984
  nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
2985
  vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
2986
  if additional_nodes is not None:
2987
    dist_nodes.extend(additional_nodes)
2988
    if additional_vm:
2989
      vm_nodes.extend(additional_nodes)
2990
  if myself.name in dist_nodes:
2991
    dist_nodes.remove(myself.name)
2992
  if myself.name in vm_nodes:
2993
    vm_nodes.remove(myself.name)
2994

    
2995
  # 2. Gather files to distribute
2996
  dist_files = set([constants.ETC_HOSTS,
2997
                    constants.SSH_KNOWN_HOSTS_FILE,
2998
                    constants.RAPI_CERT_FILE,
2999
                    constants.RAPI_USERS_FILE,
3000
                    constants.CONFD_HMAC_KEY,
3001
                    constants.CLUSTER_DOMAIN_SECRET_FILE,
3002
                   ])
3003

    
3004
  vm_files = set()
3005
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3006
  for hv_name in enabled_hypervisors:
3007
    hv_class = hypervisor.GetHypervisor(hv_name)
3008
    vm_files.update(hv_class.GetAncillaryFiles())
3009

    
3010
  # 3. Perform the files upload
3011
  for fname in dist_files:
3012
    _UploadHelper(lu, dist_nodes, fname)
3013
  for fname in vm_files:
3014
    _UploadHelper(lu, vm_nodes, fname)
3015

    
3016

    
3017
class LURedistributeConfig(NoHooksLU):
3018
  """Force the redistribution of cluster configuration.
3019

3020
  This is a very simple LU.
3021

3022
  """
3023
  REQ_BGL = False
3024

    
3025
  def ExpandNames(self):
3026
    self.needed_locks = {
3027
      locking.LEVEL_NODE: locking.ALL_SET,
3028
    }
3029
    self.share_locks[locking.LEVEL_NODE] = 1
3030

    
3031
  def Exec(self, feedback_fn):
3032
    """Redistribute the configuration.
3033

3034
    """
3035
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3036
    _RedistributeAncillaryFiles(self)
3037

    
3038

    
3039
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3040
  """Sleep and poll for an instance's disk to sync.
3041

3042
  """
3043
  if not instance.disks or disks is not None and not disks:
3044
    return True
3045

    
3046
  disks = _ExpandCheckDisks(instance, disks)
3047

    
3048
  if not oneshot:
3049
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3050

    
3051
  node = instance.primary_node
3052

    
3053
  for dev in disks:
3054
    lu.cfg.SetDiskID(dev, node)
3055

    
3056
  # TODO: Convert to utils.Retry
3057

    
3058
  retries = 0
3059
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3060
  while True:
3061
    max_time = 0
3062
    done = True
3063
    cumul_degraded = False
3064
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3065
    msg = rstats.fail_msg
3066
    if msg:
3067
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3068
      retries += 1
3069
      if retries >= 10:
3070
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3071
                                 " aborting." % node)
3072
      time.sleep(6)
3073
      continue
3074
    rstats = rstats.payload
3075
    retries = 0
3076
    for i, mstat in enumerate(rstats):
3077
      if mstat is None:
3078
        lu.LogWarning("Can't compute data for node %s/%s",
3079
                           node, disks[i].iv_name)
3080
        continue
3081

    
3082
      cumul_degraded = (cumul_degraded or
3083
                        (mstat.is_degraded and mstat.sync_percent is None))
3084
      if mstat.sync_percent is not None:
3085
        done = False
3086
        if mstat.estimated_time is not None:
3087
          rem_time = ("%s remaining (estimated)" %
3088
                      utils.FormatSeconds(mstat.estimated_time))
3089
          max_time = mstat.estimated_time
3090
        else:
3091
          rem_time = "no time estimate"
3092
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3093
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3094

    
3095
    # if we're done but degraded, let's do a few small retries, to
3096
    # make sure we see a stable and not transient situation; therefore
3097
    # we force restart of the loop
3098
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3099
      logging.info("Degraded disks found, %d retries left", degr_retries)
3100
      degr_retries -= 1
3101
      time.sleep(1)
3102
      continue
3103

    
3104
    if done or oneshot:
3105
      break
3106

    
3107
    time.sleep(min(60, max_time))
3108

    
3109
  if done:
3110
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3111
  return not cumul_degraded
3112

    
3113

    
3114
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3115
  """Check that mirrors are not degraded.
3116

3117
  The ldisk parameter, if True, will change the test from the
3118
  is_degraded attribute (which represents overall non-ok status for
3119
  the device(s)) to the ldisk (representing the local storage status).
3120

3121
  """
3122
  lu.cfg.SetDiskID(dev, node)
3123

    
3124
  result = True
3125

    
3126
  if on_primary or dev.AssembleOnSecondary():
3127
    rstats = lu.rpc.call_blockdev_find(node, dev)
3128
    msg = rstats.fail_msg
3129
    if msg:
3130
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3131
      result = False
3132
    elif not rstats.payload:
3133
      lu.LogWarning("Can't find disk on node %s", node)
3134
      result = False
3135
    else:
3136
      if ldisk:
3137
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3138
      else:
3139
        result = result and not rstats.payload.is_degraded
3140

    
3141
  if dev.children:
3142
    for child in dev.children:
3143
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3144

    
3145
  return result
3146

    
3147

    
3148
class LUOobCommand(NoHooksLU):
3149
  """Logical unit for OOB handling.
3150

3151
  """
3152
  REG_BGL = False
3153

    
3154
  def CheckPrereq(self):
3155
    """Check prerequisites.
3156

3157
    This checks:
3158
     - the node exists in the configuration
3159
     - OOB is supported
3160

3161
    Any errors are signaled by raising errors.OpPrereqError.
3162

3163
    """
3164
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3165
    node = self.cfg.GetNodeInfo(self.op.node_name)
3166

    
3167
    if node is None:
3168
      raise errors.OpPrereqError("Node %s not found" % self.op.node_name)
3169

    
3170
    self.oob_program = _SupportsOob(self.cfg, node)
3171

    
3172
    if not self.oob_program:
3173
      raise errors.OpPrereqError("OOB is not supported for node %s" %
3174
                                 self.op.node_name)
3175

    
3176
    if self.op.command == constants.OOB_POWER_OFF and not node.offline:
3177
      raise errors.OpPrereqError(("Cannot power off node %s because it is"
3178
                                  " not marked offline") % self.op.node_name)
3179

    
3180
    self.node = node
3181

    
3182
  def ExpandNames(self):
3183
    """Gather locks we need.
3184

3185
    """
3186
    node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3187
    self.needed_locks = {
3188
      locking.LEVEL_NODE: [node_name],
3189
      }
3190

    
3191
  def Exec(self, feedback_fn):
3192
    """Execute OOB and return result if we expect any.
3193

3194
    """
3195
    master_node = self.cfg.GetMasterNode()
3196
    node = self.node
3197

    
3198
    logging.info("Executing out-of-band command '%s' using '%s' on %s",
3199
                 self.op.command, self.oob_program, self.op.node_name)
3200
    result = self.rpc.call_run_oob(master_node, self.oob_program,
3201
                                   self.op.command, self.op.node_name,
3202
                                   self.op.timeout)
3203

    
3204
    result.Raise("An error occurred on execution of OOB helper")
3205

    
3206
    self._CheckPayload(result)
3207

    
3208
    if self.op.command == constants.OOB_HEALTH:
3209
      # For health we should log important events
3210
      for item, status in result.payload:
3211
        if status in [constants.OOB_STATUS_WARNING,
3212
                      constants.OOB_STATUS_CRITICAL]:
3213
          logging.warning("On node '%s' item '%s' has status '%s'",
3214
                          self.op.node_name, item, status)
3215

    
3216
    if self.op.command == constants.OOB_POWER_ON:
3217
      node.powered = True
3218
    elif self.op.command == constants.OOB_POWER_OFF:
3219
      node.powered = False
3220
    elif self.op.command == constants.OOB_POWER_STATUS:
3221
      powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3222
      if powered != self.node.powered:
3223
        logging.warning(("Recorded power state (%s) of node '%s' does not match"
3224
                         " actual power state (%s)"), node.powered,
3225
                        self.op.node_name, powered)
3226

    
3227
    self.cfg.Update(node, feedback_fn)
3228

    
3229
    return result.payload
3230

    
3231
  def _CheckPayload(self, result):
3232
    """Checks if the payload is valid.
3233

3234
    @param result: RPC result
3235
    @raises errors.OpExecError: If payload is not valid
3236

3237
    """
3238
    errs = []
3239
    if self.op.command == constants.OOB_HEALTH:
3240
      if not isinstance(result.payload, list):
3241
        errs.append("command 'health' is expected to return a list but got %s" %
3242
                    type(result.payload))
3243
      for item, status in result.payload:
3244
        if status not in constants.OOB_STATUSES:
3245
          errs.append("health item '%s' has invalid status '%s'" %
3246
                      (item, status))
3247

    
3248
    if self.op.command == constants.OOB_POWER_STATUS:
3249
      if not isinstance(result.payload, dict):
3250
        errs.append("power-status is expected to return a dict but got %s" %
3251
                    type(result.payload))
3252

    
3253
    if self.op.command in [
3254
        constants.OOB_POWER_ON,
3255
        constants.OOB_POWER_OFF,
3256
        constants.OOB_POWER_CYCLE,
3257
        ]:
3258
      if result.payload is not None:
3259
        errs.append("%s is expected to not return payload but got '%s'" %
3260
                    (self.op.command, result.payload))
3261

    
3262
    if errs:
3263
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3264
                               utils.CommaJoin(errs))
3265

    
3266

    
3267

    
3268
class LUDiagnoseOS(NoHooksLU):
3269
  """Logical unit for OS diagnose/query.
3270

3271
  """
3272
  REQ_BGL = False
3273
  _HID = "hidden"
3274
  _BLK = "blacklisted"
3275
  _VLD = "valid"
3276
  _FIELDS_STATIC = utils.FieldSet()
3277
  _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3278
                                   "parameters", "api_versions", _HID, _BLK)
3279

    
3280
  def CheckArguments(self):
3281
    if self.op.names:
3282
      raise errors.OpPrereqError("Selective OS query not supported",
3283
                                 errors.ECODE_INVAL)
3284

    
3285
    _CheckOutputFields(static=self._FIELDS_STATIC,
3286
                       dynamic=self._FIELDS_DYNAMIC,
3287
                       selected=self.op.output_fields)
3288

    
3289
  def ExpandNames(self):
3290
    # Lock all nodes, in shared mode
3291
    # Temporary removal of locks, should be reverted later
3292
    # TODO: reintroduce locks when they are lighter-weight
3293
    self.needed_locks = {}
3294
    #self.share_locks[locking.LEVEL_NODE] = 1
3295
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3296

    
3297
  @staticmethod
3298
  def _DiagnoseByOS(rlist):
3299
    """Remaps a per-node return list into an a per-os per-node dictionary
3300

3301
    @param rlist: a map with node names as keys and OS objects as values
3302

3303
    @rtype: dict
3304
    @return: a dictionary with osnames as keys and as value another
3305
        map, with nodes as keys and tuples of (path, status, diagnose,
3306
        variants, parameters, api_versions) as values, eg::
3307

3308
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3309
                                     (/srv/..., False, "invalid api")],
3310
                           "node2": [(/srv/..., True, "", [], [])]}
3311
          }
3312

3313
    """
3314
    all_os = {}
3315
    # we build here the list of nodes that didn't fail the RPC (at RPC
3316
    # level), so that nodes with a non-responding node daemon don't
3317
    # make all OSes invalid
3318
    good_nodes = [node_name for node_name in rlist
3319
                  if not rlist[node_name].fail_msg]
3320
    for node_name, nr in rlist.items():
3321
      if nr.fail_msg or not nr.payload:
3322
        continue
3323
      for (name, path, status, diagnose, variants,
3324
           params, api_versions) in nr.payload:
3325
        if name not in all_os:
3326
          # build a list of nodes for this os containing empty lists
3327
          # for each node in node_list
3328
          all_os[name] = {}
3329
          for nname in good_nodes:
3330
            all_os[name][nname] = []
3331
        # convert params from [name, help] to (name, help)
3332
        params = [tuple(v) for v in params]
3333
        all_os[name][node_name].append((path, status, diagnose,
3334
                                        variants, params, api_versions))
3335
    return all_os
3336

    
3337
  def Exec(self, feedback_fn):
3338
    """Compute the list of OSes.
3339

3340
    """
3341
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3342
    node_data = self.rpc.call_os_diagnose(valid_nodes)
3343
    pol = self._DiagnoseByOS(node_data)
3344
    output = []
3345
    cluster = self.cfg.GetClusterInfo()
3346

    
3347
    for os_name in utils.NiceSort(pol.keys()):
3348
      os_data = pol[os_name]
3349
      row = []
3350
      valid = True
3351
      (variants, params, api_versions) = null_state = (set(), set(), set())
3352
      for idx, osl in enumerate(os_data.values()):
3353
        valid = bool(valid and osl and osl[0][1])
3354
        if not valid:
3355
          (variants, params, api_versions) = null_state
3356
          break
3357
        node_variants, node_params, node_api = osl[0][3:6]
3358
        if idx == 0: # first entry
3359
          variants = set(node_variants)
3360
          params = set(node_params)
3361
          api_versions = set(node_api)
3362
        else: # keep consistency
3363
          variants.intersection_update(node_variants)
3364
          params.intersection_update(node_params)
3365
          api_versions.intersection_update(node_api)
3366

    
3367
      is_hid = os_name in cluster.hidden_os
3368
      is_blk = os_name in cluster.blacklisted_os
3369
      if ((self._HID not in self.op.output_fields and is_hid) or
3370
          (self._BLK not in self.op.output_fields and is_blk) or
3371
          (self._VLD not in self.op.output_fields and not valid)):
3372
        continue
3373

    
3374
      for field in self.op.output_fields:
3375
        if field == "name":
3376
          val = os_name
3377
        elif field == self._VLD:
3378
          val = valid
3379
        elif field == "node_status":
3380
          # this is just a copy of the dict
3381
          val = {}
3382
          for node_name, nos_list in os_data.items():
3383
            val[node_name] = nos_list
3384
        elif field == "variants":
3385
          val = utils.NiceSort(list(variants))
3386
        elif field == "parameters":
3387
          val = list(params)
3388
        elif field == "api_versions":
3389
          val = list(api_versions)
3390
        elif field == self._HID:
3391
          val = is_hid
3392
        elif field == self._BLK:
3393
          val = is_blk
3394
        else:
3395
          raise errors.ParameterError(field)
3396
        row.append(val)
3397
      output.append(row)
3398

    
3399
    return output
3400

    
3401

    
3402
class LURemoveNode(LogicalUnit):
3403
  """Logical unit for removing a node.
3404

3405
  """
3406
  HPATH = "node-remove"
3407
  HTYPE = constants.HTYPE_NODE
3408

    
3409
  def BuildHooksEnv(self):
3410
    """Build hooks env.
3411

3412
    This doesn't run on the target node in the pre phase as a failed
3413
    node would then be impossible to remove.
3414

3415
    """
3416
    env = {
3417
      "OP_TARGET": self.op.node_name,
3418
      "NODE_NAME": self.op.node_name,
3419
      }
3420
    all_nodes = self.cfg.GetNodeList()
3421
    try:
3422
      all_nodes.remove(self.op.node_name)
3423
    except ValueError:
3424
      logging.warning("Node %s which is about to be removed not found"
3425
                      " in the all nodes list", self.op.node_name)
3426
    return env, all_nodes, all_nodes
3427

    
3428
  def CheckPrereq(self):
3429
    """Check prerequisites.
3430

3431
    This checks:
3432
     - the node exists in the configuration
3433
     - it does not have primary or secondary instances
3434
     - it's not the master
3435

3436
    Any errors are signaled by raising errors.OpPrereqError.
3437

3438
    """
3439
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3440
    node = self.cfg.GetNodeInfo(self.op.node_name)
3441
    assert node is not None
3442

    
3443
    instance_list = self.cfg.GetInstanceList()
3444

    
3445
    masternode = self.cfg.GetMasterNode()
3446
    if node.name == masternode:
3447
      raise errors.OpPrereqError("Node is the master node,"
3448
                                 " you need to failover first.",
3449
                                 errors.ECODE_INVAL)
3450

    
3451
    for instance_name in instance_list:
3452
      instance = self.cfg.GetInstanceInfo(instance_name)
3453
      if node.name in instance.all_nodes:
3454
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3455
                                   " please remove first." % instance_name,
3456
                                   errors.ECODE_INVAL)
3457
    self.op.node_name = node.name
3458
    self.node = node
3459

    
3460
  def Exec(self, feedback_fn):
3461
    """Removes the node from the cluster.
3462

3463
    """
3464
    node = self.node
3465
    logging.info("Stopping the node daemon and removing configs from node %s",
3466
                 node.name)
3467

    
3468
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3469

    
3470
    # Promote nodes to master candidate as needed
3471
    _AdjustCandidatePool(self, exceptions=[node.name])
3472
    self.context.RemoveNode(node.name)
3473

    
3474
    # Run post hooks on the node before it's removed
3475
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3476
    try:
3477
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3478
    except:
3479
      # pylint: disable-msg=W0702
3480
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
3481

    
3482
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3483
    msg = result.fail_msg
3484
    if msg:
3485
      self.LogWarning("Errors encountered on the remote node while leaving"
3486
                      " the cluster: %s", msg)
3487

    
3488
    # Remove node from our /etc/hosts
3489
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3490
      master_node = self.cfg.GetMasterNode()
3491
      result = self.rpc.call_etc_hosts_modify(master_node,
3492
                                              constants.ETC_HOSTS_REMOVE,
3493
                                              node.name, None)
3494
      result.Raise("Can't update hosts file with new host data")
3495
      _RedistributeAncillaryFiles(self)
3496

    
3497

    
3498
class _NodeQuery(_QueryBase):
3499
  FIELDS = query.NODE_FIELDS
3500

    
3501
  def ExpandNames(self, lu):
3502
    lu.needed_locks = {}
3503
    lu.share_locks[locking.LEVEL_NODE] = 1
3504

    
3505
    if self.names:
3506
      self.wanted = _GetWantedNodes(lu, self.names)
3507
    else:
3508
      self.wanted = locking.ALL_SET
3509

    
3510
    self.do_locking = (self.use_locking and
3511
                       query.NQ_LIVE in self.requested_data)
3512

    
3513
    if self.do_locking:
3514
      # if we don't request only static fields, we need to lock the nodes
3515
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3516

    
3517
  def DeclareLocks(self, lu, level):
3518
    pass
3519

    
3520
  def _GetQueryData(self, lu):
3521
    """Computes the list of nodes and their attributes.
3522

3523
    """
3524
    all_info = lu.cfg.GetAllNodesInfo()
3525

    
3526
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3527

    
3528
    # Gather data as requested
3529
    if query.NQ_LIVE in self.requested_data:
3530
      node_data = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
3531
                                        lu.cfg.GetHypervisorType())
3532
      live_data = dict((name, nresult.payload)
3533
                       for (name, nresult) in node_data.items()
3534
                       if not nresult.fail_msg and nresult.payload)
3535
    else:
3536
      live_data = None
3537

    
3538
    if query.NQ_INST in self.requested_data:
3539
      node_to_primary = dict([(name, set()) for name in nodenames])
3540
      node_to_secondary = dict([(name, set()) for name in nodenames])
3541

    
3542
      inst_data = lu.cfg.GetAllInstancesInfo()
3543

    
3544
      for inst in inst_data.values():
3545
        if inst.primary_node in node_to_primary:
3546
          node_to_primary[inst.primary_node].add(inst.name)
3547
        for secnode in inst.secondary_nodes:
3548
          if secnode in node_to_secondary:
3549
            node_to_secondary[secnode].add(inst.name)
3550
    else:
3551
      node_to_primary = None
3552
      node_to_secondary = None
3553

    
3554
    if query.NQ_GROUP in self.requested_data:
3555
      groups = lu.cfg.GetAllNodeGroupsInfo()
3556
    else:
3557
      groups = {}
3558

    
3559
    return query.NodeQueryData([all_info[name] for name in nodenames],
3560
                               live_data, lu.cfg.GetMasterNode(),
3561
                               node_to_primary, node_to_secondary, groups)
3562

    
3563

    
3564
class LUQueryNodes(NoHooksLU):
3565
  """Logical unit for querying nodes.
3566

3567
  """
3568
  # pylint: disable-msg=W0142
3569
  REQ_BGL = False
3570

    
3571
  def CheckArguments(self):
3572
    self.nq = _NodeQuery(self.op.names, self.op.output_fields,
3573
                         self.op.use_locking)
3574

    
3575
  def ExpandNames(self):
3576
    self.nq.ExpandNames(self)
3577

    
3578
  def Exec(self, feedback_fn):
3579
    return self.nq.OldStyleQuery(self)
3580

    
3581

    
3582
class LUQueryNodeVolumes(NoHooksLU):
3583
  """Logical unit for getting volumes on node(s).
3584

3585
  """
3586
  REQ_BGL = False
3587
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3588
  _FIELDS_STATIC = utils.FieldSet("node")
3589

    
3590
  def CheckArguments(self):
3591
    _CheckOutputFields(static=self._FIELDS_STATIC,
3592
                       dynamic=self._FIELDS_DYNAMIC,
3593
                       selected=self.op.output_fields)
3594

    
3595
  def ExpandNames(self):
3596
    self.needed_locks = {}
3597
    self.share_locks[locking.LEVEL_NODE] = 1
3598
    if not self.op.nodes:
3599
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3600
    else:
3601
      self.needed_locks[locking.LEVEL_NODE] = \
3602
        _GetWantedNodes(self, self.op.nodes)
3603

    
3604
  def Exec(self, feedback_fn):
3605
    """Computes the list of nodes and their attributes.
3606

3607
    """
3608
    nodenames = self.acquired_locks[locking.LEVEL_NODE]
3609
    volumes = self.rpc.call_node_volumes(nodenames)
3610

    
3611
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3612
             in self.cfg.GetInstanceList()]
3613

    
3614
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3615

    
3616
    output = []
3617
    for node in nodenames:
3618
      nresult = volumes[node]
3619
      if nresult.offline:
3620
        continue
3621
      msg = nresult.fail_msg
3622
      if msg:
3623
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3624
        continue
3625

    
3626
      node_vols = nresult.payload[:]
3627
      node_vols.sort(key=lambda vol: vol['dev'])
3628

    
3629
      for vol in node_vols:
3630
        node_output = []
3631
        for field in self.op.output_fields:
3632
          if field == "node":
3633
            val = node
3634
          elif field == "phys":
3635
            val = vol['dev']
3636
          elif field == "vg":
3637
            val = vol['vg']
3638
          elif field == "name":
3639
            val = vol['name']
3640
          elif field == "size":
3641
            val = int(float(vol['size']))
3642
          elif field == "instance":
3643
            for inst in ilist:
3644
              if node not in lv_by_node[inst]:
3645
                continue
3646
              if vol['name'] in lv_by_node[inst][node]:
3647
                val = inst.name
3648
                break
3649
            else:
3650
              val = '-'
3651
          else:
3652
            raise errors.ParameterError(field)
3653
          node_output.append(str(val))
3654

    
3655
        output.append(node_output)
3656

    
3657
    return output
3658

    
3659

    
3660
class LUQueryNodeStorage(NoHooksLU):
3661
  """Logical unit for getting information on storage units on node(s).
3662

3663
  """
3664
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3665
  REQ_BGL = False
3666

    
3667
  def CheckArguments(self):
3668
    _CheckOutputFields(static=self._FIELDS_STATIC,
3669
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3670
                       selected=self.op.output_fields)
3671

    
3672
  def ExpandNames(self):
3673
    self.needed_locks = {}
3674
    self.share_locks[locking.LEVEL_NODE] = 1
3675

    
3676
    if self.op.nodes:
3677
      self.needed_locks[locking.LEVEL_NODE] = \
3678
        _GetWantedNodes(self, self.op.nodes)
3679
    else:
3680
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3681

    
3682
  def Exec(self, feedback_fn):
3683
    """Computes the list of nodes and their attributes.
3684

3685
    """
3686
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3687

    
3688
    # Always get name to sort by
3689
    if constants.SF_NAME in self.op.output_fields:
3690
      fields = self.op.output_fields[:]
3691
    else:
3692
      fields = [constants.SF_NAME] + self.op.output_fields
3693

    
3694
    # Never ask for node or type as it's only known to the LU
3695
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3696
      while extra in fields:
3697
        fields.remove(extra)
3698

    
3699
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3700
    name_idx = field_idx[constants.SF_NAME]
3701

    
3702
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3703
    data = self.rpc.call_storage_list(self.nodes,
3704
                                      self.op.storage_type, st_args,
3705
                                      self.op.name, fields)
3706

    
3707
    result = []
3708

    
3709
    for node in utils.NiceSort(self.nodes):
3710
      nresult = data[node]
3711
      if nresult.offline:
3712
        continue
3713

    
3714
      msg = nresult.fail_msg
3715
      if msg:
3716
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3717
        continue
3718

    
3719
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3720

    
3721
      for name in utils.NiceSort(rows.keys()):
3722
        row = rows[name]
3723

    
3724
        out = []
3725

    
3726
        for field in self.op.output_fields:
3727
          if field == constants.SF_NODE:
3728
            val = node
3729
          elif field == constants.SF_TYPE:
3730
            val = self.op.storage_type
3731
          elif field in field_idx:
3732
            val = row[field_idx[field]]
3733
          else:
3734
            raise errors.ParameterError(field)
3735

    
3736
          out.append(val)
3737

    
3738
        result.append(out)
3739

    
3740
    return result
3741

    
3742

    
3743
class _InstanceQuery(_QueryBase):
3744
  FIELDS = query.INSTANCE_FIELDS
3745

    
3746
  def ExpandNames(self, lu):
3747
    lu.needed_locks = {}
3748
    lu.share_locks[locking.LEVEL_INSTANCE] = 1
3749
    lu.share_locks[locking.LEVEL_NODE] = 1
3750

    
3751
    if self.names:
3752
      self.wanted = _GetWantedInstances(lu, self.names)
3753
    else:
3754
      self.wanted = locking.ALL_SET
3755

    
3756
    self.do_locking = (self.use_locking and
3757
                       query.IQ_LIVE in self.requested_data)
3758
    if self.do_locking:
3759
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3760
      lu.needed_locks[locking.LEVEL_NODE] = []
3761
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3762

    
3763
  def DeclareLocks(self, lu, level):
3764
    if level == locking.LEVEL_NODE and self.do_locking:
3765
      lu._LockInstancesNodes() # pylint: disable-msg=W0212
3766

    
3767
  def _GetQueryData(self, lu):
3768
    """Computes the list of instances and their attributes.
3769

3770
    """
3771
    all_info = lu.cfg.GetAllInstancesInfo()
3772

    
3773
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
3774

    
3775
    instance_list = [all_info[name] for name in instance_names]
3776
    nodes = frozenset([inst.primary_node for inst in instance_list])
3777
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
3778
    bad_nodes = []
3779
    offline_nodes = []
3780

    
3781
    # Gather data as requested
3782
    if query.IQ_LIVE in self.requested_data:
3783
      live_data = {}
3784
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
3785
      for name in nodes:
3786
        result = node_data[name]
3787
        if result.offline:
3788
          # offline nodes will be in both lists
3789
          assert result.fail_msg
3790
          offline_nodes.append(name)
3791
        if result.fail_msg:
3792
          bad_nodes.append(name)
3793
        elif result.payload:
3794
          live_data.update(result.payload)
3795
        # else no instance is alive
3796
    else:
3797
      live_data = {}
3798

    
3799
    if query.IQ_DISKUSAGE in self.requested_data:
3800
      disk_usage = dict((inst.name,
3801
                         _ComputeDiskSize(inst.disk_template,
3802
                                          [{"size": disk.size}
3803
                                           for disk in inst.disks]))
3804
                        for inst in instance_list)
3805
    else:
3806
      disk_usage = None
3807

    
3808
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
3809
                                   disk_usage, offline_nodes, bad_nodes,
3810
                                   live_data)
3811

    
3812

    
3813
#: Query type implementations
3814
_QUERY_IMPL = {
3815
  constants.QR_INSTANCE: _InstanceQuery,
3816
  constants.QR_NODE: _NodeQuery,
3817
  }
3818

    
3819

    
3820
def _GetQueryImplementation(name):
3821
  """Returns the implemtnation for a query type.
3822

3823
  @param name: Query type, must be one of L{constants.QR_OP_QUERY}
3824

3825
  """
3826
  try:
3827
    return _QUERY_IMPL[name]
3828
  except KeyError:
3829
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
3830
                               errors.ECODE_INVAL)
3831

    
3832

    
3833
class LUQuery(NoHooksLU):
3834
  """Query for resources/items of a certain kind.
3835

3836
  """
3837
  # pylint: disable-msg=W0142
3838
  REQ_BGL = False
3839

    
3840
  def CheckArguments(self):
3841
    qcls = _GetQueryImplementation(self.op.what)
3842
    names = qlang.ReadSimpleFilter("name", self.op.filter)
3843

    
3844
    self.impl = qcls(names, self.op.fields, False)
3845

    
3846
  def ExpandNames(self):
3847
    self.impl.ExpandNames(self)
3848

    
3849
  def DeclareLocks(self, level):
3850
    self.impl.DeclareLocks(self, level)
3851

    
3852
  def Exec(self, feedback_fn):
3853
    return self.impl.NewStyleQuery(self)
3854

    
3855

    
3856
class LUQueryFields(NoHooksLU):
3857
  """Query for resources/items of a certain kind.
3858

3859
  """
3860
  # pylint: disable-msg=W0142
3861
  REQ_BGL = False
3862

    
3863
  def CheckArguments(self):
3864
    self.qcls = _GetQueryImplementation(self.op.what)
3865

    
3866
  def ExpandNames(self):
3867
    self.needed_locks = {}
3868

    
3869
  def Exec(self, feedback_fn):
3870
    return self.qcls.FieldsQuery(self.op.fields)
3871

    
3872

    
3873
class LUModifyNodeStorage(NoHooksLU):
3874
  """Logical unit for modifying a storage volume on a node.
3875

3876
  """
3877
  REQ_BGL = False
3878

    
3879
  def CheckArguments(self):
3880
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3881

    
3882
    storage_type = self.op.storage_type
3883

    
3884
    try:
3885
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3886
    except KeyError:
3887
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3888
                                 " modified" % storage_type,
3889
                                 errors.ECODE_INVAL)
3890

    
3891
    diff = set(self.op.changes.keys()) - modifiable
3892
    if diff:
3893
      raise errors.OpPrereqError("The following fields can not be modified for"
3894
                                 " storage units of type '%s': %r" %
3895
                                 (storage_type, list(diff)),
3896
                                 errors.ECODE_INVAL)
3897

    
3898
  def ExpandNames(self):
3899
    self.needed_locks = {
3900
      locking.LEVEL_NODE: self.op.node_name,
3901
      }
3902

    
3903
  def Exec(self, feedback_fn):
3904
    """Computes the list of nodes and their attributes.
3905

3906
    """
3907
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3908
    result = self.rpc.call_storage_modify(self.op.node_name,
3909
                                          self.op.storage_type, st_args,
3910
                                          self.op.name, self.op.changes)
3911
    result.Raise("Failed to modify storage unit '%s' on %s" %
3912
                 (self.op.name, self.op.node_name))
3913

    
3914

    
3915
class LUAddNode(LogicalUnit):
3916
  """Logical unit for adding node to the cluster.
3917

3918
  """
3919
  HPATH = "node-add"
3920
  HTYPE = constants.HTYPE_NODE
3921
  _NFLAGS = ["master_capable", "vm_capable"]
3922

    
3923
  def CheckArguments(self):
3924
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
3925
    # validate/normalize the node name
3926
    self.hostname = netutils.GetHostname(name=self.op.node_name,
3927
                                         family=self.primary_ip_family)
3928
    self.op.node_name = self.hostname.name
3929
    if self.op.readd and self.op.group:
3930
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
3931
                                 " being readded", errors.ECODE_INVAL)
3932

    
3933
  def BuildHooksEnv(self):
3934
    """Build hooks env.
3935

3936
    This will run on all nodes before, and on all nodes + the new node after.
3937

3938
    """
3939
    env = {
3940
      "OP_TARGET": self.op.node_name,
3941
      "NODE_NAME": self.op.node_name,
3942
      "NODE_PIP": self.op.primary_ip,
3943
      "NODE_SIP": self.op.secondary_ip,
3944
      "MASTER_CAPABLE": str(self.op.master_capable),
3945
      "VM_CAPABLE": str(self.op.vm_capable),
3946
      }
3947
    nodes_0 = self.cfg.GetNodeList()
3948
    nodes_1 = nodes_0 + [self.op.node_name, ]
3949
    return env, nodes_0, nodes_1
3950

    
3951
  def CheckPrereq(self):
3952
    """Check prerequisites.
3953

3954
    This checks:
3955
     - the new node is not already in the config
3956
     - it is resolvable
3957
     - its parameters (single/dual homed) matches the cluster
3958

3959
    Any errors are signaled by raising errors.OpPrereqError.
3960

3961
    """
3962
    cfg = self.cfg
3963
    hostname = self.hostname
3964
    node = hostname.name
3965
    primary_ip = self.op.primary_ip = hostname.ip
3966
    if self.op.secondary_ip is None:
3967
      if self.primary_ip_family == netutils.IP6Address.family:
3968
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
3969
                                   " IPv4 address must be given as secondary",
3970
                                   errors.ECODE_INVAL)
3971
      self.op.secondary_ip = primary_ip
3972

    
3973
    secondary_ip = self.op.secondary_ip
3974
    if not netutils.IP4Address.IsValid(secondary_ip):
3975
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
3976
                                 " address" % secondary_ip, errors.ECODE_INVAL)
3977

    
3978
    node_list = cfg.GetNodeList()
3979
    if not self.op.readd and node in node_list:
3980
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3981
                                 node, errors.ECODE_EXISTS)
3982
    elif self.op.readd and node not in node_list:
3983
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3984
                                 errors.ECODE_NOENT)
3985

    
3986
    self.changed_primary_ip = False
3987

    
3988
    for existing_node_name in node_list:
3989
      existing_node = cfg.GetNodeInfo(existing_node_name)
3990

    
3991
      if self.op.readd and node == existing_node_name:
3992
        if existing_node.secondary_ip != secondary_ip:
3993
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
3994
                                     " address configuration as before",
3995
                                     errors.ECODE_INVAL)
3996
        if existing_node.primary_ip != primary_ip:
3997
          self.changed_primary_ip = True
3998

    
3999
        continue
4000

    
4001
      if (existing_node.primary_ip == primary_ip or
4002
          existing_node.secondary_ip == primary_ip or
4003
          existing_node.primary_ip == secondary_ip or
4004
          existing_node.secondary_ip == secondary_ip):
4005
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4006
                                   " existing node %s" % existing_node.name,
4007
                                   errors.ECODE_NOTUNIQUE)
4008

    
4009
    # After this 'if' block, None is no longer a valid value for the
4010
    # _capable op attributes
4011
    if self.op.readd:
4012
      old_node = self.cfg.GetNodeInfo(node)
4013
      assert old_node is not None, "Can't retrieve locked node %s" % node
4014
      for attr in self._NFLAGS:
4015
        if getattr(self.op, attr) is None:
4016
          setattr(self.op, attr, getattr(old_node, attr))
4017
    else:
4018
      for attr in self._NFLAGS:
4019
        if getattr(self.op, attr) is None:
4020
          setattr(self.op, attr, True)
4021

    
4022
    if self.op.readd and not self.op.vm_capable:
4023
      pri, sec = cfg.GetNodeInstances(node)
4024
      if pri or sec:
4025
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4026
                                   " flag set to false, but it already holds"
4027
                                   " instances" % node,
4028
                                   errors.ECODE_STATE)
4029

    
4030
    # check that the type of the node (single versus dual homed) is the
4031
    # same as for the master
4032
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4033
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4034
    newbie_singlehomed = secondary_ip == primary_ip
4035
    if master_singlehomed != newbie_singlehomed:
4036
      if master_singlehomed:
4037
        raise errors.OpPrereqError("The master has no secondary ip but the"
4038
                                   " new node has one",
4039
                                   errors.ECODE_INVAL)
4040
      else:
4041
        raise errors.OpPrereqError("The master has a secondary ip but the"
4042
                                   " new node doesn't have one",
4043
                                   errors.ECODE_INVAL)
4044

    
4045
    # checks reachability
4046
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4047
      raise errors.OpPrereqError("Node not reachable by ping",
4048
                                 errors.ECODE_ENVIRON)
4049

    
4050
    if not newbie_singlehomed:
4051
      # check reachability from my secondary ip to newbie's secondary ip
4052
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4053
                           source=myself.secondary_ip):
4054
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4055
                                   " based ping to node daemon port",
4056
                                   errors.ECODE_ENVIRON)
4057

    
4058
    if self.op.readd:
4059
      exceptions = [node]
4060
    else:
4061
      exceptions = []
4062

    
4063
    if self.op.master_capable:
4064
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4065
    else:
4066
      self.master_candidate = False
4067

    
4068
    if self.op.readd:
4069
      self.new_node = old_node
4070
    else:
4071
      node_group = cfg.LookupNodeGroup(self.op.group)
4072
      self.new_node = objects.Node(name=node,
4073
                                   primary_ip=primary_ip,
4074
                                   secondary_ip=secondary_ip,
4075
                                   master_candidate=self.master_candidate,
4076
                                   offline=False, drained=False,
4077
                                   group=node_group)
4078

    
4079
    if self.op.ndparams:
4080
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4081

    
4082
  def Exec(self, feedback_fn):
4083
    """Adds the new node to the cluster.
4084

4085
    """
4086
    new_node = self.new_node
4087
    node = new_node.name
4088

    
4089
    # We adding a new node so we assume it's powered
4090
    new_node.powered = True
4091

    
4092
    # for re-adds, reset the offline/drained/master-candidate flags;
4093
    # we need to reset here, otherwise offline would prevent RPC calls
4094
    # later in the procedure; this also means that if the re-add
4095
    # fails, we are left with a non-offlined, broken node
4096
    if self.op.readd:
4097
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4098
      self.LogInfo("Readding a node, the offline/drained flags were reset")
4099
      # if we demote the node, we do cleanup later in the procedure
4100
      new_node.master_candidate = self.master_candidate
4101
      if self.changed_primary_ip:
4102
        new_node.primary_ip = self.op.primary_ip
4103

    
4104
    # copy the master/vm_capable flags
4105
    for attr in self._NFLAGS:
4106
      setattr(new_node, attr, getattr(self.op, attr))
4107

    
4108
    # notify the user about any possible mc promotion
4109
    if new_node.master_candidate:
4110
      self.LogInfo("Node will be a master candidate")
4111

    
4112
    if self.op.ndparams:
4113
      new_node.ndparams = self.op.ndparams
4114
    else:
4115
      new_node.ndparams = {}
4116

    
4117
    # check connectivity
4118
    result = self.rpc.call_version([node])[node]
4119
    result.Raise("Can't get version information from node %s" % node)
4120
    if constants.PROTOCOL_VERSION == result.payload:
4121
      logging.info("Communication to node %s fine, sw version %s match",
4122
                   node, result.payload)
4123
    else:
4124
      raise errors.OpExecError("Version mismatch master version %s,"
4125
                               " node version %s" %
4126
                               (constants.PROTOCOL_VERSION, result.payload))
4127

    
4128
    # Add node to our /etc/hosts, and add key to known_hosts
4129
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4130
      master_node = self.cfg.GetMasterNode()
4131
      result = self.rpc.call_etc_hosts_modify(master_node,
4132
                                              constants.ETC_HOSTS_ADD,
4133
                                              self.hostname.name,
4134
                                              self.hostname.ip)
4135
      result.Raise("Can't update hosts file with new host data")
4136

    
4137
    if new_node.secondary_ip != new_node.primary_ip:
4138
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4139
                               False)
4140

    
4141
    node_verify_list = [self.cfg.GetMasterNode()]
4142
    node_verify_param = {
4143
      constants.NV_NODELIST: [node],
4144
      # TODO: do a node-net-test as well?
4145
    }
4146

    
4147
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4148
                                       self.cfg.GetClusterName())
4149
    for verifier in node_verify_list:
4150
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
4151
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
4152
      if nl_payload:
4153
        for failed in nl_payload:
4154
          feedback_fn("ssh/hostname verification failed"
4155
                      " (checking from %s): %s" %
4156
                      (verifier, nl_payload[failed]))
4157
        raise errors.OpExecError("ssh/hostname verification failed.")
4158

    
4159
    if self.op.readd:
4160
      _RedistributeAncillaryFiles(self)
4161
      self.context.ReaddNode(new_node)
4162
      # make sure we redistribute the config
4163
      self.cfg.Update(new_node, feedback_fn)
4164
      # and make sure the new node will not have old files around
4165
      if not new_node.master_candidate:
4166
        result = self.rpc.call_node_demote_from_mc(new_node.name)
4167
        msg = result.fail_msg
4168
        if msg:
4169
          self.LogWarning("Node failed to demote itself from master"
4170
                          " candidate status: %s" % msg)
4171
    else:
4172
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
4173
                                  additional_vm=self.op.vm_capable)
4174
      self.context.AddNode(new_node, self.proc.GetECId())
4175

    
4176

    
4177
class LUSetNodeParams(LogicalUnit):
4178
  """Modifies the parameters of a node.
4179

4180
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4181
      to the node role (as _ROLE_*)
4182
  @cvar _R2F: a dictionary from node role to tuples of flags
4183
  @cvar _FLAGS: a list of attribute names corresponding to the flags
4184

4185
  """
4186
  HPATH = "node-modify"
4187
  HTYPE = constants.HTYPE_NODE
4188
  REQ_BGL = False
4189
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4190
  _F2R = {
4191
    (True, False, False): _ROLE_CANDIDATE,
4192
    (False, True, False): _ROLE_DRAINED,
4193
    (False, False, True): _ROLE_OFFLINE,
4194
    (False, False, False): _ROLE_REGULAR,
4195
    }
4196
  _R2F = dict((v, k) for k, v in _F2R.items())
4197
  _FLAGS = ["master_candidate", "drained", "offline"]
4198

    
4199
  def CheckArguments(self):
4200
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4201
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4202
                self.op.master_capable, self.op.vm_capable,
4203
                self.op.secondary_ip, self.op.ndparams]
4204
    if all_mods.count(None) == len(all_mods):
4205
      raise errors.OpPrereqError("Please pass at least one modification",
4206
                                 errors.ECODE_INVAL)
4207
    if all_mods.count(True) > 1:
4208
      raise errors.OpPrereqError("Can't set the node into more than one"
4209
                                 " state at the same time",
4210
                                 errors.ECODE_INVAL)
4211

    
4212
    # Boolean value that tells us whether we might be demoting from MC
4213
    self.might_demote = (self.op.master_candidate == False or
4214
                         self.op.offline == True or
4215
                         self.op.drained == True or
4216
                         self.op.master_capable == False)
4217

    
4218
    if self.op.secondary_ip:
4219
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4220
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4221
                                   " address" % self.op.secondary_ip,
4222
                                   errors.ECODE_INVAL)
4223

    
4224
    self.lock_all = self.op.auto_promote and self.might_demote
4225
    self.lock_instances = self.op.secondary_ip is not None
4226

    
4227
  def ExpandNames(self):
4228
    if self.lock_all:
4229
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4230
    else:
4231
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4232

    
4233
    if self.lock_instances:
4234
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4235

    
4236
  def DeclareLocks(self, level):
4237
    # If we have locked all instances, before waiting to lock nodes, release
4238
    # all the ones living on nodes unrelated to the current operation.
4239
    if level == locking.LEVEL_NODE and self.lock_instances:
4240
      instances_release = []
4241
      instances_keep = []
4242
      self.affected_instances = []
4243
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4244
        for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4245
          instance = self.context.cfg.GetInstanceInfo(instance_name)
4246
          i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR
4247
          if i_mirrored and self.op.node_name in instance.all_nodes:
4248
            instances_keep.append(instance_name)
4249
            self.affected_instances.append(instance)
4250
          else:
4251
            instances_release.append(instance_name)
4252
        if instances_release:
4253
          self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4254
          self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4255

    
4256
  def BuildHooksEnv(self):
4257
    """Build hooks env.
4258

4259
    This runs on the master node.
4260

4261
    """
4262
    env = {
4263
      "OP_TARGET": self.op.node_name,
4264
      "MASTER_CANDIDATE": str(self.op.master_candidate),
4265
      "OFFLINE": str(self.op.offline),
4266
      "DRAINED": str(self.op.drained),
4267
      "MASTER_CAPABLE": str(self.op.master_capable),
4268
      "VM_CAPABLE": str(self.op.vm_capable),
4269
      }
4270
    nl = [self.cfg.GetMasterNode(),
4271
          self.op.node_name]
4272
    return env, nl, nl
4273

    
4274
  def CheckPrereq(self):
4275
    """Check prerequisites.
4276

4277
    This only checks the instance list against the existing names.
4278

4279
    """
4280
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4281

    
4282
    if (self.op.master_candidate is not None or
4283
        self.op.drained is not None or
4284
        self.op.offline is not None):
4285
      # we can't change the master's node flags
4286
      if self.op.node_name == self.cfg.GetMasterNode():
4287
        raise errors.OpPrereqError("The master role can be changed"
4288
                                   " only via master-failover",
4289
                                   errors.ECODE_INVAL)
4290

    
4291
    if self.op.master_candidate and not node.master_capable:
4292
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4293
                                 " it a master candidate" % node.name,
4294
                                 errors.ECODE_STATE)
4295

    
4296
    if self.op.vm_capable == False:
4297
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4298
      if ipri or isec:
4299
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4300
                                   " the vm_capable flag" % node.name,
4301
                                   errors.ECODE_STATE)
4302

    
4303
    if node.master_candidate and self.might_demote and not self.lock_all:
4304
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
4305
      # check if after removing the current node, we're missing master
4306
      # candidates
4307
      (mc_remaining, mc_should, _) = \
4308
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4309
      if mc_remaining < mc_should:
4310
        raise errors.OpPrereqError("Not enough master candidates, please"
4311
                                   " pass auto_promote to allow promotion",
4312
                                   errors.ECODE_STATE)
4313

    
4314
    self.old_flags = old_flags = (node.master_candidate,
4315
                                  node.drained, node.offline)
4316
    assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4317
    self.old_role = old_role = self._F2R[old_flags]
4318

    
4319
    # Check for ineffective changes
4320
    for attr in self._FLAGS:
4321
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4322
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4323
        setattr(self.op, attr, None)
4324

    
4325
    # Past this point, any flag change to False means a transition
4326
    # away from the respective state, as only real changes are kept
4327

    
4328
    # TODO: We might query the real power state if it supports OOB
4329
    if _SupportsOob(self.cfg, node):
4330
      if self.op.offline is False and not (node.powered or
4331
                                           self.op.powered == True):
4332
        raise errors.OpPrereqError(("Please power on node %s first before you"
4333
                                    " can reset offline state") %
4334
                                   self.op.node_name)
4335
    elif self.op.powered is not None:
4336
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
4337
                                  " which does not support out-of-band"
4338
                                  " handling") % self.op.node_name)
4339

    
4340
    # If we're being deofflined/drained, we'll MC ourself if needed
4341
    if (self.op.drained == False or self.op.offline == False or
4342
        (self.op.master_capable and not node.master_capable)):
4343
      if _DecideSelfPromotion(self):
4344
        self.op.master_candidate = True
4345
        self.LogInfo("Auto-promoting node to master candidate")
4346

    
4347
    # If we're no longer master capable, we'll demote ourselves from MC
4348
    if self.op.master_capable == False and node.master_candidate:
4349
      self.LogInfo("Demoting from master candidate")
4350
      self.op.master_candidate = False
4351

    
4352
    # Compute new role
4353
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4354
    if self.op.master_candidate:
4355
      new_role = self._ROLE_CANDIDATE
4356
    elif self.op.drained:
4357
      new_role = self._ROLE_DRAINED
4358
    elif self.op.offline:
4359
      new_role = self._ROLE_OFFLINE
4360
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4361
      # False is still in new flags, which means we're un-setting (the
4362
      # only) True flag
4363
      new_role = self._ROLE_REGULAR
4364
    else: # no new flags, nothing, keep old role
4365
      new_role = old_role
4366

    
4367
    self.new_role = new_role
4368

    
4369
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
4370
      # Trying to transition out of offline status
4371
      result = self.rpc.call_version([node.name])[node.name]
4372
      if result.fail_msg:
4373
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4374
                                   " to report its version: %s" %
4375
                                   (node.name, result.fail_msg),
4376
                                   errors.ECODE_STATE)
4377
      else:
4378
        self.LogWarning("Transitioning node from offline to online state"
4379
                        " without using re-add. Please make sure the node"
4380
                        " is healthy!")
4381

    
4382
    if self.op.secondary_ip:
4383
      # Ok even without locking, because this can't be changed by any LU
4384
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4385
      master_singlehomed = master.secondary_ip == master.primary_ip
4386
      if master_singlehomed and self.op.secondary_ip:
4387
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4388
                                   " homed cluster", errors.ECODE_INVAL)
4389

    
4390
      if node.offline:
4391
        if self.affected_instances:
4392
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
4393
                                     " node has instances (%s) configured"
4394
                                     " to use it" % self.affected_instances)
4395
      else:
4396
        # On online nodes, check that no instances are running, and that
4397
        # the node has the new ip and we can reach it.
4398
        for instance in self.affected_instances:
4399
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
4400

    
4401
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4402
        if master.name != node.name:
4403
          # check reachability from master secondary ip to new secondary ip
4404
          if not netutils.TcpPing(self.op.secondary_ip,
4405
                                  constants.DEFAULT_NODED_PORT,
4406
                                  source=master.secondary_ip):
4407
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4408
                                       " based ping to node daemon port",
4409
                                       errors.ECODE_ENVIRON)
4410

    
4411
    if self.op.ndparams:
4412
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4413
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4414
      self.new_ndparams = new_ndparams
4415

    
4416
  def Exec(self, feedback_fn):
4417
    """Modifies a node.
4418

4419
    """
4420
    node = self.node
4421
    old_role = self.old_role
4422
    new_role = self.new_role
4423

    
4424
    result = []
4425

    
4426
    if self.op.ndparams:
4427
      node.ndparams = self.new_ndparams
4428

    
4429
    if self.op.powered is not None:
4430
      node.powered = self.op.powered
4431

    
4432
    for attr in ["master_capable", "vm_capable"]:
4433
      val = getattr(self.op, attr)
4434
      if val is not None:
4435
        setattr(node, attr, val)
4436
        result.append((attr, str(val)))
4437

    
4438
    if new_role != old_role:
4439
      # Tell the node to demote itself, if no longer MC and not offline
4440
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4441
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4442
        if msg:
4443
          self.LogWarning("Node failed to demote itself: %s", msg)
4444

    
4445
      new_flags = self._R2F[new_role]
4446
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4447
        if of != nf:
4448
          result.append((desc, str(nf)))
4449
      (node.master_candidate, node.drained, node.offline) = new_flags
4450

    
4451
      # we locked all nodes, we adjust the CP before updating this node
4452
      if self.lock_all:
4453
        _AdjustCandidatePool(self, [node.name])
4454

    
4455
    if self.op.secondary_ip:
4456
      node.secondary_ip = self.op.secondary_ip
4457
      result.append(("secondary_ip", self.op.secondary_ip))
4458

    
4459
    # this will trigger configuration file update, if needed
4460
    self.cfg.Update(node, feedback_fn)
4461

    
4462
    # this will trigger job queue propagation or cleanup if the mc
4463
    # flag changed
4464
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4465
      self.context.ReaddNode(node)
4466

    
4467
    return result
4468

    
4469

    
4470
class LUPowercycleNode(NoHooksLU):
4471
  """Powercycles a node.
4472

4473
  """
4474
  REQ_BGL = False
4475

    
4476
  def CheckArguments(self):
4477
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4478
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4479
      raise errors.OpPrereqError("The node is the master and the force"
4480
                                 " parameter was not set",
4481
                                 errors.ECODE_INVAL)
4482

    
4483
  def ExpandNames(self):
4484
    """Locking for PowercycleNode.
4485

4486
    This is a last-resort option and shouldn't block on other
4487
    jobs. Therefore, we grab no locks.
4488

4489
    """
4490
    self.needed_locks = {}
4491

    
4492
  def Exec(self, feedback_fn):
4493
    """Reboots a node.
4494

4495
    """
4496
    result = self.rpc.call_node_powercycle(self.op.node_name,
4497
                                           self.cfg.GetHypervisorType())
4498
    result.Raise("Failed to schedule the reboot")
4499
    return result.payload
4500

    
4501

    
4502
class LUQueryClusterInfo(NoHooksLU):
4503
  """Query cluster configuration.
4504

4505
  """
4506
  REQ_BGL = False
4507

    
4508
  def ExpandNames(self):
4509
    self.needed_locks = {}
4510

    
4511
  def Exec(self, feedback_fn):
4512
    """Return cluster config.
4513

4514
    """
4515
    cluster = self.cfg.GetClusterInfo()
4516
    os_hvp = {}
4517

    
4518
    # Filter just for enabled hypervisors
4519
    for os_name, hv_dict in cluster.os_hvp.items():
4520
      os_hvp[os_name] = {}
4521
      for hv_name, hv_params in hv_dict.items():
4522
        if hv_name in cluster.enabled_hypervisors:
4523
          os_hvp[os_name][hv_name] = hv_params
4524

    
4525
    # Convert ip_family to ip_version
4526
    primary_ip_version = constants.IP4_VERSION
4527
    if cluster.primary_ip_family == netutils.IP6Address.family:
4528
      primary_ip_version = constants.IP6_VERSION
4529

    
4530
    result = {
4531
      "software_version": constants.RELEASE_VERSION,
4532
      "protocol_version": constants.PROTOCOL_VERSION,
4533
      "config_version": constants.CONFIG_VERSION,
4534
      "os_api_version": max(constants.OS_API_VERSIONS),
4535
      "export_version": constants.EXPORT_VERSION,
4536
      "architecture": (platform.architecture()[0], platform.machine()),
4537
      "name": cluster.cluster_name,
4538
      "master": cluster.master_node,
4539
      "default_hypervisor": cluster.enabled_hypervisors[0],
4540
      "enabled_hypervisors": cluster.enabled_hypervisors,
4541
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4542
                        for hypervisor_name in cluster.enabled_hypervisors]),
4543
      "os_hvp": os_hvp,
4544
      "beparams": cluster.beparams,
4545
      "osparams": cluster.osparams,
4546
      "nicparams": cluster.nicparams,
4547
      "candidate_pool_size": cluster.candidate_pool_size,
4548
      "master_netdev": cluster.master_netdev,
4549
      "volume_group_name": cluster.volume_group_name,
4550
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
4551
      "file_storage_dir": cluster.file_storage_dir,
4552
      "maintain_node_health": cluster.maintain_node_health,
4553
      "ctime": cluster.ctime,
4554
      "mtime": cluster.mtime,
4555
      "uuid": cluster.uuid,
4556
      "tags": list(cluster.GetTags()),
4557
      "uid_pool": cluster.uid_pool,
4558
      "default_iallocator": cluster.default_iallocator,
4559
      "reserved_lvs": cluster.reserved_lvs,
4560
      "primary_ip_version": primary_ip_version,
4561
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4562
      }
4563

    
4564
    return result
4565

    
4566

    
4567
class LUQueryConfigValues(NoHooksLU):
4568
  """Return configuration values.
4569

4570
  """
4571
  REQ_BGL = False
4572
  _FIELDS_DYNAMIC = utils.FieldSet()
4573
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4574
                                  "watcher_pause", "volume_group_name")
4575

    
4576
  def CheckArguments(self):
4577
    _CheckOutputFields(static=self._FIELDS_STATIC,
4578
                       dynamic=self._FIELDS_DYNAMIC,
4579
                       selected=self.op.output_fields)
4580

    
4581
  def ExpandNames(self):
4582
    self.needed_locks = {}
4583

    
4584
  def Exec(self, feedback_fn):
4585
    """Dump a representation of the cluster config to the standard output.
4586

4587
    """
4588
    values = []
4589
    for field in self.op.output_fields:
4590
      if field == "cluster_name":
4591
        entry = self.cfg.GetClusterName()
4592
      elif field == "master_node":
4593
        entry = self.cfg.GetMasterNode()
4594
      elif field == "drain_flag":
4595
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4596
      elif field == "watcher_pause":
4597
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4598
      elif field == "volume_group_name":
4599
        entry = self.cfg.GetVGName()
4600
      else:
4601
        raise errors.ParameterError(field)
4602
      values.append(entry)
4603
    return values
4604

    
4605

    
4606
class LUActivateInstanceDisks(NoHooksLU):
4607
  """Bring up an instance's disks.
4608

4609
  """
4610
  REQ_BGL = False
4611

    
4612
  def ExpandNames(self):
4613
    self._ExpandAndLockInstance()
4614
    self.needed_locks[locking.LEVEL_NODE] = []
4615
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4616

    
4617
  def DeclareLocks(self, level):
4618
    if level == locking.LEVEL_NODE:
4619
      self._LockInstancesNodes()
4620

    
4621
  def CheckPrereq(self):
4622
    """Check prerequisites.
4623

4624
    This checks that the instance is in the cluster.
4625

4626
    """
4627
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4628
    assert self.instance is not None, \
4629
      "Cannot retrieve locked instance %s" % self.op.instance_name
4630
    _CheckNodeOnline(self, self.instance.primary_node)
4631

    
4632
  def Exec(self, feedback_fn):
4633
    """Activate the disks.
4634

4635
    """
4636
    disks_ok, disks_info = \
4637
              _AssembleInstanceDisks(self, self.instance,
4638
                                     ignore_size=self.op.ignore_size)
4639
    if not disks_ok:
4640
      raise errors.OpExecError("Cannot activate block devices")
4641

    
4642
    return disks_info
4643

    
4644

    
4645
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4646
                           ignore_size=False):
4647
  """Prepare the block devices for an instance.
4648

4649
  This sets up the block devices on all nodes.
4650

4651
  @type lu: L{LogicalUnit}
4652
  @param lu: the logical unit on whose behalf we execute
4653
  @type instance: L{objects.Instance}
4654
  @param instance: the instance for whose disks we assemble
4655
  @type disks: list of L{objects.Disk} or None
4656
  @param disks: which disks to assemble (or all, if None)
4657
  @type ignore_secondaries: boolean
4658
  @param ignore_secondaries: if true, errors on secondary nodes
4659
      won't result in an error return from the function
4660
  @type ignore_size: boolean
4661
  @param ignore_size: if true, the current known size of the disk
4662
      will not be used during the disk activation, useful for cases
4663
      when the size is wrong
4664
  @return: False if the operation failed, otherwise a list of
4665
      (host, instance_visible_name, node_visible_name)
4666
      with the mapping from node devices to instance devices
4667

4668
  """
4669
  device_info = []
4670
  disks_ok = True
4671
  iname = instance.name
4672
  disks = _ExpandCheckDisks(instance, disks)
4673

    
4674
  # With the two passes mechanism we try to reduce the window of
4675
  # opportunity for the race condition of switching DRBD to primary
4676
  # before handshaking occured, but we do not eliminate it
4677

    
4678
  # The proper fix would be to wait (with some limits) until the
4679
  # connection has been made and drbd transitions from WFConnection
4680
  # into any other network-connected state (Connected, SyncTarget,
4681
  # SyncSource, etc.)
4682

    
4683
  # 1st pass, assemble on all nodes in secondary mode
4684
  for inst_disk in disks:
4685
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4686
      if ignore_size:
4687
        node_disk = node_disk.Copy()
4688
        node_disk.UnsetSize()
4689
      lu.cfg.SetDiskID(node_disk, node)
4690
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4691
      msg = result.fail_msg
4692
      if msg:
4693
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4694
                           " (is_primary=False, pass=1): %s",
4695
                           inst_disk.iv_name, node, msg)
4696
        if not ignore_secondaries:
4697
          disks_ok = False
4698

    
4699
  # FIXME: race condition on drbd migration to primary
4700

    
4701
  # 2nd pass, do only the primary node
4702
  for inst_disk in disks:
4703
    dev_path = None
4704

    
4705
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4706
      if node != instance.primary_node:
4707
        continue
4708
      if ignore_size:
4709
        node_disk = node_disk.Copy()
4710
        node_disk.UnsetSize()
4711
      lu.cfg.SetDiskID(node_disk, node)
4712
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4713
      msg = result.fail_msg
4714
      if msg:
4715
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4716
                           " (is_primary=True, pass=2): %s",
4717
                           inst_disk.iv_name, node, msg)
4718
        disks_ok = False
4719
      else:
4720
        dev_path = result.payload
4721

    
4722
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4723

    
4724
  # leave the disks configured for the primary node
4725
  # this is a workaround that would be fixed better by
4726
  # improving the logical/physical id handling
4727
  for disk in disks:
4728
    lu.cfg.SetDiskID(disk, instance.primary_node)
4729

    
4730
  return disks_ok, device_info
4731

    
4732

    
4733
def _StartInstanceDisks(lu, instance, force):
4734
  """Start the disks of an instance.
4735

4736
  """
4737
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4738
                                           ignore_secondaries=force)
4739
  if not disks_ok:
4740
    _ShutdownInstanceDisks(lu, instance)
4741
    if force is not None and not force:
4742
      lu.proc.LogWarning("", hint="If the message above refers to a"
4743
                         " secondary node,"
4744
                         " you can retry the operation using '--force'.")
4745
    raise errors.OpExecError("Disk consistency error")
4746

    
4747

    
4748
class LUDeactivateInstanceDisks(NoHooksLU):
4749
  """Shutdown an instance's disks.
4750

4751
  """
4752
  REQ_BGL = False
4753

    
4754
  def ExpandNames(self):
4755
    self._ExpandAndLockInstance()
4756
    self.needed_locks[locking.LEVEL_NODE] = []
4757
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4758

    
4759
  def DeclareLocks(self, level):
4760
    if level == locking.LEVEL_NODE:
4761
      self._LockInstancesNodes()
4762

    
4763
  def CheckPrereq(self):
4764
    """Check prerequisites.
4765

4766
    This checks that the instance is in the cluster.
4767

4768
    """
4769
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4770
    assert self.instance is not None, \
4771
      "Cannot retrieve locked instance %s" % self.op.instance_name
4772

    
4773
  def Exec(self, feedback_fn):
4774
    """Deactivate the disks
4775

4776
    """
4777
    instance = self.instance
4778
    _SafeShutdownInstanceDisks(self, instance)
4779

    
4780

    
4781
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4782
  """Shutdown block devices of an instance.
4783

4784
  This function checks if an instance is running, before calling
4785
  _ShutdownInstanceDisks.
4786

4787
  """
4788
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4789
  _ShutdownInstanceDisks(lu, instance, disks=disks)
4790

    
4791

    
4792
def _ExpandCheckDisks(instance, disks):
4793
  """Return the instance disks selected by the disks list
4794

4795
  @type disks: list of L{objects.Disk} or None
4796
  @param disks: selected disks
4797
  @rtype: list of L{objects.Disk}
4798
  @return: selected instance disks to act on
4799

4800
  """
4801
  if disks is None:
4802
    return instance.disks
4803
  else:
4804
    if not set(disks).issubset(instance.disks):
4805
      raise errors.ProgrammerError("Can only act on disks belonging to the"
4806
                                   " target instance")
4807
    return disks
4808

    
4809

    
4810
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4811
  """Shutdown block devices of an instance.
4812

4813
  This does the shutdown on all nodes of the instance.
4814

4815
  If the ignore_primary is false, errors on the primary node are
4816
  ignored.
4817

4818
  """
4819
  all_result = True
4820
  disks = _ExpandCheckDisks(instance, disks)
4821

    
4822
  for disk in disks:
4823
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4824
      lu.cfg.SetDiskID(top_disk, node)
4825
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4826
      msg = result.fail_msg
4827
      if msg:
4828
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4829
                      disk.iv_name, node, msg)
4830
        if ((node == instance.primary_node and not ignore_primary) or
4831
            (node != instance.primary_node and not result.offline)):
4832
          all_result = False
4833
  return all_result
4834

    
4835

    
4836
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4837
  """Checks if a node has enough free memory.
4838

4839
  This function check if a given node has the needed amount of free
4840
  memory. In case the node has less memory or we cannot get the
4841
  information from the node, this function raise an OpPrereqError
4842
  exception.
4843

4844
  @type lu: C{LogicalUnit}
4845
  @param lu: a logical unit from which we get configuration data
4846
  @type node: C{str}
4847
  @param node: the node to check
4848
  @type reason: C{str}
4849
  @param reason: string to use in the error message
4850
  @type requested: C{int}
4851
  @param requested: the amount of memory in MiB to check for
4852
  @type hypervisor_name: C{str}
4853
  @param hypervisor_name: the hypervisor to ask for memory stats
4854
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4855
      we cannot check the node
4856

4857
  """
4858
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
4859
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4860
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4861
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4862
  if not isinstance(free_mem, int):
4863
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4864
                               " was '%s'" % (node, free_mem),
4865
                               errors.ECODE_ENVIRON)
4866
  if requested > free_mem:
4867
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4868
                               " needed %s MiB, available %s MiB" %
4869
                               (node, reason, requested, free_mem),
4870
                               errors.ECODE_NORES)
4871

    
4872

    
4873
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
4874
  """Checks if nodes have enough free disk space in the all VGs.
4875

4876
  This function check if all given nodes have the needed amount of
4877
  free disk. In case any node has less disk or we cannot get the
4878
  information from the node, this function raise an OpPrereqError
4879
  exception.
4880

4881
  @type lu: C{LogicalUnit}
4882
  @param lu: a logical unit from which we get configuration data
4883
  @type nodenames: C{list}
4884
  @param nodenames: the list of node names to check
4885
  @type req_sizes: C{dict}
4886
  @param req_sizes: the hash of vg and corresponding amount of disk in
4887
      MiB to check for
4888
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
4889
      or we cannot check the node
4890

4891
  """
4892
  if req_sizes is not None:
4893
    for vg, req_size in req_sizes.iteritems():
4894
      _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
4895

    
4896

    
4897
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
4898
  """Checks if nodes have enough free disk space in the specified VG.
4899

4900
  This function check if all given nodes have the needed amount of
4901
  free disk. In case any node has less disk or we cannot get the
4902
  information from the node, this function raise an OpPrereqError
4903
  exception.
4904

4905
  @type lu: C{LogicalUnit}
4906
  @param lu: a logical unit from which we get configuration data
4907
  @type nodenames: C{list}
4908
  @param nodenames: the list of node names to check
4909
  @type vg: C{str}
4910
  @param vg: the volume group to check
4911
  @type requested: C{int}
4912
  @param requested: the amount of disk in MiB to check for
4913
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
4914
      or we cannot check the node
4915

4916
  """
4917
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
4918
  for node in nodenames:
4919
    info = nodeinfo[node]
4920
    info.Raise("Cannot get current information from node %s" % node,
4921
               prereq=True, ecode=errors.ECODE_ENVIRON)
4922
    vg_free = info.payload.get("vg_free", None)
4923
    if not isinstance(vg_free, int):
4924
      raise errors.OpPrereqError("Can't compute free disk space on node"
4925
                                 " %s for vg %s, result was '%s'" %
4926
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
4927
    if requested > vg_free:
4928
      raise errors.OpPrereqError("Not enough disk space on target node %s"
4929
                                 " vg %s: required %d MiB, available %d MiB" %
4930
                                 (node, vg, requested, vg_free),
4931
                                 errors.ECODE_NORES)
4932

    
4933

    
4934
class LUStartupInstance(LogicalUnit):
4935
  """Starts an instance.
4936

4937
  """
4938
  HPATH = "instance-start"
4939
  HTYPE = constants.HTYPE_INSTANCE
4940
  REQ_BGL = False
4941

    
4942
  def CheckArguments(self):
4943
    # extra beparams
4944
    if self.op.beparams:
4945
      # fill the beparams dict
4946
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4947

    
4948
  def ExpandNames(self):
4949
    self._ExpandAndLockInstance()
4950

    
4951
  def BuildHooksEnv(self):
4952
    """Build hooks env.
4953

4954
    This runs on master, primary and secondary nodes of the instance.
4955

4956
    """
4957
    env = {
4958
      "FORCE": self.op.force,
4959
      }
4960
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4961
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4962
    return env, nl, nl
4963

    
4964
  def CheckPrereq(self):
4965
    """Check prerequisites.
4966

4967
    This checks that the instance is in the cluster.
4968

4969
    """
4970
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4971
    assert self.instance is not None, \
4972
      "Cannot retrieve locked instance %s" % self.op.instance_name
4973

    
4974
    # extra hvparams
4975
    if self.op.hvparams:
4976
      # check hypervisor parameter syntax (locally)
4977
      cluster = self.cfg.GetClusterInfo()
4978
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4979
      filled_hvp = cluster.FillHV(instance)
4980
      filled_hvp.update(self.op.hvparams)
4981
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4982
      hv_type.CheckParameterSyntax(filled_hvp)
4983
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4984

    
4985
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
4986

    
4987
    if self.primary_offline and self.op.ignore_offline_nodes:
4988
      self.proc.LogWarning("Ignoring offline primary node")
4989

    
4990
      if self.op.hvparams or self.op.beparams:
4991
        self.proc.LogWarning("Overridden parameters are ignored")
4992
    else:
4993
      _CheckNodeOnline(self, instance.primary_node)
4994

    
4995
      bep = self.cfg.GetClusterInfo().FillBE(instance)
4996

    
4997
      # check bridges existence
4998
      _CheckInstanceBridgesExist(self, instance)
4999

    
5000
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5001
                                                instance.name,
5002
                                                instance.hypervisor)
5003
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5004
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5005
      if not remote_info.payload: # not running already
5006
        _CheckNodeFreeMemory(self, instance.primary_node,
5007
                             "starting instance %s" % instance.name,
5008
                             bep[constants.BE_MEMORY], instance.hypervisor)
5009

    
5010
  def Exec(self, feedback_fn):
5011
    """Start the instance.
5012

5013
    """
5014
    instance = self.instance
5015
    force = self.op.force
5016

    
5017
    self.cfg.MarkInstanceUp(instance.name)
5018

    
5019
    if self.primary_offline:
5020
      assert self.op.ignore_offline_nodes
5021
      self.proc.LogInfo("Primary node offline, marked instance as started")
5022
    else:
5023
      node_current = instance.primary_node
5024

    
5025
      _StartInstanceDisks(self, instance, force)
5026

    
5027
      result = self.rpc.call_instance_start(node_current, instance,
5028
                                            self.op.hvparams, self.op.beparams)
5029
      msg = result.fail_msg
5030
      if msg:
5031
        _ShutdownInstanceDisks(self, instance)
5032
        raise errors.OpExecError("Could not start instance: %s" % msg)
5033

    
5034

    
5035
class LURebootInstance(LogicalUnit):
5036
  """Reboot an instance.
5037

5038
  """
5039
  HPATH = "instance-reboot"
5040
  HTYPE = constants.HTYPE_INSTANCE
5041
  REQ_BGL = False
5042

    
5043
  def ExpandNames(self):
5044
    self._ExpandAndLockInstance()
5045

    
5046
  def BuildHooksEnv(self):
5047
    """Build hooks env.
5048

5049
    This runs on master, primary and secondary nodes of the instance.
5050

5051
    """
5052
    env = {
5053
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5054
      "REBOOT_TYPE": self.op.reboot_type,
5055
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5056
      }
5057
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5058
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5059
    return env, nl, nl
5060

    
5061
  def CheckPrereq(self):
5062
    """Check prerequisites.
5063

5064
    This checks that the instance is in the cluster.
5065

5066
    """
5067
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5068
    assert self.instance is not None, \
5069
      "Cannot retrieve locked instance %s" % self.op.instance_name
5070

    
5071
    _CheckNodeOnline(self, instance.primary_node)
5072

    
5073
    # check bridges existence
5074
    _CheckInstanceBridgesExist(self, instance)
5075

    
5076
  def Exec(self, feedback_fn):
5077
    """Reboot the instance.
5078

5079
    """
5080
    instance = self.instance
5081
    ignore_secondaries = self.op.ignore_secondaries
5082
    reboot_type = self.op.reboot_type
5083

    
5084
    node_current = instance.primary_node
5085

    
5086
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5087
                       constants.INSTANCE_REBOOT_HARD]:
5088
      for disk in instance.disks:
5089
        self.cfg.SetDiskID(disk, node_current)
5090
      result = self.rpc.call_instance_reboot(node_current, instance,
5091
                                             reboot_type,
5092
                                             self.op.shutdown_timeout)
5093
      result.Raise("Could not reboot instance")
5094
    else:
5095
      result = self.rpc.call_instance_shutdown(node_current, instance,
5096
                                               self.op.shutdown_timeout)
5097
      result.Raise("Could not shutdown instance for full reboot")
5098
      _ShutdownInstanceDisks(self, instance)
5099
      _StartInstanceDisks(self, instance, ignore_secondaries)
5100
      result = self.rpc.call_instance_start(node_current, instance, None, None)
5101
      msg = result.fail_msg
5102
      if msg:
5103
        _ShutdownInstanceDisks(self, instance)
5104
        raise errors.OpExecError("Could not start instance for"
5105
                                 " full reboot: %s" % msg)
5106

    
5107
    self.cfg.MarkInstanceUp(instance.name)
5108

    
5109

    
5110
class LUShutdownInstance(LogicalUnit):
5111
  """Shutdown an instance.
5112

5113
  """
5114
  HPATH = "instance-stop"
5115
  HTYPE = constants.HTYPE_INSTANCE
5116
  REQ_BGL = False
5117

    
5118
  def ExpandNames(self):
5119
    self._ExpandAndLockInstance()
5120

    
5121
  def BuildHooksEnv(self):
5122
    """Build hooks env.
5123

5124
    This runs on master, primary and secondary nodes of the instance.
5125

5126
    """
5127
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5128
    env["TIMEOUT"] = self.op.timeout
5129
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5130
    return env, nl, nl
5131

    
5132
  def CheckPrereq(self):
5133
    """Check prerequisites.
5134

5135
    This checks that the instance is in the cluster.
5136

5137
    """
5138
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5139
    assert self.instance is not None, \
5140
      "Cannot retrieve locked instance %s" % self.op.instance_name
5141

    
5142
    self.primary_offline = \
5143
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
5144

    
5145
    if self.primary_offline and self.op.ignore_offline_nodes:
5146
      self.proc.LogWarning("Ignoring offline primary node")
5147
    else:
5148
      _CheckNodeOnline(self, self.instance.primary_node)
5149

    
5150
  def Exec(self, feedback_fn):
5151
    """Shutdown the instance.
5152

5153
    """
5154
    instance = self.instance
5155
    node_current = instance.primary_node
5156
    timeout = self.op.timeout
5157

    
5158
    self.cfg.MarkInstanceDown(instance.name)
5159

    
5160
    if self.primary_offline:
5161
      assert self.op.ignore_offline_nodes
5162
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
5163
    else:
5164
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5165
      msg = result.fail_msg
5166
      if msg:
5167
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5168

    
5169
      _ShutdownInstanceDisks(self, instance)
5170

    
5171

    
5172
class LUReinstallInstance(LogicalUnit):
5173
  """Reinstall an instance.
5174

5175
  """
5176
  HPATH = "instance-reinstall"
5177
  HTYPE = constants.HTYPE_INSTANCE
5178
  REQ_BGL = False
5179

    
5180
  def ExpandNames(self):
5181
    self._ExpandAndLockInstance()
5182

    
5183
  def BuildHooksEnv(self):
5184
    """Build hooks env.
5185

5186
    This runs on master, primary and secondary nodes of the instance.
5187

5188
    """
5189
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5190
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5191
    return env, nl, nl
5192

    
5193
  def CheckPrereq(self):
5194
    """Check prerequisites.
5195

5196
    This checks that the instance is in the cluster and is not running.
5197

5198
    """
5199
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5200
    assert instance is not None, \
5201
      "Cannot retrieve locked instance %s" % self.op.instance_name
5202
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5203
                     " offline, cannot reinstall")
5204
    for node in instance.secondary_nodes:
5205
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
5206
                       " cannot reinstall")
5207

    
5208
    if instance.disk_template == constants.DT_DISKLESS:
5209
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5210
                                 self.op.instance_name,
5211
                                 errors.ECODE_INVAL)
5212
    _CheckInstanceDown(self, instance, "cannot reinstall")
5213

    
5214
    if self.op.os_type is not None:
5215
      # OS verification
5216
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5217
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5218
      instance_os = self.op.os_type
5219
    else:
5220
      instance_os = instance.os
5221

    
5222
    nodelist = list(instance.all_nodes)
5223

    
5224
    if self.op.osparams:
5225
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5226
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5227
      self.os_inst = i_osdict # the new dict (without defaults)
5228
    else:
5229
      self.os_inst = None
5230

    
5231
    self.instance = instance
5232

    
5233
  def Exec(self, feedback_fn):
5234
    """Reinstall the instance.
5235

5236
    """
5237
    inst = self.instance
5238

    
5239
    if self.op.os_type is not None:
5240
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5241
      inst.os = self.op.os_type
5242
      # Write to configuration
5243
      self.cfg.Update(inst, feedback_fn)
5244

    
5245
    _StartInstanceDisks(self, inst, None)
5246
    try:
5247
      feedback_fn("Running the instance OS create scripts...")
5248
      # FIXME: pass debug option from opcode to backend
5249
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5250
                                             self.op.debug_level,
5251
                                             osparams=self.os_inst)
5252
      result.Raise("Could not install OS for instance %s on node %s" %
5253
                   (inst.name, inst.primary_node))
5254
    finally:
5255
      _ShutdownInstanceDisks(self, inst)
5256

    
5257

    
5258
class LURecreateInstanceDisks(LogicalUnit):
5259
  """Recreate an instance's missing disks.
5260

5261
  """
5262
  HPATH = "instance-recreate-disks"
5263
  HTYPE = constants.HTYPE_INSTANCE
5264
  REQ_BGL = False
5265

    
5266
  def ExpandNames(self):
5267
    self._ExpandAndLockInstance()
5268

    
5269
  def BuildHooksEnv(self):
5270
    """Build hooks env.
5271

5272
    This runs on master, primary and secondary nodes of the instance.
5273

5274
    """
5275
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5276
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5277
    return env, nl, nl
5278

    
5279
  def CheckPrereq(self):
5280
    """Check prerequisites.
5281

5282
    This checks that the instance is in the cluster and is not running.
5283

5284
    """
5285
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5286
    assert instance is not None, \
5287
      "Cannot retrieve locked instance %s" % self.op.instance_name
5288
    _CheckNodeOnline(self, instance.primary_node)
5289

    
5290
    if instance.disk_template == constants.DT_DISKLESS:
5291
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5292
                                 self.op.instance_name, errors.ECODE_INVAL)
5293
    _CheckInstanceDown(self, instance, "cannot recreate disks")
5294

    
5295
    if not self.op.disks:
5296
      self.op.disks = range(len(instance.disks))
5297
    else:
5298
      for idx in self.op.disks:
5299
        if idx >= len(instance.disks):
5300
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5301
                                     errors.ECODE_INVAL)
5302

    
5303
    self.instance = instance
5304

    
5305
  def Exec(self, feedback_fn):
5306
    """Recreate the disks.
5307

5308
    """
5309
    to_skip = []
5310
    for idx, _ in enumerate(self.instance.disks):
5311
      if idx not in self.op.disks: # disk idx has not been passed in
5312
        to_skip.append(idx)
5313
        continue
5314

    
5315
    _CreateDisks(self, self.instance, to_skip=to_skip)
5316

    
5317

    
5318
class LURenameInstance(LogicalUnit):
5319
  """Rename an instance.
5320

5321
  """
5322
  HPATH = "instance-rename"
5323
  HTYPE = constants.HTYPE_INSTANCE
5324

    
5325
  def CheckArguments(self):
5326
    """Check arguments.
5327

5328
    """
5329
    if self.op.ip_check and not self.op.name_check:
5330
      # TODO: make the ip check more flexible and not depend on the name check
5331
      raise errors.OpPrereqError("Cannot do ip check without a name check",
5332
                                 errors.ECODE_INVAL)
5333

    
5334
  def BuildHooksEnv(self):
5335
    """Build hooks env.
5336

5337
    This runs on master, primary and secondary nodes of the instance.
5338

5339
    """
5340
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5341
    env["INSTANCE_NEW_NAME"] = self.op.new_name
5342
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5343
    return env, nl, nl
5344

    
5345
  def CheckPrereq(self):
5346
    """Check prerequisites.
5347

5348
    This checks that the instance is in the cluster and is not running.
5349

5350
    """
5351
    self.op.instance_name = _ExpandInstanceName(self.cfg,
5352
                                                self.op.instance_name)
5353
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5354
    assert instance is not None
5355
    _CheckNodeOnline(self, instance.primary_node)
5356
    _CheckInstanceDown(self, instance, "cannot rename")
5357
    self.instance = instance
5358

    
5359
    new_name = self.op.new_name
5360
    if self.op.name_check:
5361
      hostname = netutils.GetHostname(name=new_name)
5362
      self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5363
                   hostname.name)
5364
      new_name = self.op.new_name = hostname.name
5365
      if (self.op.ip_check and
5366
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5367
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
5368
                                   (hostname.ip, new_name),
5369
                                   errors.ECODE_NOTUNIQUE)
5370

    
5371
    instance_list = self.cfg.GetInstanceList()
5372
    if new_name in instance_list and new_name != instance.name:
5373
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5374
                                 new_name, errors.ECODE_EXISTS)
5375

    
5376
  def Exec(self, feedback_fn):
5377
    """Rename the instance.
5378

5379
    """
5380
    inst = self.instance
5381
    old_name = inst.name
5382

    
5383
    rename_file_storage = False
5384
    if (inst.disk_template == constants.DT_FILE and
5385
        self.op.new_name != inst.name):
5386
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5387
      rename_file_storage = True
5388

    
5389
    self.cfg.RenameInstance(inst.name, self.op.new_name)
5390
    # Change the instance lock. This is definitely safe while we hold the BGL
5391
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5392
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5393

    
5394
    # re-read the instance from the configuration after rename
5395
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
5396

    
5397
    if rename_file_storage:
5398
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5399
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5400
                                                     old_file_storage_dir,
5401
                                                     new_file_storage_dir)
5402
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
5403
                   " (but the instance has been renamed in Ganeti)" %
5404
                   (inst.primary_node, old_file_storage_dir,
5405
                    new_file_storage_dir))
5406

    
5407
    _StartInstanceDisks(self, inst, None)
5408
    try:
5409
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5410
                                                 old_name, self.op.debug_level)
5411
      msg = result.fail_msg
5412
      if msg:
5413
        msg = ("Could not run OS rename script for instance %s on node %s"
5414
               " (but the instance has been renamed in Ganeti): %s" %
5415
               (inst.name, inst.primary_node, msg))
5416
        self.proc.LogWarning(msg)
5417
    finally:
5418
      _ShutdownInstanceDisks(self, inst)
5419

    
5420
    return inst.name
5421

    
5422

    
5423
class LURemoveInstance(LogicalUnit):
5424
  """Remove an instance.
5425

5426
  """
5427
  HPATH = "instance-remove"
5428
  HTYPE = constants.HTYPE_INSTANCE
5429
  REQ_BGL = False
5430

    
5431
  def ExpandNames(self):
5432
    self._ExpandAndLockInstance()
5433
    self.needed_locks[locking.LEVEL_NODE] = []
5434
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5435

    
5436
  def DeclareLocks(self, level):
5437
    if level == locking.LEVEL_NODE:
5438
      self._LockInstancesNodes()
5439

    
5440
  def BuildHooksEnv(self):
5441
    """Build hooks env.
5442

5443
    This runs on master, primary and secondary nodes of the instance.
5444

5445
    """
5446
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5447
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5448
    nl = [self.cfg.GetMasterNode()]
5449
    nl_post = list(self.instance.all_nodes) + nl
5450
    return env, nl, nl_post
5451

    
5452
  def CheckPrereq(self):
5453
    """Check prerequisites.
5454

5455
    This checks that the instance is in the cluster.
5456

5457
    """
5458
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5459
    assert self.instance is not None, \
5460
      "Cannot retrieve locked instance %s" % self.op.instance_name
5461

    
5462
  def Exec(self, feedback_fn):
5463
    """Remove the instance.
5464

5465
    """
5466
    instance = self.instance
5467
    logging.info("Shutting down instance %s on node %s",
5468
                 instance.name, instance.primary_node)
5469

    
5470
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5471
                                             self.op.shutdown_timeout)
5472
    msg = result.fail_msg
5473
    if msg:
5474
      if self.op.ignore_failures:
5475
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
5476
      else:
5477
        raise errors.OpExecError("Could not shutdown instance %s on"
5478
                                 " node %s: %s" %
5479
                                 (instance.name, instance.primary_node, msg))
5480

    
5481
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5482

    
5483

    
5484
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5485
  """Utility function to remove an instance.
5486

5487
  """
5488
  logging.info("Removing block devices for instance %s", instance.name)
5489

    
5490
  if not _RemoveDisks(lu, instance):
5491
    if not ignore_failures:
5492
      raise errors.OpExecError("Can't remove instance's disks")
5493
    feedback_fn("Warning: can't remove instance's disks")
5494

    
5495
  logging.info("Removing instance %s out of cluster config", instance.name)
5496

    
5497
  lu.cfg.RemoveInstance(instance.name)
5498

    
5499
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5500
    "Instance lock removal conflict"
5501

    
5502
  # Remove lock for the instance
5503
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5504

    
5505

    
5506
class LUQueryInstances(NoHooksLU):
5507
  """Logical unit for querying instances.
5508

5509
  """
5510
  # pylint: disable-msg=W0142
5511
  REQ_BGL = False
5512

    
5513
  def CheckArguments(self):
5514
    self.iq = _InstanceQuery(self.op.names, self.op.output_fields,
5515
                             self.op.use_locking)
5516

    
5517
  def ExpandNames(self):
5518
    self.iq.ExpandNames(self)
5519

    
5520
  def DeclareLocks(self, level):
5521
    self.iq.DeclareLocks(self, level)
5522

    
5523
  def Exec(self, feedback_fn):
5524
    return self.iq.OldStyleQuery(self)
5525

    
5526

    
5527
class LUFailoverInstance(LogicalUnit):
5528
  """Failover an instance.
5529

5530
  """
5531
  HPATH = "instance-failover"
5532
  HTYPE = constants.HTYPE_INSTANCE
5533
  REQ_BGL = False
5534

    
5535
  def ExpandNames(self):
5536
    self._ExpandAndLockInstance()
5537
    self.needed_locks[locking.LEVEL_NODE] = []
5538
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5539

    
5540
  def DeclareLocks(self, level):
5541
    if level == locking.LEVEL_NODE:
5542
      self._LockInstancesNodes()
5543

    
5544
  def BuildHooksEnv(self):
5545
    """Build hooks env.
5546

5547
    This runs on master, primary and secondary nodes of the instance.
5548

5549
    """
5550
    instance = self.instance
5551
    source_node = instance.primary_node
5552
    target_node = instance.secondary_nodes[0]
5553
    env = {
5554
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5555
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5556
      "OLD_PRIMARY": source_node,
5557
      "OLD_SECONDARY": target_node,
5558
      "NEW_PRIMARY": target_node,
5559
      "NEW_SECONDARY": source_node,
5560
      }
5561
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5562
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5563
    nl_post = list(nl)
5564
    nl_post.append(source_node)
5565
    return env, nl, nl_post
5566

    
5567
  def CheckPrereq(self):
5568
    """Check prerequisites.
5569

5570
    This checks that the instance is in the cluster.
5571

5572
    """
5573
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5574
    assert self.instance is not None, \
5575
      "Cannot retrieve locked instance %s" % self.op.instance_name
5576

    
5577
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5578
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5579
      raise errors.OpPrereqError("Instance's disk layout is not"
5580
                                 " network mirrored, cannot failover.",
5581
                                 errors.ECODE_STATE)
5582

    
5583
    secondary_nodes = instance.secondary_nodes
5584
    if not secondary_nodes:
5585
      raise errors.ProgrammerError("no secondary node but using "
5586
                                   "a mirrored disk template")
5587

    
5588
    target_node = secondary_nodes[0]
5589
    _CheckNodeOnline(self, target_node)
5590
    _CheckNodeNotDrained(self, target_node)
5591
    if instance.admin_up:
5592
      # check memory requirements on the secondary node
5593
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5594
                           instance.name, bep[constants.BE_MEMORY],
5595
                           instance.hypervisor)
5596
    else:
5597
      self.LogInfo("Not checking memory on the secondary node as"
5598
                   " instance will not be started")
5599

    
5600
    # check bridge existance
5601
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5602

    
5603
  def Exec(self, feedback_fn):
5604
    """Failover an instance.
5605

5606
    The failover is done by shutting it down on its present node and
5607
    starting it on the secondary.
5608

5609
    """
5610
    instance = self.instance
5611
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5612

    
5613
    source_node = instance.primary_node
5614
    target_node = instance.secondary_nodes[0]
5615

    
5616
    if instance.admin_up:
5617
      feedback_fn("* checking disk consistency between source and target")
5618
      for dev in instance.disks:
5619
        # for drbd, these are drbd over lvm
5620
        if not _CheckDiskConsistency(self, dev, target_node, False):
5621
          if not self.op.ignore_consistency:
5622
            raise errors.OpExecError("Disk %s is degraded on target node,"
5623
                                     " aborting failover." % dev.iv_name)
5624
    else:
5625
      feedback_fn("* not checking disk consistency as instance is not running")
5626

    
5627
    feedback_fn("* shutting down instance on source node")
5628
    logging.info("Shutting down instance %s on node %s",
5629
                 instance.name, source_node)
5630

    
5631
    result = self.rpc.call_instance_shutdown(source_node, instance,
5632
                                             self.op.shutdown_timeout)
5633
    msg = result.fail_msg
5634
    if msg:
5635
      if self.op.ignore_consistency or primary_node.offline:
5636
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5637
                             " Proceeding anyway. Please make sure node"
5638
                             " %s is down. Error details: %s",
5639
                             instance.name, source_node, source_node, msg)
5640
      else:
5641
        raise errors.OpExecError("Could not shutdown instance %s on"
5642
                                 " node %s: %s" %
5643
                                 (instance.name, source_node, msg))
5644

    
5645
    feedback_fn("* deactivating the instance's disks on source node")
5646
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5647
      raise errors.OpExecError("Can't shut down the instance's disks.")
5648

    
5649
    instance.primary_node = target_node
5650
    # distribute new instance config to the other nodes
5651
    self.cfg.Update(instance, feedback_fn)
5652

    
5653
    # Only start the instance if it's marked as up
5654
    if instance.admin_up:
5655
      feedback_fn("* activating the instance's disks on target node")
5656
      logging.info("Starting instance %s on node %s",
5657
                   instance.name, target_node)
5658

    
5659
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5660
                                           ignore_secondaries=True)
5661
      if not disks_ok:
5662
        _ShutdownInstanceDisks(self, instance)
5663
        raise errors.OpExecError("Can't activate the instance's disks")
5664

    
5665
      feedback_fn("* starting the instance on the target node")
5666
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5667
      msg = result.fail_msg
5668
      if msg:
5669
        _ShutdownInstanceDisks(self, instance)
5670
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5671
                                 (instance.name, target_node, msg))
5672

    
5673

    
5674
class LUMigrateInstance(LogicalUnit):
5675
  """Migrate an instance.
5676

5677
  This is migration without shutting down, compared to the failover,
5678
  which is done with shutdown.
5679

5680
  """
5681
  HPATH = "instance-migrate"
5682
  HTYPE = constants.HTYPE_INSTANCE
5683
  REQ_BGL = False
5684

    
5685
  def ExpandNames(self):
5686
    self._ExpandAndLockInstance()
5687

    
5688
    self.needed_locks[locking.LEVEL_NODE] = []
5689
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5690

    
5691
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5692
                                       self.op.cleanup)
5693
    self.tasklets = [self._migrater]
5694

    
5695
  def DeclareLocks(self, level):
5696
    if level == locking.LEVEL_NODE:
5697
      self._LockInstancesNodes()
5698

    
5699
  def BuildHooksEnv(self):
5700
    """Build hooks env.
5701

5702
    This runs on master, primary and secondary nodes of the instance.
5703

5704
    """
5705
    instance = self._migrater.instance
5706
    source_node = instance.primary_node
5707
    target_node = instance.secondary_nodes[0]
5708
    env = _BuildInstanceHookEnvByObject(self, instance)
5709
    env["MIGRATE_LIVE"] = self._migrater.live
5710
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5711
    env.update({
5712
        "OLD_PRIMARY": source_node,
5713
        "OLD_SECONDARY": target_node,
5714
        "NEW_PRIMARY": target_node,
5715
        "NEW_SECONDARY": source_node,
5716
        })
5717
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5718
    nl_post = list(nl)
5719
    nl_post.append(source_node)
5720
    return env, nl, nl_post
5721

    
5722

    
5723
class LUMoveInstance(LogicalUnit):
5724
  """Move an instance by data-copying.
5725

5726
  """
5727
  HPATH = "instance-move"
5728
  HTYPE = constants.HTYPE_INSTANCE
5729
  REQ_BGL = False
5730

    
5731
  def ExpandNames(self):
5732
    self._ExpandAndLockInstance()
5733
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5734
    self.op.target_node = target_node
5735
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5736
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5737

    
5738
  def DeclareLocks(self, level):
5739
    if level == locking.LEVEL_NODE:
5740
      self._LockInstancesNodes(primary_only=True)
5741

    
5742
  def BuildHooksEnv(self):
5743
    """Build hooks env.
5744

5745
    This runs on master, primary and secondary nodes of the instance.
5746

5747
    """
5748
    env = {
5749
      "TARGET_NODE": self.op.target_node,
5750
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5751
      }
5752
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5753
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5754
                                       self.op.target_node]
5755
    return env, nl, nl
5756

    
5757
  def CheckPrereq(self):
5758
    """Check prerequisites.
5759

5760
    This checks that the instance is in the cluster.
5761

5762
    """
5763
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5764
    assert self.instance is not None, \
5765
      "Cannot retrieve locked instance %s" % self.op.instance_name
5766

    
5767
    node = self.cfg.GetNodeInfo(self.op.target_node)
5768
    assert node is not None, \
5769
      "Cannot retrieve locked node %s" % self.op.target_node
5770

    
5771
    self.target_node = target_node = node.name
5772

    
5773
    if target_node == instance.primary_node:
5774
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
5775
                                 (instance.name, target_node),
5776
                                 errors.ECODE_STATE)
5777

    
5778
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5779

    
5780
    for idx, dsk in enumerate(instance.disks):
5781
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5782
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5783
                                   " cannot copy" % idx, errors.ECODE_STATE)
5784

    
5785
    _CheckNodeOnline(self, target_node)
5786
    _CheckNodeNotDrained(self, target_node)
5787
    _CheckNodeVmCapable(self, target_node)
5788

    
5789
    if instance.admin_up:
5790
      # check memory requirements on the secondary node
5791
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5792
                           instance.name, bep[constants.BE_MEMORY],
5793
                           instance.hypervisor)
5794
    else:
5795
      self.LogInfo("Not checking memory on the secondary node as"
5796
                   " instance will not be started")
5797

    
5798
    # check bridge existance
5799
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5800

    
5801
  def Exec(self, feedback_fn):
5802
    """Move an instance.
5803

5804
    The move is done by shutting it down on its present node, copying
5805
    the data over (slow) and starting it on the new node.
5806

5807
    """
5808
    instance = self.instance
5809

    
5810
    source_node = instance.primary_node
5811
    target_node = self.target_node
5812

    
5813
    self.LogInfo("Shutting down instance %s on source node %s",
5814
                 instance.name, source_node)
5815

    
5816
    result = self.rpc.call_instance_shutdown(source_node, instance,
5817
                                             self.op.shutdown_timeout)
5818
    msg = result.fail_msg
5819
    if msg:
5820
      if self.op.ignore_consistency:
5821
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5822
                             " Proceeding anyway. Please make sure node"
5823
                             " %s is down. Error details: %s",
5824
                             instance.name, source_node, source_node, msg)
5825
      else:
5826
        raise errors.OpExecError("Could not shutdown instance %s on"
5827
                                 " node %s: %s" %
5828
                                 (instance.name, source_node, msg))
5829

    
5830
    # create the target disks
5831
    try:
5832
      _CreateDisks(self, instance, target_node=target_node)
5833
    except errors.OpExecError:
5834
      self.LogWarning("Device creation failed, reverting...")
5835
      try:
5836
        _RemoveDisks(self, instance, target_node=target_node)
5837
      finally:
5838
        self.cfg.ReleaseDRBDMinors(instance.name)
5839
        raise
5840

    
5841
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5842

    
5843
    errs = []
5844
    # activate, get path, copy the data over
5845
    for idx, disk in enumerate(instance.disks):
5846
      self.LogInfo("Copying data for disk %d", idx)
5847
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5848
                                               instance.name, True)
5849
      if result.fail_msg:
5850
        self.LogWarning("Can't assemble newly created disk %d: %s",
5851
                        idx, result.fail_msg)
5852
        errs.append(result.fail_msg)
5853
        break
5854
      dev_path = result.payload
5855
      result = self.rpc.call_blockdev_export(source_node, disk,
5856
                                             target_node, dev_path,
5857
                                             cluster_name)
5858
      if result.fail_msg:
5859
        self.LogWarning("Can't copy data over for disk %d: %s",
5860
                        idx, result.fail_msg)
5861
        errs.append(result.fail_msg)
5862
        break
5863

    
5864
    if errs:
5865
      self.LogWarning("Some disks failed to copy, aborting")
5866
      try:
5867
        _RemoveDisks(self, instance, target_node=target_node)
5868
      finally:
5869
        self.cfg.ReleaseDRBDMinors(instance.name)
5870
        raise errors.OpExecError("Errors during disk copy: %s" %
5871
                                 (",".join(errs),))
5872

    
5873
    instance.primary_node = target_node
5874
    self.cfg.Update(instance, feedback_fn)
5875

    
5876
    self.LogInfo("Removing the disks on the original node")
5877
    _RemoveDisks(self, instance, target_node=source_node)
5878

    
5879
    # Only start the instance if it's marked as up
5880
    if instance.admin_up:
5881
      self.LogInfo("Starting instance %s on node %s",
5882
                   instance.name, target_node)
5883

    
5884
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5885
                                           ignore_secondaries=True)
5886
      if not disks_ok:
5887
        _ShutdownInstanceDisks(self, instance)
5888
        raise errors.OpExecError("Can't activate the instance's disks")
5889

    
5890
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5891
      msg = result.fail_msg
5892
      if msg:
5893
        _ShutdownInstanceDisks(self, instance)
5894
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5895
                                 (instance.name, target_node, msg))
5896

    
5897

    
5898
class LUMigrateNode(LogicalUnit):
5899
  """Migrate all instances from a node.
5900

5901
  """
5902
  HPATH = "node-migrate"
5903
  HTYPE = constants.HTYPE_NODE
5904
  REQ_BGL = False
5905

    
5906
  def ExpandNames(self):
5907
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5908

    
5909
    self.needed_locks = {
5910
      locking.LEVEL_NODE: [self.op.node_name],
5911
      }
5912

    
5913
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5914

    
5915
    # Create tasklets for migrating instances for all instances on this node
5916
    names = []
5917
    tasklets = []
5918

    
5919
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5920
      logging.debug("Migrating instance %s", inst.name)
5921
      names.append(inst.name)
5922

    
5923
      tasklets.append(TLMigrateInstance(self, inst.name, False))
5924

    
5925
    self.tasklets = tasklets
5926

    
5927
    # Declare instance locks
5928
    self.needed_locks[locking.LEVEL_INSTANCE] = names
5929

    
5930
  def DeclareLocks(self, level):
5931
    if level == locking.LEVEL_NODE:
5932
      self._LockInstancesNodes()
5933

    
5934
  def BuildHooksEnv(self):
5935
    """Build hooks env.
5936

5937
    This runs on the master, the primary and all the secondaries.
5938

5939
    """
5940
    env = {
5941
      "NODE_NAME": self.op.node_name,
5942
      }
5943

    
5944
    nl = [self.cfg.GetMasterNode()]
5945

    
5946
    return (env, nl, nl)
5947

    
5948

    
5949
class TLMigrateInstance(Tasklet):
5950
  """Tasklet class for instance migration.
5951

5952
  @type live: boolean
5953
  @ivar live: whether the migration will be done live or non-live;
5954
      this variable is initalized only after CheckPrereq has run
5955

5956
  """
5957
  def __init__(self, lu, instance_name, cleanup):
5958
    """Initializes this class.
5959

5960
    """
5961
    Tasklet.__init__(self, lu)
5962

    
5963
    # Parameters
5964
    self.instance_name = instance_name
5965
    self.cleanup = cleanup
5966
    self.live = False # will be overridden later
5967

    
5968
  def CheckPrereq(self):
5969
    """Check prerequisites.
5970

5971
    This checks that the instance is in the cluster.
5972

5973
    """
5974
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5975
    instance = self.cfg.GetInstanceInfo(instance_name)
5976
    assert instance is not None
5977

    
5978
    if instance.disk_template != constants.DT_DRBD8:
5979
      raise errors.OpPrereqError("Instance's disk layout is not"
5980
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5981

    
5982
    secondary_nodes = instance.secondary_nodes
5983
    if not secondary_nodes:
5984
      raise errors.ConfigurationError("No secondary node but using"
5985
                                      " drbd8 disk template")
5986

    
5987
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
5988

    
5989
    target_node = secondary_nodes[0]
5990
    # check memory requirements on the secondary node
5991
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5992
                         instance.name, i_be[constants.BE_MEMORY],
5993
                         instance.hypervisor)
5994

    
5995
    # check bridge existance
5996
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5997

    
5998
    if not self.cleanup:
5999
      _CheckNodeNotDrained(self.lu, target_node)
6000
      result = self.rpc.call_instance_migratable(instance.primary_node,
6001
                                                 instance)
6002
      result.Raise("Can't migrate, please use failover",
6003
                   prereq=True, ecode=errors.ECODE_STATE)
6004

    
6005
    self.instance = instance
6006

    
6007
    if self.lu.op.live is not None and self.lu.op.mode is not None:
6008
      raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6009
                                 " parameters are accepted",
6010
                                 errors.ECODE_INVAL)
6011
    if self.lu.op.live is not None:
6012
      if self.lu.op.live:
6013
        self.lu.op.mode = constants.HT_MIGRATION_LIVE
6014
      else:
6015
        self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6016
      # reset the 'live' parameter to None so that repeated
6017
      # invocations of CheckPrereq do not raise an exception
6018
      self.lu.op.live = None
6019
    elif self.lu.op.mode is None:
6020
      # read the default value from the hypervisor
6021
      i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6022
      self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6023

    
6024
    self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6025

    
6026
  def _WaitUntilSync(self):
6027
    """Poll with custom rpc for disk sync.
6028

6029
    This uses our own step-based rpc call.
6030

6031
    """
6032
    self.feedback_fn("* wait until resync is done")
6033
    all_done = False
6034
    while not all_done:
6035
      all_done = True
6036
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6037
                                            self.nodes_ip,
6038
                                            self.instance.disks)
6039
      min_percent = 100
6040
      for node, nres in result.items():
6041
        nres.Raise("Cannot resync disks on node %s" % node)
6042
        node_done, node_percent = nres.payload
6043
        all_done = all_done and node_done
6044
        if node_percent is not None:
6045
          min_percent = min(min_percent, node_percent)
6046
      if not all_done:
6047
        if min_percent < 100:
6048
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
6049
        time.sleep(2)
6050

    
6051
  def _EnsureSecondary(self, node):
6052
    """Demote a node to secondary.
6053

6054
    """
6055
    self.feedback_fn("* switching node %s to secondary mode" % node)
6056

    
6057
    for dev in self.instance.disks:
6058
      self.cfg.SetDiskID(dev, node)
6059

    
6060
    result = self.rpc.call_blockdev_close(node, self.instance.name,
6061
                                          self.instance.disks)
6062
    result.Raise("Cannot change disk to secondary on node %s" % node)
6063

    
6064
  def _GoStandalone(self):
6065
    """Disconnect from the network.
6066

6067
    """
6068
    self.feedback_fn("* changing into standalone mode")
6069
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6070
                                               self.instance.disks)
6071
    for node, nres in result.items():
6072
      nres.Raise("Cannot disconnect disks node %s" % node)
6073

    
6074
  def _GoReconnect(self, multimaster):
6075
    """Reconnect to the network.
6076

6077
    """
6078
    if multimaster:
6079
      msg = "dual-master"
6080
    else:
6081
      msg = "single-master"
6082
    self.feedback_fn("* changing disks into %s mode" % msg)
6083
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6084
                                           self.instance.disks,
6085
                                           self.instance.name, multimaster)
6086
    for node, nres in result.items():
6087
      nres.Raise("Cannot change disks config on node %s" % node)
6088

    
6089
  def _ExecCleanup(self):
6090
    """Try to cleanup after a failed migration.
6091

6092
    The cleanup is done by:
6093
      - check that the instance is running only on one node
6094
        (and update the config if needed)
6095
      - change disks on its secondary node to secondary
6096
      - wait until disks are fully synchronized
6097
      - disconnect from the network
6098
      - change disks into single-master mode
6099
      - wait again until disks are fully synchronized
6100

6101
    """
6102
    instance = self.instance
6103
    target_node = self.target_node
6104
    source_node = self.source_node
6105

    
6106
    # check running on only one node
6107
    self.feedback_fn("* checking where the instance actually runs"
6108
                     " (if this hangs, the hypervisor might be in"
6109
                     " a bad state)")
6110
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6111
    for node, result in ins_l.items():
6112
      result.Raise("Can't contact node %s" % node)
6113

    
6114
    runningon_source = instance.name in ins_l[source_node].payload
6115
    runningon_target = instance.name in ins_l[target_node].payload
6116

    
6117
    if runningon_source and runningon_target:
6118
      raise errors.OpExecError("Instance seems to be running on two nodes,"
6119
                               " or the hypervisor is confused. You will have"
6120
                               " to ensure manually that it runs only on one"
6121
                               " and restart this operation.")
6122

    
6123
    if not (runningon_source or runningon_target):
6124
      raise errors.OpExecError("Instance does not seem to be running at all."
6125
                               " In this case, it's safer to repair by"
6126
                               " running 'gnt-instance stop' to ensure disk"
6127
                               " shutdown, and then restarting it.")
6128

    
6129
    if runningon_target:
6130
      # the migration has actually succeeded, we need to update the config
6131
      self.feedback_fn("* instance running on secondary node (%s),"
6132
                       " updating config" % target_node)
6133
      instance.primary_node = target_node
6134
      self.cfg.Update(instance, self.feedback_fn)
6135
      demoted_node = source_node
6136
    else:
6137
      self.feedback_fn("* instance confirmed to be running on its"
6138
                       " primary node (%s)" % source_node)
6139
      demoted_node = target_node
6140

    
6141
    self._EnsureSecondary(demoted_node)
6142
    try:
6143
      self._WaitUntilSync()
6144
    except errors.OpExecError:
6145
      # we ignore here errors, since if the device is standalone, it
6146
      # won't be able to sync
6147
      pass
6148
    self._GoStandalone()
6149
    self._GoReconnect(False)
6150
    self._WaitUntilSync()
6151

    
6152
    self.feedback_fn("* done")
6153

    
6154
  def _RevertDiskStatus(self):
6155
    """Try to revert the disk status after a failed migration.
6156

6157
    """
6158
    target_node = self.target_node
6159
    try:
6160
      self._EnsureSecondary(target_node)
6161
      self._GoStandalone()
6162
      self._GoReconnect(False)
6163
      self._WaitUntilSync()
6164
    except errors.OpExecError, err:
6165
      self.lu.LogWarning("Migration failed and I can't reconnect the"
6166
                         " drives: error '%s'\n"
6167
                         "Please look and recover the instance status" %
6168
                         str(err))
6169

    
6170
  def _AbortMigration(self):
6171
    """Call the hypervisor code to abort a started migration.
6172

6173
    """
6174
    instance = self.instance
6175
    target_node = self.target_node
6176
    migration_info = self.migration_info
6177

    
6178
    abort_result = self.rpc.call_finalize_migration(target_node,
6179
                                                    instance,
6180
                                                    migration_info,
6181
                                                    False)
6182
    abort_msg = abort_result.fail_msg
6183
    if abort_msg:
6184
      logging.error("Aborting migration failed on target node %s: %s",
6185
                    target_node, abort_msg)
6186
      # Don't raise an exception here, as we stil have to try to revert the
6187
      # disk status, even if this step failed.
6188

    
6189
  def _ExecMigration(self):
6190
    """Migrate an instance.
6191

6192
    The migrate is done by:
6193
      - change the disks into dual-master mode
6194
      - wait until disks are fully synchronized again
6195
      - migrate the instance
6196
      - change disks on the new secondary node (the old primary) to secondary
6197
      - wait until disks are fully synchronized
6198
      - change disks into single-master mode
6199

6200
    """
6201
    instance = self.instance
6202
    target_node = self.target_node
6203
    source_node = self.source_node
6204

    
6205
    self.feedback_fn("* checking disk consistency between source and target")
6206
    for dev in instance.disks:
6207
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6208
        raise errors.OpExecError("Disk %s is degraded or not fully"
6209
                                 " synchronized on target node,"
6210
                                 " aborting migrate." % dev.iv_name)
6211

    
6212
    # First get the migration information from the remote node
6213
    result = self.rpc.call_migration_info(source_node, instance)
6214
    msg = result.fail_msg
6215
    if msg:
6216
      log_err = ("Failed fetching source migration information from %s: %s" %
6217
                 (source_node, msg))
6218
      logging.error(log_err)
6219
      raise errors.OpExecError(log_err)
6220

    
6221
    self.migration_info = migration_info = result.payload
6222

    
6223
    # Then switch the disks to master/master mode
6224
    self._EnsureSecondary(target_node)
6225
    self._GoStandalone()
6226
    self._GoReconnect(True)
6227
    self._WaitUntilSync()
6228

    
6229
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
6230
    result = self.rpc.call_accept_instance(target_node,
6231
                                           instance,
6232
                                           migration_info,
6233
                                           self.nodes_ip[target_node])
6234

    
6235
    msg = result.fail_msg
6236
    if msg:
6237
      logging.error("Instance pre-migration failed, trying to revert"
6238
                    " disk status: %s", msg)
6239
      self.feedback_fn("Pre-migration failed, aborting")
6240
      self._AbortMigration()
6241
      self._RevertDiskStatus()
6242
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6243
                               (instance.name, msg))
6244

    
6245
    self.feedback_fn("* migrating instance to %s" % target_node)
6246
    time.sleep(10)
6247
    result = self.rpc.call_instance_migrate(source_node, instance,
6248
                                            self.nodes_ip[target_node],
6249
                                            self.live)
6250
    msg = result.fail_msg
6251
    if msg:
6252
      logging.error("Instance migration failed, trying to revert"
6253
                    " disk status: %s", msg)
6254
      self.feedback_fn("Migration failed, aborting")
6255
      self._AbortMigration()
6256
      self._RevertDiskStatus()
6257
      raise errors.OpExecError("Could not migrate instance %s: %s" %
6258
                               (instance.name, msg))
6259
    time.sleep(10)
6260

    
6261
    instance.primary_node = target_node
6262
    # distribute new instance config to the other nodes
6263
    self.cfg.Update(instance, self.feedback_fn)
6264

    
6265
    result = self.rpc.call_finalize_migration(target_node,
6266
                                              instance,
6267
                                              migration_info,
6268
                                              True)
6269
    msg = result.fail_msg
6270
    if msg:
6271
      logging.error("Instance migration succeeded, but finalization failed:"
6272
                    " %s", msg)
6273
      raise errors.OpExecError("Could not finalize instance migration: %s" %
6274
                               msg)
6275

    
6276
    self._EnsureSecondary(source_node)
6277
    self._WaitUntilSync()
6278
    self._GoStandalone()
6279
    self._GoReconnect(False)
6280
    self._WaitUntilSync()
6281

    
6282
    self.feedback_fn("* done")
6283

    
6284
  def Exec(self, feedback_fn):
6285
    """Perform the migration.
6286

6287
    """
6288
    feedback_fn("Migrating instance %s" % self.instance.name)
6289

    
6290
    self.feedback_fn = feedback_fn
6291

    
6292
    self.source_node = self.instance.primary_node
6293
    self.target_node = self.instance.secondary_nodes[0]
6294
    self.all_nodes = [self.source_node, self.target_node]
6295
    self.nodes_ip = {
6296
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6297
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6298
      }
6299

    
6300
    if self.cleanup:
6301
      return self._ExecCleanup()
6302
    else:
6303
      return self._ExecMigration()
6304

    
6305

    
6306
def _CreateBlockDev(lu, node, instance, device, force_create,
6307
                    info, force_open):
6308
  """Create a tree of block devices on a given node.
6309

6310
  If this device type has to be created on secondaries, create it and
6311
  all its children.
6312

6313
  If not, just recurse to children keeping the same 'force' value.
6314

6315
  @param lu: the lu on whose behalf we execute
6316
  @param node: the node on which to create the device
6317
  @type instance: L{objects.Instance}
6318
  @param instance: the instance which owns the device
6319
  @type device: L{objects.Disk}
6320
  @param device: the device to create
6321
  @type force_create: boolean
6322
  @param force_create: whether to force creation of this device; this
6323
      will be change to True whenever we find a device which has
6324
      CreateOnSecondary() attribute
6325
  @param info: the extra 'metadata' we should attach to the device
6326
      (this will be represented as a LVM tag)
6327
  @type force_open: boolean
6328
  @param force_open: this parameter will be passes to the
6329
      L{backend.BlockdevCreate} function where it specifies
6330
      whether we run on primary or not, and it affects both
6331
      the child assembly and the device own Open() execution
6332

6333
  """
6334
  if device.CreateOnSecondary():
6335
    force_create = True
6336

    
6337
  if device.children:
6338
    for child in device.children:
6339
      _CreateBlockDev(lu, node, instance, child, force_create,
6340
                      info, force_open)
6341

    
6342
  if not force_create:
6343
    return
6344

    
6345
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6346

    
6347

    
6348
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6349
  """Create a single block device on a given node.
6350

6351
  This will not recurse over children of the device, so they must be
6352
  created in advance.
6353

6354
  @param lu: the lu on whose behalf we execute
6355
  @param node: the node on which to create the device
6356
  @type instance: L{objects.Instance}
6357
  @param instance: the instance which owns the device
6358
  @type device: L{objects.Disk}
6359
  @param device: the device to create
6360
  @param info: the extra 'metadata' we should attach to the device
6361
      (this will be represented as a LVM tag)
6362
  @type force_open: boolean
6363
  @param force_open: this parameter will be passes to the
6364
      L{backend.BlockdevCreate} function where it specifies
6365
      whether we run on primary or not, and it affects both
6366
      the child assembly and the device own Open() execution
6367

6368
  """
6369
  lu.cfg.SetDiskID(device, node)
6370
  result = lu.rpc.call_blockdev_create(node, device, device.size,
6371
                                       instance.name, force_open, info)
6372
  result.Raise("Can't create block device %s on"
6373
               " node %s for instance %s" % (device, node, instance.name))
6374
  if device.physical_id is None:
6375
    device.physical_id = result.payload
6376

    
6377

    
6378
def _GenerateUniqueNames(lu, exts):
6379
  """Generate a suitable LV name.
6380

6381
  This will generate a logical volume name for the given instance.
6382

6383
  """
6384
  results = []
6385
  for val in exts:
6386
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6387
    results.append("%s%s" % (new_id, val))
6388
  return results
6389

    
6390

    
6391
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6392
                         p_minor, s_minor):
6393
  """Generate a drbd8 device complete with its children.
6394

6395
  """
6396
  port = lu.cfg.AllocatePort()
6397
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6398
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6399
                          logical_id=(vgname, names[0]))
6400
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6401
                          logical_id=(vgname, names[1]))
6402
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6403
                          logical_id=(primary, secondary, port,
6404
                                      p_minor, s_minor,
6405
                                      shared_secret),
6406
                          children=[dev_data, dev_meta],
6407
                          iv_name=iv_name)
6408
  return drbd_dev
6409

    
6410

    
6411
def _GenerateDiskTemplate(lu, template_name,
6412
                          instance_name, primary_node,
6413
                          secondary_nodes, disk_info,
6414
                          file_storage_dir, file_driver,
6415
                          base_index, feedback_fn):
6416
  """Generate the entire disk layout for a given template type.
6417

6418
  """
6419
  #TODO: compute space requirements
6420

    
6421
  vgname = lu.cfg.GetVGName()
6422
  disk_count = len(disk_info)
6423
  disks = []
6424
  if template_name == constants.DT_DISKLESS:
6425
    pass
6426
  elif template_name == constants.DT_PLAIN:
6427
    if len(secondary_nodes) != 0:
6428
      raise errors.ProgrammerError("Wrong template configuration")
6429

    
6430
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6431
                                      for i in range(disk_count)])
6432
    for idx, disk in enumerate(disk_info):
6433
      disk_index = idx + base_index
6434
      vg = disk.get("vg", vgname)
6435
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6436
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6437
                              logical_id=(vg, names[idx]),
6438
                              iv_name="disk/%d" % disk_index,
6439
                              mode=disk["mode"])
6440
      disks.append(disk_dev)
6441
  elif template_name == constants.DT_DRBD8:
6442
    if len(secondary_nodes) != 1:
6443
      raise errors.ProgrammerError("Wrong template configuration")
6444
    remote_node = secondary_nodes[0]
6445
    minors = lu.cfg.AllocateDRBDMinor(
6446
      [primary_node, remote_node] * len(disk_info), instance_name)
6447

    
6448
    names = []
6449
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6450
                                               for i in range(disk_count)]):
6451
      names.append(lv_prefix + "_data")
6452
      names.append(lv_prefix + "_meta")
6453
    for idx, disk in enumerate(disk_info):
6454
      disk_index = idx + base_index
6455
      vg = disk.get("vg", vgname)
6456
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6457
                                      disk["size"], vg, names[idx*2:idx*2+2],
6458
                                      "disk/%d" % disk_index,
6459
                                      minors[idx*2], minors[idx*2+1])
6460
      disk_dev.mode = disk["mode"]
6461
      disks.append(disk_dev)
6462
  elif template_name == constants.DT_FILE:
6463
    if len(secondary_nodes) != 0:
6464
      raise errors.ProgrammerError("Wrong template configuration")
6465

    
6466
    opcodes.RequireFileStorage()
6467

    
6468
    for idx, disk in enumerate(disk_info):
6469
      disk_index = idx + base_index
6470
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6471
                              iv_name="disk/%d" % disk_index,
6472
                              logical_id=(file_driver,
6473
                                          "%s/disk%d" % (file_storage_dir,
6474
                                                         disk_index)),
6475
                              mode=disk["mode"])
6476
      disks.append(disk_dev)
6477
  else:
6478
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6479
  return disks
6480

    
6481

    
6482
def _GetInstanceInfoText(instance):
6483
  """Compute that text that should be added to the disk's metadata.
6484

6485
  """
6486
  return "originstname+%s" % instance.name
6487

    
6488

    
6489
def _CalcEta(time_taken, written, total_size):
6490
  """Calculates the ETA based on size written and total size.
6491

6492
  @param time_taken: The time taken so far
6493
  @param written: amount written so far
6494
  @param total_size: The total size of data to be written
6495
  @return: The remaining time in seconds
6496

6497
  """
6498
  avg_time = time_taken / float(written)
6499
  return (total_size - written) * avg_time
6500

    
6501

    
6502
def _WipeDisks(lu, instance):
6503
  """Wipes instance disks.
6504

6505
  @type lu: L{LogicalUnit}
6506
  @param lu: the logical unit on whose behalf we execute
6507
  @type instance: L{objects.Instance}
6508
  @param instance: the instance whose disks we should create
6509
  @return: the success of the wipe
6510

6511
  """
6512
  node = instance.primary_node
6513
  for idx, device in enumerate(instance.disks):
6514
    lu.LogInfo("* Wiping disk %d", idx)
6515
    logging.info("Wiping disk %d for instance %s", idx, instance.name)
6516

    
6517
    # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6518
    # MAX_WIPE_CHUNK at max
6519
    wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6520
                          constants.MIN_WIPE_CHUNK_PERCENT)
6521

    
6522
    offset = 0
6523
    size = device.size
6524
    last_output = 0
6525
    start_time = time.time()
6526

    
6527
    while offset < size:
6528
      wipe_size = min(wipe_chunk_size, size - offset)
6529
      result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6530
      result.Raise("Could not wipe disk %d at offset %d for size %d" %
6531
                   (idx, offset, wipe_size))
6532
      now = time.time()
6533
      offset += wipe_size
6534
      if now - last_output >= 60:
6535
        eta = _CalcEta(now - start_time, offset, size)
6536
        lu.LogInfo(" - done: %.1f%% ETA: %s" %
6537
                   (offset / float(size) * 100, utils.FormatSeconds(eta)))
6538
        last_output = now
6539

    
6540

    
6541
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6542
  """Create all disks for an instance.
6543

6544
  This abstracts away some work from AddInstance.
6545

6546
  @type lu: L{LogicalUnit}
6547
  @param lu: the logical unit on whose behalf we execute
6548
  @type instance: L{objects.Instance}
6549
  @param instance: the instance whose disks we should create
6550
  @type to_skip: list
6551
  @param to_skip: list of indices to skip
6552
  @type target_node: string
6553
  @param target_node: if passed, overrides the target node for creation
6554
  @rtype: boolean
6555
  @return: the success of the creation
6556

6557
  """
6558
  info = _GetInstanceInfoText(instance)
6559
  if target_node is None:
6560
    pnode = instance.primary_node
6561
    all_nodes = instance.all_nodes
6562
  else:
6563
    pnode = target_node
6564
    all_nodes = [pnode]
6565

    
6566
  if instance.disk_template == constants.DT_FILE:
6567
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6568
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6569

    
6570
    result.Raise("Failed to create directory '%s' on"
6571
                 " node %s" % (file_storage_dir, pnode))
6572

    
6573
  # Note: this needs to be kept in sync with adding of disks in
6574
  # LUSetInstanceParams
6575
  for idx, device in enumerate(instance.disks):
6576
    if to_skip and idx in to_skip:
6577
      continue
6578
    logging.info("Creating volume %s for instance %s",
6579
                 device.iv_name, instance.name)
6580
    #HARDCODE
6581
    for node in all_nodes:
6582
      f_create = node == pnode
6583
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6584

    
6585

    
6586
def _RemoveDisks(lu, instance, target_node=None):
6587
  """Remove all disks for an instance.
6588

6589
  This abstracts away some work from `AddInstance()` and
6590
  `RemoveInstance()`. Note that in case some of the devices couldn't
6591
  be removed, the removal will continue with the other ones (compare
6592
  with `_CreateDisks()`).
6593

6594
  @type lu: L{LogicalUnit}
6595
  @param lu: the logical unit on whose behalf we execute
6596
  @type instance: L{objects.Instance}
6597
  @param instance: the instance whose disks we should remove
6598
  @type target_node: string
6599
  @param target_node: used to override the node on which to remove the disks
6600
  @rtype: boolean
6601
  @return: the success of the removal
6602

6603
  """
6604
  logging.info("Removing block devices for instance %s", instance.name)
6605

    
6606
  all_result = True
6607
  for device in instance.disks:
6608
    if target_node:
6609
      edata = [(target_node, device)]
6610
    else:
6611
      edata = device.ComputeNodeTree(instance.primary_node)
6612
    for node, disk in edata:
6613
      lu.cfg.SetDiskID(disk, node)
6614
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6615
      if msg:
6616
        lu.LogWarning("Could not remove block device %s on node %s,"
6617
                      " continuing anyway: %s", device.iv_name, node, msg)
6618
        all_result = False
6619

    
6620
  if instance.disk_template == constants.DT_FILE:
6621
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6622
    if target_node:
6623
      tgt = target_node
6624
    else:
6625
      tgt = instance.primary_node
6626
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6627
    if result.fail_msg:
6628
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6629
                    file_storage_dir, instance.primary_node, result.fail_msg)
6630
      all_result = False
6631

    
6632
  return all_result
6633

    
6634

    
6635
def _ComputeDiskSizePerVG(disk_template, disks):
6636
  """Compute disk size requirements in the volume group
6637

6638
  """
6639
  def _compute(disks, payload):
6640
    """Universal algorithm
6641

6642
    """
6643
    vgs = {}
6644
    for disk in disks:
6645
      vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
6646

    
6647
    return vgs
6648

    
6649
  # Required free disk space as a function of disk and swap space
6650
  req_size_dict = {
6651
    constants.DT_DISKLESS: None,
6652
    constants.DT_PLAIN: _compute(disks, 0),
6653
    # 128 MB are added for drbd metadata for each disk
6654
    constants.DT_DRBD8: _compute(disks, 128),
6655
    constants.DT_FILE: None,
6656
  }
6657

    
6658
  if disk_template not in req_size_dict:
6659
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6660
                                 " is unknown" %  disk_template)
6661

    
6662
  return req_size_dict[disk_template]
6663

    
6664

    
6665
def _ComputeDiskSize(disk_template, disks):
6666
  """Compute disk size requirements in the volume group
6667

6668
  """
6669
  # Required free disk space as a function of disk and swap space
6670
  req_size_dict = {
6671
    constants.DT_DISKLESS: None,
6672
    constants.DT_PLAIN: sum(d["size"] for d in disks),
6673
    # 128 MB are added for drbd metadata for each disk
6674
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6675
    constants.DT_FILE: None,
6676
  }
6677

    
6678
  if disk_template not in req_size_dict:
6679
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6680
                                 " is unknown" %  disk_template)
6681

    
6682
  return req_size_dict[disk_template]
6683

    
6684

    
6685
def _CheckHVParams(lu, nodenames, hvname, hvparams):
6686
  """Hypervisor parameter validation.
6687

6688
  This function abstract the hypervisor parameter validation to be
6689
  used in both instance create and instance modify.
6690

6691
  @type lu: L{LogicalUnit}
6692
  @param lu: the logical unit for which we check
6693
  @type nodenames: list
6694
  @param nodenames: the list of nodes on which we should check
6695
  @type hvname: string
6696
  @param hvname: the name of the hypervisor we should use
6697
  @type hvparams: dict
6698
  @param hvparams: the parameters which we need to check
6699
  @raise errors.OpPrereqError: if the parameters are not valid
6700

6701
  """
6702
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6703
                                                  hvname,
6704
                                                  hvparams)
6705
  for node in nodenames:
6706
    info = hvinfo[node]
6707
    if info.offline:
6708
      continue
6709
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6710

    
6711

    
6712
def _CheckOSParams(lu, required, nodenames, osname, osparams):
6713
  """OS parameters validation.
6714

6715
  @type lu: L{LogicalUnit}
6716
  @param lu: the logical unit for which we check
6717
  @type required: boolean
6718
  @param required: whether the validation should fail if the OS is not
6719
      found
6720
  @type nodenames: list
6721
  @param nodenames: the list of nodes on which we should check
6722
  @type osname: string
6723
  @param osname: the name of the hypervisor we should use
6724
  @type osparams: dict
6725
  @param osparams: the parameters which we need to check
6726
  @raise errors.OpPrereqError: if the parameters are not valid
6727

6728
  """
6729
  result = lu.rpc.call_os_validate(required, nodenames, osname,
6730
                                   [constants.OS_VALIDATE_PARAMETERS],
6731
                                   osparams)
6732
  for node, nres in result.items():
6733
    # we don't check for offline cases since this should be run only
6734
    # against the master node and/or an instance's nodes
6735
    nres.Raise("OS Parameters validation failed on node %s" % node)
6736
    if not nres.payload:
6737
      lu.LogInfo("OS %s not found on node %s, validation skipped",
6738
                 osname, node)
6739

    
6740

    
6741
class LUCreateInstance(LogicalUnit):
6742
  """Create an instance.
6743

6744
  """
6745
  HPATH = "instance-add"
6746
  HTYPE = constants.HTYPE_INSTANCE
6747
  REQ_BGL = False
6748

    
6749
  def CheckArguments(self):
6750
    """Check arguments.
6751

6752
    """
6753
    # do not require name_check to ease forward/backward compatibility
6754
    # for tools
6755
    if self.op.no_install and self.op.start:
6756
      self.LogInfo("No-installation mode selected, disabling startup")
6757
      self.op.start = False
6758
    # validate/normalize the instance name
6759
    self.op.instance_name = \
6760
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
6761

    
6762
    if self.op.ip_check and not self.op.name_check:
6763
      # TODO: make the ip check more flexible and not depend on the name check
6764
      raise errors.OpPrereqError("Cannot do ip check without a name check",
6765
                                 errors.ECODE_INVAL)
6766

    
6767
    # check nics' parameter names
6768
    for nic in self.op.nics:
6769
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6770

    
6771
    # check disks. parameter names and consistent adopt/no-adopt strategy
6772
    has_adopt = has_no_adopt = False
6773
    for disk in self.op.disks:
6774
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6775
      if "adopt" in disk:
6776
        has_adopt = True
6777
      else:
6778
        has_no_adopt = True
6779
    if has_adopt and has_no_adopt:
6780
      raise errors.OpPrereqError("Either all disks are adopted or none is",
6781
                                 errors.ECODE_INVAL)
6782
    if has_adopt:
6783
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6784
        raise errors.OpPrereqError("Disk adoption is not supported for the"
6785
                                   " '%s' disk template" %
6786
                                   self.op.disk_template,
6787
                                   errors.ECODE_INVAL)
6788
      if self.op.iallocator is not None:
6789
        raise errors.OpPrereqError("Disk adoption not allowed with an"
6790
                                   " iallocator script", errors.ECODE_INVAL)
6791
      if self.op.mode == constants.INSTANCE_IMPORT:
6792
        raise errors.OpPrereqError("Disk adoption not allowed for"
6793
                                   " instance import", errors.ECODE_INVAL)
6794

    
6795
    self.adopt_disks = has_adopt
6796

    
6797
    # instance name verification
6798
    if self.op.name_check:
6799
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
6800
      self.op.instance_name = self.hostname1.name
6801
      # used in CheckPrereq for ip ping check
6802
      self.check_ip = self.hostname1.ip
6803
    else:
6804
      self.check_ip = None
6805

    
6806
    # file storage checks
6807
    if (self.op.file_driver and
6808
        not self.op.file_driver in constants.FILE_DRIVER):
6809
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6810
                                 self.op.file_driver, errors.ECODE_INVAL)
6811

    
6812
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6813
      raise errors.OpPrereqError("File storage directory path not absolute",
6814
                                 errors.ECODE_INVAL)
6815

    
6816
    ### Node/iallocator related checks
6817
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6818

    
6819
    if self.op.pnode is not None:
6820
      if self.op.disk_template in constants.DTS_NET_MIRROR:
6821
        if self.op.snode is None:
6822
          raise errors.OpPrereqError("The networked disk templates need"
6823
                                     " a mirror node", errors.ECODE_INVAL)
6824
      elif self.op.snode:
6825
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6826
                        " template")
6827
        self.op.snode = None
6828

    
6829
    self._cds = _GetClusterDomainSecret()
6830

    
6831
    if self.op.mode == constants.INSTANCE_IMPORT:
6832
      # On import force_variant must be True, because if we forced it at
6833
      # initial install, our only chance when importing it back is that it
6834
      # works again!
6835
      self.op.force_variant = True
6836

    
6837
      if self.op.no_install:
6838
        self.LogInfo("No-installation mode has no effect during import")
6839

    
6840
    elif self.op.mode == constants.INSTANCE_CREATE:
6841
      if self.op.os_type is None:
6842
        raise errors.OpPrereqError("No guest OS specified",
6843
                                   errors.ECODE_INVAL)
6844
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
6845
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6846
                                   " installation" % self.op.os_type,
6847
                                   errors.ECODE_STATE)
6848
      if self.op.disk_template is None:
6849
        raise errors.OpPrereqError("No disk template specified",
6850
                                   errors.ECODE_INVAL)
6851

    
6852
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6853
      # Check handshake to ensure both clusters have the same domain secret
6854
      src_handshake = self.op.source_handshake
6855
      if not src_handshake:
6856
        raise errors.OpPrereqError("Missing source handshake",
6857
                                   errors.ECODE_INVAL)
6858

    
6859
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6860
                                                           src_handshake)
6861
      if errmsg:
6862
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6863
                                   errors.ECODE_INVAL)
6864

    
6865
      # Load and check source CA
6866
      self.source_x509_ca_pem = self.op.source_x509_ca
6867
      if not self.source_x509_ca_pem:
6868
        raise errors.OpPrereqError("Missing source X509 CA",
6869
                                   errors.ECODE_INVAL)
6870

    
6871
      try:
6872
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6873
                                                    self._cds)
6874
      except OpenSSL.crypto.Error, err:
6875
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6876
                                   (err, ), errors.ECODE_INVAL)
6877

    
6878
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6879
      if errcode is not None:
6880
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6881
                                   errors.ECODE_INVAL)
6882

    
6883
      self.source_x509_ca = cert
6884

    
6885
      src_instance_name = self.op.source_instance_name
6886
      if not src_instance_name:
6887
        raise errors.OpPrereqError("Missing source instance name",
6888
                                   errors.ECODE_INVAL)
6889

    
6890
      self.source_instance_name = \
6891
          netutils.GetHostname(name=src_instance_name).name
6892

    
6893
    else:
6894
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
6895
                                 self.op.mode, errors.ECODE_INVAL)
6896

    
6897
  def ExpandNames(self):
6898
    """ExpandNames for CreateInstance.
6899

6900
    Figure out the right locks for instance creation.
6901

6902
    """
6903
    self.needed_locks = {}
6904

    
6905
    instance_name = self.op.instance_name
6906
    # this is just a preventive check, but someone might still add this
6907
    # instance in the meantime, and creation will fail at lock-add time
6908
    if instance_name in self.cfg.GetInstanceList():
6909
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6910
                                 instance_name, errors.ECODE_EXISTS)
6911

    
6912
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6913

    
6914
    if self.op.iallocator:
6915
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6916
    else:
6917
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6918
      nodelist = [self.op.pnode]
6919
      if self.op.snode is not None:
6920
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6921
        nodelist.append(self.op.snode)
6922
      self.needed_locks[locking.LEVEL_NODE] = nodelist
6923

    
6924
    # in case of import lock the source node too
6925
    if self.op.mode == constants.INSTANCE_IMPORT:
6926
      src_node = self.op.src_node
6927
      src_path = self.op.src_path
6928

    
6929
      if src_path is None:
6930
        self.op.src_path = src_path = self.op.instance_name
6931

    
6932
      if src_node is None:
6933
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6934
        self.op.src_node = None
6935
        if os.path.isabs(src_path):
6936
          raise errors.OpPrereqError("Importing an instance from an absolute"
6937
                                     " path requires a source node option.",
6938
                                     errors.ECODE_INVAL)
6939
      else:
6940
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6941
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6942
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
6943
        if not os.path.isabs(src_path):
6944
          self.op.src_path = src_path = \
6945
            utils.PathJoin(constants.EXPORT_DIR, src_path)
6946

    
6947
  def _RunAllocator(self):
6948
    """Run the allocator based on input opcode.
6949

6950
    """
6951
    nics = [n.ToDict() for n in self.nics]
6952
    ial = IAllocator(self.cfg, self.rpc,
6953
                     mode=constants.IALLOCATOR_MODE_ALLOC,
6954
                     name=self.op.instance_name,
6955
                     disk_template=self.op.disk_template,
6956
                     tags=[],
6957
                     os=self.op.os_type,
6958
                     vcpus=self.be_full[constants.BE_VCPUS],
6959
                     mem_size=self.be_full[constants.BE_MEMORY],
6960
                     disks=self.disks,
6961
                     nics=nics,
6962
                     hypervisor=self.op.hypervisor,
6963
                     )
6964

    
6965
    ial.Run(self.op.iallocator)
6966

    
6967
    if not ial.success:
6968
      raise errors.OpPrereqError("Can't compute nodes using"
6969
                                 " iallocator '%s': %s" %
6970
                                 (self.op.iallocator, ial.info),
6971
                                 errors.ECODE_NORES)
6972
    if len(ial.result) != ial.required_nodes:
6973
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6974
                                 " of nodes (%s), required %s" %
6975
                                 (self.op.iallocator, len(ial.result),
6976
                                  ial.required_nodes), errors.ECODE_FAULT)
6977
    self.op.pnode = ial.result[0]
6978
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6979
                 self.op.instance_name, self.op.iallocator,
6980
                 utils.CommaJoin(ial.result))
6981
    if ial.required_nodes == 2:
6982
      self.op.snode = ial.result[1]
6983

    
6984
  def BuildHooksEnv(self):
6985
    """Build hooks env.
6986

6987
    This runs on master, primary and secondary nodes of the instance.
6988

6989
    """
6990
    env = {
6991
      "ADD_MODE": self.op.mode,
6992
      }
6993
    if self.op.mode == constants.INSTANCE_IMPORT:
6994
      env["SRC_NODE"] = self.op.src_node
6995
      env["SRC_PATH"] = self.op.src_path
6996
      env["SRC_IMAGES"] = self.src_images
6997

    
6998
    env.update(_BuildInstanceHookEnv(
6999
      name=self.op.instance_name,
7000
      primary_node=self.op.pnode,
7001
      secondary_nodes=self.secondaries,
7002
      status=self.op.start,
7003
      os_type=self.op.os_type,
7004
      memory=self.be_full[constants.BE_MEMORY],
7005
      vcpus=self.be_full[constants.BE_VCPUS],
7006
      nics=_NICListToTuple(self, self.nics),
7007
      disk_template=self.op.disk_template,
7008
      disks=[(d["size"], d["mode"]) for d in self.disks],
7009
      bep=self.be_full,
7010
      hvp=self.hv_full,
7011
      hypervisor_name=self.op.hypervisor,
7012
    ))
7013

    
7014
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7015
          self.secondaries)
7016
    return env, nl, nl
7017

    
7018
  def _ReadExportInfo(self):
7019
    """Reads the export information from disk.
7020

7021
    It will override the opcode source node and path with the actual
7022
    information, if these two were not specified before.
7023

7024
    @return: the export information
7025

7026
    """
7027
    assert self.op.mode == constants.INSTANCE_IMPORT
7028

    
7029
    src_node = self.op.src_node
7030
    src_path = self.op.src_path
7031

    
7032
    if src_node is None:
7033
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7034
      exp_list = self.rpc.call_export_list(locked_nodes)
7035
      found = False
7036
      for node in exp_list:
7037
        if exp_list[node].fail_msg:
7038
          continue
7039
        if src_path in exp_list[node].payload:
7040
          found = True
7041
          self.op.src_node = src_node = node
7042
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7043
                                                       src_path)
7044
          break
7045
      if not found:
7046
        raise errors.OpPrereqError("No export found for relative path %s" %
7047
                                    src_path, errors.ECODE_INVAL)
7048

    
7049
    _CheckNodeOnline(self, src_node)
7050
    result = self.rpc.call_export_info(src_node, src_path)
7051
    result.Raise("No export or invalid export found in dir %s" % src_path)
7052

    
7053
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7054
    if not export_info.has_section(constants.INISECT_EXP):
7055
      raise errors.ProgrammerError("Corrupted export config",
7056
                                   errors.ECODE_ENVIRON)
7057

    
7058
    ei_version = export_info.get(constants.INISECT_EXP, "version")
7059
    if (int(ei_version) != constants.EXPORT_VERSION):
7060
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7061
                                 (ei_version, constants.EXPORT_VERSION),
7062
                                 errors.ECODE_ENVIRON)
7063
    return export_info
7064

    
7065
  def _ReadExportParams(self, einfo):
7066
    """Use export parameters as defaults.
7067

7068
    In case the opcode doesn't specify (as in override) some instance
7069
    parameters, then try to use them from the export information, if
7070
    that declares them.
7071

7072
    """
7073
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7074

    
7075
    if self.op.disk_template is None:
7076
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
7077
        self.op.disk_template = einfo.get(constants.INISECT_INS,
7078
                                          "disk_template")
7079
      else:
7080
        raise errors.OpPrereqError("No disk template specified and the export"
7081
                                   " is missing the disk_template information",
7082
                                   errors.ECODE_INVAL)
7083

    
7084
    if not self.op.disks:
7085
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
7086
        disks = []
7087
        # TODO: import the disk iv_name too
7088
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7089
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7090
          disks.append({"size": disk_sz})
7091
        self.op.disks = disks
7092
      else:
7093
        raise errors.OpPrereqError("No disk info specified and the export"
7094
                                   " is missing the disk information",
7095
                                   errors.ECODE_INVAL)
7096

    
7097
    if (not self.op.nics and
7098
        einfo.has_option(constants.INISECT_INS, "nic_count")):
7099
      nics = []
7100
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7101
        ndict = {}
7102
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7103
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7104
          ndict[name] = v
7105
        nics.append(ndict)
7106
      self.op.nics = nics
7107

    
7108
    if (self.op.hypervisor is None and
7109
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
7110
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7111
    if einfo.has_section(constants.INISECT_HYP):
7112
      # use the export parameters but do not override the ones
7113
      # specified by the user
7114
      for name, value in einfo.items(constants.INISECT_HYP):
7115
        if name not in self.op.hvparams:
7116
          self.op.hvparams[name] = value
7117

    
7118
    if einfo.has_section(constants.INISECT_BEP):
7119
      # use the parameters, without overriding
7120
      for name, value in einfo.items(constants.INISECT_BEP):
7121
        if name not in self.op.beparams:
7122
          self.op.beparams[name] = value
7123
    else:
7124
      # try to read the parameters old style, from the main section
7125
      for name in constants.BES_PARAMETERS:
7126
        if (name not in self.op.beparams and
7127
            einfo.has_option(constants.INISECT_INS, name)):
7128
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7129

    
7130
    if einfo.has_section(constants.INISECT_OSP):
7131
      # use the parameters, without overriding
7132
      for name, value in einfo.items(constants.INISECT_OSP):
7133
        if name not in self.op.osparams:
7134
          self.op.osparams[name] = value
7135

    
7136
  def _RevertToDefaults(self, cluster):
7137
    """Revert the instance parameters to the default values.
7138

7139
    """
7140
    # hvparams
7141
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7142
    for name in self.op.hvparams.keys():
7143
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7144
        del self.op.hvparams[name]
7145
    # beparams
7146
    be_defs = cluster.SimpleFillBE({})
7147
    for name in self.op.beparams.keys():
7148
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
7149
        del self.op.beparams[name]
7150
    # nic params
7151
    nic_defs = cluster.SimpleFillNIC({})
7152
    for nic in self.op.nics:
7153
      for name in constants.NICS_PARAMETERS:
7154
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7155
          del nic[name]
7156
    # osparams
7157
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7158
    for name in self.op.osparams.keys():
7159
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
7160
        del self.op.osparams[name]
7161

    
7162
  def CheckPrereq(self):
7163
    """Check prerequisites.
7164

7165
    """
7166
    if self.op.mode == constants.INSTANCE_IMPORT:
7167
      export_info = self._ReadExportInfo()
7168
      self._ReadExportParams(export_info)
7169

    
7170
    if (not self.cfg.GetVGName() and
7171
        self.op.disk_template not in constants.DTS_NOT_LVM):
7172
      raise errors.OpPrereqError("Cluster does not support lvm-based"
7173
                                 " instances", errors.ECODE_STATE)
7174

    
7175
    if self.op.hypervisor is None:
7176
      self.op.hypervisor = self.cfg.GetHypervisorType()
7177

    
7178
    cluster = self.cfg.GetClusterInfo()
7179
    enabled_hvs = cluster.enabled_hypervisors
7180
    if self.op.hypervisor not in enabled_hvs:
7181
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7182
                                 " cluster (%s)" % (self.op.hypervisor,
7183
                                  ",".join(enabled_hvs)),
7184
                                 errors.ECODE_STATE)
7185

    
7186
    # check hypervisor parameter syntax (locally)
7187
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7188
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7189
                                      self.op.hvparams)
7190
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7191
    hv_type.CheckParameterSyntax(filled_hvp)
7192
    self.hv_full = filled_hvp
7193
    # check that we don't specify global parameters on an instance
7194
    _CheckGlobalHvParams(self.op.hvparams)
7195

    
7196
    # fill and remember the beparams dict
7197
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7198
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
7199

    
7200
    # build os parameters
7201
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7202

    
7203
    # now that hvp/bep are in final format, let's reset to defaults,
7204
    # if told to do so
7205
    if self.op.identify_defaults:
7206
      self._RevertToDefaults(cluster)
7207

    
7208
    # NIC buildup
7209
    self.nics = []
7210
    for idx, nic in enumerate(self.op.nics):
7211
      nic_mode_req = nic.get("mode", None)
7212
      nic_mode = nic_mode_req
7213
      if nic_mode is None:
7214
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7215

    
7216
      # in routed mode, for the first nic, the default ip is 'auto'
7217
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7218
        default_ip_mode = constants.VALUE_AUTO
7219
      else:
7220
        default_ip_mode = constants.VALUE_NONE
7221

    
7222
      # ip validity checks
7223
      ip = nic.get("ip", default_ip_mode)
7224
      if ip is None or ip.lower() == constants.VALUE_NONE:
7225
        nic_ip = None
7226
      elif ip.lower() == constants.VALUE_AUTO:
7227
        if not self.op.name_check:
7228
          raise errors.OpPrereqError("IP address set to auto but name checks"
7229
                                     " have been skipped",
7230
                                     errors.ECODE_INVAL)
7231
        nic_ip = self.hostname1.ip
7232
      else:
7233
        if not netutils.IPAddress.IsValid(ip):
7234
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7235
                                     errors.ECODE_INVAL)
7236
        nic_ip = ip
7237

    
7238
      # TODO: check the ip address for uniqueness
7239
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7240
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
7241
                                   errors.ECODE_INVAL)
7242

    
7243
      # MAC address verification
7244
      mac = nic.get("mac", constants.VALUE_AUTO)
7245
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7246
        mac = utils.NormalizeAndValidateMac(mac)
7247

    
7248
        try:
7249
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
7250
        except errors.ReservationError:
7251
          raise errors.OpPrereqError("MAC address %s already in use"
7252
                                     " in cluster" % mac,
7253
                                     errors.ECODE_NOTUNIQUE)
7254

    
7255
      # bridge verification
7256
      bridge = nic.get("bridge", None)
7257
      link = nic.get("link", None)
7258
      if bridge and link:
7259
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7260
                                   " at the same time", errors.ECODE_INVAL)
7261
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7262
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7263
                                   errors.ECODE_INVAL)
7264
      elif bridge:
7265
        link = bridge
7266

    
7267
      nicparams = {}
7268
      if nic_mode_req:
7269
        nicparams[constants.NIC_MODE] = nic_mode_req
7270
      if link:
7271
        nicparams[constants.NIC_LINK] = link
7272

    
7273
      check_params = cluster.SimpleFillNIC(nicparams)
7274
      objects.NIC.CheckParameterSyntax(check_params)
7275
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7276

    
7277
    # disk checks/pre-build
7278
    self.disks = []
7279
    for disk in self.op.disks:
7280
      mode = disk.get("mode", constants.DISK_RDWR)
7281
      if mode not in constants.DISK_ACCESS_SET:
7282
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7283
                                   mode, errors.ECODE_INVAL)
7284
      size = disk.get("size", None)
7285
      if size is None:
7286
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7287
      try:
7288
        size = int(size)
7289
      except (TypeError, ValueError):
7290
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7291
                                   errors.ECODE_INVAL)
7292
      vg = disk.get("vg", self.cfg.GetVGName())
7293
      new_disk = {"size": size, "mode": mode, "vg": vg}
7294
      if "adopt" in disk:
7295
        new_disk["adopt"] = disk["adopt"]
7296
      self.disks.append(new_disk)
7297

    
7298
    if self.op.mode == constants.INSTANCE_IMPORT:
7299

    
7300
      # Check that the new instance doesn't have less disks than the export
7301
      instance_disks = len(self.disks)
7302
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7303
      if instance_disks < export_disks:
7304
        raise errors.OpPrereqError("Not enough disks to import."
7305
                                   " (instance: %d, export: %d)" %
7306
                                   (instance_disks, export_disks),
7307
                                   errors.ECODE_INVAL)
7308

    
7309
      disk_images = []
7310
      for idx in range(export_disks):
7311
        option = 'disk%d_dump' % idx
7312
        if export_info.has_option(constants.INISECT_INS, option):
7313
          # FIXME: are the old os-es, disk sizes, etc. useful?
7314
          export_name = export_info.get(constants.INISECT_INS, option)
7315
          image = utils.PathJoin(self.op.src_path, export_name)
7316
          disk_images.append(image)
7317
        else:
7318
          disk_images.append(False)
7319

    
7320
      self.src_images = disk_images
7321

    
7322
      old_name = export_info.get(constants.INISECT_INS, 'name')
7323
      try:
7324
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7325
      except (TypeError, ValueError), err:
7326
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
7327
                                   " an integer: %s" % str(err),
7328
                                   errors.ECODE_STATE)
7329
      if self.op.instance_name == old_name:
7330
        for idx, nic in enumerate(self.nics):
7331
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7332
            nic_mac_ini = 'nic%d_mac' % idx
7333
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7334

    
7335
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7336

    
7337
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
7338
    if self.op.ip_check:
7339
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7340
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
7341
                                   (self.check_ip, self.op.instance_name),
7342
                                   errors.ECODE_NOTUNIQUE)
7343

    
7344
    #### mac address generation
7345
    # By generating here the mac address both the allocator and the hooks get
7346
    # the real final mac address rather than the 'auto' or 'generate' value.
7347
    # There is a race condition between the generation and the instance object
7348
    # creation, which means that we know the mac is valid now, but we're not
7349
    # sure it will be when we actually add the instance. If things go bad
7350
    # adding the instance will abort because of a duplicate mac, and the
7351
    # creation job will fail.
7352
    for nic in self.nics:
7353
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7354
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7355

    
7356
    #### allocator run
7357

    
7358
    if self.op.iallocator is not None:
7359
      self._RunAllocator()
7360

    
7361
    #### node related checks
7362

    
7363
    # check primary node
7364
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7365
    assert self.pnode is not None, \
7366
      "Cannot retrieve locked node %s" % self.op.pnode
7367
    if pnode.offline:
7368
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7369
                                 pnode.name, errors.ECODE_STATE)
7370
    if pnode.drained:
7371
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7372
                                 pnode.name, errors.ECODE_STATE)
7373
    if not pnode.vm_capable:
7374
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7375
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
7376

    
7377
    self.secondaries = []
7378

    
7379
    # mirror node verification
7380
    if self.op.disk_template in constants.DTS_NET_MIRROR:
7381
      if self.op.snode == pnode.name:
7382
        raise errors.OpPrereqError("The secondary node cannot be the"
7383
                                   " primary node.", errors.ECODE_INVAL)
7384
      _CheckNodeOnline(self, self.op.snode)
7385
      _CheckNodeNotDrained(self, self.op.snode)
7386
      _CheckNodeVmCapable(self, self.op.snode)
7387
      self.secondaries.append(self.op.snode)
7388

    
7389
    nodenames = [pnode.name] + self.secondaries
7390

    
7391
    if not self.adopt_disks:
7392
      # Check lv size requirements, if not adopting
7393
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7394
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7395

    
7396
    else: # instead, we must check the adoption data
7397
      all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks])
7398
      if len(all_lvs) != len(self.disks):
7399
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
7400
                                   errors.ECODE_INVAL)
7401
      for lv_name in all_lvs:
7402
        try:
7403
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
7404
          # to ReserveLV uses the same syntax
7405
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7406
        except errors.ReservationError:
7407
          raise errors.OpPrereqError("LV named %s used by another instance" %
7408
                                     lv_name, errors.ECODE_NOTUNIQUE)
7409

    
7410
      vg_names = self.rpc.call_vg_list([pnode.name])
7411
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
7412

    
7413
      node_lvs = self.rpc.call_lv_list([pnode.name],
7414
                                       vg_names[pnode.name].payload.keys()
7415
                                      )[pnode.name]
7416
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7417
      node_lvs = node_lvs.payload
7418

    
7419
      delta = all_lvs.difference(node_lvs.keys())
7420
      if delta:
7421
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
7422
                                   utils.CommaJoin(delta),
7423
                                   errors.ECODE_INVAL)
7424
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7425
      if online_lvs:
7426
        raise errors.OpPrereqError("Online logical volumes found, cannot"
7427
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
7428
                                   errors.ECODE_STATE)
7429
      # update the size of disk based on what is found
7430
      for dsk in self.disks:
7431
        dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0]))
7432

    
7433
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7434

    
7435
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7436
    # check OS parameters (remotely)
7437
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7438

    
7439
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7440

    
7441
    # memory check on primary node
7442
    if self.op.start:
7443
      _CheckNodeFreeMemory(self, self.pnode.name,
7444
                           "creating instance %s" % self.op.instance_name,
7445
                           self.be_full[constants.BE_MEMORY],
7446
                           self.op.hypervisor)
7447

    
7448
    self.dry_run_result = list(nodenames)
7449

    
7450
  def Exec(self, feedback_fn):
7451
    """Create and add the instance to the cluster.
7452

7453
    """
7454
    instance = self.op.instance_name
7455
    pnode_name = self.pnode.name
7456

    
7457
    ht_kind = self.op.hypervisor
7458
    if ht_kind in constants.HTS_REQ_PORT:
7459
      network_port = self.cfg.AllocatePort()
7460
    else:
7461
      network_port = None
7462

    
7463
    if constants.ENABLE_FILE_STORAGE:
7464
      # this is needed because os.path.join does not accept None arguments
7465
      if self.op.file_storage_dir is None:
7466
        string_file_storage_dir = ""
7467
      else:
7468
        string_file_storage_dir = self.op.file_storage_dir
7469

    
7470
      # build the full file storage dir path
7471
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7472
                                        string_file_storage_dir, instance)
7473
    else:
7474
      file_storage_dir = ""
7475

    
7476
    disks = _GenerateDiskTemplate(self,
7477
                                  self.op.disk_template,
7478
                                  instance, pnode_name,
7479
                                  self.secondaries,
7480
                                  self.disks,
7481
                                  file_storage_dir,
7482
                                  self.op.file_driver,
7483
                                  0,
7484
                                  feedback_fn)
7485

    
7486
    iobj = objects.Instance(name=instance, os=self.op.os_type,
7487
                            primary_node=pnode_name,
7488
                            nics=self.nics, disks=disks,
7489
                            disk_template=self.op.disk_template,
7490
                            admin_up=False,
7491
                            network_port=network_port,
7492
                            beparams=self.op.beparams,
7493
                            hvparams=self.op.hvparams,
7494
                            hypervisor=self.op.hypervisor,
7495
                            osparams=self.op.osparams,
7496
                            )
7497

    
7498
    if self.adopt_disks:
7499
      # rename LVs to the newly-generated names; we need to construct
7500
      # 'fake' LV disks with the old data, plus the new unique_id
7501
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7502
      rename_to = []
7503
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7504
        rename_to.append(t_dsk.logical_id)
7505
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7506
        self.cfg.SetDiskID(t_dsk, pnode_name)
7507
      result = self.rpc.call_blockdev_rename(pnode_name,
7508
                                             zip(tmp_disks, rename_to))
7509
      result.Raise("Failed to rename adoped LVs")
7510
    else:
7511
      feedback_fn("* creating instance disks...")
7512
      try:
7513
        _CreateDisks(self, iobj)
7514
      except errors.OpExecError:
7515
        self.LogWarning("Device creation failed, reverting...")
7516
        try:
7517
          _RemoveDisks(self, iobj)
7518
        finally:
7519
          self.cfg.ReleaseDRBDMinors(instance)
7520
          raise
7521

    
7522
      if self.cfg.GetClusterInfo().prealloc_wipe_disks:
7523
        feedback_fn("* wiping instance disks...")
7524
        try:
7525
          _WipeDisks(self, iobj)
7526
        except errors.OpExecError:
7527
          self.LogWarning("Device wiping failed, reverting...")
7528
          try:
7529
            _RemoveDisks(self, iobj)
7530
          finally:
7531
            self.cfg.ReleaseDRBDMinors(instance)
7532
            raise
7533

    
7534
    feedback_fn("adding instance %s to cluster config" % instance)
7535

    
7536
    self.cfg.AddInstance(iobj, self.proc.GetECId())
7537

    
7538
    # Declare that we don't want to remove the instance lock anymore, as we've
7539
    # added the instance to the config
7540
    del self.remove_locks[locking.LEVEL_INSTANCE]
7541
    # Unlock all the nodes
7542
    if self.op.mode == constants.INSTANCE_IMPORT:
7543
      nodes_keep = [self.op.src_node]
7544
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7545
                       if node != self.op.src_node]
7546
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7547
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7548
    else:
7549
      self.context.glm.release(locking.LEVEL_NODE)
7550
      del self.acquired_locks[locking.LEVEL_NODE]
7551

    
7552
    if self.op.wait_for_sync:
7553
      disk_abort = not _WaitForSync(self, iobj)
7554
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
7555
      # make sure the disks are not degraded (still sync-ing is ok)
7556
      time.sleep(15)
7557
      feedback_fn("* checking mirrors status")
7558
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7559
    else:
7560
      disk_abort = False
7561

    
7562
    if disk_abort:
7563
      _RemoveDisks(self, iobj)
7564
      self.cfg.RemoveInstance(iobj.name)
7565
      # Make sure the instance lock gets removed
7566
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7567
      raise errors.OpExecError("There are some degraded disks for"
7568
                               " this instance")
7569

    
7570
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7571
      if self.op.mode == constants.INSTANCE_CREATE:
7572
        if not self.op.no_install:
7573
          feedback_fn("* running the instance OS create scripts...")
7574
          # FIXME: pass debug option from opcode to backend
7575
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7576
                                                 self.op.debug_level)
7577
          result.Raise("Could not add os for instance %s"
7578
                       " on node %s" % (instance, pnode_name))
7579

    
7580
      elif self.op.mode == constants.INSTANCE_IMPORT:
7581
        feedback_fn("* running the instance OS import scripts...")
7582

    
7583
        transfers = []
7584

    
7585
        for idx, image in enumerate(self.src_images):
7586
          if not image:
7587
            continue
7588

    
7589
          # FIXME: pass debug option from opcode to backend
7590
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7591
                                             constants.IEIO_FILE, (image, ),
7592
                                             constants.IEIO_SCRIPT,
7593
                                             (iobj.disks[idx], idx),
7594
                                             None)
7595
          transfers.append(dt)
7596

    
7597
        import_result = \
7598
          masterd.instance.TransferInstanceData(self, feedback_fn,
7599
                                                self.op.src_node, pnode_name,
7600
                                                self.pnode.secondary_ip,
7601
                                                iobj, transfers)
7602
        if not compat.all(import_result):
7603
          self.LogWarning("Some disks for instance %s on node %s were not"
7604
                          " imported successfully" % (instance, pnode_name))
7605

    
7606
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7607
        feedback_fn("* preparing remote import...")
7608
        # The source cluster will stop the instance before attempting to make a
7609
        # connection. In some cases stopping an instance can take a long time,
7610
        # hence the shutdown timeout is added to the connection timeout.
7611
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
7612
                           self.op.source_shutdown_timeout)
7613
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7614

    
7615
        assert iobj.primary_node == self.pnode.name
7616
        disk_results = \
7617
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
7618
                                        self.source_x509_ca,
7619
                                        self._cds, timeouts)
7620
        if not compat.all(disk_results):
7621
          # TODO: Should the instance still be started, even if some disks
7622
          # failed to import (valid for local imports, too)?
7623
          self.LogWarning("Some disks for instance %s on node %s were not"
7624
                          " imported successfully" % (instance, pnode_name))
7625

    
7626
        # Run rename script on newly imported instance
7627
        assert iobj.name == instance
7628
        feedback_fn("Running rename script for %s" % instance)
7629
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7630
                                                   self.source_instance_name,
7631
                                                   self.op.debug_level)
7632
        if result.fail_msg:
7633
          self.LogWarning("Failed to run rename script for %s on node"
7634
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
7635

    
7636
      else:
7637
        # also checked in the prereq part
7638
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7639
                                     % self.op.mode)
7640

    
7641
    if self.op.start:
7642
      iobj.admin_up = True
7643
      self.cfg.Update(iobj, feedback_fn)
7644
      logging.info("Starting instance %s on node %s", instance, pnode_name)
7645
      feedback_fn("* starting instance...")
7646
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7647
      result.Raise("Could not start instance")
7648

    
7649
    return list(iobj.all_nodes)
7650

    
7651

    
7652
class LUConnectConsole(NoHooksLU):
7653
  """Connect to an instance's console.
7654

7655
  This is somewhat special in that it returns the command line that
7656
  you need to run on the master node in order to connect to the
7657
  console.
7658

7659
  """
7660
  REQ_BGL = False
7661

    
7662
  def ExpandNames(self):
7663
    self._ExpandAndLockInstance()
7664

    
7665
  def CheckPrereq(self):
7666
    """Check prerequisites.
7667

7668
    This checks that the instance is in the cluster.
7669

7670
    """
7671
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7672
    assert self.instance is not None, \
7673
      "Cannot retrieve locked instance %s" % self.op.instance_name
7674
    _CheckNodeOnline(self, self.instance.primary_node)
7675

    
7676
  def Exec(self, feedback_fn):
7677
    """Connect to the console of an instance
7678

7679
    """
7680
    instance = self.instance
7681
    node = instance.primary_node
7682

    
7683
    node_insts = self.rpc.call_instance_list([node],
7684
                                             [instance.hypervisor])[node]
7685
    node_insts.Raise("Can't get node information from %s" % node)
7686

    
7687
    if instance.name not in node_insts.payload:
7688
      if instance.admin_up:
7689
        state = "ERROR_down"
7690
      else:
7691
        state = "ADMIN_down"
7692
      raise errors.OpExecError("Instance %s is not running (state %s)" %
7693
                               (instance.name, state))
7694

    
7695
    logging.debug("Connecting to console of %s on %s", instance.name, node)
7696

    
7697
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
7698
    cluster = self.cfg.GetClusterInfo()
7699
    # beparams and hvparams are passed separately, to avoid editing the
7700
    # instance and then saving the defaults in the instance itself.
7701
    hvparams = cluster.FillHV(instance)
7702
    beparams = cluster.FillBE(instance)
7703
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7704

    
7705
    # build ssh cmdline
7706
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7707

    
7708

    
7709
class LUReplaceDisks(LogicalUnit):
7710
  """Replace the disks of an instance.
7711

7712
  """
7713
  HPATH = "mirrors-replace"
7714
  HTYPE = constants.HTYPE_INSTANCE
7715
  REQ_BGL = False
7716

    
7717
  def CheckArguments(self):
7718
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7719
                                  self.op.iallocator)
7720

    
7721
  def ExpandNames(self):
7722
    self._ExpandAndLockInstance()
7723

    
7724
    if self.op.iallocator is not None:
7725
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7726

    
7727
    elif self.op.remote_node is not None:
7728
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7729
      self.op.remote_node = remote_node
7730

    
7731
      # Warning: do not remove the locking of the new secondary here
7732
      # unless DRBD8.AddChildren is changed to work in parallel;
7733
      # currently it doesn't since parallel invocations of
7734
      # FindUnusedMinor will conflict
7735
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7736
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7737

    
7738
    else:
7739
      self.needed_locks[locking.LEVEL_NODE] = []
7740
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7741

    
7742
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7743
                                   self.op.iallocator, self.op.remote_node,
7744
                                   self.op.disks, False, self.op.early_release)
7745

    
7746
    self.tasklets = [self.replacer]
7747

    
7748
  def DeclareLocks(self, level):
7749
    # If we're not already locking all nodes in the set we have to declare the
7750
    # instance's primary/secondary nodes.
7751
    if (level == locking.LEVEL_NODE and
7752
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7753
      self._LockInstancesNodes()
7754

    
7755
  def BuildHooksEnv(self):
7756
    """Build hooks env.
7757

7758
    This runs on the master, the primary and all the secondaries.
7759

7760
    """
7761
    instance = self.replacer.instance
7762
    env = {
7763
      "MODE": self.op.mode,
7764
      "NEW_SECONDARY": self.op.remote_node,
7765
      "OLD_SECONDARY": instance.secondary_nodes[0],
7766
      }
7767
    env.update(_BuildInstanceHookEnvByObject(self, instance))
7768
    nl = [
7769
      self.cfg.GetMasterNode(),
7770
      instance.primary_node,
7771
      ]
7772
    if self.op.remote_node is not None:
7773
      nl.append(self.op.remote_node)
7774
    return env, nl, nl
7775

    
7776

    
7777
class TLReplaceDisks(Tasklet):
7778
  """Replaces disks for an instance.
7779

7780
  Note: Locking is not within the scope of this class.
7781

7782
  """
7783
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7784
               disks, delay_iallocator, early_release):
7785
    """Initializes this class.
7786

7787
    """
7788
    Tasklet.__init__(self, lu)
7789

    
7790
    # Parameters
7791
    self.instance_name = instance_name
7792
    self.mode = mode
7793
    self.iallocator_name = iallocator_name
7794
    self.remote_node = remote_node
7795
    self.disks = disks
7796
    self.delay_iallocator = delay_iallocator
7797
    self.early_release = early_release
7798

    
7799
    # Runtime data
7800
    self.instance = None
7801
    self.new_node = None
7802
    self.target_node = None
7803
    self.other_node = None
7804
    self.remote_node_info = None
7805
    self.node_secondary_ip = None
7806

    
7807
  @staticmethod
7808
  def CheckArguments(mode, remote_node, iallocator):
7809
    """Helper function for users of this class.
7810

7811
    """
7812
    # check for valid parameter combination
7813
    if mode == constants.REPLACE_DISK_CHG:
7814
      if remote_node is None and iallocator is None:
7815
        raise errors.OpPrereqError("When changing the secondary either an"
7816
                                   " iallocator script must be used or the"
7817
                                   " new node given", errors.ECODE_INVAL)
7818

    
7819
      if remote_node is not None and iallocator is not None:
7820
        raise errors.OpPrereqError("Give either the iallocator or the new"
7821
                                   " secondary, not both", errors.ECODE_INVAL)
7822

    
7823
    elif remote_node is not None or iallocator is not None:
7824
      # Not replacing the secondary
7825
      raise errors.OpPrereqError("The iallocator and new node options can"
7826
                                 " only be used when changing the"
7827
                                 " secondary node", errors.ECODE_INVAL)
7828

    
7829
  @staticmethod
7830
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7831
    """Compute a new secondary node using an IAllocator.
7832

7833
    """
7834
    ial = IAllocator(lu.cfg, lu.rpc,
7835
                     mode=constants.IALLOCATOR_MODE_RELOC,
7836
                     name=instance_name,
7837
                     relocate_from=relocate_from)
7838

    
7839
    ial.Run(iallocator_name)
7840

    
7841
    if not ial.success:
7842
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7843
                                 " %s" % (iallocator_name, ial.info),
7844
                                 errors.ECODE_NORES)
7845

    
7846
    if len(ial.result) != ial.required_nodes:
7847
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7848
                                 " of nodes (%s), required %s" %
7849
                                 (iallocator_name,
7850
                                  len(ial.result), ial.required_nodes),
7851
                                 errors.ECODE_FAULT)
7852

    
7853
    remote_node_name = ial.result[0]
7854

    
7855
    lu.LogInfo("Selected new secondary for instance '%s': %s",
7856
               instance_name, remote_node_name)
7857

    
7858
    return remote_node_name
7859

    
7860
  def _FindFaultyDisks(self, node_name):
7861
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7862
                                    node_name, True)
7863

    
7864
  def CheckPrereq(self):
7865
    """Check prerequisites.
7866

7867
    This checks that the instance is in the cluster.
7868

7869
    """
7870
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7871
    assert instance is not None, \
7872
      "Cannot retrieve locked instance %s" % self.instance_name
7873

    
7874
    if instance.disk_template != constants.DT_DRBD8:
7875
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7876
                                 " instances", errors.ECODE_INVAL)
7877

    
7878
    if len(instance.secondary_nodes) != 1:
7879
      raise errors.OpPrereqError("The instance has a strange layout,"
7880
                                 " expected one secondary but found %d" %
7881
                                 len(instance.secondary_nodes),
7882
                                 errors.ECODE_FAULT)
7883

    
7884
    if not self.delay_iallocator:
7885
      self._CheckPrereq2()
7886

    
7887
  def _CheckPrereq2(self):
7888
    """Check prerequisites, second part.
7889

7890
    This function should always be part of CheckPrereq. It was separated and is
7891
    now called from Exec because during node evacuation iallocator was only
7892
    called with an unmodified cluster model, not taking planned changes into
7893
    account.
7894

7895
    """
7896
    instance = self.instance
7897
    secondary_node = instance.secondary_nodes[0]
7898

    
7899
    if self.iallocator_name is None:
7900
      remote_node = self.remote_node
7901
    else:
7902
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7903
                                       instance.name, instance.secondary_nodes)
7904

    
7905
    if remote_node is not None:
7906
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7907
      assert self.remote_node_info is not None, \
7908
        "Cannot retrieve locked node %s" % remote_node
7909
    else:
7910
      self.remote_node_info = None
7911

    
7912
    if remote_node == self.instance.primary_node:
7913
      raise errors.OpPrereqError("The specified node is the primary node of"
7914
                                 " the instance.", errors.ECODE_INVAL)
7915

    
7916
    if remote_node == secondary_node:
7917
      raise errors.OpPrereqError("The specified node is already the"
7918
                                 " secondary node of the instance.",
7919
                                 errors.ECODE_INVAL)
7920

    
7921
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7922
                                    constants.REPLACE_DISK_CHG):
7923
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
7924
                                 errors.ECODE_INVAL)
7925

    
7926
    if self.mode == constants.REPLACE_DISK_AUTO:
7927
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
7928
      faulty_secondary = self._FindFaultyDisks(secondary_node)
7929

    
7930
      if faulty_primary and faulty_secondary:
7931
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7932
                                   " one node and can not be repaired"
7933
                                   " automatically" % self.instance_name,
7934
                                   errors.ECODE_STATE)
7935

    
7936
      if faulty_primary:
7937
        self.disks = faulty_primary
7938
        self.target_node = instance.primary_node
7939
        self.other_node = secondary_node
7940
        check_nodes = [self.target_node, self.other_node]
7941
      elif faulty_secondary:
7942
        self.disks = faulty_secondary
7943
        self.target_node = secondary_node
7944
        self.other_node = instance.primary_node
7945
        check_nodes = [self.target_node, self.other_node]
7946
      else:
7947
        self.disks = []
7948
        check_nodes = []
7949

    
7950
    else:
7951
      # Non-automatic modes
7952
      if self.mode == constants.REPLACE_DISK_PRI:
7953
        self.target_node = instance.primary_node
7954
        self.other_node = secondary_node
7955
        check_nodes = [self.target_node, self.other_node]
7956

    
7957
      elif self.mode == constants.REPLACE_DISK_SEC:
7958
        self.target_node = secondary_node
7959
        self.other_node = instance.primary_node
7960
        check_nodes = [self.target_node, self.other_node]
7961

    
7962
      elif self.mode == constants.REPLACE_DISK_CHG:
7963
        self.new_node = remote_node
7964
        self.other_node = instance.primary_node
7965
        self.target_node = secondary_node
7966
        check_nodes = [self.new_node, self.other_node]
7967

    
7968
        _CheckNodeNotDrained(self.lu, remote_node)
7969
        _CheckNodeVmCapable(self.lu, remote_node)
7970

    
7971
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
7972
        assert old_node_info is not None
7973
        if old_node_info.offline and not self.early_release:
7974
          # doesn't make sense to delay the release
7975
          self.early_release = True
7976
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7977
                          " early-release mode", secondary_node)
7978

    
7979
      else:
7980
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7981
                                     self.mode)
7982

    
7983
      # If not specified all disks should be replaced
7984
      if not self.disks:
7985
        self.disks = range(len(self.instance.disks))
7986

    
7987
    for node in check_nodes:
7988
      _CheckNodeOnline(self.lu, node)
7989

    
7990
    # Check whether disks are valid
7991
    for disk_idx in self.disks:
7992
      instance.FindDisk(disk_idx)
7993

    
7994
    # Get secondary node IP addresses
7995
    node_2nd_ip = {}
7996

    
7997
    for node_name in [self.target_node, self.other_node, self.new_node]:
7998
      if node_name is not None:
7999
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8000

    
8001
    self.node_secondary_ip = node_2nd_ip
8002

    
8003
  def Exec(self, feedback_fn):
8004
    """Execute disk replacement.
8005

8006
    This dispatches the disk replacement to the appropriate handler.
8007

8008
    """
8009
    if self.delay_iallocator:
8010
      self._CheckPrereq2()
8011

    
8012
    if not self.disks:
8013
      feedback_fn("No disks need replacement")
8014
      return
8015

    
8016
    feedback_fn("Replacing disk(s) %s for %s" %
8017
                (utils.CommaJoin(self.disks), self.instance.name))
8018

    
8019
    activate_disks = (not self.instance.admin_up)
8020

    
8021
    # Activate the instance disks if we're replacing them on a down instance
8022
    if activate_disks:
8023
      _StartInstanceDisks(self.lu, self.instance, True)
8024

    
8025
    try:
8026
      # Should we replace the secondary node?
8027
      if self.new_node is not None:
8028
        fn = self._ExecDrbd8Secondary
8029
      else:
8030
        fn = self._ExecDrbd8DiskOnly
8031

    
8032
      return fn(feedback_fn)
8033

    
8034
    finally:
8035
      # Deactivate the instance disks if we're replacing them on a
8036
      # down instance
8037
      if activate_disks:
8038
        _SafeShutdownInstanceDisks(self.lu, self.instance)
8039

    
8040
  def _CheckVolumeGroup(self, nodes):
8041
    self.lu.LogInfo("Checking volume groups")
8042

    
8043
    vgname = self.cfg.GetVGName()
8044

    
8045
    # Make sure volume group exists on all involved nodes
8046
    results = self.rpc.call_vg_list(nodes)
8047
    if not results:
8048
      raise errors.OpExecError("Can't list volume groups on the nodes")
8049

    
8050
    for node in nodes:
8051
      res = results[node]
8052
      res.Raise("Error checking node %s" % node)
8053
      if vgname not in res.payload:
8054
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
8055
                                 (vgname, node))
8056

    
8057
  def _CheckDisksExistence(self, nodes):
8058
    # Check disk existence
8059
    for idx, dev in enumerate(self.instance.disks):
8060
      if idx not in self.disks:
8061
        continue
8062

    
8063
      for node in nodes:
8064
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8065
        self.cfg.SetDiskID(dev, node)
8066

    
8067
        result = self.rpc.call_blockdev_find(node, dev)
8068

    
8069
        msg = result.fail_msg
8070
        if msg or not result.payload:
8071
          if not msg:
8072
            msg = "disk not found"
8073
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8074
                                   (idx, node, msg))
8075

    
8076
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8077
    for idx, dev in enumerate(self.instance.disks):
8078
      if idx not in self.disks:
8079
        continue
8080

    
8081
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8082
                      (idx, node_name))
8083

    
8084
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8085
                                   ldisk=ldisk):
8086
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8087
                                 " replace disks for instance %s" %
8088
                                 (node_name, self.instance.name))
8089

    
8090
  def _CreateNewStorage(self, node_name):
8091
    vgname = self.cfg.GetVGName()
8092
    iv_names = {}
8093

    
8094
    for idx, dev in enumerate(self.instance.disks):
8095
      if idx not in self.disks:
8096
        continue
8097

    
8098
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8099

    
8100
      self.cfg.SetDiskID(dev, node_name)
8101

    
8102
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8103
      names = _GenerateUniqueNames(self.lu, lv_names)
8104

    
8105
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8106
                             logical_id=(vgname, names[0]))
8107
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8108
                             logical_id=(vgname, names[1]))
8109

    
8110
      new_lvs = [lv_data, lv_meta]
8111
      old_lvs = dev.children
8112
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8113

    
8114
      # we pass force_create=True to force the LVM creation
8115
      for new_lv in new_lvs:
8116
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8117
                        _GetInstanceInfoText(self.instance), False)
8118

    
8119
    return iv_names
8120

    
8121
  def _CheckDevices(self, node_name, iv_names):
8122
    for name, (dev, _, _) in iv_names.iteritems():
8123
      self.cfg.SetDiskID(dev, node_name)
8124

    
8125
      result = self.rpc.call_blockdev_find(node_name, dev)
8126

    
8127
      msg = result.fail_msg
8128
      if msg or not result.payload:
8129
        if not msg:
8130
          msg = "disk not found"
8131
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
8132
                                 (name, msg))
8133

    
8134
      if result.payload.is_degraded:
8135
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
8136

    
8137
  def _RemoveOldStorage(self, node_name, iv_names):
8138
    for name, (_, old_lvs, _) in iv_names.iteritems():
8139
      self.lu.LogInfo("Remove logical volumes for %s" % name)
8140

    
8141
      for lv in old_lvs:
8142
        self.cfg.SetDiskID(lv, node_name)
8143

    
8144
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8145
        if msg:
8146
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
8147
                             hint="remove unused LVs manually")
8148

    
8149
  def _ReleaseNodeLock(self, node_name):
8150
    """Releases the lock for a given node."""
8151
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8152

    
8153
  def _ExecDrbd8DiskOnly(self, feedback_fn):
8154
    """Replace a disk on the primary or secondary for DRBD 8.
8155

8156
    The algorithm for replace is quite complicated:
8157

8158
      1. for each disk to be replaced:
8159

8160
        1. create new LVs on the target node with unique names
8161
        1. detach old LVs from the drbd device
8162
        1. rename old LVs to name_replaced.<time_t>
8163
        1. rename new LVs to old LVs
8164
        1. attach the new LVs (with the old names now) to the drbd device
8165

8166
      1. wait for sync across all devices
8167

8168
      1. for each modified disk:
8169

8170
        1. remove old LVs (which have the name name_replaces.<time_t>)
8171

8172
    Failures are not very well handled.
8173

8174
    """
8175
    steps_total = 6
8176

    
8177
    # Step: check device activation
8178
    self.lu.LogStep(1, steps_total, "Check device existence")
8179
    self._CheckDisksExistence([self.other_node, self.target_node])
8180
    self._CheckVolumeGroup([self.target_node, self.other_node])
8181

    
8182
    # Step: check other node consistency
8183
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8184
    self._CheckDisksConsistency(self.other_node,
8185
                                self.other_node == self.instance.primary_node,
8186
                                False)
8187

    
8188
    # Step: create new storage
8189
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8190
    iv_names = self._CreateNewStorage(self.target_node)
8191

    
8192
    # Step: for each lv, detach+rename*2+attach
8193
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8194
    for dev, old_lvs, new_lvs in iv_names.itervalues():
8195
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8196

    
8197
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8198
                                                     old_lvs)
8199
      result.Raise("Can't detach drbd from local storage on node"
8200
                   " %s for device %s" % (self.target_node, dev.iv_name))
8201
      #dev.children = []
8202
      #cfg.Update(instance)
8203

    
8204
      # ok, we created the new LVs, so now we know we have the needed
8205
      # storage; as such, we proceed on the target node to rename
8206
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8207
      # using the assumption that logical_id == physical_id (which in
8208
      # turn is the unique_id on that node)
8209

    
8210
      # FIXME(iustin): use a better name for the replaced LVs
8211
      temp_suffix = int(time.time())
8212
      ren_fn = lambda d, suff: (d.physical_id[0],
8213
                                d.physical_id[1] + "_replaced-%s" % suff)
8214

    
8215
      # Build the rename list based on what LVs exist on the node
8216
      rename_old_to_new = []
8217
      for to_ren in old_lvs:
8218
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8219
        if not result.fail_msg and result.payload:
8220
          # device exists
8221
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8222

    
8223
      self.lu.LogInfo("Renaming the old LVs on the target node")
8224
      result = self.rpc.call_blockdev_rename(self.target_node,
8225
                                             rename_old_to_new)
8226
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
8227

    
8228
      # Now we rename the new LVs to the old LVs
8229
      self.lu.LogInfo("Renaming the new LVs on the target node")
8230
      rename_new_to_old = [(new, old.physical_id)
8231
                           for old, new in zip(old_lvs, new_lvs)]
8232
      result = self.rpc.call_blockdev_rename(self.target_node,
8233
                                             rename_new_to_old)
8234
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
8235

    
8236
      for old, new in zip(old_lvs, new_lvs):
8237
        new.logical_id = old.logical_id
8238
        self.cfg.SetDiskID(new, self.target_node)
8239

    
8240
      for disk in old_lvs:
8241
        disk.logical_id = ren_fn(disk, temp_suffix)
8242
        self.cfg.SetDiskID(disk, self.target_node)
8243

    
8244
      # Now that the new lvs have the old name, we can add them to the device
8245
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8246
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8247
                                                  new_lvs)
8248
      msg = result.fail_msg
8249
      if msg:
8250
        for new_lv in new_lvs:
8251
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
8252
                                               new_lv).fail_msg
8253
          if msg2:
8254
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8255
                               hint=("cleanup manually the unused logical"
8256
                                     "volumes"))
8257
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8258

    
8259
      dev.children = new_lvs
8260

    
8261
      self.cfg.Update(self.instance, feedback_fn)
8262

    
8263
    cstep = 5
8264
    if self.early_release:
8265
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8266
      cstep += 1
8267
      self._RemoveOldStorage(self.target_node, iv_names)
8268
      # WARNING: we release both node locks here, do not do other RPCs
8269
      # than WaitForSync to the primary node
8270
      self._ReleaseNodeLock([self.target_node, self.other_node])
8271

    
8272
    # Wait for sync
8273
    # This can fail as the old devices are degraded and _WaitForSync
8274
    # does a combined result over all disks, so we don't check its return value
8275
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8276
    cstep += 1
8277
    _WaitForSync(self.lu, self.instance)
8278

    
8279
    # Check all devices manually
8280
    self._CheckDevices(self.instance.primary_node, iv_names)
8281

    
8282
    # Step: remove old storage
8283
    if not self.early_release:
8284
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8285
      cstep += 1
8286
      self._RemoveOldStorage(self.target_node, iv_names)
8287

    
8288
  def _ExecDrbd8Secondary(self, feedback_fn):
8289
    """Replace the secondary node for DRBD 8.
8290

8291
    The algorithm for replace is quite complicated:
8292
      - for all disks of the instance:
8293
        - create new LVs on the new node with same names
8294
        - shutdown the drbd device on the old secondary
8295
        - disconnect the drbd network on the primary
8296
        - create the drbd device on the new secondary
8297
        - network attach the drbd on the primary, using an artifice:
8298
          the drbd code for Attach() will connect to the network if it
8299
          finds a device which is connected to the good local disks but
8300
          not network enabled
8301
      - wait for sync across all devices
8302
      - remove all disks from the old secondary
8303

8304
    Failures are not very well handled.
8305

8306
    """
8307
    steps_total = 6
8308

    
8309
    # Step: check device activation
8310
    self.lu.LogStep(1, steps_total, "Check device existence")
8311
    self._CheckDisksExistence([self.instance.primary_node])
8312
    self._CheckVolumeGroup([self.instance.primary_node])
8313

    
8314
    # Step: check other node consistency
8315
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8316
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
8317

    
8318
    # Step: create new storage
8319
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8320
    for idx, dev in enumerate(self.instance.disks):
8321
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8322
                      (self.new_node, idx))
8323
      # we pass force_create=True to force LVM creation
8324
      for new_lv in dev.children:
8325
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8326
                        _GetInstanceInfoText(self.instance), False)
8327

    
8328
    # Step 4: dbrd minors and drbd setups changes
8329
    # after this, we must manually remove the drbd minors on both the
8330
    # error and the success paths
8331
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8332
    minors = self.cfg.AllocateDRBDMinor([self.new_node
8333
                                         for dev in self.instance.disks],
8334
                                        self.instance.name)
8335
    logging.debug("Allocated minors %r", minors)
8336

    
8337
    iv_names = {}
8338
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8339
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8340
                      (self.new_node, idx))
8341
      # create new devices on new_node; note that we create two IDs:
8342
      # one without port, so the drbd will be activated without
8343
      # networking information on the new node at this stage, and one
8344
      # with network, for the latter activation in step 4
8345
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8346
      if self.instance.primary_node == o_node1:
8347
        p_minor = o_minor1
8348
      else:
8349
        assert self.instance.primary_node == o_node2, "Three-node instance?"
8350
        p_minor = o_minor2
8351

    
8352
      new_alone_id = (self.instance.primary_node, self.new_node, None,
8353
                      p_minor, new_minor, o_secret)
8354
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
8355
                    p_minor, new_minor, o_secret)
8356

    
8357
      iv_names[idx] = (dev, dev.children, new_net_id)
8358
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8359
                    new_net_id)
8360
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8361
                              logical_id=new_alone_id,
8362
                              children=dev.children,
8363
                              size=dev.size)
8364
      try:
8365
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8366
                              _GetInstanceInfoText(self.instance), False)
8367
      except errors.GenericError:
8368
        self.cfg.ReleaseDRBDMinors(self.instance.name)
8369
        raise
8370

    
8371
    # We have new devices, shutdown the drbd on the old secondary
8372
    for idx, dev in enumerate(self.instance.disks):
8373
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8374
      self.cfg.SetDiskID(dev, self.target_node)
8375
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8376
      if msg:
8377
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8378
                           "node: %s" % (idx, msg),
8379
                           hint=("Please cleanup this device manually as"
8380
                                 " soon as possible"))
8381

    
8382
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8383
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8384
                                               self.node_secondary_ip,
8385
                                               self.instance.disks)\
8386
                                              [self.instance.primary_node]
8387

    
8388
    msg = result.fail_msg
8389
    if msg:
8390
      # detaches didn't succeed (unlikely)
8391
      self.cfg.ReleaseDRBDMinors(self.instance.name)
8392
      raise errors.OpExecError("Can't detach the disks from the network on"
8393
                               " old node: %s" % (msg,))
8394

    
8395
    # if we managed to detach at least one, we update all the disks of
8396
    # the instance to point to the new secondary
8397
    self.lu.LogInfo("Updating instance configuration")
8398
    for dev, _, new_logical_id in iv_names.itervalues():
8399
      dev.logical_id = new_logical_id
8400
      self.cfg.SetDiskID(dev, self.instance.primary_node)
8401

    
8402
    self.cfg.Update(self.instance, feedback_fn)
8403

    
8404
    # and now perform the drbd attach
8405
    self.lu.LogInfo("Attaching primary drbds to new secondary"
8406
                    " (standalone => connected)")
8407
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8408
                                            self.new_node],
8409
                                           self.node_secondary_ip,
8410
                                           self.instance.disks,
8411
                                           self.instance.name,
8412
                                           False)
8413
    for to_node, to_result in result.items():
8414
      msg = to_result.fail_msg
8415
      if msg:
8416
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8417
                           to_node, msg,
8418
                           hint=("please do a gnt-instance info to see the"
8419
                                 " status of disks"))
8420
    cstep = 5
8421
    if self.early_release:
8422
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8423
      cstep += 1
8424
      self._RemoveOldStorage(self.target_node, iv_names)
8425
      # WARNING: we release all node locks here, do not do other RPCs
8426
      # than WaitForSync to the primary node
8427
      self._ReleaseNodeLock([self.instance.primary_node,
8428
                             self.target_node,
8429
                             self.new_node])
8430

    
8431
    # Wait for sync
8432
    # This can fail as the old devices are degraded and _WaitForSync
8433
    # does a combined result over all disks, so we don't check its return value
8434
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8435
    cstep += 1
8436
    _WaitForSync(self.lu, self.instance)
8437

    
8438
    # Check all devices manually
8439
    self._CheckDevices(self.instance.primary_node, iv_names)
8440

    
8441
    # Step: remove old storage
8442
    if not self.early_release:
8443
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8444
      self._RemoveOldStorage(self.target_node, iv_names)
8445

    
8446

    
8447
class LURepairNodeStorage(NoHooksLU):
8448
  """Repairs the volume group on a node.
8449

8450
  """
8451
  REQ_BGL = False
8452

    
8453
  def CheckArguments(self):
8454
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8455

    
8456
    storage_type = self.op.storage_type
8457

    
8458
    if (constants.SO_FIX_CONSISTENCY not in
8459
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8460
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
8461
                                 " repaired" % storage_type,
8462
                                 errors.ECODE_INVAL)
8463

    
8464
  def ExpandNames(self):
8465
    self.needed_locks = {
8466
      locking.LEVEL_NODE: [self.op.node_name],
8467
      }
8468

    
8469
  def _CheckFaultyDisks(self, instance, node_name):
8470
    """Ensure faulty disks abort the opcode or at least warn."""
8471
    try:
8472
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8473
                                  node_name, True):
8474
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8475
                                   " node '%s'" % (instance.name, node_name),
8476
                                   errors.ECODE_STATE)
8477
    except errors.OpPrereqError, err:
8478
      if self.op.ignore_consistency:
8479
        self.proc.LogWarning(str(err.args[0]))
8480
      else:
8481
        raise
8482

    
8483
  def CheckPrereq(self):
8484
    """Check prerequisites.
8485

8486
    """
8487
    # Check whether any instance on this node has faulty disks
8488
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8489
      if not inst.admin_up:
8490
        continue
8491
      check_nodes = set(inst.all_nodes)
8492
      check_nodes.discard(self.op.node_name)
8493
      for inst_node_name in check_nodes:
8494
        self._CheckFaultyDisks(inst, inst_node_name)
8495

    
8496
  def Exec(self, feedback_fn):
8497
    feedback_fn("Repairing storage unit '%s' on %s ..." %
8498
                (self.op.name, self.op.node_name))
8499

    
8500
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8501
    result = self.rpc.call_storage_execute(self.op.node_name,
8502
                                           self.op.storage_type, st_args,
8503
                                           self.op.name,
8504
                                           constants.SO_FIX_CONSISTENCY)
8505
    result.Raise("Failed to repair storage unit '%s' on %s" %
8506
                 (self.op.name, self.op.node_name))
8507

    
8508

    
8509
class LUNodeEvacuationStrategy(NoHooksLU):
8510
  """Computes the node evacuation strategy.
8511

8512
  """
8513
  REQ_BGL = False
8514

    
8515
  def CheckArguments(self):
8516
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8517

    
8518
  def ExpandNames(self):
8519
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8520
    self.needed_locks = locks = {}
8521
    if self.op.remote_node is None:
8522
      locks[locking.LEVEL_NODE] = locking.ALL_SET
8523
    else:
8524
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8525
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8526

    
8527
  def Exec(self, feedback_fn):
8528
    if self.op.remote_node is not None:
8529
      instances = []
8530
      for node in self.op.nodes:
8531
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8532
      result = []
8533
      for i in instances:
8534
        if i.primary_node == self.op.remote_node:
8535
          raise errors.OpPrereqError("Node %s is the primary node of"
8536
                                     " instance %s, cannot use it as"
8537
                                     " secondary" %
8538
                                     (self.op.remote_node, i.name),
8539
                                     errors.ECODE_INVAL)
8540
        result.append([i.name, self.op.remote_node])
8541
    else:
8542
      ial = IAllocator(self.cfg, self.rpc,
8543
                       mode=constants.IALLOCATOR_MODE_MEVAC,
8544
                       evac_nodes=self.op.nodes)
8545
      ial.Run(self.op.iallocator, validate=True)
8546
      if not ial.success:
8547
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8548
                                 errors.ECODE_NORES)
8549
      result = ial.result
8550
    return result
8551

    
8552

    
8553
class LUGrowDisk(LogicalUnit):
8554
  """Grow a disk of an instance.
8555

8556
  """
8557
  HPATH = "disk-grow"
8558
  HTYPE = constants.HTYPE_INSTANCE
8559
  REQ_BGL = False
8560

    
8561
  def ExpandNames(self):
8562
    self._ExpandAndLockInstance()
8563
    self.needed_locks[locking.LEVEL_NODE] = []
8564
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8565

    
8566
  def DeclareLocks(self, level):
8567
    if level == locking.LEVEL_NODE:
8568
      self._LockInstancesNodes()
8569

    
8570
  def BuildHooksEnv(self):
8571
    """Build hooks env.
8572

8573
    This runs on the master, the primary and all the secondaries.
8574

8575
    """
8576
    env = {
8577
      "DISK": self.op.disk,
8578
      "AMOUNT": self.op.amount,
8579
      }
8580
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8581
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8582
    return env, nl, nl
8583

    
8584
  def CheckPrereq(self):
8585
    """Check prerequisites.
8586

8587
    This checks that the instance is in the cluster.
8588

8589
    """
8590
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8591
    assert instance is not None, \
8592
      "Cannot retrieve locked instance %s" % self.op.instance_name
8593
    nodenames = list(instance.all_nodes)
8594
    for node in nodenames:
8595
      _CheckNodeOnline(self, node)
8596

    
8597
    self.instance = instance
8598

    
8599
    if instance.disk_template not in constants.DTS_GROWABLE:
8600
      raise errors.OpPrereqError("Instance's disk layout does not support"
8601
                                 " growing.", errors.ECODE_INVAL)
8602

    
8603
    self.disk = instance.FindDisk(self.op.disk)
8604

    
8605
    if instance.disk_template != constants.DT_FILE:
8606
      # TODO: check the free disk space for file, when that feature
8607
      # will be supported
8608
      _CheckNodesFreeDiskPerVG(self, nodenames,
8609
                               {self.disk.physical_id[0]: self.op.amount})
8610

    
8611
  def Exec(self, feedback_fn):
8612
    """Execute disk grow.
8613

8614
    """
8615
    instance = self.instance
8616
    disk = self.disk
8617

    
8618
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8619
    if not disks_ok:
8620
      raise errors.OpExecError("Cannot activate block device to grow")
8621

    
8622
    for node in instance.all_nodes:
8623
      self.cfg.SetDiskID(disk, node)
8624
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8625
      result.Raise("Grow request failed to node %s" % node)
8626

    
8627
      # TODO: Rewrite code to work properly
8628
      # DRBD goes into sync mode for a short amount of time after executing the
8629
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8630
      # calling "resize" in sync mode fails. Sleeping for a short amount of
8631
      # time is a work-around.
8632
      time.sleep(5)
8633

    
8634
    disk.RecordGrow(self.op.amount)
8635
    self.cfg.Update(instance, feedback_fn)
8636
    if self.op.wait_for_sync:
8637
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
8638
      if disk_abort:
8639
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8640
                             " status.\nPlease check the instance.")
8641
      if not instance.admin_up:
8642
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8643
    elif not instance.admin_up:
8644
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
8645
                           " not supposed to be running because no wait for"
8646
                           " sync mode was requested.")
8647

    
8648

    
8649
class LUQueryInstanceData(NoHooksLU):
8650
  """Query runtime instance data.
8651

8652
  """
8653
  REQ_BGL = False
8654

    
8655
  def ExpandNames(self):
8656
    self.needed_locks = {}
8657
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8658

    
8659
    if self.op.instances:
8660
      self.wanted_names = []
8661
      for name in self.op.instances:
8662
        full_name = _ExpandInstanceName(self.cfg, name)
8663
        self.wanted_names.append(full_name)
8664
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8665
    else:
8666
      self.wanted_names = None
8667
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8668

    
8669
    self.needed_locks[locking.LEVEL_NODE] = []
8670
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8671

    
8672
  def DeclareLocks(self, level):
8673
    if level == locking.LEVEL_NODE:
8674
      self._LockInstancesNodes()
8675

    
8676
  def CheckPrereq(self):
8677
    """Check prerequisites.
8678

8679
    This only checks the optional instance list against the existing names.
8680

8681
    """
8682
    if self.wanted_names is None:
8683
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8684

    
8685
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8686
                             in self.wanted_names]
8687

    
8688
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
8689
    """Returns the status of a block device
8690

8691
    """
8692
    if self.op.static or not node:
8693
      return None
8694

    
8695
    self.cfg.SetDiskID(dev, node)
8696

    
8697
    result = self.rpc.call_blockdev_find(node, dev)
8698
    if result.offline:
8699
      return None
8700

    
8701
    result.Raise("Can't compute disk status for %s" % instance_name)
8702

    
8703
    status = result.payload
8704
    if status is None:
8705
      return None
8706

    
8707
    return (status.dev_path, status.major, status.minor,
8708
            status.sync_percent, status.estimated_time,
8709
            status.is_degraded, status.ldisk_status)
8710

    
8711
  def _ComputeDiskStatus(self, instance, snode, dev):
8712
    """Compute block device status.
8713

8714
    """
8715
    if dev.dev_type in constants.LDS_DRBD:
8716
      # we change the snode then (otherwise we use the one passed in)
8717
      if dev.logical_id[0] == instance.primary_node:
8718
        snode = dev.logical_id[1]
8719
      else:
8720
        snode = dev.logical_id[0]
8721

    
8722
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8723
                                              instance.name, dev)
8724
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8725

    
8726
    if dev.children:
8727
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
8728
                      for child in dev.children]
8729
    else:
8730
      dev_children = []
8731

    
8732
    data = {
8733
      "iv_name": dev.iv_name,
8734
      "dev_type": dev.dev_type,
8735
      "logical_id": dev.logical_id,
8736
      "physical_id": dev.physical_id,
8737
      "pstatus": dev_pstatus,
8738
      "sstatus": dev_sstatus,
8739
      "children": dev_children,
8740
      "mode": dev.mode,
8741
      "size": dev.size,
8742
      }
8743

    
8744
    return data
8745

    
8746
  def Exec(self, feedback_fn):
8747
    """Gather and return data"""
8748
    result = {}
8749

    
8750
    cluster = self.cfg.GetClusterInfo()
8751

    
8752
    for instance in self.wanted_instances:
8753
      if not self.op.static:
8754
        remote_info = self.rpc.call_instance_info(instance.primary_node,
8755
                                                  instance.name,
8756
                                                  instance.hypervisor)
8757
        remote_info.Raise("Error checking node %s" % instance.primary_node)
8758
        remote_info = remote_info.payload
8759
        if remote_info and "state" in remote_info:
8760
          remote_state = "up"
8761
        else:
8762
          remote_state = "down"
8763
      else:
8764
        remote_state = None
8765
      if instance.admin_up:
8766
        config_state = "up"
8767
      else:
8768
        config_state = "down"
8769

    
8770
      disks = [self._ComputeDiskStatus(instance, None, device)
8771
               for device in instance.disks]
8772

    
8773
      idict = {
8774
        "name": instance.name,
8775
        "config_state": config_state,
8776
        "run_state": remote_state,
8777
        "pnode": instance.primary_node,
8778
        "snodes": instance.secondary_nodes,
8779
        "os": instance.os,
8780
        # this happens to be the same format used for hooks
8781
        "nics": _NICListToTuple(self, instance.nics),
8782
        "disk_template": instance.disk_template,
8783
        "disks": disks,
8784
        "hypervisor": instance.hypervisor,
8785
        "network_port": instance.network_port,
8786
        "hv_instance": instance.hvparams,
8787
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
8788
        "be_instance": instance.beparams,
8789
        "be_actual": cluster.FillBE(instance),
8790
        "os_instance": instance.osparams,
8791
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8792
        "serial_no": instance.serial_no,
8793
        "mtime": instance.mtime,
8794
        "ctime": instance.ctime,
8795
        "uuid": instance.uuid,
8796
        }
8797

    
8798
      result[instance.name] = idict
8799

    
8800
    return result
8801

    
8802

    
8803
class LUSetInstanceParams(LogicalUnit):
8804
  """Modifies an instances's parameters.
8805

8806
  """
8807
  HPATH = "instance-modify"
8808
  HTYPE = constants.HTYPE_INSTANCE
8809
  REQ_BGL = False
8810

    
8811
  def CheckArguments(self):
8812
    if not (self.op.nics or self.op.disks or self.op.disk_template or
8813
            self.op.hvparams or self.op.beparams or self.op.os_name):
8814
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8815

    
8816
    if self.op.hvparams:
8817
      _CheckGlobalHvParams(self.op.hvparams)
8818

    
8819
    # Disk validation
8820
    disk_addremove = 0
8821
    for disk_op, disk_dict in self.op.disks:
8822
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8823
      if disk_op == constants.DDM_REMOVE:
8824
        disk_addremove += 1
8825
        continue
8826
      elif disk_op == constants.DDM_ADD:
8827
        disk_addremove += 1
8828
      else:
8829
        if not isinstance(disk_op, int):
8830
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8831
        if not isinstance(disk_dict, dict):
8832
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8833
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8834

    
8835
      if disk_op == constants.DDM_ADD:
8836
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8837
        if mode not in constants.DISK_ACCESS_SET:
8838
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8839
                                     errors.ECODE_INVAL)
8840
        size = disk_dict.get('size', None)
8841
        if size is None:
8842
          raise errors.OpPrereqError("Required disk parameter size missing",
8843
                                     errors.ECODE_INVAL)
8844
        try:
8845
          size = int(size)
8846
        except (TypeError, ValueError), err:
8847
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8848
                                     str(err), errors.ECODE_INVAL)
8849
        disk_dict['size'] = size
8850
      else:
8851
        # modification of disk
8852
        if 'size' in disk_dict:
8853
          raise errors.OpPrereqError("Disk size change not possible, use"
8854
                                     " grow-disk", errors.ECODE_INVAL)
8855

    
8856
    if disk_addremove > 1:
8857
      raise errors.OpPrereqError("Only one disk add or remove operation"
8858
                                 " supported at a time", errors.ECODE_INVAL)
8859

    
8860
    if self.op.disks and self.op.disk_template is not None:
8861
      raise errors.OpPrereqError("Disk template conversion and other disk"
8862
                                 " changes not supported at the same time",
8863
                                 errors.ECODE_INVAL)
8864

    
8865
    if (self.op.disk_template and
8866
        self.op.disk_template in constants.DTS_NET_MIRROR and
8867
        self.op.remote_node is None):
8868
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
8869
                                 " one requires specifying a secondary node",
8870
                                 errors.ECODE_INVAL)
8871

    
8872
    # NIC validation
8873
    nic_addremove = 0
8874
    for nic_op, nic_dict in self.op.nics:
8875
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8876
      if nic_op == constants.DDM_REMOVE:
8877
        nic_addremove += 1
8878
        continue
8879
      elif nic_op == constants.DDM_ADD:
8880
        nic_addremove += 1
8881
      else:
8882
        if not isinstance(nic_op, int):
8883
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8884
        if not isinstance(nic_dict, dict):
8885
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8886
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8887

    
8888
      # nic_dict should be a dict
8889
      nic_ip = nic_dict.get('ip', None)
8890
      if nic_ip is not None:
8891
        if nic_ip.lower() == constants.VALUE_NONE:
8892
          nic_dict['ip'] = None
8893
        else:
8894
          if not netutils.IPAddress.IsValid(nic_ip):
8895
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8896
                                       errors.ECODE_INVAL)
8897

    
8898
      nic_bridge = nic_dict.get('bridge', None)
8899
      nic_link = nic_dict.get('link', None)
8900
      if nic_bridge and nic_link:
8901
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8902
                                   " at the same time", errors.ECODE_INVAL)
8903
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8904
        nic_dict['bridge'] = None
8905
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8906
        nic_dict['link'] = None
8907

    
8908
      if nic_op == constants.DDM_ADD:
8909
        nic_mac = nic_dict.get('mac', None)
8910
        if nic_mac is None:
8911
          nic_dict['mac'] = constants.VALUE_AUTO
8912

    
8913
      if 'mac' in nic_dict:
8914
        nic_mac = nic_dict['mac']
8915
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8916
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8917

    
8918
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8919
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8920
                                     " modifying an existing nic",
8921
                                     errors.ECODE_INVAL)
8922

    
8923
    if nic_addremove > 1:
8924
      raise errors.OpPrereqError("Only one NIC add or remove operation"
8925
                                 " supported at a time", errors.ECODE_INVAL)
8926

    
8927
  def ExpandNames(self):
8928
    self._ExpandAndLockInstance()
8929
    self.needed_locks[locking.LEVEL_NODE] = []
8930
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8931

    
8932
  def DeclareLocks(self, level):
8933
    if level == locking.LEVEL_NODE:
8934
      self._LockInstancesNodes()
8935
      if self.op.disk_template and self.op.remote_node:
8936
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8937
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8938

    
8939
  def BuildHooksEnv(self):
8940
    """Build hooks env.
8941

8942
    This runs on the master, primary and secondaries.
8943

8944
    """
8945
    args = dict()
8946
    if constants.BE_MEMORY in self.be_new:
8947
      args['memory'] = self.be_new[constants.BE_MEMORY]
8948
    if constants.BE_VCPUS in self.be_new:
8949
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
8950
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8951
    # information at all.
8952
    if self.op.nics:
8953
      args['nics'] = []
8954
      nic_override = dict(self.op.nics)
8955
      for idx, nic in enumerate(self.instance.nics):
8956
        if idx in nic_override:
8957
          this_nic_override = nic_override[idx]
8958
        else:
8959
          this_nic_override = {}
8960
        if 'ip' in this_nic_override:
8961
          ip = this_nic_override['ip']
8962
        else:
8963
          ip = nic.ip
8964
        if 'mac' in this_nic_override:
8965
          mac = this_nic_override['mac']
8966
        else:
8967
          mac = nic.mac
8968
        if idx in self.nic_pnew:
8969
          nicparams = self.nic_pnew[idx]
8970
        else:
8971
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8972
        mode = nicparams[constants.NIC_MODE]
8973
        link = nicparams[constants.NIC_LINK]
8974
        args['nics'].append((ip, mac, mode, link))
8975
      if constants.DDM_ADD in nic_override:
8976
        ip = nic_override[constants.DDM_ADD].get('ip', None)
8977
        mac = nic_override[constants.DDM_ADD]['mac']
8978
        nicparams = self.nic_pnew[constants.DDM_ADD]
8979
        mode = nicparams[constants.NIC_MODE]
8980
        link = nicparams[constants.NIC_LINK]
8981
        args['nics'].append((ip, mac, mode, link))
8982
      elif constants.DDM_REMOVE in nic_override:
8983
        del args['nics'][-1]
8984

    
8985
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8986
    if self.op.disk_template:
8987
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8988
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8989
    return env, nl, nl
8990

    
8991
  def CheckPrereq(self):
8992
    """Check prerequisites.
8993

8994
    This only checks the instance list against the existing names.
8995

8996
    """
8997
    # checking the new params on the primary/secondary nodes
8998

    
8999
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9000
    cluster = self.cluster = self.cfg.GetClusterInfo()
9001
    assert self.instance is not None, \
9002
      "Cannot retrieve locked instance %s" % self.op.instance_name
9003
    pnode = instance.primary_node
9004
    nodelist = list(instance.all_nodes)
9005

    
9006
    # OS change
9007
    if self.op.os_name and not self.op.force:
9008
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9009
                      self.op.force_variant)
9010
      instance_os = self.op.os_name
9011
    else:
9012
      instance_os = instance.os
9013

    
9014
    if self.op.disk_template:
9015
      if instance.disk_template == self.op.disk_template:
9016
        raise errors.OpPrereqError("Instance already has disk template %s" %
9017
                                   instance.disk_template, errors.ECODE_INVAL)
9018

    
9019
      if (instance.disk_template,
9020
          self.op.disk_template) not in self._DISK_CONVERSIONS:
9021
        raise errors.OpPrereqError("Unsupported disk template conversion from"
9022
                                   " %s to %s" % (instance.disk_template,
9023
                                                  self.op.disk_template),
9024
                                   errors.ECODE_INVAL)
9025
      _CheckInstanceDown(self, instance, "cannot change disk template")
9026
      if self.op.disk_template in constants.DTS_NET_MIRROR:
9027
        if self.op.remote_node == pnode:
9028
          raise errors.OpPrereqError("Given new secondary node %s is the same"
9029
                                     " as the primary node of the instance" %
9030
                                     self.op.remote_node, errors.ECODE_STATE)
9031
        _CheckNodeOnline(self, self.op.remote_node)
9032
        _CheckNodeNotDrained(self, self.op.remote_node)
9033
        # FIXME: here we assume that the old instance type is DT_PLAIN
9034
        assert instance.disk_template == constants.DT_PLAIN
9035
        disks = [{"size": d.size, "vg": d.logical_id[0]}
9036
                 for d in instance.disks]
9037
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9038
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9039

    
9040
    # hvparams processing
9041
    if self.op.hvparams:
9042
      hv_type = instance.hypervisor
9043
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9044
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9045
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9046

    
9047
      # local check
9048
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9049
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9050
      self.hv_new = hv_new # the new actual values
9051
      self.hv_inst = i_hvdict # the new dict (without defaults)
9052
    else:
9053
      self.hv_new = self.hv_inst = {}
9054

    
9055
    # beparams processing
9056
    if self.op.beparams:
9057
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9058
                                   use_none=True)
9059
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9060
      be_new = cluster.SimpleFillBE(i_bedict)
9061
      self.be_new = be_new # the new actual values
9062
      self.be_inst = i_bedict # the new dict (without defaults)
9063
    else:
9064
      self.be_new = self.be_inst = {}
9065

    
9066
    # osparams processing
9067
    if self.op.osparams:
9068
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9069
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9070
      self.os_inst = i_osdict # the new dict (without defaults)
9071
    else:
9072
      self.os_inst = {}
9073

    
9074
    self.warn = []
9075

    
9076
    if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9077
      mem_check_list = [pnode]
9078
      if be_new[constants.BE_AUTO_BALANCE]:
9079
        # either we changed auto_balance to yes or it was from before
9080
        mem_check_list.extend(instance.secondary_nodes)
9081
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
9082
                                                  instance.hypervisor)
9083
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9084
                                         instance.hypervisor)
9085
      pninfo = nodeinfo[pnode]
9086
      msg = pninfo.fail_msg
9087
      if msg:
9088
        # Assume the primary node is unreachable and go ahead
9089
        self.warn.append("Can't get info from primary node %s: %s" %
9090
                         (pnode,  msg))
9091
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
9092
        self.warn.append("Node data from primary node %s doesn't contain"
9093
                         " free memory information" % pnode)
9094
      elif instance_info.fail_msg:
9095
        self.warn.append("Can't get instance runtime information: %s" %
9096
                        instance_info.fail_msg)
9097
      else:
9098
        if instance_info.payload:
9099
          current_mem = int(instance_info.payload['memory'])
9100
        else:
9101
          # Assume instance not running
9102
          # (there is a slight race condition here, but it's not very probable,
9103
          # and we have no other way to check)
9104
          current_mem = 0
9105
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9106
                    pninfo.payload['memory_free'])
9107
        if miss_mem > 0:
9108
          raise errors.OpPrereqError("This change will prevent the instance"
9109
                                     " from starting, due to %d MB of memory"
9110
                                     " missing on its primary node" % miss_mem,
9111
                                     errors.ECODE_NORES)
9112

    
9113
      if be_new[constants.BE_AUTO_BALANCE]:
9114
        for node, nres in nodeinfo.items():
9115
          if node not in instance.secondary_nodes:
9116
            continue
9117
          msg = nres.fail_msg
9118
          if msg:
9119
            self.warn.append("Can't get info from secondary node %s: %s" %
9120
                             (node, msg))
9121
          elif not isinstance(nres.payload.get('memory_free', None), int):
9122
            self.warn.append("Secondary node %s didn't return free"
9123
                             " memory information" % node)
9124
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9125
            self.warn.append("Not enough memory to failover instance to"
9126
                             " secondary node %s" % node)
9127

    
9128
    # NIC processing
9129
    self.nic_pnew = {}
9130
    self.nic_pinst = {}
9131
    for nic_op, nic_dict in self.op.nics:
9132
      if nic_op == constants.DDM_REMOVE:
9133
        if not instance.nics:
9134
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9135
                                     errors.ECODE_INVAL)
9136
        continue
9137
      if nic_op != constants.DDM_ADD:
9138
        # an existing nic
9139
        if not instance.nics:
9140
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9141
                                     " no NICs" % nic_op,
9142
                                     errors.ECODE_INVAL)
9143
        if nic_op < 0 or nic_op >= len(instance.nics):
9144
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9145
                                     " are 0 to %d" %
9146
                                     (nic_op, len(instance.nics) - 1),
9147
                                     errors.ECODE_INVAL)
9148
        old_nic_params = instance.nics[nic_op].nicparams
9149
        old_nic_ip = instance.nics[nic_op].ip
9150
      else:
9151
        old_nic_params = {}
9152
        old_nic_ip = None
9153

    
9154
      update_params_dict = dict([(key, nic_dict[key])
9155
                                 for key in constants.NICS_PARAMETERS
9156
                                 if key in nic_dict])
9157

    
9158
      if 'bridge' in nic_dict:
9159
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9160

    
9161
      new_nic_params = _GetUpdatedParams(old_nic_params,
9162
                                         update_params_dict)
9163
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9164
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9165
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9166
      self.nic_pinst[nic_op] = new_nic_params
9167
      self.nic_pnew[nic_op] = new_filled_nic_params
9168
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9169

    
9170
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
9171
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9172
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9173
        if msg:
9174
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9175
          if self.op.force:
9176
            self.warn.append(msg)
9177
          else:
9178
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9179
      if new_nic_mode == constants.NIC_MODE_ROUTED:
9180
        if 'ip' in nic_dict:
9181
          nic_ip = nic_dict['ip']
9182
        else:
9183
          nic_ip = old_nic_ip
9184
        if nic_ip is None:
9185
          raise errors.OpPrereqError('Cannot set the nic ip to None'
9186
                                     ' on a routed nic', errors.ECODE_INVAL)
9187
      if 'mac' in nic_dict:
9188
        nic_mac = nic_dict['mac']
9189
        if nic_mac is None:
9190
          raise errors.OpPrereqError('Cannot set the nic mac to None',
9191
                                     errors.ECODE_INVAL)
9192
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9193
          # otherwise generate the mac
9194
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9195
        else:
9196
          # or validate/reserve the current one
9197
          try:
9198
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9199
          except errors.ReservationError:
9200
            raise errors.OpPrereqError("MAC address %s already in use"
9201
                                       " in cluster" % nic_mac,
9202
                                       errors.ECODE_NOTUNIQUE)
9203

    
9204
    # DISK processing
9205
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9206
      raise errors.OpPrereqError("Disk operations not supported for"
9207
                                 " diskless instances",
9208
                                 errors.ECODE_INVAL)
9209
    for disk_op, _ in self.op.disks:
9210
      if disk_op == constants.DDM_REMOVE:
9211
        if len(instance.disks) == 1:
9212
          raise errors.OpPrereqError("Cannot remove the last disk of"
9213
                                     " an instance", errors.ECODE_INVAL)
9214
        _CheckInstanceDown(self, instance, "cannot remove disks")
9215

    
9216
      if (disk_op == constants.DDM_ADD and
9217
          len(instance.nics) >= constants.MAX_DISKS):
9218
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9219
                                   " add more" % constants.MAX_DISKS,
9220
                                   errors.ECODE_STATE)
9221
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9222
        # an existing disk
9223
        if disk_op < 0 or disk_op >= len(instance.disks):
9224
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
9225
                                     " are 0 to %d" %
9226
                                     (disk_op, len(instance.disks)),
9227
                                     errors.ECODE_INVAL)
9228

    
9229
    return
9230

    
9231
  def _ConvertPlainToDrbd(self, feedback_fn):
9232
    """Converts an instance from plain to drbd.
9233

9234
    """
9235
    feedback_fn("Converting template to drbd")
9236
    instance = self.instance
9237
    pnode = instance.primary_node
9238
    snode = self.op.remote_node
9239

    
9240
    # create a fake disk info for _GenerateDiskTemplate
9241
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9242
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9243
                                      instance.name, pnode, [snode],
9244
                                      disk_info, None, None, 0, feedback_fn)
9245
    info = _GetInstanceInfoText(instance)
9246
    feedback_fn("Creating aditional volumes...")
9247
    # first, create the missing data and meta devices
9248
    for disk in new_disks:
9249
      # unfortunately this is... not too nice
9250
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9251
                            info, True)
9252
      for child in disk.children:
9253
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
9254
    # at this stage, all new LVs have been created, we can rename the
9255
    # old ones
9256
    feedback_fn("Renaming original volumes...")
9257
    rename_list = [(o, n.children[0].logical_id)
9258
                   for (o, n) in zip(instance.disks, new_disks)]
9259
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
9260
    result.Raise("Failed to rename original LVs")
9261

    
9262
    feedback_fn("Initializing DRBD devices...")
9263
    # all child devices are in place, we can now create the DRBD devices
9264
    for disk in new_disks:
9265
      for node in [pnode, snode]:
9266
        f_create = node == pnode
9267
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9268

    
9269
    # at this point, the instance has been modified
9270
    instance.disk_template = constants.DT_DRBD8
9271
    instance.disks = new_disks
9272
    self.cfg.Update(instance, feedback_fn)
9273

    
9274
    # disks are created, waiting for sync
9275
    disk_abort = not _WaitForSync(self, instance)
9276
    if disk_abort:
9277
      raise errors.OpExecError("There are some degraded disks for"
9278
                               " this instance, please cleanup manually")
9279

    
9280
  def _ConvertDrbdToPlain(self, feedback_fn):
9281
    """Converts an instance from drbd to plain.
9282

9283
    """
9284
    instance = self.instance
9285
    assert len(instance.secondary_nodes) == 1
9286
    pnode = instance.primary_node
9287
    snode = instance.secondary_nodes[0]
9288
    feedback_fn("Converting template to plain")
9289

    
9290
    old_disks = instance.disks
9291
    new_disks = [d.children[0] for d in old_disks]
9292

    
9293
    # copy over size and mode
9294
    for parent, child in zip(old_disks, new_disks):
9295
      child.size = parent.size
9296
      child.mode = parent.mode
9297

    
9298
    # update instance structure
9299
    instance.disks = new_disks
9300
    instance.disk_template = constants.DT_PLAIN
9301
    self.cfg.Update(instance, feedback_fn)
9302

    
9303
    feedback_fn("Removing volumes on the secondary node...")
9304
    for disk in old_disks:
9305
      self.cfg.SetDiskID(disk, snode)
9306
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9307
      if msg:
9308
        self.LogWarning("Could not remove block device %s on node %s,"
9309
                        " continuing anyway: %s", disk.iv_name, snode, msg)
9310

    
9311
    feedback_fn("Removing unneeded volumes on the primary node...")
9312
    for idx, disk in enumerate(old_disks):
9313
      meta = disk.children[1]
9314
      self.cfg.SetDiskID(meta, pnode)
9315
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9316
      if msg:
9317
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
9318
                        " continuing anyway: %s", idx, pnode, msg)
9319

    
9320
  def Exec(self, feedback_fn):
9321
    """Modifies an instance.
9322

9323
    All parameters take effect only at the next restart of the instance.
9324

9325
    """
9326
    # Process here the warnings from CheckPrereq, as we don't have a
9327
    # feedback_fn there.
9328
    for warn in self.warn:
9329
      feedback_fn("WARNING: %s" % warn)
9330

    
9331
    result = []
9332
    instance = self.instance
9333
    # disk changes
9334
    for disk_op, disk_dict in self.op.disks:
9335
      if disk_op == constants.DDM_REMOVE:
9336
        # remove the last disk
9337
        device = instance.disks.pop()
9338
        device_idx = len(instance.disks)
9339
        for node, disk in device.ComputeNodeTree(instance.primary_node):
9340
          self.cfg.SetDiskID(disk, node)
9341
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9342
          if msg:
9343
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
9344
                            " continuing anyway", device_idx, node, msg)
9345
        result.append(("disk/%d" % device_idx, "remove"))
9346
      elif disk_op == constants.DDM_ADD:
9347
        # add a new disk
9348
        if instance.disk_template == constants.DT_FILE:
9349
          file_driver, file_path = instance.disks[0].logical_id
9350
          file_path = os.path.dirname(file_path)
9351
        else:
9352
          file_driver = file_path = None
9353
        disk_idx_base = len(instance.disks)
9354
        new_disk = _GenerateDiskTemplate(self,
9355
                                         instance.disk_template,
9356
                                         instance.name, instance.primary_node,
9357
                                         instance.secondary_nodes,
9358
                                         [disk_dict],
9359
                                         file_path,
9360
                                         file_driver,
9361
                                         disk_idx_base, feedback_fn)[0]
9362
        instance.disks.append(new_disk)
9363
        info = _GetInstanceInfoText(instance)
9364

    
9365
        logging.info("Creating volume %s for instance %s",
9366
                     new_disk.iv_name, instance.name)
9367
        # Note: this needs to be kept in sync with _CreateDisks
9368
        #HARDCODE
9369
        for node in instance.all_nodes:
9370
          f_create = node == instance.primary_node
9371
          try:
9372
            _CreateBlockDev(self, node, instance, new_disk,
9373
                            f_create, info, f_create)
9374
          except errors.OpExecError, err:
9375
            self.LogWarning("Failed to create volume %s (%s) on"
9376
                            " node %s: %s",
9377
                            new_disk.iv_name, new_disk, node, err)
9378
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9379
                       (new_disk.size, new_disk.mode)))
9380
      else:
9381
        # change a given disk
9382
        instance.disks[disk_op].mode = disk_dict['mode']
9383
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9384

    
9385
    if self.op.disk_template:
9386
      r_shut = _ShutdownInstanceDisks(self, instance)
9387
      if not r_shut:
9388
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
9389
                                 " proceed with disk template conversion")
9390
      mode = (instance.disk_template, self.op.disk_template)
9391
      try:
9392
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
9393
      except:
9394
        self.cfg.ReleaseDRBDMinors(instance.name)
9395
        raise
9396
      result.append(("disk_template", self.op.disk_template))
9397

    
9398
    # NIC changes
9399
    for nic_op, nic_dict in self.op.nics:
9400
      if nic_op == constants.DDM_REMOVE:
9401
        # remove the last nic
9402
        del instance.nics[-1]
9403
        result.append(("nic.%d" % len(instance.nics), "remove"))
9404
      elif nic_op == constants.DDM_ADD:
9405
        # mac and bridge should be set, by now
9406
        mac = nic_dict['mac']
9407
        ip = nic_dict.get('ip', None)
9408
        nicparams = self.nic_pinst[constants.DDM_ADD]
9409
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9410
        instance.nics.append(new_nic)
9411
        result.append(("nic.%d" % (len(instance.nics) - 1),
9412
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
9413
                       (new_nic.mac, new_nic.ip,
9414
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9415
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9416
                       )))
9417
      else:
9418
        for key in 'mac', 'ip':
9419
          if key in nic_dict:
9420
            setattr(instance.nics[nic_op], key, nic_dict[key])
9421
        if nic_op in self.nic_pinst:
9422
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9423
        for key, val in nic_dict.iteritems():
9424
          result.append(("nic.%s/%d" % (key, nic_op), val))
9425

    
9426
    # hvparams changes
9427
    if self.op.hvparams:
9428
      instance.hvparams = self.hv_inst
9429
      for key, val in self.op.hvparams.iteritems():
9430
        result.append(("hv/%s" % key, val))
9431

    
9432
    # beparams changes
9433
    if self.op.beparams:
9434
      instance.beparams = self.be_inst
9435
      for key, val in self.op.beparams.iteritems():
9436
        result.append(("be/%s" % key, val))
9437

    
9438
    # OS change
9439
    if self.op.os_name:
9440
      instance.os = self.op.os_name
9441

    
9442
    # osparams changes
9443
    if self.op.osparams:
9444
      instance.osparams = self.os_inst
9445
      for key, val in self.op.osparams.iteritems():
9446
        result.append(("os/%s" % key, val))
9447

    
9448
    self.cfg.Update(instance, feedback_fn)
9449

    
9450
    return result
9451

    
9452
  _DISK_CONVERSIONS = {
9453
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9454
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9455
    }
9456

    
9457

    
9458
class LUQueryExports(NoHooksLU):
9459
  """Query the exports list
9460

9461
  """
9462
  REQ_BGL = False
9463

    
9464
  def ExpandNames(self):
9465
    self.needed_locks = {}
9466
    self.share_locks[locking.LEVEL_NODE] = 1
9467
    if not self.op.nodes:
9468
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9469
    else:
9470
      self.needed_locks[locking.LEVEL_NODE] = \
9471
        _GetWantedNodes(self, self.op.nodes)
9472

    
9473
  def Exec(self, feedback_fn):
9474
    """Compute the list of all the exported system images.
9475

9476
    @rtype: dict
9477
    @return: a dictionary with the structure node->(export-list)
9478
        where export-list is a list of the instances exported on
9479
        that node.
9480

9481
    """
9482
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9483
    rpcresult = self.rpc.call_export_list(self.nodes)
9484
    result = {}
9485
    for node in rpcresult:
9486
      if rpcresult[node].fail_msg:
9487
        result[node] = False
9488
      else:
9489
        result[node] = rpcresult[node].payload
9490

    
9491
    return result
9492

    
9493

    
9494
class LUPrepareExport(NoHooksLU):
9495
  """Prepares an instance for an export and returns useful information.
9496

9497
  """
9498
  REQ_BGL = False
9499

    
9500
  def ExpandNames(self):
9501
    self._ExpandAndLockInstance()
9502

    
9503
  def CheckPrereq(self):
9504
    """Check prerequisites.
9505

9506
    """
9507
    instance_name = self.op.instance_name
9508

    
9509
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9510
    assert self.instance is not None, \
9511
          "Cannot retrieve locked instance %s" % self.op.instance_name
9512
    _CheckNodeOnline(self, self.instance.primary_node)
9513

    
9514
    self._cds = _GetClusterDomainSecret()
9515

    
9516
  def Exec(self, feedback_fn):
9517
    """Prepares an instance for an export.
9518

9519
    """
9520
    instance = self.instance
9521

    
9522
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9523
      salt = utils.GenerateSecret(8)
9524

    
9525
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9526
      result = self.rpc.call_x509_cert_create(instance.primary_node,
9527
                                              constants.RIE_CERT_VALIDITY)
9528
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
9529

    
9530
      (name, cert_pem) = result.payload
9531

    
9532
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9533
                                             cert_pem)
9534

    
9535
      return {
9536
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9537
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9538
                          salt),
9539
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9540
        }
9541

    
9542
    return None
9543

    
9544

    
9545
class LUExportInstance(LogicalUnit):
9546
  """Export an instance to an image in the cluster.
9547

9548
  """
9549
  HPATH = "instance-export"
9550
  HTYPE = constants.HTYPE_INSTANCE
9551
  REQ_BGL = False
9552

    
9553
  def CheckArguments(self):
9554
    """Check the arguments.
9555

9556
    """
9557
    self.x509_key_name = self.op.x509_key_name
9558
    self.dest_x509_ca_pem = self.op.destination_x509_ca
9559

    
9560
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9561
      if not self.x509_key_name:
9562
        raise errors.OpPrereqError("Missing X509 key name for encryption",
9563
                                   errors.ECODE_INVAL)
9564

    
9565
      if not self.dest_x509_ca_pem:
9566
        raise errors.OpPrereqError("Missing destination X509 CA",
9567
                                   errors.ECODE_INVAL)
9568

    
9569
  def ExpandNames(self):
9570
    self._ExpandAndLockInstance()
9571

    
9572
    # Lock all nodes for local exports
9573
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9574
      # FIXME: lock only instance primary and destination node
9575
      #
9576
      # Sad but true, for now we have do lock all nodes, as we don't know where
9577
      # the previous export might be, and in this LU we search for it and
9578
      # remove it from its current node. In the future we could fix this by:
9579
      #  - making a tasklet to search (share-lock all), then create the
9580
      #    new one, then one to remove, after
9581
      #  - removing the removal operation altogether
9582
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9583

    
9584
  def DeclareLocks(self, level):
9585
    """Last minute lock declaration."""
9586
    # All nodes are locked anyway, so nothing to do here.
9587

    
9588
  def BuildHooksEnv(self):
9589
    """Build hooks env.
9590

9591
    This will run on the master, primary node and target node.
9592

9593
    """
9594
    env = {
9595
      "EXPORT_MODE": self.op.mode,
9596
      "EXPORT_NODE": self.op.target_node,
9597
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9598
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9599
      # TODO: Generic function for boolean env variables
9600
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9601
      }
9602

    
9603
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9604

    
9605
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9606

    
9607
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9608
      nl.append(self.op.target_node)
9609

    
9610
    return env, nl, nl
9611

    
9612
  def CheckPrereq(self):
9613
    """Check prerequisites.
9614

9615
    This checks that the instance and node names are valid.
9616

9617
    """
9618
    instance_name = self.op.instance_name
9619

    
9620
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9621
    assert self.instance is not None, \
9622
          "Cannot retrieve locked instance %s" % self.op.instance_name
9623
    _CheckNodeOnline(self, self.instance.primary_node)
9624

    
9625
    if (self.op.remove_instance and self.instance.admin_up and
9626
        not self.op.shutdown):
9627
      raise errors.OpPrereqError("Can not remove instance without shutting it"
9628
                                 " down before")
9629

    
9630
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9631
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9632
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9633
      assert self.dst_node is not None
9634

    
9635
      _CheckNodeOnline(self, self.dst_node.name)
9636
      _CheckNodeNotDrained(self, self.dst_node.name)
9637

    
9638
      self._cds = None
9639
      self.dest_disk_info = None
9640
      self.dest_x509_ca = None
9641

    
9642
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9643
      self.dst_node = None
9644

    
9645
      if len(self.op.target_node) != len(self.instance.disks):
9646
        raise errors.OpPrereqError(("Received destination information for %s"
9647
                                    " disks, but instance %s has %s disks") %
9648
                                   (len(self.op.target_node), instance_name,
9649
                                    len(self.instance.disks)),
9650
                                   errors.ECODE_INVAL)
9651

    
9652
      cds = _GetClusterDomainSecret()
9653

    
9654
      # Check X509 key name
9655
      try:
9656
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9657
      except (TypeError, ValueError), err:
9658
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9659

    
9660
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9661
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9662
                                   errors.ECODE_INVAL)
9663

    
9664
      # Load and verify CA
9665
      try:
9666
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9667
      except OpenSSL.crypto.Error, err:
9668
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9669
                                   (err, ), errors.ECODE_INVAL)
9670

    
9671
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9672
      if errcode is not None:
9673
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9674
                                   (msg, ), errors.ECODE_INVAL)
9675

    
9676
      self.dest_x509_ca = cert
9677

    
9678
      # Verify target information
9679
      disk_info = []
9680
      for idx, disk_data in enumerate(self.op.target_node):
9681
        try:
9682
          (host, port, magic) = \
9683
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9684
        except errors.GenericError, err:
9685
          raise errors.OpPrereqError("Target info for disk %s: %s" %
9686
                                     (idx, err), errors.ECODE_INVAL)
9687

    
9688
        disk_info.append((host, port, magic))
9689

    
9690
      assert len(disk_info) == len(self.op.target_node)
9691
      self.dest_disk_info = disk_info
9692

    
9693
    else:
9694
      raise errors.ProgrammerError("Unhandled export mode %r" %
9695
                                   self.op.mode)
9696

    
9697
    # instance disk type verification
9698
    # TODO: Implement export support for file-based disks
9699
    for disk in self.instance.disks:
9700
      if disk.dev_type == constants.LD_FILE:
9701
        raise errors.OpPrereqError("Export not supported for instances with"
9702
                                   " file-based disks", errors.ECODE_INVAL)
9703

    
9704
  def _CleanupExports(self, feedback_fn):
9705
    """Removes exports of current instance from all other nodes.
9706

9707
    If an instance in a cluster with nodes A..D was exported to node C, its
9708
    exports will be removed from the nodes A, B and D.
9709

9710
    """
9711
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
9712

    
9713
    nodelist = self.cfg.GetNodeList()
9714
    nodelist.remove(self.dst_node.name)
9715

    
9716
    # on one-node clusters nodelist will be empty after the removal
9717
    # if we proceed the backup would be removed because OpQueryExports
9718
    # substitutes an empty list with the full cluster node list.
9719
    iname = self.instance.name
9720
    if nodelist:
9721
      feedback_fn("Removing old exports for instance %s" % iname)
9722
      exportlist = self.rpc.call_export_list(nodelist)
9723
      for node in exportlist:
9724
        if exportlist[node].fail_msg:
9725
          continue
9726
        if iname in exportlist[node].payload:
9727
          msg = self.rpc.call_export_remove(node, iname).fail_msg
9728
          if msg:
9729
            self.LogWarning("Could not remove older export for instance %s"
9730
                            " on node %s: %s", iname, node, msg)
9731

    
9732
  def Exec(self, feedback_fn):
9733
    """Export an instance to an image in the cluster.
9734

9735
    """
9736
    assert self.op.mode in constants.EXPORT_MODES
9737

    
9738
    instance = self.instance
9739
    src_node = instance.primary_node
9740

    
9741
    if self.op.shutdown:
9742
      # shutdown the instance, but not the disks
9743
      feedback_fn("Shutting down instance %s" % instance.name)
9744
      result = self.rpc.call_instance_shutdown(src_node, instance,
9745
                                               self.op.shutdown_timeout)
9746
      # TODO: Maybe ignore failures if ignore_remove_failures is set
9747
      result.Raise("Could not shutdown instance %s on"
9748
                   " node %s" % (instance.name, src_node))
9749

    
9750
    # set the disks ID correctly since call_instance_start needs the
9751
    # correct drbd minor to create the symlinks
9752
    for disk in instance.disks:
9753
      self.cfg.SetDiskID(disk, src_node)
9754

    
9755
    activate_disks = (not instance.admin_up)
9756

    
9757
    if activate_disks:
9758
      # Activate the instance disks if we'exporting a stopped instance
9759
      feedback_fn("Activating disks for %s" % instance.name)
9760
      _StartInstanceDisks(self, instance, None)
9761

    
9762
    try:
9763
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9764
                                                     instance)
9765

    
9766
      helper.CreateSnapshots()
9767
      try:
9768
        if (self.op.shutdown and instance.admin_up and
9769
            not self.op.remove_instance):
9770
          assert not activate_disks
9771
          feedback_fn("Starting instance %s" % instance.name)
9772
          result = self.rpc.call_instance_start(src_node, instance, None, None)
9773
          msg = result.fail_msg
9774
          if msg:
9775
            feedback_fn("Failed to start instance: %s" % msg)
9776
            _ShutdownInstanceDisks(self, instance)
9777
            raise errors.OpExecError("Could not start instance: %s" % msg)
9778

    
9779
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
9780
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9781
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9782
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
9783
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9784

    
9785
          (key_name, _, _) = self.x509_key_name
9786

    
9787
          dest_ca_pem = \
9788
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9789
                                            self.dest_x509_ca)
9790

    
9791
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9792
                                                     key_name, dest_ca_pem,
9793
                                                     timeouts)
9794
      finally:
9795
        helper.Cleanup()
9796

    
9797
      # Check for backwards compatibility
9798
      assert len(dresults) == len(instance.disks)
9799
      assert compat.all(isinstance(i, bool) for i in dresults), \
9800
             "Not all results are boolean: %r" % dresults
9801

    
9802
    finally:
9803
      if activate_disks:
9804
        feedback_fn("Deactivating disks for %s" % instance.name)
9805
        _ShutdownInstanceDisks(self, instance)
9806

    
9807
    if not (compat.all(dresults) and fin_resu):
9808
      failures = []
9809
      if not fin_resu:
9810
        failures.append("export finalization")
9811
      if not compat.all(dresults):
9812
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9813
                               if not dsk)
9814
        failures.append("disk export: disk(s) %s" % fdsk)
9815

    
9816
      raise errors.OpExecError("Export failed, errors in %s" %
9817
                               utils.CommaJoin(failures))
9818

    
9819
    # At this point, the export was successful, we can cleanup/finish
9820

    
9821
    # Remove instance if requested
9822
    if self.op.remove_instance:
9823
      feedback_fn("Removing instance %s" % instance.name)
9824
      _RemoveInstance(self, feedback_fn, instance,
9825
                      self.op.ignore_remove_failures)
9826

    
9827
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9828
      self._CleanupExports(feedback_fn)
9829

    
9830
    return fin_resu, dresults
9831

    
9832

    
9833
class LURemoveExport(NoHooksLU):
9834
  """Remove exports related to the named instance.
9835

9836
  """
9837
  REQ_BGL = False
9838

    
9839
  def ExpandNames(self):
9840
    self.needed_locks = {}
9841
    # We need all nodes to be locked in order for RemoveExport to work, but we
9842
    # don't need to lock the instance itself, as nothing will happen to it (and
9843
    # we can remove exports also for a removed instance)
9844
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9845

    
9846
  def Exec(self, feedback_fn):
9847
    """Remove any export.
9848

9849
    """
9850
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9851
    # If the instance was not found we'll try with the name that was passed in.
9852
    # This will only work if it was an FQDN, though.
9853
    fqdn_warn = False
9854
    if not instance_name:
9855
      fqdn_warn = True
9856
      instance_name = self.op.instance_name
9857

    
9858
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9859
    exportlist = self.rpc.call_export_list(locked_nodes)
9860
    found = False
9861
    for node in exportlist:
9862
      msg = exportlist[node].fail_msg
9863
      if msg:
9864
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9865
        continue
9866
      if instance_name in exportlist[node].payload:
9867
        found = True
9868
        result = self.rpc.call_export_remove(node, instance_name)
9869
        msg = result.fail_msg
9870
        if msg:
9871
          logging.error("Could not remove export for instance %s"
9872
                        " on node %s: %s", instance_name, node, msg)
9873

    
9874
    if fqdn_warn and not found:
9875
      feedback_fn("Export not found. If trying to remove an export belonging"
9876
                  " to a deleted instance please use its Fully Qualified"
9877
                  " Domain Name.")
9878

    
9879

    
9880
class LUAddGroup(LogicalUnit):
9881
  """Logical unit for creating node groups.
9882

9883
  """
9884
  HPATH = "group-add"
9885
  HTYPE = constants.HTYPE_GROUP
9886
  REQ_BGL = False
9887

    
9888
  def ExpandNames(self):
9889
    # We need the new group's UUID here so that we can create and acquire the
9890
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
9891
    # that it should not check whether the UUID exists in the configuration.
9892
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
9893
    self.needed_locks = {}
9894
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
9895

    
9896
  def CheckPrereq(self):
9897
    """Check prerequisites.
9898

9899
    This checks that the given group name is not an existing node group
9900
    already.
9901

9902
    """
9903
    try:
9904
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
9905
    except errors.OpPrereqError:
9906
      pass
9907
    else:
9908
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
9909
                                 " node group (UUID: %s)" %
9910
                                 (self.op.group_name, existing_uuid),
9911
                                 errors.ECODE_EXISTS)
9912

    
9913
    if self.op.ndparams:
9914
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
9915

    
9916
  def BuildHooksEnv(self):
9917
    """Build hooks env.
9918

9919
    """
9920
    env = {
9921
      "GROUP_NAME": self.op.group_name,
9922
      }
9923
    mn = self.cfg.GetMasterNode()
9924
    return env, [mn], [mn]
9925

    
9926
  def Exec(self, feedback_fn):
9927
    """Add the node group to the cluster.
9928

9929
    """
9930
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
9931
                                  uuid=self.group_uuid,
9932
                                  alloc_policy=self.op.alloc_policy,
9933
                                  ndparams=self.op.ndparams)
9934

    
9935
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
9936
    del self.remove_locks[locking.LEVEL_NODEGROUP]
9937

    
9938

    
9939
class LUQueryGroups(NoHooksLU):
9940
  """Logical unit for querying node groups.
9941

9942
  """
9943
  # pylint: disable-msg=W0142
9944
  REQ_BGL = False
9945
  _FIELDS_DYNAMIC = utils.FieldSet()
9946
  _SIMPLE_FIELDS = ["name", "uuid", "alloc_policy",
9947
                    "ctime", "mtime", "serial_no"]
9948
  _FIELDS_STATIC = utils.FieldSet("node_cnt", "node_list", "pinst_cnt",
9949
                                  "pinst_list", *_SIMPLE_FIELDS)
9950

    
9951
  def CheckArguments(self):
9952
    _CheckOutputFields(static=self._FIELDS_STATIC,
9953
                       dynamic=self._FIELDS_DYNAMIC,
9954
                       selected=self.op.output_fields)
9955

    
9956
  def ExpandNames(self):
9957
    self.needed_locks = {}
9958

    
9959
  def Exec(self, feedback_fn):
9960
    """Computes the list of groups and their attributes.
9961

9962
    """
9963
    all_groups = self.cfg.GetAllNodeGroupsInfo()
9964
    name_to_uuid = dict((g.name, g.uuid) for g in all_groups.values())
9965

    
9966
    if not self.op.names:
9967
      sorted_names = utils.NiceSort(name_to_uuid.keys())
9968
      my_groups = [name_to_uuid[n] for n in sorted_names]
9969
    else:
9970
      # Accept names to be either names or UUIDs.
9971
      all_uuid = frozenset(all_groups.keys())
9972
      my_groups = []
9973
      missing = []
9974

    
9975
      for name in self.op.names:
9976
        if name in all_uuid:
9977
          my_groups.append(name)
9978
        elif name in name_to_uuid:
9979
          my_groups.append(name_to_uuid[name])
9980
        else:
9981
          missing.append(name)
9982

    
9983
      if missing:
9984
        raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
9985
                                   errors.ECODE_NOENT)
9986

    
9987
    do_nodes = bool(frozenset(["node_cnt", "node_list"]).
9988
                    intersection(self.op.output_fields))
9989

    
9990
    do_instances = bool(frozenset(["pinst_cnt", "pinst_list"]).
9991
                        intersection(self.op.output_fields))
9992

    
9993
    # We need to map group->[nodes], and group->[instances]. The former is
9994
    # directly attainable, but the latter we have to do through instance->node,
9995
    # hence we need to process nodes even if we only need instance information.
9996
    if do_nodes or do_instances:
9997
      all_nodes = self.cfg.GetAllNodesInfo()
9998
      group_to_nodes = dict((all_groups[name].uuid, []) for name in my_groups)
9999
      node_to_group = {}
10000

    
10001
      for node in all_nodes.values():
10002
        if node.group in group_to_nodes:
10003
          group_to_nodes[node.group].append(node.name)
10004
          node_to_group[node.name] = node.group
10005

    
10006
      if do_instances:
10007
        all_instances = self.cfg.GetAllInstancesInfo()
10008
        group_to_instances = dict((all_groups[name].uuid, [])
10009
                                  for name in my_groups)
10010
        for instance in all_instances.values():
10011
          node = instance.primary_node
10012
          if node in node_to_group:
10013
            group_to_instances[node_to_group[node]].append(instance.name)
10014

    
10015
    output = []
10016

    
10017
    for uuid in my_groups:
10018
      group = all_groups[uuid]
10019
      group_output = []
10020

    
10021
      for field in self.op.output_fields:
10022
        if field in self._SIMPLE_FIELDS:
10023
          val = getattr(group, field)
10024
        elif field == "node_list":
10025
          val = utils.NiceSort(group_to_nodes[group.uuid])
10026
        elif field == "node_cnt":
10027
          val = len(group_to_nodes[group.uuid])
10028
        elif field == "pinst_list":
10029
          val = utils.NiceSort(group_to_instances[group.uuid])
10030
        elif field == "pinst_cnt":
10031
          val = len(group_to_instances[group.uuid])
10032
        else:
10033
          raise errors.ParameterError(field)
10034
        group_output.append(val)
10035
      output.append(group_output)
10036

    
10037
    return output
10038

    
10039

    
10040
class LUSetGroupParams(LogicalUnit):
10041
  """Modifies the parameters of a node group.
10042

10043
  """
10044
  HPATH = "group-modify"
10045
  HTYPE = constants.HTYPE_GROUP
10046
  REQ_BGL = False
10047

    
10048
  def CheckArguments(self):
10049
    all_changes = [
10050
      self.op.ndparams,
10051
      self.op.alloc_policy,
10052
      ]
10053

    
10054
    if all_changes.count(None) == len(all_changes):
10055
      raise errors.OpPrereqError("Please pass at least one modification",
10056
                                 errors.ECODE_INVAL)
10057

    
10058
  def ExpandNames(self):
10059
    # This raises errors.OpPrereqError on its own:
10060
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10061

    
10062
    self.needed_locks = {
10063
      locking.LEVEL_NODEGROUP: [self.group_uuid],
10064
      }
10065

    
10066
  def CheckPrereq(self):
10067
    """Check prerequisites.
10068

10069
    """
10070
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
10071

    
10072
    if self.group is None:
10073
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10074
                               (self.op.group_name, self.group_uuid))
10075

    
10076
    if self.op.ndparams:
10077
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10078
      self.new_ndparams = self.group.SimpleFillND(self.op.ndparams)
10079

    
10080
  def BuildHooksEnv(self):
10081
    """Build hooks env.
10082

10083
    """
10084
    env = {
10085
      "GROUP_NAME": self.op.group_name,
10086
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
10087
      }
10088
    mn = self.cfg.GetMasterNode()
10089
    return env, [mn], [mn]
10090

    
10091
  def Exec(self, feedback_fn):
10092
    """Modifies the node group.
10093

10094
    """
10095
    result = []
10096

    
10097
    if self.op.ndparams:
10098
      self.group.ndparams = self.new_ndparams
10099
      result.append(("ndparams", str(self.group.ndparams)))
10100

    
10101
    if self.op.alloc_policy:
10102
      self.group.alloc_policy = self.op.alloc_policy
10103

    
10104
    self.cfg.Update(self.group, feedback_fn)
10105
    return result
10106

    
10107

    
10108

    
10109
class LURemoveGroup(LogicalUnit):
10110
  HPATH = "group-remove"
10111
  HTYPE = constants.HTYPE_GROUP
10112
  REQ_BGL = False
10113

    
10114
  def ExpandNames(self):
10115
    # This will raises errors.OpPrereqError on its own:
10116
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10117
    self.needed_locks = {
10118
      locking.LEVEL_NODEGROUP: [self.group_uuid],
10119
      }
10120

    
10121
  def CheckPrereq(self):
10122
    """Check prerequisites.
10123

10124
    This checks that the given group name exists as a node group, that is
10125
    empty (i.e., contains no nodes), and that is not the last group of the
10126
    cluster.
10127

10128
    """
10129
    # Verify that the group is empty.
10130
    group_nodes = [node.name
10131
                   for node in self.cfg.GetAllNodesInfo().values()
10132
                   if node.group == self.group_uuid]
10133

    
10134
    if group_nodes:
10135
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
10136
                                 " nodes: %s" %
10137
                                 (self.op.group_name,
10138
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
10139
                                 errors.ECODE_STATE)
10140

    
10141
    # Verify the cluster would not be left group-less.
10142
    if len(self.cfg.GetNodeGroupList()) == 1:
10143
      raise errors.OpPrereqError("Group '%s' is the last group in the cluster,"
10144
                                 " which cannot be left without at least one"
10145
                                 " group" % self.op.group_name,
10146
                                 errors.ECODE_STATE)
10147

    
10148
  def BuildHooksEnv(self):
10149
    """Build hooks env.
10150

10151
    """
10152
    env = {
10153
      "GROUP_NAME": self.op.group_name,
10154
      }
10155
    mn = self.cfg.GetMasterNode()
10156
    return env, [mn], [mn]
10157

    
10158
  def Exec(self, feedback_fn):
10159
    """Remove the node group.
10160

10161
    """
10162
    try:
10163
      self.cfg.RemoveNodeGroup(self.group_uuid)
10164
    except errors.ConfigurationError:
10165
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
10166
                               (self.op.group_name, self.group_uuid))
10167

    
10168
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10169

    
10170

    
10171
class LURenameGroup(LogicalUnit):
10172
  HPATH = "group-rename"
10173
  HTYPE = constants.HTYPE_GROUP
10174
  REQ_BGL = False
10175

    
10176
  def ExpandNames(self):
10177
    # This raises errors.OpPrereqError on its own:
10178
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.old_name)
10179

    
10180
    self.needed_locks = {
10181
      locking.LEVEL_NODEGROUP: [self.group_uuid],
10182
      }
10183

    
10184
  def CheckPrereq(self):
10185
    """Check prerequisites.
10186

10187
    This checks that the given old_name exists as a node group, and that
10188
    new_name doesn't.
10189

10190
    """
10191
    try:
10192
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
10193
    except errors.OpPrereqError:
10194
      pass
10195
    else:
10196
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
10197
                                 " node group (UUID: %s)" %
10198
                                 (self.op.new_name, new_name_uuid),
10199
                                 errors.ECODE_EXISTS)
10200

    
10201
  def BuildHooksEnv(self):
10202
    """Build hooks env.
10203

10204
    """
10205
    env = {
10206
      "OLD_NAME": self.op.old_name,
10207
      "NEW_NAME": self.op.new_name,
10208
      }
10209

    
10210
    mn = self.cfg.GetMasterNode()
10211
    all_nodes = self.cfg.GetAllNodesInfo()
10212
    run_nodes = [mn]
10213
    all_nodes.pop(mn, None)
10214

    
10215
    for node in all_nodes.values():
10216
      if node.group == self.group_uuid:
10217
        run_nodes.append(node.name)
10218

    
10219
    return env, run_nodes, run_nodes
10220

    
10221
  def Exec(self, feedback_fn):
10222
    """Rename the node group.
10223

10224
    """
10225
    group = self.cfg.GetNodeGroup(self.group_uuid)
10226

    
10227
    if group is None:
10228
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10229
                               (self.op.old_name, self.group_uuid))
10230

    
10231
    group.name = self.op.new_name
10232
    self.cfg.Update(group, feedback_fn)
10233

    
10234
    return self.op.new_name
10235

    
10236

    
10237
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10238
  """Generic tags LU.
10239

10240
  This is an abstract class which is the parent of all the other tags LUs.
10241

10242
  """
10243

    
10244
  def ExpandNames(self):
10245
    self.needed_locks = {}
10246
    if self.op.kind == constants.TAG_NODE:
10247
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10248
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
10249
    elif self.op.kind == constants.TAG_INSTANCE:
10250
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10251
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10252

    
10253
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10254
    # not possible to acquire the BGL based on opcode parameters)
10255

    
10256
  def CheckPrereq(self):
10257
    """Check prerequisites.
10258

10259
    """
10260
    if self.op.kind == constants.TAG_CLUSTER:
10261
      self.target = self.cfg.GetClusterInfo()
10262
    elif self.op.kind == constants.TAG_NODE:
10263
      self.target = self.cfg.GetNodeInfo(self.op.name)
10264
    elif self.op.kind == constants.TAG_INSTANCE:
10265
      self.target = self.cfg.GetInstanceInfo(self.op.name)
10266
    else:
10267
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10268
                                 str(self.op.kind), errors.ECODE_INVAL)
10269

    
10270

    
10271
class LUGetTags(TagsLU):
10272
  """Returns the tags of a given object.
10273

10274
  """
10275
  REQ_BGL = False
10276

    
10277
  def ExpandNames(self):
10278
    TagsLU.ExpandNames(self)
10279

    
10280
    # Share locks as this is only a read operation
10281
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10282

    
10283
  def Exec(self, feedback_fn):
10284
    """Returns the tag list.
10285

10286
    """
10287
    return list(self.target.GetTags())
10288

    
10289

    
10290
class LUSearchTags(NoHooksLU):
10291
  """Searches the tags for a given pattern.
10292

10293
  """
10294
  REQ_BGL = False
10295

    
10296
  def ExpandNames(self):
10297
    self.needed_locks = {}
10298

    
10299
  def CheckPrereq(self):
10300
    """Check prerequisites.
10301

10302
    This checks the pattern passed for validity by compiling it.
10303

10304
    """
10305
    try:
10306
      self.re = re.compile(self.op.pattern)
10307
    except re.error, err:
10308
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10309
                                 (self.op.pattern, err), errors.ECODE_INVAL)
10310

    
10311
  def Exec(self, feedback_fn):
10312
    """Returns the tag list.
10313

10314
    """
10315
    cfg = self.cfg
10316
    tgts = [("/cluster", cfg.GetClusterInfo())]
10317
    ilist = cfg.GetAllInstancesInfo().values()
10318
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10319
    nlist = cfg.GetAllNodesInfo().values()
10320
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10321
    results = []
10322
    for path, target in tgts:
10323
      for tag in target.GetTags():
10324
        if self.re.search(tag):
10325
          results.append((path, tag))
10326
    return results
10327

    
10328

    
10329
class LUAddTags(TagsLU):
10330
  """Sets a tag on a given object.
10331

10332
  """
10333
  REQ_BGL = False
10334

    
10335
  def CheckPrereq(self):
10336
    """Check prerequisites.
10337

10338
    This checks the type and length of the tag name and value.
10339

10340
    """
10341
    TagsLU.CheckPrereq(self)
10342
    for tag in self.op.tags:
10343
      objects.TaggableObject.ValidateTag(tag)
10344

    
10345
  def Exec(self, feedback_fn):
10346
    """Sets the tag.
10347

10348
    """
10349
    try:
10350
      for tag in self.op.tags:
10351
        self.target.AddTag(tag)
10352
    except errors.TagError, err:
10353
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
10354
    self.cfg.Update(self.target, feedback_fn)
10355

    
10356

    
10357
class LUDelTags(TagsLU):
10358
  """Delete a list of tags from a given object.
10359

10360
  """
10361
  REQ_BGL = False
10362

    
10363
  def CheckPrereq(self):
10364
    """Check prerequisites.
10365

10366
    This checks that we have the given tag.
10367

10368
    """
10369
    TagsLU.CheckPrereq(self)
10370
    for tag in self.op.tags:
10371
      objects.TaggableObject.ValidateTag(tag)
10372
    del_tags = frozenset(self.op.tags)
10373
    cur_tags = self.target.GetTags()
10374

    
10375
    diff_tags = del_tags - cur_tags
10376
    if diff_tags:
10377
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
10378
      raise errors.OpPrereqError("Tag(s) %s not found" %
10379
                                 (utils.CommaJoin(diff_names), ),
10380
                                 errors.ECODE_NOENT)
10381

    
10382
  def Exec(self, feedback_fn):
10383
    """Remove the tag from the object.
10384

10385
    """
10386
    for tag in self.op.tags:
10387
      self.target.RemoveTag(tag)
10388
    self.cfg.Update(self.target, feedback_fn)
10389

    
10390

    
10391
class LUTestDelay(NoHooksLU):
10392
  """Sleep for a specified amount of time.
10393

10394
  This LU sleeps on the master and/or nodes for a specified amount of
10395
  time.
10396

10397
  """
10398
  REQ_BGL = False
10399

    
10400
  def ExpandNames(self):
10401
    """Expand names and set required locks.
10402

10403
    This expands the node list, if any.
10404

10405
    """
10406
    self.needed_locks = {}
10407
    if self.op.on_nodes:
10408
      # _GetWantedNodes can be used here, but is not always appropriate to use
10409
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10410
      # more information.
10411
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10412
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10413

    
10414
  def _TestDelay(self):
10415
    """Do the actual sleep.
10416

10417
    """
10418
    if self.op.on_master:
10419
      if not utils.TestDelay(self.op.duration):
10420
        raise errors.OpExecError("Error during master delay test")
10421
    if self.op.on_nodes:
10422
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10423
      for node, node_result in result.items():
10424
        node_result.Raise("Failure during rpc call to node %s" % node)
10425

    
10426
  def Exec(self, feedback_fn):
10427
    """Execute the test delay opcode, with the wanted repetitions.
10428

10429
    """
10430
    if self.op.repeat == 0:
10431
      self._TestDelay()
10432
    else:
10433
      top_value = self.op.repeat - 1
10434
      for i in range(self.op.repeat):
10435
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10436
        self._TestDelay()
10437

    
10438

    
10439
class LUTestJobqueue(NoHooksLU):
10440
  """Utility LU to test some aspects of the job queue.
10441

10442
  """
10443
  REQ_BGL = False
10444

    
10445
  # Must be lower than default timeout for WaitForJobChange to see whether it
10446
  # notices changed jobs
10447
  _CLIENT_CONNECT_TIMEOUT = 20.0
10448
  _CLIENT_CONFIRM_TIMEOUT = 60.0
10449

    
10450
  @classmethod
10451
  def _NotifyUsingSocket(cls, cb, errcls):
10452
    """Opens a Unix socket and waits for another program to connect.
10453

10454
    @type cb: callable
10455
    @param cb: Callback to send socket name to client
10456
    @type errcls: class
10457
    @param errcls: Exception class to use for errors
10458

10459
    """
10460
    # Using a temporary directory as there's no easy way to create temporary
10461
    # sockets without writing a custom loop around tempfile.mktemp and
10462
    # socket.bind
10463
    tmpdir = tempfile.mkdtemp()
10464
    try:
10465
      tmpsock = utils.PathJoin(tmpdir, "sock")
10466

    
10467
      logging.debug("Creating temporary socket at %s", tmpsock)
10468
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10469
      try:
10470
        sock.bind(tmpsock)
10471
        sock.listen(1)
10472

    
10473
        # Send details to client
10474
        cb(tmpsock)
10475

    
10476
        # Wait for client to connect before continuing
10477
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10478
        try:
10479
          (conn, _) = sock.accept()
10480
        except socket.error, err:
10481
          raise errcls("Client didn't connect in time (%s)" % err)
10482
      finally:
10483
        sock.close()
10484
    finally:
10485
      # Remove as soon as client is connected
10486
      shutil.rmtree(tmpdir)
10487

    
10488
    # Wait for client to close
10489
    try:
10490
      try:
10491
        # pylint: disable-msg=E1101
10492
        # Instance of '_socketobject' has no ... member
10493
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10494
        conn.recv(1)
10495
      except socket.error, err:
10496
        raise errcls("Client failed to confirm notification (%s)" % err)
10497
    finally:
10498
      conn.close()
10499

    
10500
  def _SendNotification(self, test, arg, sockname):
10501
    """Sends a notification to the client.
10502

10503
    @type test: string
10504
    @param test: Test name
10505
    @param arg: Test argument (depends on test)
10506
    @type sockname: string
10507
    @param sockname: Socket path
10508

10509
    """
10510
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10511

    
10512
  def _Notify(self, prereq, test, arg):
10513
    """Notifies the client of a test.
10514

10515
    @type prereq: bool
10516
    @param prereq: Whether this is a prereq-phase test
10517
    @type test: string
10518
    @param test: Test name
10519
    @param arg: Test argument (depends on test)
10520

10521
    """
10522
    if prereq:
10523
      errcls = errors.OpPrereqError
10524
    else:
10525
      errcls = errors.OpExecError
10526

    
10527
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10528
                                                  test, arg),
10529
                                   errcls)
10530

    
10531
  def CheckArguments(self):
10532
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10533
    self.expandnames_calls = 0
10534

    
10535
  def ExpandNames(self):
10536
    checkargs_calls = getattr(self, "checkargs_calls", 0)
10537
    if checkargs_calls < 1:
10538
      raise errors.ProgrammerError("CheckArguments was not called")
10539

    
10540
    self.expandnames_calls += 1
10541

    
10542
    if self.op.notify_waitlock:
10543
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
10544

    
10545
    self.LogInfo("Expanding names")
10546

    
10547
    # Get lock on master node (just to get a lock, not for a particular reason)
10548
    self.needed_locks = {
10549
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10550
      }
10551

    
10552
  def Exec(self, feedback_fn):
10553
    if self.expandnames_calls < 1:
10554
      raise errors.ProgrammerError("ExpandNames was not called")
10555

    
10556
    if self.op.notify_exec:
10557
      self._Notify(False, constants.JQT_EXEC, None)
10558

    
10559
    self.LogInfo("Executing")
10560

    
10561
    if self.op.log_messages:
10562
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10563
      for idx, msg in enumerate(self.op.log_messages):
10564
        self.LogInfo("Sending log message %s", idx + 1)
10565
        feedback_fn(constants.JQT_MSGPREFIX + msg)
10566
        # Report how many test messages have been sent
10567
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10568

    
10569
    if self.op.fail:
10570
      raise errors.OpExecError("Opcode failure was requested")
10571

    
10572
    return True
10573

    
10574

    
10575
class IAllocator(object):
10576
  """IAllocator framework.
10577

10578
  An IAllocator instance has three sets of attributes:
10579
    - cfg that is needed to query the cluster
10580
    - input data (all members of the _KEYS class attribute are required)
10581
    - four buffer attributes (in|out_data|text), that represent the
10582
      input (to the external script) in text and data structure format,
10583
      and the output from it, again in two formats
10584
    - the result variables from the script (success, info, nodes) for
10585
      easy usage
10586

10587
  """
10588
  # pylint: disable-msg=R0902
10589
  # lots of instance attributes
10590
  _ALLO_KEYS = [
10591
    "name", "mem_size", "disks", "disk_template",
10592
    "os", "tags", "nics", "vcpus", "hypervisor",
10593
    ]
10594
  _RELO_KEYS = [
10595
    "name", "relocate_from",
10596
    ]
10597
  _EVAC_KEYS = [
10598
    "evac_nodes",
10599
    ]
10600

    
10601
  def __init__(self, cfg, rpc, mode, **kwargs):
10602
    self.cfg = cfg
10603
    self.rpc = rpc
10604
    # init buffer variables
10605
    self.in_text = self.out_text = self.in_data = self.out_data = None
10606
    # init all input fields so that pylint is happy
10607
    self.mode = mode
10608
    self.mem_size = self.disks = self.disk_template = None
10609
    self.os = self.tags = self.nics = self.vcpus = None
10610
    self.hypervisor = None
10611
    self.relocate_from = None
10612
    self.name = None
10613
    self.evac_nodes = None
10614
    # computed fields
10615
    self.required_nodes = None
10616
    # init result fields
10617
    self.success = self.info = self.result = None
10618
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10619
      keyset = self._ALLO_KEYS
10620
      fn = self._AddNewInstance
10621
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10622
      keyset = self._RELO_KEYS
10623
      fn = self._AddRelocateInstance
10624
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10625
      keyset = self._EVAC_KEYS
10626
      fn = self._AddEvacuateNodes
10627
    else:
10628
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10629
                                   " IAllocator" % self.mode)
10630
    for key in kwargs:
10631
      if key not in keyset:
10632
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
10633
                                     " IAllocator" % key)
10634
      setattr(self, key, kwargs[key])
10635

    
10636
    for key in keyset:
10637
      if key not in kwargs:
10638
        raise errors.ProgrammerError("Missing input parameter '%s' to"
10639
                                     " IAllocator" % key)
10640
    self._BuildInputData(fn)
10641

    
10642
  def _ComputeClusterData(self):
10643
    """Compute the generic allocator input data.
10644

10645
    This is the data that is independent of the actual operation.
10646

10647
    """
10648
    cfg = self.cfg
10649
    cluster_info = cfg.GetClusterInfo()
10650
    # cluster data
10651
    data = {
10652
      "version": constants.IALLOCATOR_VERSION,
10653
      "cluster_name": cfg.GetClusterName(),
10654
      "cluster_tags": list(cluster_info.GetTags()),
10655
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10656
      # we don't have job IDs
10657
      }
10658
    iinfo = cfg.GetAllInstancesInfo().values()
10659
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10660

    
10661
    # node data
10662
    node_list = cfg.GetNodeList()
10663

    
10664
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10665
      hypervisor_name = self.hypervisor
10666
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10667
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10668
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10669
      hypervisor_name = cluster_info.enabled_hypervisors[0]
10670

    
10671
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10672
                                        hypervisor_name)
10673
    node_iinfo = \
10674
      self.rpc.call_all_instances_info(node_list,
10675
                                       cluster_info.enabled_hypervisors)
10676

    
10677
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
10678

    
10679
    data["nodes"] = self._ComputeNodeData(cfg, node_data, node_iinfo, i_list)
10680

    
10681
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
10682

    
10683
    self.in_data = data
10684

    
10685
  @staticmethod
10686
  def _ComputeNodeGroupData(cfg):
10687
    """Compute node groups data.
10688

10689
    """
10690
    ng = {}
10691
    for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
10692
      ng[guuid] = {
10693
        "name": gdata.name,
10694
        "alloc_policy": gdata.alloc_policy,
10695
        }
10696
    return ng
10697

    
10698
  @staticmethod
10699
  def _ComputeNodeData(cfg, node_data, node_iinfo, i_list):
10700
    """Compute global node data.
10701

10702
    """
10703
    node_results = {}
10704
    for nname, nresult in node_data.items():
10705
      # first fill in static (config-based) values
10706
      ninfo = cfg.GetNodeInfo(nname)
10707
      pnr = {
10708
        "tags": list(ninfo.GetTags()),
10709
        "primary_ip": ninfo.primary_ip,
10710
        "secondary_ip": ninfo.secondary_ip,
10711
        "offline": ninfo.offline,
10712
        "drained": ninfo.drained,
10713
        "master_candidate": ninfo.master_candidate,
10714
        "group": ninfo.group,
10715
        "master_capable": ninfo.master_capable,
10716
        "vm_capable": ninfo.vm_capable,
10717
        }
10718

    
10719
      if not (ninfo.offline or ninfo.drained):
10720
        nresult.Raise("Can't get data for node %s" % nname)
10721
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10722
                                nname)
10723
        remote_info = nresult.payload
10724

    
10725
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
10726
                     'vg_size', 'vg_free', 'cpu_total']:
10727
          if attr not in remote_info:
10728
            raise errors.OpExecError("Node '%s' didn't return attribute"
10729
                                     " '%s'" % (nname, attr))
10730
          if not isinstance(remote_info[attr], int):
10731
            raise errors.OpExecError("Node '%s' returned invalid value"
10732
                                     " for '%s': %s" %
10733
                                     (nname, attr, remote_info[attr]))
10734
        # compute memory used by primary instances
10735
        i_p_mem = i_p_up_mem = 0
10736
        for iinfo, beinfo in i_list:
10737
          if iinfo.primary_node == nname:
10738
            i_p_mem += beinfo[constants.BE_MEMORY]
10739
            if iinfo.name not in node_iinfo[nname].payload:
10740
              i_used_mem = 0
10741
            else:
10742
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10743
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10744
            remote_info['memory_free'] -= max(0, i_mem_diff)
10745

    
10746
            if iinfo.admin_up:
10747
              i_p_up_mem += beinfo[constants.BE_MEMORY]
10748

    
10749
        # compute memory used by instances
10750
        pnr_dyn = {
10751
          "total_memory": remote_info['memory_total'],
10752
          "reserved_memory": remote_info['memory_dom0'],
10753
          "free_memory": remote_info['memory_free'],
10754
          "total_disk": remote_info['vg_size'],
10755
          "free_disk": remote_info['vg_free'],
10756
          "total_cpus": remote_info['cpu_total'],
10757
          "i_pri_memory": i_p_mem,
10758
          "i_pri_up_memory": i_p_up_mem,
10759
          }
10760
        pnr.update(pnr_dyn)
10761

    
10762
      node_results[nname] = pnr
10763

    
10764
    return node_results
10765

    
10766
  @staticmethod
10767
  def _ComputeInstanceData(cluster_info, i_list):
10768
    """Compute global instance data.
10769

10770
    """
10771
    instance_data = {}
10772
    for iinfo, beinfo in i_list:
10773
      nic_data = []
10774
      for nic in iinfo.nics:
10775
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10776
        nic_dict = {"mac": nic.mac,
10777
                    "ip": nic.ip,
10778
                    "mode": filled_params[constants.NIC_MODE],
10779
                    "link": filled_params[constants.NIC_LINK],
10780
                   }
10781
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10782
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10783
        nic_data.append(nic_dict)
10784
      pir = {
10785
        "tags": list(iinfo.GetTags()),
10786
        "admin_up": iinfo.admin_up,
10787
        "vcpus": beinfo[constants.BE_VCPUS],
10788
        "memory": beinfo[constants.BE_MEMORY],
10789
        "os": iinfo.os,
10790
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10791
        "nics": nic_data,
10792
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10793
        "disk_template": iinfo.disk_template,
10794
        "hypervisor": iinfo.hypervisor,
10795
        }
10796
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10797
                                                 pir["disks"])
10798
      instance_data[iinfo.name] = pir
10799

    
10800
    return instance_data
10801

    
10802
  def _AddNewInstance(self):
10803
    """Add new instance data to allocator structure.
10804

10805
    This in combination with _AllocatorGetClusterData will create the
10806
    correct structure needed as input for the allocator.
10807

10808
    The checks for the completeness of the opcode must have already been
10809
    done.
10810

10811
    """
10812
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10813

    
10814
    if self.disk_template in constants.DTS_NET_MIRROR:
10815
      self.required_nodes = 2
10816
    else:
10817
      self.required_nodes = 1
10818
    request = {
10819
      "name": self.name,
10820
      "disk_template": self.disk_template,
10821
      "tags": self.tags,
10822
      "os": self.os,
10823
      "vcpus": self.vcpus,
10824
      "memory": self.mem_size,
10825
      "disks": self.disks,
10826
      "disk_space_total": disk_space,
10827
      "nics": self.nics,
10828
      "required_nodes": self.required_nodes,
10829
      }
10830
    return request
10831

    
10832
  def _AddRelocateInstance(self):
10833
    """Add relocate instance data to allocator structure.
10834

10835
    This in combination with _IAllocatorGetClusterData will create the
10836
    correct structure needed as input for the allocator.
10837

10838
    The checks for the completeness of the opcode must have already been
10839
    done.
10840

10841
    """
10842
    instance = self.cfg.GetInstanceInfo(self.name)
10843
    if instance is None:
10844
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
10845
                                   " IAllocator" % self.name)
10846

    
10847
    if instance.disk_template not in constants.DTS_NET_MIRROR:
10848
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10849
                                 errors.ECODE_INVAL)
10850

    
10851
    if len(instance.secondary_nodes) != 1:
10852
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
10853
                                 errors.ECODE_STATE)
10854

    
10855
    self.required_nodes = 1
10856
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
10857
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10858

    
10859
    request = {
10860
      "name": self.name,
10861
      "disk_space_total": disk_space,
10862
      "required_nodes": self.required_nodes,
10863
      "relocate_from": self.relocate_from,
10864
      }
10865
    return request
10866

    
10867
  def _AddEvacuateNodes(self):
10868
    """Add evacuate nodes data to allocator structure.
10869

10870
    """
10871
    request = {
10872
      "evac_nodes": self.evac_nodes
10873
      }
10874
    return request
10875

    
10876
  def _BuildInputData(self, fn):
10877
    """Build input data structures.
10878

10879
    """
10880
    self._ComputeClusterData()
10881

    
10882
    request = fn()
10883
    request["type"] = self.mode
10884
    self.in_data["request"] = request
10885

    
10886
    self.in_text = serializer.Dump(self.in_data)
10887

    
10888
  def Run(self, name, validate=True, call_fn=None):
10889
    """Run an instance allocator and return the results.
10890

10891
    """
10892
    if call_fn is None:
10893
      call_fn = self.rpc.call_iallocator_runner
10894

    
10895
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10896
    result.Raise("Failure while running the iallocator script")
10897

    
10898
    self.out_text = result.payload
10899
    if validate:
10900
      self._ValidateResult()
10901

    
10902
  def _ValidateResult(self):
10903
    """Process the allocator results.
10904

10905
    This will process and if successful save the result in
10906
    self.out_data and the other parameters.
10907

10908
    """
10909
    try:
10910
      rdict = serializer.Load(self.out_text)
10911
    except Exception, err:
10912
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10913

    
10914
    if not isinstance(rdict, dict):
10915
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
10916

    
10917
    # TODO: remove backwards compatiblity in later versions
10918
    if "nodes" in rdict and "result" not in rdict:
10919
      rdict["result"] = rdict["nodes"]
10920
      del rdict["nodes"]
10921

    
10922
    for key in "success", "info", "result":
10923
      if key not in rdict:
10924
        raise errors.OpExecError("Can't parse iallocator results:"
10925
                                 " missing key '%s'" % key)
10926
      setattr(self, key, rdict[key])
10927

    
10928
    if not isinstance(rdict["result"], list):
10929
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10930
                               " is not a list")
10931
    self.out_data = rdict
10932

    
10933

    
10934
class LUTestAllocator(NoHooksLU):
10935
  """Run allocator tests.
10936

10937
  This LU runs the allocator tests
10938

10939
  """
10940
  def CheckPrereq(self):
10941
    """Check prerequisites.
10942

10943
    This checks the opcode parameters depending on the director and mode test.
10944

10945
    """
10946
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10947
      for attr in ["mem_size", "disks", "disk_template",
10948
                   "os", "tags", "nics", "vcpus"]:
10949
        if not hasattr(self.op, attr):
10950
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10951
                                     attr, errors.ECODE_INVAL)
10952
      iname = self.cfg.ExpandInstanceName(self.op.name)
10953
      if iname is not None:
10954
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10955
                                   iname, errors.ECODE_EXISTS)
10956
      if not isinstance(self.op.nics, list):
10957
        raise errors.OpPrereqError("Invalid parameter 'nics'",
10958
                                   errors.ECODE_INVAL)
10959
      if not isinstance(self.op.disks, list):
10960
        raise errors.OpPrereqError("Invalid parameter 'disks'",
10961
                                   errors.ECODE_INVAL)
10962
      for row in self.op.disks:
10963
        if (not isinstance(row, dict) or
10964
            "size" not in row or
10965
            not isinstance(row["size"], int) or
10966
            "mode" not in row or
10967
            row["mode"] not in ['r', 'w']):
10968
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
10969
                                     " parameter", errors.ECODE_INVAL)
10970
      if self.op.hypervisor is None:
10971
        self.op.hypervisor = self.cfg.GetHypervisorType()
10972
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10973
      fname = _ExpandInstanceName(self.cfg, self.op.name)
10974
      self.op.name = fname
10975
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10976
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10977
      if not hasattr(self.op, "evac_nodes"):
10978
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10979
                                   " opcode input", errors.ECODE_INVAL)
10980
    else:
10981
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10982
                                 self.op.mode, errors.ECODE_INVAL)
10983

    
10984
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10985
      if self.op.allocator is None:
10986
        raise errors.OpPrereqError("Missing allocator name",
10987
                                   errors.ECODE_INVAL)
10988
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10989
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
10990
                                 self.op.direction, errors.ECODE_INVAL)
10991

    
10992
  def Exec(self, feedback_fn):
10993
    """Run the allocator test.
10994

10995
    """
10996
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10997
      ial = IAllocator(self.cfg, self.rpc,
10998
                       mode=self.op.mode,
10999
                       name=self.op.name,
11000
                       mem_size=self.op.mem_size,
11001
                       disks=self.op.disks,
11002
                       disk_template=self.op.disk_template,
11003
                       os=self.op.os,
11004
                       tags=self.op.tags,
11005
                       nics=self.op.nics,
11006
                       vcpus=self.op.vcpus,
11007
                       hypervisor=self.op.hypervisor,
11008
                       )
11009
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11010
      ial = IAllocator(self.cfg, self.rpc,
11011
                       mode=self.op.mode,
11012
                       name=self.op.name,
11013
                       relocate_from=list(self.relocate_from),
11014
                       )
11015
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11016
      ial = IAllocator(self.cfg, self.rpc,
11017
                       mode=self.op.mode,
11018
                       evac_nodes=self.op.evac_nodes)
11019
    else:
11020
      raise errors.ProgrammerError("Uncatched mode %s in"
11021
                                   " LUTestAllocator.Exec", self.op.mode)
11022

    
11023
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
11024
      result = ial.in_text
11025
    else:
11026
      ial.Run(self.op.allocator, validate=False)
11027
      result = ial.out_text
11028
    return result