Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 4e272d8c

History | View | Annotate | Download (435.9 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43

    
44
from ganeti import ssh
45
from ganeti import utils
46
from ganeti import errors
47
from ganeti import hypervisor
48
from ganeti import locking
49
from ganeti import constants
50
from ganeti import objects
51
from ganeti import serializer
52
from ganeti import ssconf
53
from ganeti import uidpool
54
from ganeti import compat
55
from ganeti import masterd
56
from ganeti import netutils
57
from ganeti import query
58
from ganeti import qlang
59
from ganeti import opcodes
60
from ganeti import ht
61

    
62
import ganeti.masterd.instance # pylint: disable-msg=W0611
63

    
64

    
65
def _SupportsOob(cfg, node):
66
  """Tells if node supports OOB.
67

68
  @type cfg: L{config.ConfigWriter}
69
  @param cfg: The cluster configuration
70
  @type node: L{objects.Node}
71
  @param node: The node
72
  @return: The OOB script if supported or an empty string otherwise
73

74
  """
75
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
76

    
77

    
78
class ResultWithJobs:
79
  """Data container for LU results with jobs.
80

81
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83
  contained in the C{jobs} attribute and include the job IDs in the opcode
84
  result.
85

86
  """
87
  def __init__(self, jobs, **kwargs):
88
    """Initializes this class.
89

90
    Additional return values can be specified as keyword arguments.
91

92
    @type jobs: list of lists of L{opcode.OpCode}
93
    @param jobs: A list of lists of opcode objects
94

95
    """
96
    self.jobs = jobs
97
    self.other = kwargs
98

    
99

    
100
class LogicalUnit(object):
101
  """Logical Unit base class.
102

103
  Subclasses must follow these rules:
104
    - implement ExpandNames
105
    - implement CheckPrereq (except when tasklets are used)
106
    - implement Exec (except when tasklets are used)
107
    - implement BuildHooksEnv
108
    - implement BuildHooksNodes
109
    - redefine HPATH and HTYPE
110
    - optionally redefine their run requirements:
111
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
112

113
  Note that all commands require root permissions.
114

115
  @ivar dry_run_result: the value (if any) that will be returned to the caller
116
      in dry-run mode (signalled by opcode dry_run parameter)
117

118
  """
119
  HPATH = None
120
  HTYPE = None
121
  REQ_BGL = True
122

    
123
  def __init__(self, processor, op, context, rpc):
124
    """Constructor for LogicalUnit.
125

126
    This needs to be overridden in derived classes in order to check op
127
    validity.
128

129
    """
130
    self.proc = processor
131
    self.op = op
132
    self.cfg = context.cfg
133
    self.glm = context.glm
134
    self.context = context
135
    self.rpc = rpc
136
    # Dicts used to declare locking needs to mcpu
137
    self.needed_locks = None
138
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
139
    self.add_locks = {}
140
    self.remove_locks = {}
141
    # Used to force good behavior when calling helper functions
142
    self.recalculate_locks = {}
143
    # logging
144
    self.Log = processor.Log # pylint: disable-msg=C0103
145
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
146
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
147
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
148
    # support for dry-run
149
    self.dry_run_result = None
150
    # support for generic debug attribute
151
    if (not hasattr(self.op, "debug_level") or
152
        not isinstance(self.op.debug_level, int)):
153
      self.op.debug_level = 0
154

    
155
    # Tasklets
156
    self.tasklets = None
157

    
158
    # Validate opcode parameters and set defaults
159
    self.op.Validate(True)
160

    
161
    self.CheckArguments()
162

    
163
  def CheckArguments(self):
164
    """Check syntactic validity for the opcode arguments.
165

166
    This method is for doing a simple syntactic check and ensure
167
    validity of opcode parameters, without any cluster-related
168
    checks. While the same can be accomplished in ExpandNames and/or
169
    CheckPrereq, doing these separate is better because:
170

171
      - ExpandNames is left as as purely a lock-related function
172
      - CheckPrereq is run after we have acquired locks (and possible
173
        waited for them)
174

175
    The function is allowed to change the self.op attribute so that
176
    later methods can no longer worry about missing parameters.
177

178
    """
179
    pass
180

    
181
  def ExpandNames(self):
182
    """Expand names for this LU.
183

184
    This method is called before starting to execute the opcode, and it should
185
    update all the parameters of the opcode to their canonical form (e.g. a
186
    short node name must be fully expanded after this method has successfully
187
    completed). This way locking, hooks, logging, etc. can work correctly.
188

189
    LUs which implement this method must also populate the self.needed_locks
190
    member, as a dict with lock levels as keys, and a list of needed lock names
191
    as values. Rules:
192

193
      - use an empty dict if you don't need any lock
194
      - if you don't need any lock at a particular level omit that level
195
      - don't put anything for the BGL level
196
      - if you want all locks at a level use locking.ALL_SET as a value
197

198
    If you need to share locks (rather than acquire them exclusively) at one
199
    level you can modify self.share_locks, setting a true value (usually 1) for
200
    that level. By default locks are not shared.
201

202
    This function can also define a list of tasklets, which then will be
203
    executed in order instead of the usual LU-level CheckPrereq and Exec
204
    functions, if those are not defined by the LU.
205

206
    Examples::
207

208
      # Acquire all nodes and one instance
209
      self.needed_locks = {
210
        locking.LEVEL_NODE: locking.ALL_SET,
211
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
212
      }
213
      # Acquire just two nodes
214
      self.needed_locks = {
215
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
216
      }
217
      # Acquire no locks
218
      self.needed_locks = {} # No, you can't leave it to the default value None
219

220
    """
221
    # The implementation of this method is mandatory only if the new LU is
222
    # concurrent, so that old LUs don't need to be changed all at the same
223
    # time.
224
    if self.REQ_BGL:
225
      self.needed_locks = {} # Exclusive LUs don't need locks.
226
    else:
227
      raise NotImplementedError
228

    
229
  def DeclareLocks(self, level):
230
    """Declare LU locking needs for a level
231

232
    While most LUs can just declare their locking needs at ExpandNames time,
233
    sometimes there's the need to calculate some locks after having acquired
234
    the ones before. This function is called just before acquiring locks at a
235
    particular level, but after acquiring the ones at lower levels, and permits
236
    such calculations. It can be used to modify self.needed_locks, and by
237
    default it does nothing.
238

239
    This function is only called if you have something already set in
240
    self.needed_locks for the level.
241

242
    @param level: Locking level which is going to be locked
243
    @type level: member of ganeti.locking.LEVELS
244

245
    """
246

    
247
  def CheckPrereq(self):
248
    """Check prerequisites for this LU.
249

250
    This method should check that the prerequisites for the execution
251
    of this LU are fulfilled. It can do internode communication, but
252
    it should be idempotent - no cluster or system changes are
253
    allowed.
254

255
    The method should raise errors.OpPrereqError in case something is
256
    not fulfilled. Its return value is ignored.
257

258
    This method should also update all the parameters of the opcode to
259
    their canonical form if it hasn't been done by ExpandNames before.
260

261
    """
262
    if self.tasklets is not None:
263
      for (idx, tl) in enumerate(self.tasklets):
264
        logging.debug("Checking prerequisites for tasklet %s/%s",
265
                      idx + 1, len(self.tasklets))
266
        tl.CheckPrereq()
267
    else:
268
      pass
269

    
270
  def Exec(self, feedback_fn):
271
    """Execute the LU.
272

273
    This method should implement the actual work. It should raise
274
    errors.OpExecError for failures that are somewhat dealt with in
275
    code, or expected.
276

277
    """
278
    if self.tasklets is not None:
279
      for (idx, tl) in enumerate(self.tasklets):
280
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
281
        tl.Exec(feedback_fn)
282
    else:
283
      raise NotImplementedError
284

    
285
  def BuildHooksEnv(self):
286
    """Build hooks environment for this LU.
287

288
    @rtype: dict
289
    @return: Dictionary containing the environment that will be used for
290
      running the hooks for this LU. The keys of the dict must not be prefixed
291
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
292
      will extend the environment with additional variables. If no environment
293
      should be defined, an empty dictionary should be returned (not C{None}).
294
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
295
      will not be called.
296

297
    """
298
    raise NotImplementedError
299

    
300
  def BuildHooksNodes(self):
301
    """Build list of nodes to run LU's hooks.
302

303
    @rtype: tuple; (list, list)
304
    @return: Tuple containing a list of node names on which the hook
305
      should run before the execution and a list of node names on which the
306
      hook should run after the execution. No nodes should be returned as an
307
      empty list (and not None).
308
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
309
      will not be called.
310

311
    """
312
    raise NotImplementedError
313

    
314
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
315
    """Notify the LU about the results of its hooks.
316

317
    This method is called every time a hooks phase is executed, and notifies
318
    the Logical Unit about the hooks' result. The LU can then use it to alter
319
    its result based on the hooks.  By default the method does nothing and the
320
    previous result is passed back unchanged but any LU can define it if it
321
    wants to use the local cluster hook-scripts somehow.
322

323
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
324
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
325
    @param hook_results: the results of the multi-node hooks rpc call
326
    @param feedback_fn: function used send feedback back to the caller
327
    @param lu_result: the previous Exec result this LU had, or None
328
        in the PRE phase
329
    @return: the new Exec result, based on the previous result
330
        and hook results
331

332
    """
333
    # API must be kept, thus we ignore the unused argument and could
334
    # be a function warnings
335
    # pylint: disable-msg=W0613,R0201
336
    return lu_result
337

    
338
  def _ExpandAndLockInstance(self):
339
    """Helper function to expand and lock an instance.
340

341
    Many LUs that work on an instance take its name in self.op.instance_name
342
    and need to expand it and then declare the expanded name for locking. This
343
    function does it, and then updates self.op.instance_name to the expanded
344
    name. It also initializes needed_locks as a dict, if this hasn't been done
345
    before.
346

347
    """
348
    if self.needed_locks is None:
349
      self.needed_locks = {}
350
    else:
351
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
352
        "_ExpandAndLockInstance called with instance-level locks set"
353
    self.op.instance_name = _ExpandInstanceName(self.cfg,
354
                                                self.op.instance_name)
355
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
356

    
357
  def _LockInstancesNodes(self, primary_only=False):
358
    """Helper function to declare instances' nodes for locking.
359

360
    This function should be called after locking one or more instances to lock
361
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
362
    with all primary or secondary nodes for instances already locked and
363
    present in self.needed_locks[locking.LEVEL_INSTANCE].
364

365
    It should be called from DeclareLocks, and for safety only works if
366
    self.recalculate_locks[locking.LEVEL_NODE] is set.
367

368
    In the future it may grow parameters to just lock some instance's nodes, or
369
    to just lock primaries or secondary nodes, if needed.
370

371
    If should be called in DeclareLocks in a way similar to::
372

373
      if level == locking.LEVEL_NODE:
374
        self._LockInstancesNodes()
375

376
    @type primary_only: boolean
377
    @param primary_only: only lock primary nodes of locked instances
378

379
    """
380
    assert locking.LEVEL_NODE in self.recalculate_locks, \
381
      "_LockInstancesNodes helper function called with no nodes to recalculate"
382

    
383
    # TODO: check if we're really been called with the instance locks held
384

    
385
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
386
    # future we might want to have different behaviors depending on the value
387
    # of self.recalculate_locks[locking.LEVEL_NODE]
388
    wanted_nodes = []
389
    for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
390
      instance = self.context.cfg.GetInstanceInfo(instance_name)
391
      wanted_nodes.append(instance.primary_node)
392
      if not primary_only:
393
        wanted_nodes.extend(instance.secondary_nodes)
394

    
395
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
396
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
397
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
398
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
399

    
400
    del self.recalculate_locks[locking.LEVEL_NODE]
401

    
402

    
403
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
404
  """Simple LU which runs no hooks.
405

406
  This LU is intended as a parent for other LogicalUnits which will
407
  run no hooks, in order to reduce duplicate code.
408

409
  """
410
  HPATH = None
411
  HTYPE = None
412

    
413
  def BuildHooksEnv(self):
414
    """Empty BuildHooksEnv for NoHooksLu.
415

416
    This just raises an error.
417

418
    """
419
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
420

    
421
  def BuildHooksNodes(self):
422
    """Empty BuildHooksNodes for NoHooksLU.
423

424
    """
425
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
426

    
427

    
428
class Tasklet:
429
  """Tasklet base class.
430

431
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
432
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
433
  tasklets know nothing about locks.
434

435
  Subclasses must follow these rules:
436
    - Implement CheckPrereq
437
    - Implement Exec
438

439
  """
440
  def __init__(self, lu):
441
    self.lu = lu
442

    
443
    # Shortcuts
444
    self.cfg = lu.cfg
445
    self.rpc = lu.rpc
446

    
447
  def CheckPrereq(self):
448
    """Check prerequisites for this tasklets.
449

450
    This method should check whether the prerequisites for the execution of
451
    this tasklet are fulfilled. It can do internode communication, but it
452
    should be idempotent - no cluster or system changes are allowed.
453

454
    The method should raise errors.OpPrereqError in case something is not
455
    fulfilled. Its return value is ignored.
456

457
    This method should also update all parameters to their canonical form if it
458
    hasn't been done before.
459

460
    """
461
    pass
462

    
463
  def Exec(self, feedback_fn):
464
    """Execute the tasklet.
465

466
    This method should implement the actual work. It should raise
467
    errors.OpExecError for failures that are somewhat dealt with in code, or
468
    expected.
469

470
    """
471
    raise NotImplementedError
472

    
473

    
474
class _QueryBase:
475
  """Base for query utility classes.
476

477
  """
478
  #: Attribute holding field definitions
479
  FIELDS = None
480

    
481
  def __init__(self, filter_, fields, use_locking):
482
    """Initializes this class.
483

484
    """
485
    self.use_locking = use_locking
486

    
487
    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
488
                             namefield="name")
489
    self.requested_data = self.query.RequestedData()
490
    self.names = self.query.RequestedNames()
491

    
492
    # Sort only if no names were requested
493
    self.sort_by_name = not self.names
494

    
495
    self.do_locking = None
496
    self.wanted = None
497

    
498
  def _GetNames(self, lu, all_names, lock_level):
499
    """Helper function to determine names asked for in the query.
500

501
    """
502
    if self.do_locking:
503
      names = lu.glm.list_owned(lock_level)
504
    else:
505
      names = all_names
506

    
507
    if self.wanted == locking.ALL_SET:
508
      assert not self.names
509
      # caller didn't specify names, so ordering is not important
510
      return utils.NiceSort(names)
511

    
512
    # caller specified names and we must keep the same order
513
    assert self.names
514
    assert not self.do_locking or lu.glm.is_owned(lock_level)
515

    
516
    missing = set(self.wanted).difference(names)
517
    if missing:
518
      raise errors.OpExecError("Some items were removed before retrieving"
519
                               " their data: %s" % missing)
520

    
521
    # Return expanded names
522
    return self.wanted
523

    
524
  def ExpandNames(self, lu):
525
    """Expand names for this query.
526

527
    See L{LogicalUnit.ExpandNames}.
528

529
    """
530
    raise NotImplementedError()
531

    
532
  def DeclareLocks(self, lu, level):
533
    """Declare locks for this query.
534

535
    See L{LogicalUnit.DeclareLocks}.
536

537
    """
538
    raise NotImplementedError()
539

    
540
  def _GetQueryData(self, lu):
541
    """Collects all data for this query.
542

543
    @return: Query data object
544

545
    """
546
    raise NotImplementedError()
547

    
548
  def NewStyleQuery(self, lu):
549
    """Collect data and execute query.
550

551
    """
552
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
553
                                  sort_by_name=self.sort_by_name)
554

    
555
  def OldStyleQuery(self, lu):
556
    """Collect data and execute query.
557

558
    """
559
    return self.query.OldStyleQuery(self._GetQueryData(lu),
560
                                    sort_by_name=self.sort_by_name)
561

    
562

    
563
def _GetWantedNodes(lu, nodes):
564
  """Returns list of checked and expanded node names.
565

566
  @type lu: L{LogicalUnit}
567
  @param lu: the logical unit on whose behalf we execute
568
  @type nodes: list
569
  @param nodes: list of node names or None for all nodes
570
  @rtype: list
571
  @return: the list of nodes, sorted
572
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
573

574
  """
575
  if nodes:
576
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
577

    
578
  return utils.NiceSort(lu.cfg.GetNodeList())
579

    
580

    
581
def _GetWantedInstances(lu, instances):
582
  """Returns list of checked and expanded instance names.
583

584
  @type lu: L{LogicalUnit}
585
  @param lu: the logical unit on whose behalf we execute
586
  @type instances: list
587
  @param instances: list of instance names or None for all instances
588
  @rtype: list
589
  @return: the list of instances, sorted
590
  @raise errors.OpPrereqError: if the instances parameter is wrong type
591
  @raise errors.OpPrereqError: if any of the passed instances is not found
592

593
  """
594
  if instances:
595
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
596
  else:
597
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
598
  return wanted
599

    
600

    
601
def _GetUpdatedParams(old_params, update_dict,
602
                      use_default=True, use_none=False):
603
  """Return the new version of a parameter dictionary.
604

605
  @type old_params: dict
606
  @param old_params: old parameters
607
  @type update_dict: dict
608
  @param update_dict: dict containing new parameter values, or
609
      constants.VALUE_DEFAULT to reset the parameter to its default
610
      value
611
  @param use_default: boolean
612
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
613
      values as 'to be deleted' values
614
  @param use_none: boolean
615
  @type use_none: whether to recognise C{None} values as 'to be
616
      deleted' values
617
  @rtype: dict
618
  @return: the new parameter dictionary
619

620
  """
621
  params_copy = copy.deepcopy(old_params)
622
  for key, val in update_dict.iteritems():
623
    if ((use_default and val == constants.VALUE_DEFAULT) or
624
        (use_none and val is None)):
625
      try:
626
        del params_copy[key]
627
      except KeyError:
628
        pass
629
    else:
630
      params_copy[key] = val
631
  return params_copy
632

    
633

    
634
def _ReleaseLocks(lu, level, names=None, keep=None):
635
  """Releases locks owned by an LU.
636

637
  @type lu: L{LogicalUnit}
638
  @param level: Lock level
639
  @type names: list or None
640
  @param names: Names of locks to release
641
  @type keep: list or None
642
  @param keep: Names of locks to retain
643

644
  """
645
  assert not (keep is not None and names is not None), \
646
         "Only one of the 'names' and the 'keep' parameters can be given"
647

    
648
  if names is not None:
649
    should_release = names.__contains__
650
  elif keep:
651
    should_release = lambda name: name not in keep
652
  else:
653
    should_release = None
654

    
655
  if should_release:
656
    retain = []
657
    release = []
658

    
659
    # Determine which locks to release
660
    for name in lu.glm.list_owned(level):
661
      if should_release(name):
662
        release.append(name)
663
      else:
664
        retain.append(name)
665

    
666
    assert len(lu.glm.list_owned(level)) == (len(retain) + len(release))
667

    
668
    # Release just some locks
669
    lu.glm.release(level, names=release)
670

    
671
    assert frozenset(lu.glm.list_owned(level)) == frozenset(retain)
672
  else:
673
    # Release everything
674
    lu.glm.release(level)
675

    
676
    assert not lu.glm.is_owned(level), "No locks should be owned"
677

    
678

    
679
def _RunPostHook(lu, node_name):
680
  """Runs the post-hook for an opcode on a single node.
681

682
  """
683
  hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
684
  try:
685
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
686
  except:
687
    # pylint: disable-msg=W0702
688
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
689

    
690

    
691
def _CheckOutputFields(static, dynamic, selected):
692
  """Checks whether all selected fields are valid.
693

694
  @type static: L{utils.FieldSet}
695
  @param static: static fields set
696
  @type dynamic: L{utils.FieldSet}
697
  @param dynamic: dynamic fields set
698

699
  """
700
  f = utils.FieldSet()
701
  f.Extend(static)
702
  f.Extend(dynamic)
703

    
704
  delta = f.NonMatching(selected)
705
  if delta:
706
    raise errors.OpPrereqError("Unknown output fields selected: %s"
707
                               % ",".join(delta), errors.ECODE_INVAL)
708

    
709

    
710
def _CheckGlobalHvParams(params):
711
  """Validates that given hypervisor params are not global ones.
712

713
  This will ensure that instances don't get customised versions of
714
  global params.
715

716
  """
717
  used_globals = constants.HVC_GLOBALS.intersection(params)
718
  if used_globals:
719
    msg = ("The following hypervisor parameters are global and cannot"
720
           " be customized at instance level, please modify them at"
721
           " cluster level: %s" % utils.CommaJoin(used_globals))
722
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
723

    
724

    
725
def _CheckNodeOnline(lu, node, msg=None):
726
  """Ensure that a given node is online.
727

728
  @param lu: the LU on behalf of which we make the check
729
  @param node: the node to check
730
  @param msg: if passed, should be a message to replace the default one
731
  @raise errors.OpPrereqError: if the node is offline
732

733
  """
734
  if msg is None:
735
    msg = "Can't use offline node"
736
  if lu.cfg.GetNodeInfo(node).offline:
737
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
738

    
739

    
740
def _CheckNodeNotDrained(lu, node):
741
  """Ensure that a given node is not drained.
742

743
  @param lu: the LU on behalf of which we make the check
744
  @param node: the node to check
745
  @raise errors.OpPrereqError: if the node is drained
746

747
  """
748
  if lu.cfg.GetNodeInfo(node).drained:
749
    raise errors.OpPrereqError("Can't use drained node %s" % node,
750
                               errors.ECODE_STATE)
751

    
752

    
753
def _CheckNodeVmCapable(lu, node):
754
  """Ensure that a given node is vm capable.
755

756
  @param lu: the LU on behalf of which we make the check
757
  @param node: the node to check
758
  @raise errors.OpPrereqError: if the node is not vm capable
759

760
  """
761
  if not lu.cfg.GetNodeInfo(node).vm_capable:
762
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
763
                               errors.ECODE_STATE)
764

    
765

    
766
def _CheckNodeHasOS(lu, node, os_name, force_variant):
767
  """Ensure that a node supports a given OS.
768

769
  @param lu: the LU on behalf of which we make the check
770
  @param node: the node to check
771
  @param os_name: the OS to query about
772
  @param force_variant: whether to ignore variant errors
773
  @raise errors.OpPrereqError: if the node is not supporting the OS
774

775
  """
776
  result = lu.rpc.call_os_get(node, os_name)
777
  result.Raise("OS '%s' not in supported OS list for node %s" %
778
               (os_name, node),
779
               prereq=True, ecode=errors.ECODE_INVAL)
780
  if not force_variant:
781
    _CheckOSVariant(result.payload, os_name)
782

    
783

    
784
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
785
  """Ensure that a node has the given secondary ip.
786

787
  @type lu: L{LogicalUnit}
788
  @param lu: the LU on behalf of which we make the check
789
  @type node: string
790
  @param node: the node to check
791
  @type secondary_ip: string
792
  @param secondary_ip: the ip to check
793
  @type prereq: boolean
794
  @param prereq: whether to throw a prerequisite or an execute error
795
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
796
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
797

798
  """
799
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
800
  result.Raise("Failure checking secondary ip on node %s" % node,
801
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
802
  if not result.payload:
803
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
804
           " please fix and re-run this command" % secondary_ip)
805
    if prereq:
806
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
807
    else:
808
      raise errors.OpExecError(msg)
809

    
810

    
811
def _GetClusterDomainSecret():
812
  """Reads the cluster domain secret.
813

814
  """
815
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
816
                               strict=True)
817

    
818

    
819
def _CheckInstanceDown(lu, instance, reason):
820
  """Ensure that an instance is not running."""
821
  if instance.admin_up:
822
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
823
                               (instance.name, reason), errors.ECODE_STATE)
824

    
825
  pnode = instance.primary_node
826
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
827
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
828
              prereq=True, ecode=errors.ECODE_ENVIRON)
829

    
830
  if instance.name in ins_l.payload:
831
    raise errors.OpPrereqError("Instance %s is running, %s" %
832
                               (instance.name, reason), errors.ECODE_STATE)
833

    
834

    
835
def _ExpandItemName(fn, name, kind):
836
  """Expand an item name.
837

838
  @param fn: the function to use for expansion
839
  @param name: requested item name
840
  @param kind: text description ('Node' or 'Instance')
841
  @return: the resolved (full) name
842
  @raise errors.OpPrereqError: if the item is not found
843

844
  """
845
  full_name = fn(name)
846
  if full_name is None:
847
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
848
                               errors.ECODE_NOENT)
849
  return full_name
850

    
851

    
852
def _ExpandNodeName(cfg, name):
853
  """Wrapper over L{_ExpandItemName} for nodes."""
854
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
855

    
856

    
857
def _ExpandInstanceName(cfg, name):
858
  """Wrapper over L{_ExpandItemName} for instance."""
859
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
860

    
861

    
862
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
863
                          memory, vcpus, nics, disk_template, disks,
864
                          bep, hvp, hypervisor_name):
865
  """Builds instance related env variables for hooks
866

867
  This builds the hook environment from individual variables.
868

869
  @type name: string
870
  @param name: the name of the instance
871
  @type primary_node: string
872
  @param primary_node: the name of the instance's primary node
873
  @type secondary_nodes: list
874
  @param secondary_nodes: list of secondary nodes as strings
875
  @type os_type: string
876
  @param os_type: the name of the instance's OS
877
  @type status: boolean
878
  @param status: the should_run status of the instance
879
  @type memory: string
880
  @param memory: the memory size of the instance
881
  @type vcpus: string
882
  @param vcpus: the count of VCPUs the instance has
883
  @type nics: list
884
  @param nics: list of tuples (ip, mac, mode, link) representing
885
      the NICs the instance has
886
  @type disk_template: string
887
  @param disk_template: the disk template of the instance
888
  @type disks: list
889
  @param disks: the list of (size, mode) pairs
890
  @type bep: dict
891
  @param bep: the backend parameters for the instance
892
  @type hvp: dict
893
  @param hvp: the hypervisor parameters for the instance
894
  @type hypervisor_name: string
895
  @param hypervisor_name: the hypervisor for the instance
896
  @rtype: dict
897
  @return: the hook environment for this instance
898

899
  """
900
  if status:
901
    str_status = "up"
902
  else:
903
    str_status = "down"
904
  env = {
905
    "OP_TARGET": name,
906
    "INSTANCE_NAME": name,
907
    "INSTANCE_PRIMARY": primary_node,
908
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
909
    "INSTANCE_OS_TYPE": os_type,
910
    "INSTANCE_STATUS": str_status,
911
    "INSTANCE_MEMORY": memory,
912
    "INSTANCE_VCPUS": vcpus,
913
    "INSTANCE_DISK_TEMPLATE": disk_template,
914
    "INSTANCE_HYPERVISOR": hypervisor_name,
915
  }
916

    
917
  if nics:
918
    nic_count = len(nics)
919
    for idx, (ip, mac, mode, link) in enumerate(nics):
920
      if ip is None:
921
        ip = ""
922
      env["INSTANCE_NIC%d_IP" % idx] = ip
923
      env["INSTANCE_NIC%d_MAC" % idx] = mac
924
      env["INSTANCE_NIC%d_MODE" % idx] = mode
925
      env["INSTANCE_NIC%d_LINK" % idx] = link
926
      if mode == constants.NIC_MODE_BRIDGED:
927
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
928
  else:
929
    nic_count = 0
930

    
931
  env["INSTANCE_NIC_COUNT"] = nic_count
932

    
933
  if disks:
934
    disk_count = len(disks)
935
    for idx, (size, mode) in enumerate(disks):
936
      env["INSTANCE_DISK%d_SIZE" % idx] = size
937
      env["INSTANCE_DISK%d_MODE" % idx] = mode
938
  else:
939
    disk_count = 0
940

    
941
  env["INSTANCE_DISK_COUNT"] = disk_count
942

    
943
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
944
    for key, value in source.items():
945
      env["INSTANCE_%s_%s" % (kind, key)] = value
946

    
947
  return env
948

    
949

    
950
def _NICListToTuple(lu, nics):
951
  """Build a list of nic information tuples.
952

953
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
954
  value in LUInstanceQueryData.
955

956
  @type lu:  L{LogicalUnit}
957
  @param lu: the logical unit on whose behalf we execute
958
  @type nics: list of L{objects.NIC}
959
  @param nics: list of nics to convert to hooks tuples
960

961
  """
962
  hooks_nics = []
963
  cluster = lu.cfg.GetClusterInfo()
964
  for nic in nics:
965
    ip = nic.ip
966
    mac = nic.mac
967
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
968
    mode = filled_params[constants.NIC_MODE]
969
    link = filled_params[constants.NIC_LINK]
970
    hooks_nics.append((ip, mac, mode, link))
971
  return hooks_nics
972

    
973

    
974
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
975
  """Builds instance related env variables for hooks from an object.
976

977
  @type lu: L{LogicalUnit}
978
  @param lu: the logical unit on whose behalf we execute
979
  @type instance: L{objects.Instance}
980
  @param instance: the instance for which we should build the
981
      environment
982
  @type override: dict
983
  @param override: dictionary with key/values that will override
984
      our values
985
  @rtype: dict
986
  @return: the hook environment dictionary
987

988
  """
989
  cluster = lu.cfg.GetClusterInfo()
990
  bep = cluster.FillBE(instance)
991
  hvp = cluster.FillHV(instance)
992
  args = {
993
    'name': instance.name,
994
    'primary_node': instance.primary_node,
995
    'secondary_nodes': instance.secondary_nodes,
996
    'os_type': instance.os,
997
    'status': instance.admin_up,
998
    'memory': bep[constants.BE_MEMORY],
999
    'vcpus': bep[constants.BE_VCPUS],
1000
    'nics': _NICListToTuple(lu, instance.nics),
1001
    'disk_template': instance.disk_template,
1002
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
1003
    'bep': bep,
1004
    'hvp': hvp,
1005
    'hypervisor_name': instance.hypervisor,
1006
  }
1007
  if override:
1008
    args.update(override)
1009
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1010

    
1011

    
1012
def _AdjustCandidatePool(lu, exceptions):
1013
  """Adjust the candidate pool after node operations.
1014

1015
  """
1016
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1017
  if mod_list:
1018
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1019
               utils.CommaJoin(node.name for node in mod_list))
1020
    for name in mod_list:
1021
      lu.context.ReaddNode(name)
1022
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1023
  if mc_now > mc_max:
1024
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1025
               (mc_now, mc_max))
1026

    
1027

    
1028
def _DecideSelfPromotion(lu, exceptions=None):
1029
  """Decide whether I should promote myself as a master candidate.
1030

1031
  """
1032
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1033
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1034
  # the new node will increase mc_max with one, so:
1035
  mc_should = min(mc_should + 1, cp_size)
1036
  return mc_now < mc_should
1037

    
1038

    
1039
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1040
  """Check that the brigdes needed by a list of nics exist.
1041

1042
  """
1043
  cluster = lu.cfg.GetClusterInfo()
1044
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1045
  brlist = [params[constants.NIC_LINK] for params in paramslist
1046
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1047
  if brlist:
1048
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1049
    result.Raise("Error checking bridges on destination node '%s'" %
1050
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1051

    
1052

    
1053
def _CheckInstanceBridgesExist(lu, instance, node=None):
1054
  """Check that the brigdes needed by an instance exist.
1055

1056
  """
1057
  if node is None:
1058
    node = instance.primary_node
1059
  _CheckNicsBridgesExist(lu, instance.nics, node)
1060

    
1061

    
1062
def _CheckOSVariant(os_obj, name):
1063
  """Check whether an OS name conforms to the os variants specification.
1064

1065
  @type os_obj: L{objects.OS}
1066
  @param os_obj: OS object to check
1067
  @type name: string
1068
  @param name: OS name passed by the user, to check for validity
1069

1070
  """
1071
  if not os_obj.supported_variants:
1072
    return
1073
  variant = objects.OS.GetVariant(name)
1074
  if not variant:
1075
    raise errors.OpPrereqError("OS name must include a variant",
1076
                               errors.ECODE_INVAL)
1077

    
1078
  if variant not in os_obj.supported_variants:
1079
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1080

    
1081

    
1082
def _GetNodeInstancesInner(cfg, fn):
1083
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1084

    
1085

    
1086
def _GetNodeInstances(cfg, node_name):
1087
  """Returns a list of all primary and secondary instances on a node.
1088

1089
  """
1090

    
1091
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1092

    
1093

    
1094
def _GetNodePrimaryInstances(cfg, node_name):
1095
  """Returns primary instances on a node.
1096

1097
  """
1098
  return _GetNodeInstancesInner(cfg,
1099
                                lambda inst: node_name == inst.primary_node)
1100

    
1101

    
1102
def _GetNodeSecondaryInstances(cfg, node_name):
1103
  """Returns secondary instances on a node.
1104

1105
  """
1106
  return _GetNodeInstancesInner(cfg,
1107
                                lambda inst: node_name in inst.secondary_nodes)
1108

    
1109

    
1110
def _GetStorageTypeArgs(cfg, storage_type):
1111
  """Returns the arguments for a storage type.
1112

1113
  """
1114
  # Special case for file storage
1115
  if storage_type == constants.ST_FILE:
1116
    # storage.FileStorage wants a list of storage directories
1117
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1118

    
1119
  return []
1120

    
1121

    
1122
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1123
  faulty = []
1124

    
1125
  for dev in instance.disks:
1126
    cfg.SetDiskID(dev, node_name)
1127

    
1128
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1129
  result.Raise("Failed to get disk status from node %s" % node_name,
1130
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1131

    
1132
  for idx, bdev_status in enumerate(result.payload):
1133
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1134
      faulty.append(idx)
1135

    
1136
  return faulty
1137

    
1138

    
1139
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1140
  """Check the sanity of iallocator and node arguments and use the
1141
  cluster-wide iallocator if appropriate.
1142

1143
  Check that at most one of (iallocator, node) is specified. If none is
1144
  specified, then the LU's opcode's iallocator slot is filled with the
1145
  cluster-wide default iallocator.
1146

1147
  @type iallocator_slot: string
1148
  @param iallocator_slot: the name of the opcode iallocator slot
1149
  @type node_slot: string
1150
  @param node_slot: the name of the opcode target node slot
1151

1152
  """
1153
  node = getattr(lu.op, node_slot, None)
1154
  iallocator = getattr(lu.op, iallocator_slot, None)
1155

    
1156
  if node is not None and iallocator is not None:
1157
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1158
                               errors.ECODE_INVAL)
1159
  elif node is None and iallocator is None:
1160
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1161
    if default_iallocator:
1162
      setattr(lu.op, iallocator_slot, default_iallocator)
1163
    else:
1164
      raise errors.OpPrereqError("No iallocator or node given and no"
1165
                                 " cluster-wide default iallocator found;"
1166
                                 " please specify either an iallocator or a"
1167
                                 " node, or set a cluster-wide default"
1168
                                 " iallocator")
1169

    
1170

    
1171
class LUClusterPostInit(LogicalUnit):
1172
  """Logical unit for running hooks after cluster initialization.
1173

1174
  """
1175
  HPATH = "cluster-init"
1176
  HTYPE = constants.HTYPE_CLUSTER
1177

    
1178
  def BuildHooksEnv(self):
1179
    """Build hooks env.
1180

1181
    """
1182
    return {
1183
      "OP_TARGET": self.cfg.GetClusterName(),
1184
      }
1185

    
1186
  def BuildHooksNodes(self):
1187
    """Build hooks nodes.
1188

1189
    """
1190
    return ([], [self.cfg.GetMasterNode()])
1191

    
1192
  def Exec(self, feedback_fn):
1193
    """Nothing to do.
1194

1195
    """
1196
    return True
1197

    
1198

    
1199
class LUClusterDestroy(LogicalUnit):
1200
  """Logical unit for destroying the cluster.
1201

1202
  """
1203
  HPATH = "cluster-destroy"
1204
  HTYPE = constants.HTYPE_CLUSTER
1205

    
1206
  def BuildHooksEnv(self):
1207
    """Build hooks env.
1208

1209
    """
1210
    return {
1211
      "OP_TARGET": self.cfg.GetClusterName(),
1212
      }
1213

    
1214
  def BuildHooksNodes(self):
1215
    """Build hooks nodes.
1216

1217
    """
1218
    return ([], [])
1219

    
1220
  def CheckPrereq(self):
1221
    """Check prerequisites.
1222

1223
    This checks whether the cluster is empty.
1224

1225
    Any errors are signaled by raising errors.OpPrereqError.
1226

1227
    """
1228
    master = self.cfg.GetMasterNode()
1229

    
1230
    nodelist = self.cfg.GetNodeList()
1231
    if len(nodelist) != 1 or nodelist[0] != master:
1232
      raise errors.OpPrereqError("There are still %d node(s) in"
1233
                                 " this cluster." % (len(nodelist) - 1),
1234
                                 errors.ECODE_INVAL)
1235
    instancelist = self.cfg.GetInstanceList()
1236
    if instancelist:
1237
      raise errors.OpPrereqError("There are still %d instance(s) in"
1238
                                 " this cluster." % len(instancelist),
1239
                                 errors.ECODE_INVAL)
1240

    
1241
  def Exec(self, feedback_fn):
1242
    """Destroys the cluster.
1243

1244
    """
1245
    master = self.cfg.GetMasterNode()
1246

    
1247
    # Run post hooks on master node before it's removed
1248
    _RunPostHook(self, master)
1249

    
1250
    result = self.rpc.call_node_stop_master(master, False)
1251
    result.Raise("Could not disable the master role")
1252

    
1253
    return master
1254

    
1255

    
1256
def _VerifyCertificate(filename):
1257
  """Verifies a certificate for LUClusterVerify.
1258

1259
  @type filename: string
1260
  @param filename: Path to PEM file
1261

1262
  """
1263
  try:
1264
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1265
                                           utils.ReadFile(filename))
1266
  except Exception, err: # pylint: disable-msg=W0703
1267
    return (LUClusterVerify.ETYPE_ERROR,
1268
            "Failed to load X509 certificate %s: %s" % (filename, err))
1269

    
1270
  (errcode, msg) = \
1271
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1272
                                constants.SSL_CERT_EXPIRATION_ERROR)
1273

    
1274
  if msg:
1275
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1276
  else:
1277
    fnamemsg = None
1278

    
1279
  if errcode is None:
1280
    return (None, fnamemsg)
1281
  elif errcode == utils.CERT_WARNING:
1282
    return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1283
  elif errcode == utils.CERT_ERROR:
1284
    return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1285

    
1286
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1287

    
1288

    
1289
class LUClusterVerify(LogicalUnit):
1290
  """Verifies the cluster status.
1291

1292
  """
1293
  HPATH = "cluster-verify"
1294
  HTYPE = constants.HTYPE_CLUSTER
1295
  REQ_BGL = False
1296

    
1297
  TCLUSTER = "cluster"
1298
  TNODE = "node"
1299
  TINSTANCE = "instance"
1300

    
1301
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1302
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1303
  ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1304
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1305
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1306
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1307
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1308
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1309
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1310
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1311
  ENODEDRBD = (TNODE, "ENODEDRBD")
1312
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1313
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1314
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1315
  ENODEHV = (TNODE, "ENODEHV")
1316
  ENODELVM = (TNODE, "ENODELVM")
1317
  ENODEN1 = (TNODE, "ENODEN1")
1318
  ENODENET = (TNODE, "ENODENET")
1319
  ENODEOS = (TNODE, "ENODEOS")
1320
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1321
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1322
  ENODERPC = (TNODE, "ENODERPC")
1323
  ENODESSH = (TNODE, "ENODESSH")
1324
  ENODEVERSION = (TNODE, "ENODEVERSION")
1325
  ENODESETUP = (TNODE, "ENODESETUP")
1326
  ENODETIME = (TNODE, "ENODETIME")
1327
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1328

    
1329
  ETYPE_FIELD = "code"
1330
  ETYPE_ERROR = "ERROR"
1331
  ETYPE_WARNING = "WARNING"
1332

    
1333
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1334

    
1335
  class NodeImage(object):
1336
    """A class representing the logical and physical status of a node.
1337

1338
    @type name: string
1339
    @ivar name: the node name to which this object refers
1340
    @ivar volumes: a structure as returned from
1341
        L{ganeti.backend.GetVolumeList} (runtime)
1342
    @ivar instances: a list of running instances (runtime)
1343
    @ivar pinst: list of configured primary instances (config)
1344
    @ivar sinst: list of configured secondary instances (config)
1345
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1346
        instances for which this node is secondary (config)
1347
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1348
    @ivar dfree: free disk, as reported by the node (runtime)
1349
    @ivar offline: the offline status (config)
1350
    @type rpc_fail: boolean
1351
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1352
        not whether the individual keys were correct) (runtime)
1353
    @type lvm_fail: boolean
1354
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1355
    @type hyp_fail: boolean
1356
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1357
    @type ghost: boolean
1358
    @ivar ghost: whether this is a known node or not (config)
1359
    @type os_fail: boolean
1360
    @ivar os_fail: whether the RPC call didn't return valid OS data
1361
    @type oslist: list
1362
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1363
    @type vm_capable: boolean
1364
    @ivar vm_capable: whether the node can host instances
1365

1366
    """
1367
    def __init__(self, offline=False, name=None, vm_capable=True):
1368
      self.name = name
1369
      self.volumes = {}
1370
      self.instances = []
1371
      self.pinst = []
1372
      self.sinst = []
1373
      self.sbp = {}
1374
      self.mfree = 0
1375
      self.dfree = 0
1376
      self.offline = offline
1377
      self.vm_capable = vm_capable
1378
      self.rpc_fail = False
1379
      self.lvm_fail = False
1380
      self.hyp_fail = False
1381
      self.ghost = False
1382
      self.os_fail = False
1383
      self.oslist = {}
1384

    
1385
  def ExpandNames(self):
1386
    self.needed_locks = {
1387
      locking.LEVEL_NODE: locking.ALL_SET,
1388
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1389
    }
1390
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1391

    
1392
  def CheckPrereq(self):
1393
    self.all_node_info = self.cfg.GetAllNodesInfo()
1394
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1395
    self.my_node_names = utils.NiceSort(list(self.all_node_info))
1396
    self.my_node_info = self.all_node_info
1397
    self.my_inst_names = utils.NiceSort(list(self.all_inst_info))
1398
    self.my_inst_info = self.all_inst_info
1399

    
1400
  def _Error(self, ecode, item, msg, *args, **kwargs):
1401
    """Format an error message.
1402

1403
    Based on the opcode's error_codes parameter, either format a
1404
    parseable error code, or a simpler error string.
1405

1406
    This must be called only from Exec and functions called from Exec.
1407

1408
    """
1409
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1410
    itype, etxt = ecode
1411
    # first complete the msg
1412
    if args:
1413
      msg = msg % args
1414
    # then format the whole message
1415
    if self.op.error_codes:
1416
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1417
    else:
1418
      if item:
1419
        item = " " + item
1420
      else:
1421
        item = ""
1422
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1423
    # and finally report it via the feedback_fn
1424
    self._feedback_fn("  - %s" % msg)
1425

    
1426
  def _ErrorIf(self, cond, *args, **kwargs):
1427
    """Log an error message if the passed condition is True.
1428

1429
    """
1430
    cond = bool(cond) or self.op.debug_simulate_errors
1431
    if cond:
1432
      self._Error(*args, **kwargs)
1433
    # do not mark the operation as failed for WARN cases only
1434
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1435
      self.bad = self.bad or cond
1436

    
1437
  def _VerifyNode(self, ninfo, nresult):
1438
    """Perform some basic validation on data returned from a node.
1439

1440
      - check the result data structure is well formed and has all the
1441
        mandatory fields
1442
      - check ganeti version
1443

1444
    @type ninfo: L{objects.Node}
1445
    @param ninfo: the node to check
1446
    @param nresult: the results from the node
1447
    @rtype: boolean
1448
    @return: whether overall this call was successful (and we can expect
1449
         reasonable values in the respose)
1450

1451
    """
1452
    node = ninfo.name
1453
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1454

    
1455
    # main result, nresult should be a non-empty dict
1456
    test = not nresult or not isinstance(nresult, dict)
1457
    _ErrorIf(test, self.ENODERPC, node,
1458
                  "unable to verify node: no data returned")
1459
    if test:
1460
      return False
1461

    
1462
    # compares ganeti version
1463
    local_version = constants.PROTOCOL_VERSION
1464
    remote_version = nresult.get("version", None)
1465
    test = not (remote_version and
1466
                isinstance(remote_version, (list, tuple)) and
1467
                len(remote_version) == 2)
1468
    _ErrorIf(test, self.ENODERPC, node,
1469
             "connection to node returned invalid data")
1470
    if test:
1471
      return False
1472

    
1473
    test = local_version != remote_version[0]
1474
    _ErrorIf(test, self.ENODEVERSION, node,
1475
             "incompatible protocol versions: master %s,"
1476
             " node %s", local_version, remote_version[0])
1477
    if test:
1478
      return False
1479

    
1480
    # node seems compatible, we can actually try to look into its results
1481

    
1482
    # full package version
1483
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1484
                  self.ENODEVERSION, node,
1485
                  "software version mismatch: master %s, node %s",
1486
                  constants.RELEASE_VERSION, remote_version[1],
1487
                  code=self.ETYPE_WARNING)
1488

    
1489
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1490
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1491
      for hv_name, hv_result in hyp_result.iteritems():
1492
        test = hv_result is not None
1493
        _ErrorIf(test, self.ENODEHV, node,
1494
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1495

    
1496
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1497
    if ninfo.vm_capable and isinstance(hvp_result, list):
1498
      for item, hv_name, hv_result in hvp_result:
1499
        _ErrorIf(True, self.ENODEHV, node,
1500
                 "hypervisor %s parameter verify failure (source %s): %s",
1501
                 hv_name, item, hv_result)
1502

    
1503
    test = nresult.get(constants.NV_NODESETUP,
1504
                       ["Missing NODESETUP results"])
1505
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1506
             "; ".join(test))
1507

    
1508
    return True
1509

    
1510
  def _VerifyNodeTime(self, ninfo, nresult,
1511
                      nvinfo_starttime, nvinfo_endtime):
1512
    """Check the node time.
1513

1514
    @type ninfo: L{objects.Node}
1515
    @param ninfo: the node to check
1516
    @param nresult: the remote results for the node
1517
    @param nvinfo_starttime: the start time of the RPC call
1518
    @param nvinfo_endtime: the end time of the RPC call
1519

1520
    """
1521
    node = ninfo.name
1522
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1523

    
1524
    ntime = nresult.get(constants.NV_TIME, None)
1525
    try:
1526
      ntime_merged = utils.MergeTime(ntime)
1527
    except (ValueError, TypeError):
1528
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1529
      return
1530

    
1531
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1532
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1533
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1534
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1535
    else:
1536
      ntime_diff = None
1537

    
1538
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1539
             "Node time diverges by at least %s from master node time",
1540
             ntime_diff)
1541

    
1542
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1543
    """Check the node LVM results.
1544

1545
    @type ninfo: L{objects.Node}
1546
    @param ninfo: the node to check
1547
    @param nresult: the remote results for the node
1548
    @param vg_name: the configured VG name
1549

1550
    """
1551
    if vg_name is None:
1552
      return
1553

    
1554
    node = ninfo.name
1555
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1556

    
1557
    # checks vg existence and size > 20G
1558
    vglist = nresult.get(constants.NV_VGLIST, None)
1559
    test = not vglist
1560
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1561
    if not test:
1562
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1563
                                            constants.MIN_VG_SIZE)
1564
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1565

    
1566
    # check pv names
1567
    pvlist = nresult.get(constants.NV_PVLIST, None)
1568
    test = pvlist is None
1569
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1570
    if not test:
1571
      # check that ':' is not present in PV names, since it's a
1572
      # special character for lvcreate (denotes the range of PEs to
1573
      # use on the PV)
1574
      for _, pvname, owner_vg in pvlist:
1575
        test = ":" in pvname
1576
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1577
                 " '%s' of VG '%s'", pvname, owner_vg)
1578

    
1579
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1580
    """Check the node bridges.
1581

1582
    @type ninfo: L{objects.Node}
1583
    @param ninfo: the node to check
1584
    @param nresult: the remote results for the node
1585
    @param bridges: the expected list of bridges
1586

1587
    """
1588
    if not bridges:
1589
      return
1590

    
1591
    node = ninfo.name
1592
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1593

    
1594
    missing = nresult.get(constants.NV_BRIDGES, None)
1595
    test = not isinstance(missing, list)
1596
    _ErrorIf(test, self.ENODENET, node,
1597
             "did not return valid bridge information")
1598
    if not test:
1599
      _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1600
               utils.CommaJoin(sorted(missing)))
1601

    
1602
  def _VerifyNodeNetwork(self, ninfo, nresult):
1603
    """Check the node network connectivity results.
1604

1605
    @type ninfo: L{objects.Node}
1606
    @param ninfo: the node to check
1607
    @param nresult: the remote results for the node
1608

1609
    """
1610
    node = ninfo.name
1611
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1612

    
1613
    test = constants.NV_NODELIST not in nresult
1614
    _ErrorIf(test, self.ENODESSH, node,
1615
             "node hasn't returned node ssh connectivity data")
1616
    if not test:
1617
      if nresult[constants.NV_NODELIST]:
1618
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1619
          _ErrorIf(True, self.ENODESSH, node,
1620
                   "ssh communication with node '%s': %s", a_node, a_msg)
1621

    
1622
    test = constants.NV_NODENETTEST not in nresult
1623
    _ErrorIf(test, self.ENODENET, node,
1624
             "node hasn't returned node tcp connectivity data")
1625
    if not test:
1626
      if nresult[constants.NV_NODENETTEST]:
1627
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1628
        for anode in nlist:
1629
          _ErrorIf(True, self.ENODENET, node,
1630
                   "tcp communication with node '%s': %s",
1631
                   anode, nresult[constants.NV_NODENETTEST][anode])
1632

    
1633
    test = constants.NV_MASTERIP not in nresult
1634
    _ErrorIf(test, self.ENODENET, node,
1635
             "node hasn't returned node master IP reachability data")
1636
    if not test:
1637
      if not nresult[constants.NV_MASTERIP]:
1638
        if node == self.master_node:
1639
          msg = "the master node cannot reach the master IP (not configured?)"
1640
        else:
1641
          msg = "cannot reach the master IP"
1642
        _ErrorIf(True, self.ENODENET, node, msg)
1643

    
1644
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1645
                      diskstatus):
1646
    """Verify an instance.
1647

1648
    This function checks to see if the required block devices are
1649
    available on the instance's node.
1650

1651
    """
1652
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1653
    node_current = instanceconfig.primary_node
1654

    
1655
    node_vol_should = {}
1656
    instanceconfig.MapLVsByNode(node_vol_should)
1657

    
1658
    for node in node_vol_should:
1659
      n_img = node_image[node]
1660
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1661
        # ignore missing volumes on offline or broken nodes
1662
        continue
1663
      for volume in node_vol_should[node]:
1664
        test = volume not in n_img.volumes
1665
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1666
                 "volume %s missing on node %s", volume, node)
1667

    
1668
    if instanceconfig.admin_up:
1669
      pri_img = node_image[node_current]
1670
      test = instance not in pri_img.instances and not pri_img.offline
1671
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1672
               "instance not running on its primary node %s",
1673
               node_current)
1674

    
1675
    for node, n_img in node_image.items():
1676
      if node != node_current:
1677
        test = instance in n_img.instances
1678
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1679
                 "instance should not run on node %s", node)
1680

    
1681
    diskdata = [(nname, success, status, idx)
1682
                for (nname, disks) in diskstatus.items()
1683
                for idx, (success, status) in enumerate(disks)]
1684

    
1685
    for nname, success, bdev_status, idx in diskdata:
1686
      # the 'ghost node' construction in Exec() ensures that we have a
1687
      # node here
1688
      snode = node_image[nname]
1689
      bad_snode = snode.ghost or snode.offline
1690
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1691
               self.EINSTANCEFAULTYDISK, instance,
1692
               "couldn't retrieve status for disk/%s on %s: %s",
1693
               idx, nname, bdev_status)
1694
      _ErrorIf((instanceconfig.admin_up and success and
1695
                bdev_status.ldisk_status == constants.LDS_FAULTY),
1696
               self.EINSTANCEFAULTYDISK, instance,
1697
               "disk/%s on %s is faulty", idx, nname)
1698

    
1699
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1700
    """Verify if there are any unknown volumes in the cluster.
1701

1702
    The .os, .swap and backup volumes are ignored. All other volumes are
1703
    reported as unknown.
1704

1705
    @type reserved: L{ganeti.utils.FieldSet}
1706
    @param reserved: a FieldSet of reserved volume names
1707

1708
    """
1709
    for node, n_img in node_image.items():
1710
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1711
        # skip non-healthy nodes
1712
        continue
1713
      for volume in n_img.volumes:
1714
        test = ((node not in node_vol_should or
1715
                volume not in node_vol_should[node]) and
1716
                not reserved.Matches(volume))
1717
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1718
                      "volume %s is unknown", volume)
1719

    
1720
  def _VerifyOrphanInstances(self, instancelist, node_image):
1721
    """Verify the list of running instances.
1722

1723
    This checks what instances are running but unknown to the cluster.
1724

1725
    """
1726
    for node, n_img in node_image.items():
1727
      for o_inst in n_img.instances:
1728
        test = o_inst not in instancelist
1729
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1730
                      "instance %s on node %s should not exist", o_inst, node)
1731

    
1732
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1733
    """Verify N+1 Memory Resilience.
1734

1735
    Check that if one single node dies we can still start all the
1736
    instances it was primary for.
1737

1738
    """
1739
    cluster_info = self.cfg.GetClusterInfo()
1740
    for node, n_img in node_image.items():
1741
      # This code checks that every node which is now listed as
1742
      # secondary has enough memory to host all instances it is
1743
      # supposed to should a single other node in the cluster fail.
1744
      # FIXME: not ready for failover to an arbitrary node
1745
      # FIXME: does not support file-backed instances
1746
      # WARNING: we currently take into account down instances as well
1747
      # as up ones, considering that even if they're down someone
1748
      # might want to start them even in the event of a node failure.
1749
      if n_img.offline:
1750
        # we're skipping offline nodes from the N+1 warning, since
1751
        # most likely we don't have good memory infromation from them;
1752
        # we already list instances living on such nodes, and that's
1753
        # enough warning
1754
        continue
1755
      for prinode, instances in n_img.sbp.items():
1756
        needed_mem = 0
1757
        for instance in instances:
1758
          bep = cluster_info.FillBE(instance_cfg[instance])
1759
          if bep[constants.BE_AUTO_BALANCE]:
1760
            needed_mem += bep[constants.BE_MEMORY]
1761
        test = n_img.mfree < needed_mem
1762
        self._ErrorIf(test, self.ENODEN1, node,
1763
                      "not enough memory to accomodate instance failovers"
1764
                      " should node %s fail (%dMiB needed, %dMiB available)",
1765
                      prinode, needed_mem, n_img.mfree)
1766

    
1767
  @classmethod
1768
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1769
                   (files_all, files_all_opt, files_mc, files_vm)):
1770
    """Verifies file checksums collected from all nodes.
1771

1772
    @param errorif: Callback for reporting errors
1773
    @param nodeinfo: List of L{objects.Node} objects
1774
    @param master_node: Name of master node
1775
    @param all_nvinfo: RPC results
1776

1777
    """
1778
    node_names = frozenset(node.name for node in nodeinfo)
1779

    
1780
    assert master_node in node_names
1781
    assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1782
            sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1783
           "Found file listed in more than one file list"
1784

    
1785
    # Define functions determining which nodes to consider for a file
1786
    file2nodefn = dict([(filename, fn)
1787
      for (files, fn) in [(files_all, None),
1788
                          (files_all_opt, None),
1789
                          (files_mc, lambda node: (node.master_candidate or
1790
                                                   node.name == master_node)),
1791
                          (files_vm, lambda node: node.vm_capable)]
1792
      for filename in files])
1793

    
1794
    fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
1795

    
1796
    for node in nodeinfo:
1797
      nresult = all_nvinfo[node.name]
1798

    
1799
      if nresult.fail_msg or not nresult.payload:
1800
        node_files = None
1801
      else:
1802
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
1803

    
1804
      test = not (node_files and isinstance(node_files, dict))
1805
      errorif(test, cls.ENODEFILECHECK, node.name,
1806
              "Node did not return file checksum data")
1807
      if test:
1808
        continue
1809

    
1810
      for (filename, checksum) in node_files.items():
1811
        # Check if the file should be considered for a node
1812
        fn = file2nodefn[filename]
1813
        if fn is None or fn(node):
1814
          fileinfo[filename].setdefault(checksum, set()).add(node.name)
1815

    
1816
    for (filename, checksums) in fileinfo.items():
1817
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
1818

    
1819
      # Nodes having the file
1820
      with_file = frozenset(node_name
1821
                            for nodes in fileinfo[filename].values()
1822
                            for node_name in nodes)
1823

    
1824
      # Nodes missing file
1825
      missing_file = node_names - with_file
1826

    
1827
      if filename in files_all_opt:
1828
        # All or no nodes
1829
        errorif(missing_file and missing_file != node_names,
1830
                cls.ECLUSTERFILECHECK, None,
1831
                "File %s is optional, but it must exist on all or no nodes (not"
1832
                " found on %s)",
1833
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
1834
      else:
1835
        errorif(missing_file, cls.ECLUSTERFILECHECK, None,
1836
                "File %s is missing from node(s) %s", filename,
1837
                utils.CommaJoin(utils.NiceSort(missing_file)))
1838

    
1839
      # See if there are multiple versions of the file
1840
      test = len(checksums) > 1
1841
      if test:
1842
        variants = ["variant %s on %s" %
1843
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
1844
                    for (idx, (checksum, nodes)) in
1845
                      enumerate(sorted(checksums.items()))]
1846
      else:
1847
        variants = []
1848

    
1849
      errorif(test, cls.ECLUSTERFILECHECK, None,
1850
              "File %s found with %s different checksums (%s)",
1851
              filename, len(checksums), "; ".join(variants))
1852

    
1853
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1854
                      drbd_map):
1855
    """Verifies and the node DRBD status.
1856

1857
    @type ninfo: L{objects.Node}
1858
    @param ninfo: the node to check
1859
    @param nresult: the remote results for the node
1860
    @param instanceinfo: the dict of instances
1861
    @param drbd_helper: the configured DRBD usermode helper
1862
    @param drbd_map: the DRBD map as returned by
1863
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1864

1865
    """
1866
    node = ninfo.name
1867
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1868

    
1869
    if drbd_helper:
1870
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1871
      test = (helper_result == None)
1872
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
1873
               "no drbd usermode helper returned")
1874
      if helper_result:
1875
        status, payload = helper_result
1876
        test = not status
1877
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1878
                 "drbd usermode helper check unsuccessful: %s", payload)
1879
        test = status and (payload != drbd_helper)
1880
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1881
                 "wrong drbd usermode helper: %s", payload)
1882

    
1883
    # compute the DRBD minors
1884
    node_drbd = {}
1885
    for minor, instance in drbd_map[node].items():
1886
      test = instance not in instanceinfo
1887
      _ErrorIf(test, self.ECLUSTERCFG, None,
1888
               "ghost instance '%s' in temporary DRBD map", instance)
1889
        # ghost instance should not be running, but otherwise we
1890
        # don't give double warnings (both ghost instance and
1891
        # unallocated minor in use)
1892
      if test:
1893
        node_drbd[minor] = (instance, False)
1894
      else:
1895
        instance = instanceinfo[instance]
1896
        node_drbd[minor] = (instance.name, instance.admin_up)
1897

    
1898
    # and now check them
1899
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1900
    test = not isinstance(used_minors, (tuple, list))
1901
    _ErrorIf(test, self.ENODEDRBD, node,
1902
             "cannot parse drbd status file: %s", str(used_minors))
1903
    if test:
1904
      # we cannot check drbd status
1905
      return
1906

    
1907
    for minor, (iname, must_exist) in node_drbd.items():
1908
      test = minor not in used_minors and must_exist
1909
      _ErrorIf(test, self.ENODEDRBD, node,
1910
               "drbd minor %d of instance %s is not active", minor, iname)
1911
    for minor in used_minors:
1912
      test = minor not in node_drbd
1913
      _ErrorIf(test, self.ENODEDRBD, node,
1914
               "unallocated drbd minor %d is in use", minor)
1915

    
1916
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1917
    """Builds the node OS structures.
1918

1919
    @type ninfo: L{objects.Node}
1920
    @param ninfo: the node to check
1921
    @param nresult: the remote results for the node
1922
    @param nimg: the node image object
1923

1924
    """
1925
    node = ninfo.name
1926
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1927

    
1928
    remote_os = nresult.get(constants.NV_OSLIST, None)
1929
    test = (not isinstance(remote_os, list) or
1930
            not compat.all(isinstance(v, list) and len(v) == 7
1931
                           for v in remote_os))
1932

    
1933
    _ErrorIf(test, self.ENODEOS, node,
1934
             "node hasn't returned valid OS data")
1935

    
1936
    nimg.os_fail = test
1937

    
1938
    if test:
1939
      return
1940

    
1941
    os_dict = {}
1942

    
1943
    for (name, os_path, status, diagnose,
1944
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1945

    
1946
      if name not in os_dict:
1947
        os_dict[name] = []
1948

    
1949
      # parameters is a list of lists instead of list of tuples due to
1950
      # JSON lacking a real tuple type, fix it:
1951
      parameters = [tuple(v) for v in parameters]
1952
      os_dict[name].append((os_path, status, diagnose,
1953
                            set(variants), set(parameters), set(api_ver)))
1954

    
1955
    nimg.oslist = os_dict
1956

    
1957
  def _VerifyNodeOS(self, ninfo, nimg, base):
1958
    """Verifies the node OS list.
1959

1960
    @type ninfo: L{objects.Node}
1961
    @param ninfo: the node to check
1962
    @param nimg: the node image object
1963
    @param base: the 'template' node we match against (e.g. from the master)
1964

1965
    """
1966
    node = ninfo.name
1967
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1968

    
1969
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1970

    
1971
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
1972
    for os_name, os_data in nimg.oslist.items():
1973
      assert os_data, "Empty OS status for OS %s?!" % os_name
1974
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1975
      _ErrorIf(not f_status, self.ENODEOS, node,
1976
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1977
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1978
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1979
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1980
      # this will catched in backend too
1981
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1982
               and not f_var, self.ENODEOS, node,
1983
               "OS %s with API at least %d does not declare any variant",
1984
               os_name, constants.OS_API_V15)
1985
      # comparisons with the 'base' image
1986
      test = os_name not in base.oslist
1987
      _ErrorIf(test, self.ENODEOS, node,
1988
               "Extra OS %s not present on reference node (%s)",
1989
               os_name, base.name)
1990
      if test:
1991
        continue
1992
      assert base.oslist[os_name], "Base node has empty OS status?"
1993
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1994
      if not b_status:
1995
        # base OS is invalid, skipping
1996
        continue
1997
      for kind, a, b in [("API version", f_api, b_api),
1998
                         ("variants list", f_var, b_var),
1999
                         ("parameters", beautify_params(f_param),
2000
                          beautify_params(b_param))]:
2001
        _ErrorIf(a != b, self.ENODEOS, node,
2002
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2003
                 kind, os_name, base.name,
2004
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2005

    
2006
    # check any missing OSes
2007
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2008
    _ErrorIf(missing, self.ENODEOS, node,
2009
             "OSes present on reference node %s but missing on this node: %s",
2010
             base.name, utils.CommaJoin(missing))
2011

    
2012
  def _VerifyOob(self, ninfo, nresult):
2013
    """Verifies out of band functionality of a node.
2014

2015
    @type ninfo: L{objects.Node}
2016
    @param ninfo: the node to check
2017
    @param nresult: the remote results for the node
2018

2019
    """
2020
    node = ninfo.name
2021
    # We just have to verify the paths on master and/or master candidates
2022
    # as the oob helper is invoked on the master
2023
    if ((ninfo.master_candidate or ninfo.master_capable) and
2024
        constants.NV_OOB_PATHS in nresult):
2025
      for path_result in nresult[constants.NV_OOB_PATHS]:
2026
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2027

    
2028
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2029
    """Verifies and updates the node volume data.
2030

2031
    This function will update a L{NodeImage}'s internal structures
2032
    with data from the remote call.
2033

2034
    @type ninfo: L{objects.Node}
2035
    @param ninfo: the node to check
2036
    @param nresult: the remote results for the node
2037
    @param nimg: the node image object
2038
    @param vg_name: the configured VG name
2039

2040
    """
2041
    node = ninfo.name
2042
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2043

    
2044
    nimg.lvm_fail = True
2045
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2046
    if vg_name is None:
2047
      pass
2048
    elif isinstance(lvdata, basestring):
2049
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2050
               utils.SafeEncode(lvdata))
2051
    elif not isinstance(lvdata, dict):
2052
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2053
    else:
2054
      nimg.volumes = lvdata
2055
      nimg.lvm_fail = False
2056

    
2057
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2058
    """Verifies and updates the node instance list.
2059

2060
    If the listing was successful, then updates this node's instance
2061
    list. Otherwise, it marks the RPC call as failed for the instance
2062
    list key.
2063

2064
    @type ninfo: L{objects.Node}
2065
    @param ninfo: the node to check
2066
    @param nresult: the remote results for the node
2067
    @param nimg: the node image object
2068

2069
    """
2070
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2071
    test = not isinstance(idata, list)
2072
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2073
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
2074
    if test:
2075
      nimg.hyp_fail = True
2076
    else:
2077
      nimg.instances = idata
2078

    
2079
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2080
    """Verifies and computes a node information map
2081

2082
    @type ninfo: L{objects.Node}
2083
    @param ninfo: the node to check
2084
    @param nresult: the remote results for the node
2085
    @param nimg: the node image object
2086
    @param vg_name: the configured VG name
2087

2088
    """
2089
    node = ninfo.name
2090
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2091

    
2092
    # try to read free memory (from the hypervisor)
2093
    hv_info = nresult.get(constants.NV_HVINFO, None)
2094
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2095
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2096
    if not test:
2097
      try:
2098
        nimg.mfree = int(hv_info["memory_free"])
2099
      except (ValueError, TypeError):
2100
        _ErrorIf(True, self.ENODERPC, node,
2101
                 "node returned invalid nodeinfo, check hypervisor")
2102

    
2103
    # FIXME: devise a free space model for file based instances as well
2104
    if vg_name is not None:
2105
      test = (constants.NV_VGLIST not in nresult or
2106
              vg_name not in nresult[constants.NV_VGLIST])
2107
      _ErrorIf(test, self.ENODELVM, node,
2108
               "node didn't return data for the volume group '%s'"
2109
               " - it is either missing or broken", vg_name)
2110
      if not test:
2111
        try:
2112
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2113
        except (ValueError, TypeError):
2114
          _ErrorIf(True, self.ENODERPC, node,
2115
                   "node returned invalid LVM info, check LVM status")
2116

    
2117
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2118
    """Gets per-disk status information for all instances.
2119

2120
    @type nodelist: list of strings
2121
    @param nodelist: Node names
2122
    @type node_image: dict of (name, L{objects.Node})
2123
    @param node_image: Node objects
2124
    @type instanceinfo: dict of (name, L{objects.Instance})
2125
    @param instanceinfo: Instance objects
2126
    @rtype: {instance: {node: [(succes, payload)]}}
2127
    @return: a dictionary of per-instance dictionaries with nodes as
2128
        keys and disk information as values; the disk information is a
2129
        list of tuples (success, payload)
2130

2131
    """
2132
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2133

    
2134
    node_disks = {}
2135
    node_disks_devonly = {}
2136
    diskless_instances = set()
2137
    diskless = constants.DT_DISKLESS
2138

    
2139
    for nname in nodelist:
2140
      node_instances = list(itertools.chain(node_image[nname].pinst,
2141
                                            node_image[nname].sinst))
2142
      diskless_instances.update(inst for inst in node_instances
2143
                                if instanceinfo[inst].disk_template == diskless)
2144
      disks = [(inst, disk)
2145
               for inst in node_instances
2146
               for disk in instanceinfo[inst].disks]
2147

    
2148
      if not disks:
2149
        # No need to collect data
2150
        continue
2151

    
2152
      node_disks[nname] = disks
2153

    
2154
      # Creating copies as SetDiskID below will modify the objects and that can
2155
      # lead to incorrect data returned from nodes
2156
      devonly = [dev.Copy() for (_, dev) in disks]
2157

    
2158
      for dev in devonly:
2159
        self.cfg.SetDiskID(dev, nname)
2160

    
2161
      node_disks_devonly[nname] = devonly
2162

    
2163
    assert len(node_disks) == len(node_disks_devonly)
2164

    
2165
    # Collect data from all nodes with disks
2166
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2167
                                                          node_disks_devonly)
2168

    
2169
    assert len(result) == len(node_disks)
2170

    
2171
    instdisk = {}
2172

    
2173
    for (nname, nres) in result.items():
2174
      disks = node_disks[nname]
2175

    
2176
      if nres.offline:
2177
        # No data from this node
2178
        data = len(disks) * [(False, "node offline")]
2179
      else:
2180
        msg = nres.fail_msg
2181
        _ErrorIf(msg, self.ENODERPC, nname,
2182
                 "while getting disk information: %s", msg)
2183
        if msg:
2184
          # No data from this node
2185
          data = len(disks) * [(False, msg)]
2186
        else:
2187
          data = []
2188
          for idx, i in enumerate(nres.payload):
2189
            if isinstance(i, (tuple, list)) and len(i) == 2:
2190
              data.append(i)
2191
            else:
2192
              logging.warning("Invalid result from node %s, entry %d: %s",
2193
                              nname, idx, i)
2194
              data.append((False, "Invalid result from the remote node"))
2195

    
2196
      for ((inst, _), status) in zip(disks, data):
2197
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2198

    
2199
    # Add empty entries for diskless instances.
2200
    for inst in diskless_instances:
2201
      assert inst not in instdisk
2202
      instdisk[inst] = {}
2203

    
2204
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2205
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2206
                      compat.all(isinstance(s, (tuple, list)) and
2207
                                 len(s) == 2 for s in statuses)
2208
                      for inst, nnames in instdisk.items()
2209
                      for nname, statuses in nnames.items())
2210
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2211

    
2212
    return instdisk
2213

    
2214
  def _VerifyHVP(self, hvp_data):
2215
    """Verifies locally the syntax of the hypervisor parameters.
2216

2217
    """
2218
    for item, hv_name, hv_params in hvp_data:
2219
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2220
             (item, hv_name))
2221
      try:
2222
        hv_class = hypervisor.GetHypervisor(hv_name)
2223
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2224
        hv_class.CheckParameterSyntax(hv_params)
2225
      except errors.GenericError, err:
2226
        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2227

    
2228
  def BuildHooksEnv(self):
2229
    """Build hooks env.
2230

2231
    Cluster-Verify hooks just ran in the post phase and their failure makes
2232
    the output be logged in the verify output and the verification to fail.
2233

2234
    """
2235
    env = {
2236
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2237
      }
2238

    
2239
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2240
               for node in self.my_node_info.values())
2241

    
2242
    return env
2243

    
2244
  def BuildHooksNodes(self):
2245
    """Build hooks nodes.
2246

2247
    """
2248
    assert self.my_node_names, ("Node list not gathered,"
2249
      " has CheckPrereq been executed?")
2250
    return ([], self.my_node_names)
2251

    
2252
  def Exec(self, feedback_fn):
2253
    """Verify integrity of cluster, performing various test on nodes.
2254

2255
    """
2256
    # This method has too many local variables. pylint: disable-msg=R0914
2257
    self.bad = False
2258
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2259
    verbose = self.op.verbose
2260
    self._feedback_fn = feedback_fn
2261
    feedback_fn("* Verifying global settings")
2262
    for msg in self.cfg.VerifyConfig():
2263
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2264

    
2265
    # Check the cluster certificates
2266
    for cert_filename in constants.ALL_CERT_FILES:
2267
      (errcode, msg) = _VerifyCertificate(cert_filename)
2268
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2269

    
2270
    vg_name = self.cfg.GetVGName()
2271
    drbd_helper = self.cfg.GetDRBDHelper()
2272
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2273
    cluster = self.cfg.GetClusterInfo()
2274
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2275
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2276

    
2277
    i_non_redundant = [] # Non redundant instances
2278
    i_non_a_balanced = [] # Non auto-balanced instances
2279
    n_offline = 0 # Count of offline nodes
2280
    n_drained = 0 # Count of nodes being drained
2281
    node_vol_should = {}
2282

    
2283
    # FIXME: verify OS list
2284

    
2285
    # File verification
2286
    filemap = _ComputeAncillaryFiles(cluster, False)
2287

    
2288
    # do local checksums
2289
    master_node = self.master_node = self.cfg.GetMasterNode()
2290
    master_ip = self.cfg.GetMasterIP()
2291

    
2292
    # Compute the set of hypervisor parameters
2293
    hvp_data = []
2294
    for hv_name in hypervisors:
2295
      hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2296
    for os_name, os_hvp in cluster.os_hvp.items():
2297
      for hv_name, hv_params in os_hvp.items():
2298
        if not hv_params:
2299
          continue
2300
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2301
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
2302
    # TODO: collapse identical parameter values in a single one
2303
    for instance in self.all_inst_info.values():
2304
      if not instance.hvparams:
2305
        continue
2306
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2307
                       cluster.FillHV(instance)))
2308
    # and verify them locally
2309
    self._VerifyHVP(hvp_data)
2310

    
2311
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2312
    node_verify_param = {
2313
      constants.NV_FILELIST:
2314
        utils.UniqueSequence(filename
2315
                             for files in filemap
2316
                             for filename in files),
2317
      constants.NV_NODELIST: [node.name for node in self.all_node_info.values()
2318
                              if not node.offline],
2319
      constants.NV_HYPERVISOR: hypervisors,
2320
      constants.NV_HVPARAMS: hvp_data,
2321
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2322
                                 for node in node_data_list
2323
                                 if not node.offline],
2324
      constants.NV_INSTANCELIST: hypervisors,
2325
      constants.NV_VERSION: None,
2326
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2327
      constants.NV_NODESETUP: None,
2328
      constants.NV_TIME: None,
2329
      constants.NV_MASTERIP: (master_node, master_ip),
2330
      constants.NV_OSLIST: None,
2331
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2332
      }
2333

    
2334
    if vg_name is not None:
2335
      node_verify_param[constants.NV_VGLIST] = None
2336
      node_verify_param[constants.NV_LVLIST] = vg_name
2337
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2338
      node_verify_param[constants.NV_DRBDLIST] = None
2339

    
2340
    if drbd_helper:
2341
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2342

    
2343
    # bridge checks
2344
    # FIXME: this needs to be changed per node-group, not cluster-wide
2345
    bridges = set()
2346
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2347
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2348
      bridges.add(default_nicpp[constants.NIC_LINK])
2349
    for instance in self.my_inst_info.values():
2350
      for nic in instance.nics:
2351
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2352
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2353
          bridges.add(full_nic[constants.NIC_LINK])
2354

    
2355
    if bridges:
2356
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2357

    
2358
    # Build our expected cluster state
2359
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2360
                                                 name=node.name,
2361
                                                 vm_capable=node.vm_capable))
2362
                      for node in node_data_list)
2363

    
2364
    # Gather OOB paths
2365
    oob_paths = []
2366
    for node in self.all_node_info.values():
2367
      path = _SupportsOob(self.cfg, node)
2368
      if path and path not in oob_paths:
2369
        oob_paths.append(path)
2370

    
2371
    if oob_paths:
2372
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2373

    
2374
    for instance in self.my_inst_names:
2375
      inst_config = self.my_inst_info[instance]
2376

    
2377
      for nname in inst_config.all_nodes:
2378
        if nname not in node_image:
2379
          # ghost node
2380
          gnode = self.NodeImage(name=nname)
2381
          gnode.ghost = True
2382
          node_image[nname] = gnode
2383

    
2384
      inst_config.MapLVsByNode(node_vol_should)
2385

    
2386
      pnode = inst_config.primary_node
2387
      node_image[pnode].pinst.append(instance)
2388

    
2389
      for snode in inst_config.secondary_nodes:
2390
        nimg = node_image[snode]
2391
        nimg.sinst.append(instance)
2392
        if pnode not in nimg.sbp:
2393
          nimg.sbp[pnode] = []
2394
        nimg.sbp[pnode].append(instance)
2395

    
2396
    # At this point, we have the in-memory data structures complete,
2397
    # except for the runtime information, which we'll gather next
2398

    
2399
    # Due to the way our RPC system works, exact response times cannot be
2400
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2401
    # time before and after executing the request, we can at least have a time
2402
    # window.
2403
    nvinfo_starttime = time.time()
2404
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2405
                                           node_verify_param,
2406
                                           self.cfg.GetClusterName())
2407
    nvinfo_endtime = time.time()
2408

    
2409
    all_drbd_map = self.cfg.ComputeDRBDMap()
2410

    
2411
    feedback_fn("* Gathering disk information (%s nodes)" %
2412
                len(self.my_node_names))
2413
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2414
                                     self.my_inst_info)
2415

    
2416
    feedback_fn("* Verifying configuration file consistency")
2417

    
2418
    # If not all nodes are being checked, we need to make sure the master node
2419
    # and a non-checked vm_capable node are in the list.
2420
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2421
    if absent_nodes:
2422
      vf_nvinfo = all_nvinfo.copy()
2423
      vf_node_info = list(self.my_node_info.values())
2424
      additional_nodes = []
2425
      if master_node not in self.my_node_info:
2426
        additional_nodes.append(master_node)
2427
        vf_node_info.append(self.all_node_info[master_node])
2428
      # Add the first vm_capable node we find which is not included
2429
      for node in absent_nodes:
2430
        nodeinfo = self.all_node_info[node]
2431
        if nodeinfo.vm_capable and not nodeinfo.offline:
2432
          additional_nodes.append(node)
2433
          vf_node_info.append(self.all_node_info[node])
2434
          break
2435
      key = constants.NV_FILELIST
2436
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2437
                                                 {key: node_verify_param[key]},
2438
                                                 self.cfg.GetClusterName()))
2439
    else:
2440
      vf_nvinfo = all_nvinfo
2441
      vf_node_info = self.my_node_info.values()
2442

    
2443
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2444

    
2445
    feedback_fn("* Verifying node status")
2446

    
2447
    refos_img = None
2448

    
2449
    for node_i in node_data_list:
2450
      node = node_i.name
2451
      nimg = node_image[node]
2452

    
2453
      if node_i.offline:
2454
        if verbose:
2455
          feedback_fn("* Skipping offline node %s" % (node,))
2456
        n_offline += 1
2457
        continue
2458

    
2459
      if node == master_node:
2460
        ntype = "master"
2461
      elif node_i.master_candidate:
2462
        ntype = "master candidate"
2463
      elif node_i.drained:
2464
        ntype = "drained"
2465
        n_drained += 1
2466
      else:
2467
        ntype = "regular"
2468
      if verbose:
2469
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2470

    
2471
      msg = all_nvinfo[node].fail_msg
2472
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2473
      if msg:
2474
        nimg.rpc_fail = True
2475
        continue
2476

    
2477
      nresult = all_nvinfo[node].payload
2478

    
2479
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2480
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2481
      self._VerifyNodeNetwork(node_i, nresult)
2482
      self._VerifyOob(node_i, nresult)
2483

    
2484
      if nimg.vm_capable:
2485
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2486
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2487
                             all_drbd_map)
2488

    
2489
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2490
        self._UpdateNodeInstances(node_i, nresult, nimg)
2491
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2492
        self._UpdateNodeOS(node_i, nresult, nimg)
2493
        if not nimg.os_fail:
2494
          if refos_img is None:
2495
            refos_img = nimg
2496
          self._VerifyNodeOS(node_i, nimg, refos_img)
2497
        self._VerifyNodeBridges(node_i, nresult, bridges)
2498

    
2499
    feedback_fn("* Verifying instance status")
2500
    for instance in self.my_inst_names:
2501
      if verbose:
2502
        feedback_fn("* Verifying instance %s" % instance)
2503
      inst_config = self.my_inst_info[instance]
2504
      self._VerifyInstance(instance, inst_config, node_image,
2505
                           instdisk[instance])
2506
      inst_nodes_offline = []
2507

    
2508
      pnode = inst_config.primary_node
2509
      pnode_img = node_image[pnode]
2510
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2511
               self.ENODERPC, pnode, "instance %s, connection to"
2512
               " primary node failed", instance)
2513

    
2514
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2515
               self.EINSTANCEBADNODE, instance,
2516
               "instance is marked as running and lives on offline node %s",
2517
               inst_config.primary_node)
2518

    
2519
      # If the instance is non-redundant we cannot survive losing its primary
2520
      # node, so we are not N+1 compliant. On the other hand we have no disk
2521
      # templates with more than one secondary so that situation is not well
2522
      # supported either.
2523
      # FIXME: does not support file-backed instances
2524
      if not inst_config.secondary_nodes:
2525
        i_non_redundant.append(instance)
2526

    
2527
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2528
               instance, "instance has multiple secondary nodes: %s",
2529
               utils.CommaJoin(inst_config.secondary_nodes),
2530
               code=self.ETYPE_WARNING)
2531

    
2532
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2533
        pnode = inst_config.primary_node
2534
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2535
        instance_groups = {}
2536

    
2537
        for node in instance_nodes:
2538
          instance_groups.setdefault(self.all_node_info[node].group,
2539
                                     []).append(node)
2540

    
2541
        pretty_list = [
2542
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2543
          # Sort so that we always list the primary node first.
2544
          for group, nodes in sorted(instance_groups.items(),
2545
                                     key=lambda (_, nodes): pnode in nodes,
2546
                                     reverse=True)]
2547

    
2548
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2549
                      instance, "instance has primary and secondary nodes in"
2550
                      " different groups: %s", utils.CommaJoin(pretty_list),
2551
                      code=self.ETYPE_WARNING)
2552

    
2553
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2554
        i_non_a_balanced.append(instance)
2555

    
2556
      for snode in inst_config.secondary_nodes:
2557
        s_img = node_image[snode]
2558
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2559
                 "instance %s, connection to secondary node failed", instance)
2560

    
2561
        if s_img.offline:
2562
          inst_nodes_offline.append(snode)
2563

    
2564
      # warn that the instance lives on offline nodes
2565
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2566
               "instance has offline secondary node(s) %s",
2567
               utils.CommaJoin(inst_nodes_offline))
2568
      # ... or ghost/non-vm_capable nodes
2569
      for node in inst_config.all_nodes:
2570
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2571
                 "instance lives on ghost node %s", node)
2572
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2573
                 instance, "instance lives on non-vm_capable node %s", node)
2574

    
2575
    feedback_fn("* Verifying orphan volumes")
2576
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2577
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2578

    
2579
    feedback_fn("* Verifying orphan instances")
2580
    self._VerifyOrphanInstances(set(self.all_inst_info.keys()), node_image)
2581

    
2582
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2583
      feedback_fn("* Verifying N+1 Memory redundancy")
2584
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2585

    
2586
    feedback_fn("* Other Notes")
2587
    if i_non_redundant:
2588
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2589
                  % len(i_non_redundant))
2590

    
2591
    if i_non_a_balanced:
2592
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2593
                  % len(i_non_a_balanced))
2594

    
2595
    if n_offline:
2596
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2597

    
2598
    if n_drained:
2599
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2600

    
2601
    return not self.bad
2602

    
2603
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2604
    """Analyze the post-hooks' result
2605

2606
    This method analyses the hook result, handles it, and sends some
2607
    nicely-formatted feedback back to the user.
2608

2609
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2610
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2611
    @param hooks_results: the results of the multi-node hooks rpc call
2612
    @param feedback_fn: function used send feedback back to the caller
2613
    @param lu_result: previous Exec result
2614
    @return: the new Exec result, based on the previous result
2615
        and hook results
2616

2617
    """
2618
    # We only really run POST phase hooks, and are only interested in
2619
    # their results
2620
    if phase == constants.HOOKS_PHASE_POST:
2621
      # Used to change hooks' output to proper indentation
2622
      feedback_fn("* Hooks Results")
2623
      assert hooks_results, "invalid result from hooks"
2624

    
2625
      for node_name in hooks_results:
2626
        res = hooks_results[node_name]
2627
        msg = res.fail_msg
2628
        test = msg and not res.offline
2629
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2630
                      "Communication failure in hooks execution: %s", msg)
2631
        if res.offline or msg:
2632
          # No need to investigate payload if node is offline or gave an error.
2633
          # override manually lu_result here as _ErrorIf only
2634
          # overrides self.bad
2635
          lu_result = 1
2636
          continue
2637
        for script, hkr, output in res.payload:
2638
          test = hkr == constants.HKR_FAIL
2639
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2640
                        "Script %s failed, output:", script)
2641
          if test:
2642
            output = self._HOOKS_INDENT_RE.sub('      ', output)
2643
            feedback_fn("%s" % output)
2644
            lu_result = 0
2645

    
2646
      return lu_result
2647

    
2648

    
2649
class LUClusterVerifyDisks(NoHooksLU):
2650
  """Verifies the cluster disks status.
2651

2652
  """
2653
  REQ_BGL = False
2654

    
2655
  def ExpandNames(self):
2656
    self.needed_locks = {
2657
      locking.LEVEL_NODE: locking.ALL_SET,
2658
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2659
    }
2660
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2661

    
2662
  def Exec(self, feedback_fn):
2663
    """Verify integrity of cluster disks.
2664

2665
    @rtype: tuple of three items
2666
    @return: a tuple of (dict of node-to-node_error, list of instances
2667
        which need activate-disks, dict of instance: (node, volume) for
2668
        missing volumes
2669

2670
    """
2671
    result = res_nodes, res_instances, res_missing = {}, [], {}
2672

    
2673
    nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2674
    instances = self.cfg.GetAllInstancesInfo().values()
2675

    
2676
    nv_dict = {}
2677
    for inst in instances:
2678
      inst_lvs = {}
2679
      if not inst.admin_up:
2680
        continue
2681
      inst.MapLVsByNode(inst_lvs)
2682
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2683
      for node, vol_list in inst_lvs.iteritems():
2684
        for vol in vol_list:
2685
          nv_dict[(node, vol)] = inst
2686

    
2687
    if not nv_dict:
2688
      return result
2689

    
2690
    node_lvs = self.rpc.call_lv_list(nodes, [])
2691
    for node, node_res in node_lvs.items():
2692
      if node_res.offline:
2693
        continue
2694
      msg = node_res.fail_msg
2695
      if msg:
2696
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2697
        res_nodes[node] = msg
2698
        continue
2699

    
2700
      lvs = node_res.payload
2701
      for lv_name, (_, _, lv_online) in lvs.items():
2702
        inst = nv_dict.pop((node, lv_name), None)
2703
        if (not lv_online and inst is not None
2704
            and inst.name not in res_instances):
2705
          res_instances.append(inst.name)
2706

    
2707
    # any leftover items in nv_dict are missing LVs, let's arrange the
2708
    # data better
2709
    for key, inst in nv_dict.iteritems():
2710
      if inst.name not in res_missing:
2711
        res_missing[inst.name] = []
2712
      res_missing[inst.name].append(key)
2713

    
2714
    return result
2715

    
2716

    
2717
class LUClusterRepairDiskSizes(NoHooksLU):
2718
  """Verifies the cluster disks sizes.
2719

2720
  """
2721
  REQ_BGL = False
2722

    
2723
  def ExpandNames(self):
2724
    if self.op.instances:
2725
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
2726
      self.needed_locks = {
2727
        locking.LEVEL_NODE: [],
2728
        locking.LEVEL_INSTANCE: self.wanted_names,
2729
        }
2730
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2731
    else:
2732
      self.wanted_names = None
2733
      self.needed_locks = {
2734
        locking.LEVEL_NODE: locking.ALL_SET,
2735
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2736
        }
2737
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2738

    
2739
  def DeclareLocks(self, level):
2740
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2741
      self._LockInstancesNodes(primary_only=True)
2742

    
2743
  def CheckPrereq(self):
2744
    """Check prerequisites.
2745

2746
    This only checks the optional instance list against the existing names.
2747

2748
    """
2749
    if self.wanted_names is None:
2750
      self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
2751

    
2752
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2753
                             in self.wanted_names]
2754

    
2755
  def _EnsureChildSizes(self, disk):
2756
    """Ensure children of the disk have the needed disk size.
2757

2758
    This is valid mainly for DRBD8 and fixes an issue where the
2759
    children have smaller disk size.
2760

2761
    @param disk: an L{ganeti.objects.Disk} object
2762

2763
    """
2764
    if disk.dev_type == constants.LD_DRBD8:
2765
      assert disk.children, "Empty children for DRBD8?"
2766
      fchild = disk.children[0]
2767
      mismatch = fchild.size < disk.size
2768
      if mismatch:
2769
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2770
                     fchild.size, disk.size)
2771
        fchild.size = disk.size
2772

    
2773
      # and we recurse on this child only, not on the metadev
2774
      return self._EnsureChildSizes(fchild) or mismatch
2775
    else:
2776
      return False
2777

    
2778
  def Exec(self, feedback_fn):
2779
    """Verify the size of cluster disks.
2780

2781
    """
2782
    # TODO: check child disks too
2783
    # TODO: check differences in size between primary/secondary nodes
2784
    per_node_disks = {}
2785
    for instance in self.wanted_instances:
2786
      pnode = instance.primary_node
2787
      if pnode not in per_node_disks:
2788
        per_node_disks[pnode] = []
2789
      for idx, disk in enumerate(instance.disks):
2790
        per_node_disks[pnode].append((instance, idx, disk))
2791

    
2792
    changed = []
2793
    for node, dskl in per_node_disks.items():
2794
      newl = [v[2].Copy() for v in dskl]
2795
      for dsk in newl:
2796
        self.cfg.SetDiskID(dsk, node)
2797
      result = self.rpc.call_blockdev_getsize(node, newl)
2798
      if result.fail_msg:
2799
        self.LogWarning("Failure in blockdev_getsize call to node"
2800
                        " %s, ignoring", node)
2801
        continue
2802
      if len(result.payload) != len(dskl):
2803
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
2804
                        " result.payload=%s", node, len(dskl), result.payload)
2805
        self.LogWarning("Invalid result from node %s, ignoring node results",
2806
                        node)
2807
        continue
2808
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
2809
        if size is None:
2810
          self.LogWarning("Disk %d of instance %s did not return size"
2811
                          " information, ignoring", idx, instance.name)
2812
          continue
2813
        if not isinstance(size, (int, long)):
2814
          self.LogWarning("Disk %d of instance %s did not return valid"
2815
                          " size information, ignoring", idx, instance.name)
2816
          continue
2817
        size = size >> 20
2818
        if size != disk.size:
2819
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2820
                       " correcting: recorded %d, actual %d", idx,
2821
                       instance.name, disk.size, size)
2822
          disk.size = size
2823
          self.cfg.Update(instance, feedback_fn)
2824
          changed.append((instance.name, idx, size))
2825
        if self._EnsureChildSizes(disk):
2826
          self.cfg.Update(instance, feedback_fn)
2827
          changed.append((instance.name, idx, disk.size))
2828
    return changed
2829

    
2830

    
2831
class LUClusterRename(LogicalUnit):
2832
  """Rename the cluster.
2833

2834
  """
2835
  HPATH = "cluster-rename"
2836
  HTYPE = constants.HTYPE_CLUSTER
2837

    
2838
  def BuildHooksEnv(self):
2839
    """Build hooks env.
2840

2841
    """
2842
    return {
2843
      "OP_TARGET": self.cfg.GetClusterName(),
2844
      "NEW_NAME": self.op.name,
2845
      }
2846

    
2847
  def BuildHooksNodes(self):
2848
    """Build hooks nodes.
2849

2850
    """
2851
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
2852

    
2853
  def CheckPrereq(self):
2854
    """Verify that the passed name is a valid one.
2855

2856
    """
2857
    hostname = netutils.GetHostname(name=self.op.name,
2858
                                    family=self.cfg.GetPrimaryIPFamily())
2859

    
2860
    new_name = hostname.name
2861
    self.ip = new_ip = hostname.ip
2862
    old_name = self.cfg.GetClusterName()
2863
    old_ip = self.cfg.GetMasterIP()
2864
    if new_name == old_name and new_ip == old_ip:
2865
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2866
                                 " cluster has changed",
2867
                                 errors.ECODE_INVAL)
2868
    if new_ip != old_ip:
2869
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2870
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2871
                                   " reachable on the network" %
2872
                                   new_ip, errors.ECODE_NOTUNIQUE)
2873

    
2874
    self.op.name = new_name
2875

    
2876
  def Exec(self, feedback_fn):
2877
    """Rename the cluster.
2878

2879
    """
2880
    clustername = self.op.name
2881
    ip = self.ip
2882

    
2883
    # shutdown the master IP
2884
    master = self.cfg.GetMasterNode()
2885
    result = self.rpc.call_node_stop_master(master, False)
2886
    result.Raise("Could not disable the master role")
2887

    
2888
    try:
2889
      cluster = self.cfg.GetClusterInfo()
2890
      cluster.cluster_name = clustername
2891
      cluster.master_ip = ip
2892
      self.cfg.Update(cluster, feedback_fn)
2893

    
2894
      # update the known hosts file
2895
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2896
      node_list = self.cfg.GetOnlineNodeList()
2897
      try:
2898
        node_list.remove(master)
2899
      except ValueError:
2900
        pass
2901
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2902
    finally:
2903
      result = self.rpc.call_node_start_master(master, False, False)
2904
      msg = result.fail_msg
2905
      if msg:
2906
        self.LogWarning("Could not re-enable the master role on"
2907
                        " the master, please restart manually: %s", msg)
2908

    
2909
    return clustername
2910

    
2911

    
2912
class LUClusterSetParams(LogicalUnit):
2913
  """Change the parameters of the cluster.
2914

2915
  """
2916
  HPATH = "cluster-modify"
2917
  HTYPE = constants.HTYPE_CLUSTER
2918
  REQ_BGL = False
2919

    
2920
  def CheckArguments(self):
2921
    """Check parameters
2922

2923
    """
2924
    if self.op.uid_pool:
2925
      uidpool.CheckUidPool(self.op.uid_pool)
2926

    
2927
    if self.op.add_uids:
2928
      uidpool.CheckUidPool(self.op.add_uids)
2929

    
2930
    if self.op.remove_uids:
2931
      uidpool.CheckUidPool(self.op.remove_uids)
2932

    
2933
  def ExpandNames(self):
2934
    # FIXME: in the future maybe other cluster params won't require checking on
2935
    # all nodes to be modified.
2936
    self.needed_locks = {
2937
      locking.LEVEL_NODE: locking.ALL_SET,
2938
    }
2939
    self.share_locks[locking.LEVEL_NODE] = 1
2940

    
2941
  def BuildHooksEnv(self):
2942
    """Build hooks env.
2943

2944
    """
2945
    return {
2946
      "OP_TARGET": self.cfg.GetClusterName(),
2947
      "NEW_VG_NAME": self.op.vg_name,
2948
      }
2949

    
2950
  def BuildHooksNodes(self):
2951
    """Build hooks nodes.
2952

2953
    """
2954
    mn = self.cfg.GetMasterNode()
2955
    return ([mn], [mn])
2956

    
2957
  def CheckPrereq(self):
2958
    """Check prerequisites.
2959

2960
    This checks whether the given params don't conflict and
2961
    if the given volume group is valid.
2962

2963
    """
2964
    if self.op.vg_name is not None and not self.op.vg_name:
2965
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2966
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2967
                                   " instances exist", errors.ECODE_INVAL)
2968

    
2969
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
2970
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2971
        raise errors.OpPrereqError("Cannot disable drbd helper while"
2972
                                   " drbd-based instances exist",
2973
                                   errors.ECODE_INVAL)
2974

    
2975
    node_list = self.glm.list_owned(locking.LEVEL_NODE)
2976

    
2977
    # if vg_name not None, checks given volume group on all nodes
2978
    if self.op.vg_name:
2979
      vglist = self.rpc.call_vg_list(node_list)
2980
      for node in node_list:
2981
        msg = vglist[node].fail_msg
2982
        if msg:
2983
          # ignoring down node
2984
          self.LogWarning("Error while gathering data on node %s"
2985
                          " (ignoring node): %s", node, msg)
2986
          continue
2987
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2988
                                              self.op.vg_name,
2989
                                              constants.MIN_VG_SIZE)
2990
        if vgstatus:
2991
          raise errors.OpPrereqError("Error on node '%s': %s" %
2992
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2993

    
2994
    if self.op.drbd_helper:
2995
      # checks given drbd helper on all nodes
2996
      helpers = self.rpc.call_drbd_helper(node_list)
2997
      for node in node_list:
2998
        ninfo = self.cfg.GetNodeInfo(node)
2999
        if ninfo.offline:
3000
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3001
          continue
3002
        msg = helpers[node].fail_msg
3003
        if msg:
3004
          raise errors.OpPrereqError("Error checking drbd helper on node"
3005
                                     " '%s': %s" % (node, msg),
3006
                                     errors.ECODE_ENVIRON)
3007
        node_helper = helpers[node].payload
3008
        if node_helper != self.op.drbd_helper:
3009
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3010
                                     (node, node_helper), errors.ECODE_ENVIRON)
3011

    
3012
    self.cluster = cluster = self.cfg.GetClusterInfo()
3013
    # validate params changes
3014
    if self.op.beparams:
3015
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3016
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3017

    
3018
    if self.op.ndparams:
3019
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3020
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3021

    
3022
      # TODO: we need a more general way to handle resetting
3023
      # cluster-level parameters to default values
3024
      if self.new_ndparams["oob_program"] == "":
3025
        self.new_ndparams["oob_program"] = \
3026
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3027

    
3028
    if self.op.nicparams:
3029
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3030
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3031
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3032
      nic_errors = []
3033

    
3034
      # check all instances for consistency
3035
      for instance in self.cfg.GetAllInstancesInfo().values():
3036
        for nic_idx, nic in enumerate(instance.nics):
3037
          params_copy = copy.deepcopy(nic.nicparams)
3038
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3039

    
3040
          # check parameter syntax
3041
          try:
3042
            objects.NIC.CheckParameterSyntax(params_filled)
3043
          except errors.ConfigurationError, err:
3044
            nic_errors.append("Instance %s, nic/%d: %s" %
3045
                              (instance.name, nic_idx, err))
3046

    
3047
          # if we're moving instances to routed, check that they have an ip
3048
          target_mode = params_filled[constants.NIC_MODE]
3049
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3050
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3051
                              " address" % (instance.name, nic_idx))
3052
      if nic_errors:
3053
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3054
                                   "\n".join(nic_errors))
3055

    
3056
    # hypervisor list/parameters
3057
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3058
    if self.op.hvparams:
3059
      for hv_name, hv_dict in self.op.hvparams.items():
3060
        if hv_name not in self.new_hvparams:
3061
          self.new_hvparams[hv_name] = hv_dict
3062
        else:
3063
          self.new_hvparams[hv_name].update(hv_dict)
3064

    
3065
    # os hypervisor parameters
3066
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3067
    if self.op.os_hvp:
3068
      for os_name, hvs in self.op.os_hvp.items():
3069
        if os_name not in self.new_os_hvp:
3070
          self.new_os_hvp[os_name] = hvs
3071
        else:
3072
          for hv_name, hv_dict in hvs.items():
3073
            if hv_name not in self.new_os_hvp[os_name]:
3074
              self.new_os_hvp[os_name][hv_name] = hv_dict
3075
            else:
3076
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3077

    
3078
    # os parameters
3079
    self.new_osp = objects.FillDict(cluster.osparams, {})
3080
    if self.op.osparams:
3081
      for os_name, osp in self.op.osparams.items():
3082
        if os_name not in self.new_osp:
3083
          self.new_osp[os_name] = {}
3084

    
3085
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3086
                                                  use_none=True)
3087

    
3088
        if not self.new_osp[os_name]:
3089
          # we removed all parameters
3090
          del self.new_osp[os_name]
3091
        else:
3092
          # check the parameter validity (remote check)
3093
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3094
                         os_name, self.new_osp[os_name])
3095

    
3096
    # changes to the hypervisor list
3097
    if self.op.enabled_hypervisors is not None:
3098
      self.hv_list = self.op.enabled_hypervisors
3099
      for hv in self.hv_list:
3100
        # if the hypervisor doesn't already exist in the cluster
3101
        # hvparams, we initialize it to empty, and then (in both
3102
        # cases) we make sure to fill the defaults, as we might not
3103
        # have a complete defaults list if the hypervisor wasn't
3104
        # enabled before
3105
        if hv not in new_hvp:
3106
          new_hvp[hv] = {}
3107
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3108
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3109
    else:
3110
      self.hv_list = cluster.enabled_hypervisors
3111

    
3112
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3113
      # either the enabled list has changed, or the parameters have, validate
3114
      for hv_name, hv_params in self.new_hvparams.items():
3115
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3116
            (self.op.enabled_hypervisors and
3117
             hv_name in self.op.enabled_hypervisors)):
3118
          # either this is a new hypervisor, or its parameters have changed
3119
          hv_class = hypervisor.GetHypervisor(hv_name)
3120
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3121
          hv_class.CheckParameterSyntax(hv_params)
3122
          _CheckHVParams(self, node_list, hv_name, hv_params)
3123

    
3124
    if self.op.os_hvp:
3125
      # no need to check any newly-enabled hypervisors, since the
3126
      # defaults have already been checked in the above code-block
3127
      for os_name, os_hvp in self.new_os_hvp.items():
3128
        for hv_name, hv_params in os_hvp.items():
3129
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3130
          # we need to fill in the new os_hvp on top of the actual hv_p
3131
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3132
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3133
          hv_class = hypervisor.GetHypervisor(hv_name)
3134
          hv_class.CheckParameterSyntax(new_osp)
3135
          _CheckHVParams(self, node_list, hv_name, new_osp)
3136

    
3137
    if self.op.default_iallocator:
3138
      alloc_script = utils.FindFile(self.op.default_iallocator,
3139
                                    constants.IALLOCATOR_SEARCH_PATH,
3140
                                    os.path.isfile)
3141
      if alloc_script is None:
3142
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3143
                                   " specified" % self.op.default_iallocator,
3144
                                   errors.ECODE_INVAL)
3145

    
3146
  def Exec(self, feedback_fn):
3147
    """Change the parameters of the cluster.
3148

3149
    """
3150
    if self.op.vg_name is not None:
3151
      new_volume = self.op.vg_name
3152
      if not new_volume:
3153
        new_volume = None
3154
      if new_volume != self.cfg.GetVGName():
3155
        self.cfg.SetVGName(new_volume)
3156
      else:
3157
        feedback_fn("Cluster LVM configuration already in desired"
3158
                    " state, not changing")
3159
    if self.op.drbd_helper is not None:
3160
      new_helper = self.op.drbd_helper
3161
      if not new_helper:
3162
        new_helper = None
3163
      if new_helper != self.cfg.GetDRBDHelper():
3164
        self.cfg.SetDRBDHelper(new_helper)
3165
      else:
3166
        feedback_fn("Cluster DRBD helper already in desired state,"
3167
                    " not changing")
3168
    if self.op.hvparams:
3169
      self.cluster.hvparams = self.new_hvparams
3170
    if self.op.os_hvp:
3171
      self.cluster.os_hvp = self.new_os_hvp
3172
    if self.op.enabled_hypervisors is not None:
3173
      self.cluster.hvparams = self.new_hvparams
3174
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3175
    if self.op.beparams:
3176
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3177
    if self.op.nicparams:
3178
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3179
    if self.op.osparams:
3180
      self.cluster.osparams = self.new_osp
3181
    if self.op.ndparams:
3182
      self.cluster.ndparams = self.new_ndparams
3183

    
3184
    if self.op.candidate_pool_size is not None:
3185
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3186
      # we need to update the pool size here, otherwise the save will fail
3187
      _AdjustCandidatePool(self, [])
3188

    
3189
    if self.op.maintain_node_health is not None:
3190
      self.cluster.maintain_node_health = self.op.maintain_node_health
3191

    
3192
    if self.op.prealloc_wipe_disks is not None:
3193
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3194

    
3195
    if self.op.add_uids is not None:
3196
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3197

    
3198
    if self.op.remove_uids is not None:
3199
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3200

    
3201
    if self.op.uid_pool is not None:
3202
      self.cluster.uid_pool = self.op.uid_pool
3203

    
3204
    if self.op.default_iallocator is not None:
3205
      self.cluster.default_iallocator = self.op.default_iallocator
3206

    
3207
    if self.op.reserved_lvs is not None:
3208
      self.cluster.reserved_lvs = self.op.reserved_lvs
3209

    
3210
    def helper_os(aname, mods, desc):
3211
      desc += " OS list"
3212
      lst = getattr(self.cluster, aname)
3213
      for key, val in mods:
3214
        if key == constants.DDM_ADD:
3215
          if val in lst:
3216
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3217
          else:
3218
            lst.append(val)
3219
        elif key == constants.DDM_REMOVE:
3220
          if val in lst:
3221
            lst.remove(val)
3222
          else:
3223
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3224
        else:
3225
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3226

    
3227
    if self.op.hidden_os:
3228
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3229

    
3230
    if self.op.blacklisted_os:
3231
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3232

    
3233
    if self.op.master_netdev:
3234
      master = self.cfg.GetMasterNode()
3235
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3236
                  self.cluster.master_netdev)
3237
      result = self.rpc.call_node_stop_master(master, False)
3238
      result.Raise("Could not disable the master ip")
3239
      feedback_fn("Changing master_netdev from %s to %s" %
3240
                  (self.cluster.master_netdev, self.op.master_netdev))
3241
      self.cluster.master_netdev = self.op.master_netdev
3242

    
3243
    self.cfg.Update(self.cluster, feedback_fn)
3244

    
3245
    if self.op.master_netdev:
3246
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3247
                  self.op.master_netdev)
3248
      result = self.rpc.call_node_start_master(master, False, False)
3249
      if result.fail_msg:
3250
        self.LogWarning("Could not re-enable the master ip on"
3251
                        " the master, please restart manually: %s",
3252
                        result.fail_msg)
3253

    
3254

    
3255
def _UploadHelper(lu, nodes, fname):
3256
  """Helper for uploading a file and showing warnings.
3257

3258
  """
3259
  if os.path.exists(fname):
3260
    result = lu.rpc.call_upload_file(nodes, fname)
3261
    for to_node, to_result in result.items():
3262
      msg = to_result.fail_msg
3263
      if msg:
3264
        msg = ("Copy of file %s to node %s failed: %s" %
3265
               (fname, to_node, msg))
3266
        lu.proc.LogWarning(msg)
3267

    
3268

    
3269
def _ComputeAncillaryFiles(cluster, redist):
3270
  """Compute files external to Ganeti which need to be consistent.
3271

3272
  @type redist: boolean
3273
  @param redist: Whether to include files which need to be redistributed
3274

3275
  """
3276
  # Compute files for all nodes
3277
  files_all = set([
3278
    constants.SSH_KNOWN_HOSTS_FILE,
3279
    constants.CONFD_HMAC_KEY,
3280
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3281
    ])
3282

    
3283
  if not redist:
3284
    files_all.update(constants.ALL_CERT_FILES)
3285
    files_all.update(ssconf.SimpleStore().GetFileList())
3286

    
3287
  if cluster.modify_etc_hosts:
3288
    files_all.add(constants.ETC_HOSTS)
3289

    
3290
  # Files which must either exist on all nodes or on none
3291
  files_all_opt = set([
3292
    constants.RAPI_USERS_FILE,
3293
    ])
3294

    
3295
  # Files which should only be on master candidates
3296
  files_mc = set()
3297
  if not redist:
3298
    files_mc.add(constants.CLUSTER_CONF_FILE)
3299

    
3300
  # Files which should only be on VM-capable nodes
3301
  files_vm = set(filename
3302
    for hv_name in cluster.enabled_hypervisors
3303
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3304

    
3305
  # Filenames must be unique
3306
  assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3307
          sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3308
         "Found file listed in more than one file list"
3309

    
3310
  return (files_all, files_all_opt, files_mc, files_vm)
3311

    
3312

    
3313
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3314
  """Distribute additional files which are part of the cluster configuration.
3315

3316
  ConfigWriter takes care of distributing the config and ssconf files, but
3317
  there are more files which should be distributed to all nodes. This function
3318
  makes sure those are copied.
3319

3320
  @param lu: calling logical unit
3321
  @param additional_nodes: list of nodes not in the config to distribute to
3322
  @type additional_vm: boolean
3323
  @param additional_vm: whether the additional nodes are vm-capable or not
3324

3325
  """
3326
  # Gather target nodes
3327
  cluster = lu.cfg.GetClusterInfo()
3328
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3329

    
3330
  online_nodes = lu.cfg.GetOnlineNodeList()
3331
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3332

    
3333
  if additional_nodes is not None:
3334
    online_nodes.extend(additional_nodes)
3335
    if additional_vm:
3336
      vm_nodes.extend(additional_nodes)
3337

    
3338
  # Never distribute to master node
3339
  for nodelist in [online_nodes, vm_nodes]:
3340
    if master_info.name in nodelist:
3341
      nodelist.remove(master_info.name)
3342

    
3343
  # Gather file lists
3344
  (files_all, files_all_opt, files_mc, files_vm) = \
3345
    _ComputeAncillaryFiles(cluster, True)
3346

    
3347
  # Never re-distribute configuration file from here
3348
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3349
              constants.CLUSTER_CONF_FILE in files_vm)
3350
  assert not files_mc, "Master candidates not handled in this function"
3351

    
3352
  filemap = [
3353
    (online_nodes, files_all),
3354
    (online_nodes, files_all_opt),
3355
    (vm_nodes, files_vm),
3356
    ]
3357

    
3358
  # Upload the files
3359
  for (node_list, files) in filemap:
3360
    for fname in files:
3361
      _UploadHelper(lu, node_list, fname)
3362

    
3363

    
3364
class LUClusterRedistConf(NoHooksLU):
3365
  """Force the redistribution of cluster configuration.
3366

3367
  This is a very simple LU.
3368

3369
  """
3370
  REQ_BGL = False
3371

    
3372
  def ExpandNames(self):
3373
    self.needed_locks = {
3374
      locking.LEVEL_NODE: locking.ALL_SET,
3375
    }
3376
    self.share_locks[locking.LEVEL_NODE] = 1
3377

    
3378
  def Exec(self, feedback_fn):
3379
    """Redistribute the configuration.
3380

3381
    """
3382
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3383
    _RedistributeAncillaryFiles(self)
3384

    
3385

    
3386
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3387
  """Sleep and poll for an instance's disk to sync.
3388

3389
  """
3390
  if not instance.disks or disks is not None and not disks:
3391
    return True
3392

    
3393
  disks = _ExpandCheckDisks(instance, disks)
3394

    
3395
  if not oneshot:
3396
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3397

    
3398
  node = instance.primary_node
3399

    
3400
  for dev in disks:
3401
    lu.cfg.SetDiskID(dev, node)
3402

    
3403
  # TODO: Convert to utils.Retry
3404

    
3405
  retries = 0
3406
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3407
  while True:
3408
    max_time = 0
3409
    done = True
3410
    cumul_degraded = False
3411
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3412
    msg = rstats.fail_msg
3413
    if msg:
3414
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3415
      retries += 1
3416
      if retries >= 10:
3417
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3418
                                 " aborting." % node)
3419
      time.sleep(6)
3420
      continue
3421
    rstats = rstats.payload
3422
    retries = 0
3423
    for i, mstat in enumerate(rstats):
3424
      if mstat is None:
3425
        lu.LogWarning("Can't compute data for node %s/%s",
3426
                           node, disks[i].iv_name)
3427
        continue
3428

    
3429
      cumul_degraded = (cumul_degraded or
3430
                        (mstat.is_degraded and mstat.sync_percent is None))
3431
      if mstat.sync_percent is not None:
3432
        done = False
3433
        if mstat.estimated_time is not None:
3434
          rem_time = ("%s remaining (estimated)" %
3435
                      utils.FormatSeconds(mstat.estimated_time))
3436
          max_time = mstat.estimated_time
3437
        else:
3438
          rem_time = "no time estimate"
3439
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3440
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3441

    
3442
    # if we're done but degraded, let's do a few small retries, to
3443
    # make sure we see a stable and not transient situation; therefore
3444
    # we force restart of the loop
3445
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3446
      logging.info("Degraded disks found, %d retries left", degr_retries)
3447
      degr_retries -= 1
3448
      time.sleep(1)
3449
      continue
3450

    
3451
    if done or oneshot:
3452
      break
3453

    
3454
    time.sleep(min(60, max_time))
3455

    
3456
  if done:
3457
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3458
  return not cumul_degraded
3459

    
3460

    
3461
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3462
  """Check that mirrors are not degraded.
3463

3464
  The ldisk parameter, if True, will change the test from the
3465
  is_degraded attribute (which represents overall non-ok status for
3466
  the device(s)) to the ldisk (representing the local storage status).
3467

3468
  """
3469
  lu.cfg.SetDiskID(dev, node)
3470

    
3471
  result = True
3472

    
3473
  if on_primary or dev.AssembleOnSecondary():
3474
    rstats = lu.rpc.call_blockdev_find(node, dev)
3475
    msg = rstats.fail_msg
3476
    if msg:
3477
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3478
      result = False
3479
    elif not rstats.payload:
3480
      lu.LogWarning("Can't find disk on node %s", node)
3481
      result = False
3482
    else:
3483
      if ldisk:
3484
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3485
      else:
3486
        result = result and not rstats.payload.is_degraded
3487

    
3488
  if dev.children:
3489
    for child in dev.children:
3490
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3491

    
3492
  return result
3493

    
3494

    
3495
class LUOobCommand(NoHooksLU):
3496
  """Logical unit for OOB handling.
3497

3498
  """
3499
  REG_BGL = False
3500
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3501

    
3502
  def ExpandNames(self):
3503
    """Gather locks we need.
3504

3505
    """
3506
    if self.op.node_names:
3507
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3508
      lock_names = self.op.node_names
3509
    else:
3510
      lock_names = locking.ALL_SET
3511

    
3512
    self.needed_locks = {
3513
      locking.LEVEL_NODE: lock_names,
3514
      }
3515

    
3516
  def CheckPrereq(self):
3517
    """Check prerequisites.
3518

3519
    This checks:
3520
     - the node exists in the configuration
3521
     - OOB is supported
3522

3523
    Any errors are signaled by raising errors.OpPrereqError.
3524

3525
    """
3526
    self.nodes = []
3527
    self.master_node = self.cfg.GetMasterNode()
3528

    
3529
    assert self.op.power_delay >= 0.0
3530

    
3531
    if self.op.node_names:
3532
      if (self.op.command in self._SKIP_MASTER and
3533
          self.master_node in self.op.node_names):
3534
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3535
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3536

    
3537
        if master_oob_handler:
3538
          additional_text = ("run '%s %s %s' if you want to operate on the"
3539
                             " master regardless") % (master_oob_handler,
3540
                                                      self.op.command,
3541
                                                      self.master_node)
3542
        else:
3543
          additional_text = "it does not support out-of-band operations"
3544

    
3545
        raise errors.OpPrereqError(("Operating on the master node %s is not"
3546
                                    " allowed for %s; %s") %
3547
                                   (self.master_node, self.op.command,
3548
                                    additional_text), errors.ECODE_INVAL)
3549
    else:
3550
      self.op.node_names = self.cfg.GetNodeList()
3551
      if self.op.command in self._SKIP_MASTER:
3552
        self.op.node_names.remove(self.master_node)
3553

    
3554
    if self.op.command in self._SKIP_MASTER:
3555
      assert self.master_node not in self.op.node_names
3556

    
3557
    for node_name in self.op.node_names:
3558
      node = self.cfg.GetNodeInfo(node_name)
3559

    
3560
      if node is None:
3561
        raise errors.OpPrereqError("Node %s not found" % node_name,
3562
                                   errors.ECODE_NOENT)
3563
      else:
3564
        self.nodes.append(node)
3565

    
3566
      if (not self.op.ignore_status and
3567
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3568
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
3569
                                    " not marked offline") % node_name,
3570
                                   errors.ECODE_STATE)
3571

    
3572
  def Exec(self, feedback_fn):
3573
    """Execute OOB and return result if we expect any.
3574

3575
    """
3576
    master_node = self.master_node
3577
    ret = []
3578

    
3579
    for idx, node in enumerate(utils.NiceSort(self.nodes,
3580
                                              key=lambda node: node.name)):
3581
      node_entry = [(constants.RS_NORMAL, node.name)]
3582
      ret.append(node_entry)
3583

    
3584
      oob_program = _SupportsOob(self.cfg, node)
3585

    
3586
      if not oob_program:
3587
        node_entry.append((constants.RS_UNAVAIL, None))
3588
        continue
3589

    
3590
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
3591
                   self.op.command, oob_program, node.name)
3592
      result = self.rpc.call_run_oob(master_node, oob_program,
3593
                                     self.op.command, node.name,
3594
                                     self.op.timeout)
3595

    
3596
      if result.fail_msg:
3597
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
3598
                        node.name, result.fail_msg)
3599
        node_entry.append((constants.RS_NODATA, None))
3600
      else:
3601
        try:
3602
          self._CheckPayload(result)
3603
        except errors.OpExecError, err:
3604
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
3605
                          node.name, err)
3606
          node_entry.append((constants.RS_NODATA, None))
3607
        else:
3608
          if self.op.command == constants.OOB_HEALTH:
3609
            # For health we should log important events
3610
            for item, status in result.payload:
3611
              if status in [constants.OOB_STATUS_WARNING,
3612
                            constants.OOB_STATUS_CRITICAL]:
3613
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
3614
                                item, node.name, status)
3615

    
3616
          if self.op.command == constants.OOB_POWER_ON:
3617
            node.powered = True
3618
          elif self.op.command == constants.OOB_POWER_OFF:
3619
            node.powered = False
3620
          elif self.op.command == constants.OOB_POWER_STATUS:
3621
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3622
            if powered != node.powered:
3623
              logging.warning(("Recorded power state (%s) of node '%s' does not"
3624
                               " match actual power state (%s)"), node.powered,
3625
                              node.name, powered)
3626

    
3627
          # For configuration changing commands we should update the node
3628
          if self.op.command in (constants.OOB_POWER_ON,
3629
                                 constants.OOB_POWER_OFF):
3630
            self.cfg.Update(node, feedback_fn)
3631

    
3632
          node_entry.append((constants.RS_NORMAL, result.payload))
3633

    
3634
          if (self.op.command == constants.OOB_POWER_ON and
3635
              idx < len(self.nodes) - 1):
3636
            time.sleep(self.op.power_delay)
3637

    
3638
    return ret
3639

    
3640
  def _CheckPayload(self, result):
3641
    """Checks if the payload is valid.
3642

3643
    @param result: RPC result
3644
    @raises errors.OpExecError: If payload is not valid
3645

3646
    """
3647
    errs = []
3648
    if self.op.command == constants.OOB_HEALTH:
3649
      if not isinstance(result.payload, list):
3650
        errs.append("command 'health' is expected to return a list but got %s" %
3651
                    type(result.payload))
3652
      else:
3653
        for item, status in result.payload:
3654
          if status not in constants.OOB_STATUSES:
3655
            errs.append("health item '%s' has invalid status '%s'" %
3656
                        (item, status))
3657

    
3658
    if self.op.command == constants.OOB_POWER_STATUS:
3659
      if not isinstance(result.payload, dict):
3660
        errs.append("power-status is expected to return a dict but got %s" %
3661
                    type(result.payload))
3662

    
3663
    if self.op.command in [
3664
        constants.OOB_POWER_ON,
3665
        constants.OOB_POWER_OFF,
3666
        constants.OOB_POWER_CYCLE,
3667
        ]:
3668
      if result.payload is not None:
3669
        errs.append("%s is expected to not return payload but got '%s'" %
3670
                    (self.op.command, result.payload))
3671

    
3672
    if errs:
3673
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3674
                               utils.CommaJoin(errs))
3675

    
3676
class _OsQuery(_QueryBase):
3677
  FIELDS = query.OS_FIELDS
3678

    
3679
  def ExpandNames(self, lu):
3680
    # Lock all nodes in shared mode
3681
    # Temporary removal of locks, should be reverted later
3682
    # TODO: reintroduce locks when they are lighter-weight
3683
    lu.needed_locks = {}
3684
    #self.share_locks[locking.LEVEL_NODE] = 1
3685
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3686

    
3687
    # The following variables interact with _QueryBase._GetNames
3688
    if self.names:
3689
      self.wanted = self.names
3690
    else:
3691
      self.wanted = locking.ALL_SET
3692

    
3693
    self.do_locking = self.use_locking
3694

    
3695
  def DeclareLocks(self, lu, level):
3696
    pass
3697

    
3698
  @staticmethod
3699
  def _DiagnoseByOS(rlist):
3700
    """Remaps a per-node return list into an a per-os per-node dictionary
3701

3702
    @param rlist: a map with node names as keys and OS objects as values
3703

3704
    @rtype: dict
3705
    @return: a dictionary with osnames as keys and as value another
3706
        map, with nodes as keys and tuples of (path, status, diagnose,
3707
        variants, parameters, api_versions) as values, eg::
3708

3709
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3710
                                     (/srv/..., False, "invalid api")],
3711
                           "node2": [(/srv/..., True, "", [], [])]}
3712
          }
3713

3714
    """
3715
    all_os = {}
3716
    # we build here the list of nodes that didn't fail the RPC (at RPC
3717
    # level), so that nodes with a non-responding node daemon don't
3718
    # make all OSes invalid
3719
    good_nodes = [node_name for node_name in rlist
3720
                  if not rlist[node_name].fail_msg]
3721
    for node_name, nr in rlist.items():
3722
      if nr.fail_msg or not nr.payload:
3723
        continue
3724
      for (name, path, status, diagnose, variants,
3725
           params, api_versions) in nr.payload:
3726
        if name not in all_os:
3727
          # build a list of nodes for this os containing empty lists
3728
          # for each node in node_list
3729
          all_os[name] = {}
3730
          for nname in good_nodes:
3731
            all_os[name][nname] = []
3732
        # convert params from [name, help] to (name, help)
3733
        params = [tuple(v) for v in params]
3734
        all_os[name][node_name].append((path, status, diagnose,
3735
                                        variants, params, api_versions))
3736
    return all_os
3737

    
3738
  def _GetQueryData(self, lu):
3739
    """Computes the list of nodes and their attributes.
3740

3741
    """
3742
    # Locking is not used
3743
    assert not (compat.any(lu.glm.is_owned(level)
3744
                           for level in locking.LEVELS
3745
                           if level != locking.LEVEL_CLUSTER) or
3746
                self.do_locking or self.use_locking)
3747

    
3748
    valid_nodes = [node.name
3749
                   for node in lu.cfg.GetAllNodesInfo().values()
3750
                   if not node.offline and node.vm_capable]
3751
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3752
    cluster = lu.cfg.GetClusterInfo()
3753

    
3754
    data = {}
3755

    
3756
    for (os_name, os_data) in pol.items():
3757
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3758
                          hidden=(os_name in cluster.hidden_os),
3759
                          blacklisted=(os_name in cluster.blacklisted_os))
3760

    
3761
      variants = set()
3762
      parameters = set()
3763
      api_versions = set()
3764

    
3765
      for idx, osl in enumerate(os_data.values()):
3766
        info.valid = bool(info.valid and osl and osl[0][1])
3767
        if not info.valid:
3768
          break
3769

    
3770
        (node_variants, node_params, node_api) = osl[0][3:6]
3771
        if idx == 0:
3772
          # First entry
3773
          variants.update(node_variants)
3774
          parameters.update(node_params)
3775
          api_versions.update(node_api)
3776
        else:
3777
          # Filter out inconsistent values
3778
          variants.intersection_update(node_variants)
3779
          parameters.intersection_update(node_params)
3780
          api_versions.intersection_update(node_api)
3781

    
3782
      info.variants = list(variants)
3783
      info.parameters = list(parameters)
3784
      info.api_versions = list(api_versions)
3785

    
3786
      data[os_name] = info
3787

    
3788
    # Prepare data in requested order
3789
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
3790
            if name in data]
3791

    
3792

    
3793
class LUOsDiagnose(NoHooksLU):
3794
  """Logical unit for OS diagnose/query.
3795

3796
  """
3797
  REQ_BGL = False
3798

    
3799
  @staticmethod
3800
  def _BuildFilter(fields, names):
3801
    """Builds a filter for querying OSes.
3802

3803
    """
3804
    name_filter = qlang.MakeSimpleFilter("name", names)
3805

    
3806
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
3807
    # respective field is not requested
3808
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
3809
                     for fname in ["hidden", "blacklisted"]
3810
                     if fname not in fields]
3811
    if "valid" not in fields:
3812
      status_filter.append([qlang.OP_TRUE, "valid"])
3813

    
3814
    if status_filter:
3815
      status_filter.insert(0, qlang.OP_AND)
3816
    else:
3817
      status_filter = None
3818

    
3819
    if name_filter and status_filter:
3820
      return [qlang.OP_AND, name_filter, status_filter]
3821
    elif name_filter:
3822
      return name_filter
3823
    else:
3824
      return status_filter
3825

    
3826
  def CheckArguments(self):
3827
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
3828
                       self.op.output_fields, False)
3829

    
3830
  def ExpandNames(self):
3831
    self.oq.ExpandNames(self)
3832

    
3833
  def Exec(self, feedback_fn):
3834
    return self.oq.OldStyleQuery(self)
3835

    
3836

    
3837
class LUNodeRemove(LogicalUnit):
3838
  """Logical unit for removing a node.
3839

3840
  """
3841
  HPATH = "node-remove"
3842
  HTYPE = constants.HTYPE_NODE
3843

    
3844
  def BuildHooksEnv(self):
3845
    """Build hooks env.
3846

3847
    This doesn't run on the target node in the pre phase as a failed
3848
    node would then be impossible to remove.
3849

3850
    """
3851
    return {
3852
      "OP_TARGET": self.op.node_name,
3853
      "NODE_NAME": self.op.node_name,
3854
      }
3855

    
3856
  def BuildHooksNodes(self):
3857
    """Build hooks nodes.
3858

3859
    """
3860
    all_nodes = self.cfg.GetNodeList()
3861
    try:
3862
      all_nodes.remove(self.op.node_name)
3863
    except ValueError:
3864
      logging.warning("Node '%s', which is about to be removed, was not found"
3865
                      " in the list of all nodes", self.op.node_name)
3866
    return (all_nodes, all_nodes)
3867

    
3868
  def CheckPrereq(self):
3869
    """Check prerequisites.
3870

3871
    This checks:
3872
     - the node exists in the configuration
3873
     - it does not have primary or secondary instances
3874
     - it's not the master
3875

3876
    Any errors are signaled by raising errors.OpPrereqError.
3877

3878
    """
3879
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3880
    node = self.cfg.GetNodeInfo(self.op.node_name)
3881
    assert node is not None
3882

    
3883
    instance_list = self.cfg.GetInstanceList()
3884

    
3885
    masternode = self.cfg.GetMasterNode()
3886
    if node.name == masternode:
3887
      raise errors.OpPrereqError("Node is the master node, failover to another"
3888
                                 " node is required", errors.ECODE_INVAL)
3889

    
3890
    for instance_name in instance_list:
3891
      instance = self.cfg.GetInstanceInfo(instance_name)
3892
      if node.name in instance.all_nodes:
3893
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3894
                                   " please remove first" % instance_name,
3895
                                   errors.ECODE_INVAL)
3896
    self.op.node_name = node.name
3897
    self.node = node
3898

    
3899
  def Exec(self, feedback_fn):
3900
    """Removes the node from the cluster.
3901

3902
    """
3903
    node = self.node
3904
    logging.info("Stopping the node daemon and removing configs from node %s",
3905
                 node.name)
3906

    
3907
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3908

    
3909
    # Promote nodes to master candidate as needed
3910
    _AdjustCandidatePool(self, exceptions=[node.name])
3911
    self.context.RemoveNode(node.name)
3912

    
3913
    # Run post hooks on the node before it's removed
3914
    _RunPostHook(self, node.name)
3915

    
3916
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3917
    msg = result.fail_msg
3918
    if msg:
3919
      self.LogWarning("Errors encountered on the remote node while leaving"
3920
                      " the cluster: %s", msg)
3921

    
3922
    # Remove node from our /etc/hosts
3923
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3924
      master_node = self.cfg.GetMasterNode()
3925
      result = self.rpc.call_etc_hosts_modify(master_node,
3926
                                              constants.ETC_HOSTS_REMOVE,
3927
                                              node.name, None)
3928
      result.Raise("Can't update hosts file with new host data")
3929
      _RedistributeAncillaryFiles(self)
3930

    
3931

    
3932
class _NodeQuery(_QueryBase):
3933
  FIELDS = query.NODE_FIELDS
3934

    
3935
  def ExpandNames(self, lu):
3936
    lu.needed_locks = {}
3937
    lu.share_locks[locking.LEVEL_NODE] = 1
3938

    
3939
    if self.names:
3940
      self.wanted = _GetWantedNodes(lu, self.names)
3941
    else:
3942
      self.wanted = locking.ALL_SET
3943

    
3944
    self.do_locking = (self.use_locking and
3945
                       query.NQ_LIVE in self.requested_data)
3946

    
3947
    if self.do_locking:
3948
      # if we don't request only static fields, we need to lock the nodes
3949
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3950

    
3951
  def DeclareLocks(self, lu, level):
3952
    pass
3953

    
3954
  def _GetQueryData(self, lu):
3955
    """Computes the list of nodes and their attributes.
3956

3957
    """
3958
    all_info = lu.cfg.GetAllNodesInfo()
3959

    
3960
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3961

    
3962
    # Gather data as requested
3963
    if query.NQ_LIVE in self.requested_data:
3964
      # filter out non-vm_capable nodes
3965
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
3966

    
3967
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
3968
                                        lu.cfg.GetHypervisorType())
3969
      live_data = dict((name, nresult.payload)
3970
                       for (name, nresult) in node_data.items()
3971
                       if not nresult.fail_msg and nresult.payload)
3972
    else:
3973
      live_data = None
3974

    
3975
    if query.NQ_INST in self.requested_data:
3976
      node_to_primary = dict([(name, set()) for name in nodenames])
3977
      node_to_secondary = dict([(name, set()) for name in nodenames])
3978

    
3979
      inst_data = lu.cfg.GetAllInstancesInfo()
3980

    
3981
      for inst in inst_data.values():
3982
        if inst.primary_node in node_to_primary:
3983
          node_to_primary[inst.primary_node].add(inst.name)
3984
        for secnode in inst.secondary_nodes:
3985
          if secnode in node_to_secondary:
3986
            node_to_secondary[secnode].add(inst.name)
3987
    else:
3988
      node_to_primary = None
3989
      node_to_secondary = None
3990

    
3991
    if query.NQ_OOB in self.requested_data:
3992
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3993
                         for name, node in all_info.iteritems())
3994
    else:
3995
      oob_support = None
3996

    
3997
    if query.NQ_GROUP in self.requested_data:
3998
      groups = lu.cfg.GetAllNodeGroupsInfo()
3999
    else:
4000
      groups = {}
4001

    
4002
    return query.NodeQueryData([all_info[name] for name in nodenames],
4003
                               live_data, lu.cfg.GetMasterNode(),
4004
                               node_to_primary, node_to_secondary, groups,
4005
                               oob_support, lu.cfg.GetClusterInfo())
4006

    
4007

    
4008
class LUNodeQuery(NoHooksLU):
4009
  """Logical unit for querying nodes.
4010

4011
  """
4012
  # pylint: disable-msg=W0142
4013
  REQ_BGL = False
4014

    
4015
  def CheckArguments(self):
4016
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4017
                         self.op.output_fields, self.op.use_locking)
4018

    
4019
  def ExpandNames(self):
4020
    self.nq.ExpandNames(self)
4021

    
4022
  def Exec(self, feedback_fn):
4023
    return self.nq.OldStyleQuery(self)
4024

    
4025

    
4026
class LUNodeQueryvols(NoHooksLU):
4027
  """Logical unit for getting volumes on node(s).
4028

4029
  """
4030
  REQ_BGL = False
4031
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4032
  _FIELDS_STATIC = utils.FieldSet("node")
4033

    
4034
  def CheckArguments(self):
4035
    _CheckOutputFields(static=self._FIELDS_STATIC,
4036
                       dynamic=self._FIELDS_DYNAMIC,
4037
                       selected=self.op.output_fields)
4038

    
4039
  def ExpandNames(self):
4040
    self.needed_locks = {}
4041
    self.share_locks[locking.LEVEL_NODE] = 1
4042
    if not self.op.nodes:
4043
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4044
    else:
4045
      self.needed_locks[locking.LEVEL_NODE] = \
4046
        _GetWantedNodes(self, self.op.nodes)
4047

    
4048
  def Exec(self, feedback_fn):
4049
    """Computes the list of nodes and their attributes.
4050

4051
    """
4052
    nodenames = self.glm.list_owned(locking.LEVEL_NODE)
4053
    volumes = self.rpc.call_node_volumes(nodenames)
4054

    
4055
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
4056
             in self.cfg.GetInstanceList()]
4057

    
4058
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
4059

    
4060
    output = []
4061
    for node in nodenames:
4062
      nresult = volumes[node]
4063
      if nresult.offline:
4064
        continue
4065
      msg = nresult.fail_msg
4066
      if msg:
4067
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4068
        continue
4069

    
4070
      node_vols = nresult.payload[:]
4071
      node_vols.sort(key=lambda vol: vol['dev'])
4072

    
4073
      for vol in node_vols:
4074
        node_output = []
4075
        for field in self.op.output_fields:
4076
          if field == "node":
4077
            val = node
4078
          elif field == "phys":
4079
            val = vol['dev']
4080
          elif field == "vg":
4081
            val = vol['vg']
4082
          elif field == "name":
4083
            val = vol['name']
4084
          elif field == "size":
4085
            val = int(float(vol['size']))
4086
          elif field == "instance":
4087
            for inst in ilist:
4088
              if node not in lv_by_node[inst]:
4089
                continue
4090
              if vol['name'] in lv_by_node[inst][node]:
4091
                val = inst.name
4092
                break
4093
            else:
4094
              val = '-'
4095
          else:
4096
            raise errors.ParameterError(field)
4097
          node_output.append(str(val))
4098

    
4099
        output.append(node_output)
4100

    
4101
    return output
4102

    
4103

    
4104
class LUNodeQueryStorage(NoHooksLU):
4105
  """Logical unit for getting information on storage units on node(s).
4106

4107
  """
4108
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4109
  REQ_BGL = False
4110

    
4111
  def CheckArguments(self):
4112
    _CheckOutputFields(static=self._FIELDS_STATIC,
4113
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4114
                       selected=self.op.output_fields)
4115

    
4116
  def ExpandNames(self):
4117
    self.needed_locks = {}
4118
    self.share_locks[locking.LEVEL_NODE] = 1
4119

    
4120
    if self.op.nodes:
4121
      self.needed_locks[locking.LEVEL_NODE] = \
4122
        _GetWantedNodes(self, self.op.nodes)
4123
    else:
4124
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4125

    
4126
  def Exec(self, feedback_fn):
4127
    """Computes the list of nodes and their attributes.
4128

4129
    """
4130
    self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
4131

    
4132
    # Always get name to sort by
4133
    if constants.SF_NAME in self.op.output_fields:
4134
      fields = self.op.output_fields[:]
4135
    else:
4136
      fields = [constants.SF_NAME] + self.op.output_fields
4137

    
4138
    # Never ask for node or type as it's only known to the LU
4139
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4140
      while extra in fields:
4141
        fields.remove(extra)
4142

    
4143
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4144
    name_idx = field_idx[constants.SF_NAME]
4145

    
4146
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4147
    data = self.rpc.call_storage_list(self.nodes,
4148
                                      self.op.storage_type, st_args,
4149
                                      self.op.name, fields)
4150

    
4151
    result = []
4152

    
4153
    for node in utils.NiceSort(self.nodes):
4154
      nresult = data[node]
4155
      if nresult.offline:
4156
        continue
4157

    
4158
      msg = nresult.fail_msg
4159
      if msg:
4160
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4161
        continue
4162

    
4163
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4164

    
4165
      for name in utils.NiceSort(rows.keys()):
4166
        row = rows[name]
4167

    
4168
        out = []
4169

    
4170
        for field in self.op.output_fields:
4171
          if field == constants.SF_NODE:
4172
            val = node
4173
          elif field == constants.SF_TYPE:
4174
            val = self.op.storage_type
4175
          elif field in field_idx:
4176
            val = row[field_idx[field]]
4177
          else:
4178
            raise errors.ParameterError(field)
4179

    
4180
          out.append(val)
4181

    
4182
        result.append(out)
4183

    
4184
    return result
4185

    
4186

    
4187
class _InstanceQuery(_QueryBase):
4188
  FIELDS = query.INSTANCE_FIELDS
4189

    
4190
  def ExpandNames(self, lu):
4191
    lu.needed_locks = {}
4192
    lu.share_locks[locking.LEVEL_INSTANCE] = 1
4193
    lu.share_locks[locking.LEVEL_NODE] = 1
4194

    
4195
    if self.names:
4196
      self.wanted = _GetWantedInstances(lu, self.names)
4197
    else:
4198
      self.wanted = locking.ALL_SET
4199

    
4200
    self.do_locking = (self.use_locking and
4201
                       query.IQ_LIVE in self.requested_data)
4202
    if self.do_locking:
4203
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4204
      lu.needed_locks[locking.LEVEL_NODE] = []
4205
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4206

    
4207
  def DeclareLocks(self, lu, level):
4208
    if level == locking.LEVEL_NODE and self.do_locking:
4209
      lu._LockInstancesNodes() # pylint: disable-msg=W0212
4210

    
4211
  def _GetQueryData(self, lu):
4212
    """Computes the list of instances and their attributes.
4213

4214
    """
4215
    cluster = lu.cfg.GetClusterInfo()
4216
    all_info = lu.cfg.GetAllInstancesInfo()
4217

    
4218
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4219

    
4220
    instance_list = [all_info[name] for name in instance_names]
4221
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4222
                                        for inst in instance_list)))
4223
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4224
    bad_nodes = []
4225
    offline_nodes = []
4226
    wrongnode_inst = set()
4227

    
4228
    # Gather data as requested
4229
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4230
      live_data = {}
4231
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4232
      for name in nodes:
4233
        result = node_data[name]
4234
        if result.offline:
4235
          # offline nodes will be in both lists
4236
          assert result.fail_msg
4237
          offline_nodes.append(name)
4238
        if result.fail_msg:
4239
          bad_nodes.append(name)
4240
        elif result.payload:
4241
          for inst in result.payload:
4242
            if inst in all_info:
4243
              if all_info[inst].primary_node == name:
4244
                live_data.update(result.payload)
4245
              else:
4246
                wrongnode_inst.add(inst)
4247
            else:
4248
              # orphan instance; we don't list it here as we don't
4249
              # handle this case yet in the output of instance listing
4250
              logging.warning("Orphan instance '%s' found on node %s",
4251
                              inst, name)
4252
        # else no instance is alive
4253
    else:
4254
      live_data = {}
4255

    
4256
    if query.IQ_DISKUSAGE in self.requested_data:
4257
      disk_usage = dict((inst.name,
4258
                         _ComputeDiskSize(inst.disk_template,
4259
                                          [{constants.IDISK_SIZE: disk.size}
4260
                                           for disk in inst.disks]))
4261
                        for inst in instance_list)
4262
    else:
4263
      disk_usage = None
4264

    
4265
    if query.IQ_CONSOLE in self.requested_data:
4266
      consinfo = {}
4267
      for inst in instance_list:
4268
        if inst.name in live_data:
4269
          # Instance is running
4270
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4271
        else:
4272
          consinfo[inst.name] = None
4273
      assert set(consinfo.keys()) == set(instance_names)
4274
    else:
4275
      consinfo = None
4276

    
4277
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4278
                                   disk_usage, offline_nodes, bad_nodes,
4279
                                   live_data, wrongnode_inst, consinfo)
4280

    
4281

    
4282
class LUQuery(NoHooksLU):
4283
  """Query for resources/items of a certain kind.
4284

4285
  """
4286
  # pylint: disable-msg=W0142
4287
  REQ_BGL = False
4288

    
4289
  def CheckArguments(self):
4290
    qcls = _GetQueryImplementation(self.op.what)
4291

    
4292
    self.impl = qcls(self.op.filter, self.op.fields, False)
4293

    
4294
  def ExpandNames(self):
4295
    self.impl.ExpandNames(self)
4296

    
4297
  def DeclareLocks(self, level):
4298
    self.impl.DeclareLocks(self, level)
4299

    
4300
  def Exec(self, feedback_fn):
4301
    return self.impl.NewStyleQuery(self)
4302

    
4303

    
4304
class LUQueryFields(NoHooksLU):
4305
  """Query for resources/items of a certain kind.
4306

4307
  """
4308
  # pylint: disable-msg=W0142
4309
  REQ_BGL = False
4310

    
4311
  def CheckArguments(self):
4312
    self.qcls = _GetQueryImplementation(self.op.what)
4313

    
4314
  def ExpandNames(self):
4315
    self.needed_locks = {}
4316

    
4317
  def Exec(self, feedback_fn):
4318
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4319

    
4320

    
4321
class LUNodeModifyStorage(NoHooksLU):
4322
  """Logical unit for modifying a storage volume on a node.
4323

4324
  """
4325
  REQ_BGL = False
4326

    
4327
  def CheckArguments(self):
4328
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4329

    
4330
    storage_type = self.op.storage_type
4331

    
4332
    try:
4333
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4334
    except KeyError:
4335
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4336
                                 " modified" % storage_type,
4337
                                 errors.ECODE_INVAL)
4338

    
4339
    diff = set(self.op.changes.keys()) - modifiable
4340
    if diff:
4341
      raise errors.OpPrereqError("The following fields can not be modified for"
4342
                                 " storage units of type '%s': %r" %
4343
                                 (storage_type, list(diff)),
4344
                                 errors.ECODE_INVAL)
4345

    
4346
  def ExpandNames(self):
4347
    self.needed_locks = {
4348
      locking.LEVEL_NODE: self.op.node_name,
4349
      }
4350

    
4351
  def Exec(self, feedback_fn):
4352
    """Computes the list of nodes and their attributes.
4353

4354
    """
4355
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4356
    result = self.rpc.call_storage_modify(self.op.node_name,
4357
                                          self.op.storage_type, st_args,
4358
                                          self.op.name, self.op.changes)
4359
    result.Raise("Failed to modify storage unit '%s' on %s" %
4360
                 (self.op.name, self.op.node_name))
4361

    
4362

    
4363
class LUNodeAdd(LogicalUnit):
4364
  """Logical unit for adding node to the cluster.
4365

4366
  """
4367
  HPATH = "node-add"
4368
  HTYPE = constants.HTYPE_NODE
4369
  _NFLAGS = ["master_capable", "vm_capable"]
4370

    
4371
  def CheckArguments(self):
4372
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4373
    # validate/normalize the node name
4374
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4375
                                         family=self.primary_ip_family)
4376
    self.op.node_name = self.hostname.name
4377

    
4378
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4379
      raise errors.OpPrereqError("Cannot readd the master node",
4380
                                 errors.ECODE_STATE)
4381

    
4382
    if self.op.readd and self.op.group:
4383
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4384
                                 " being readded", errors.ECODE_INVAL)
4385

    
4386
  def BuildHooksEnv(self):
4387
    """Build hooks env.
4388

4389
    This will run on all nodes before, and on all nodes + the new node after.
4390

4391
    """
4392
    return {
4393
      "OP_TARGET": self.op.node_name,
4394
      "NODE_NAME": self.op.node_name,
4395
      "NODE_PIP": self.op.primary_ip,
4396
      "NODE_SIP": self.op.secondary_ip,
4397
      "MASTER_CAPABLE": str(self.op.master_capable),
4398
      "VM_CAPABLE": str(self.op.vm_capable),
4399
      }
4400

    
4401
  def BuildHooksNodes(self):
4402
    """Build hooks nodes.
4403

4404
    """
4405
    # Exclude added node
4406
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4407
    post_nodes = pre_nodes + [self.op.node_name, ]
4408

    
4409
    return (pre_nodes, post_nodes)
4410

    
4411
  def CheckPrereq(self):
4412
    """Check prerequisites.
4413

4414
    This checks:
4415
     - the new node is not already in the config
4416
     - it is resolvable
4417
     - its parameters (single/dual homed) matches the cluster
4418

4419
    Any errors are signaled by raising errors.OpPrereqError.
4420

4421
    """
4422
    cfg = self.cfg
4423
    hostname = self.hostname
4424
    node = hostname.name
4425
    primary_ip = self.op.primary_ip = hostname.ip
4426
    if self.op.secondary_ip is None:
4427
      if self.primary_ip_family == netutils.IP6Address.family:
4428
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4429
                                   " IPv4 address must be given as secondary",
4430
                                   errors.ECODE_INVAL)
4431
      self.op.secondary_ip = primary_ip
4432

    
4433
    secondary_ip = self.op.secondary_ip
4434
    if not netutils.IP4Address.IsValid(secondary_ip):
4435
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4436
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4437

    
4438
    node_list = cfg.GetNodeList()
4439
    if not self.op.readd and node in node_list:
4440
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4441
                                 node, errors.ECODE_EXISTS)
4442
    elif self.op.readd and node not in node_list:
4443
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4444
                                 errors.ECODE_NOENT)
4445

    
4446
    self.changed_primary_ip = False
4447

    
4448
    for existing_node_name in node_list:
4449
      existing_node = cfg.GetNodeInfo(existing_node_name)
4450

    
4451
      if self.op.readd and node == existing_node_name:
4452
        if existing_node.secondary_ip != secondary_ip:
4453
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4454
                                     " address configuration as before",
4455
                                     errors.ECODE_INVAL)
4456
        if existing_node.primary_ip != primary_ip:
4457
          self.changed_primary_ip = True
4458

    
4459
        continue
4460

    
4461
      if (existing_node.primary_ip == primary_ip or
4462
          existing_node.secondary_ip == primary_ip or
4463
          existing_node.primary_ip == secondary_ip or
4464
          existing_node.secondary_ip == secondary_ip):
4465
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4466
                                   " existing node %s" % existing_node.name,
4467
                                   errors.ECODE_NOTUNIQUE)
4468

    
4469
    # After this 'if' block, None is no longer a valid value for the
4470
    # _capable op attributes
4471
    if self.op.readd:
4472
      old_node = self.cfg.GetNodeInfo(node)
4473
      assert old_node is not None, "Can't retrieve locked node %s" % node
4474
      for attr in self._NFLAGS:
4475
        if getattr(self.op, attr) is None:
4476
          setattr(self.op, attr, getattr(old_node, attr))
4477
    else:
4478
      for attr in self._NFLAGS:
4479
        if getattr(self.op, attr) is None:
4480
          setattr(self.op, attr, True)
4481

    
4482
    if self.op.readd and not self.op.vm_capable:
4483
      pri, sec = cfg.GetNodeInstances(node)
4484
      if pri or sec:
4485
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4486
                                   " flag set to false, but it already holds"
4487
                                   " instances" % node,
4488
                                   errors.ECODE_STATE)
4489

    
4490
    # check that the type of the node (single versus dual homed) is the
4491
    # same as for the master
4492
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4493
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4494
    newbie_singlehomed = secondary_ip == primary_ip
4495
    if master_singlehomed != newbie_singlehomed:
4496
      if master_singlehomed:
4497
        raise errors.OpPrereqError("The master has no secondary ip but the"
4498
                                   " new node has one",
4499
                                   errors.ECODE_INVAL)
4500
      else:
4501
        raise errors.OpPrereqError("The master has a secondary ip but the"
4502
                                   " new node doesn't have one",
4503
                                   errors.ECODE_INVAL)
4504

    
4505
    # checks reachability
4506
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4507
      raise errors.OpPrereqError("Node not reachable by ping",
4508
                                 errors.ECODE_ENVIRON)
4509

    
4510
    if not newbie_singlehomed:
4511
      # check reachability from my secondary ip to newbie's secondary ip
4512
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4513
                           source=myself.secondary_ip):
4514
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4515
                                   " based ping to node daemon port",
4516
                                   errors.ECODE_ENVIRON)
4517

    
4518
    if self.op.readd:
4519
      exceptions = [node]
4520
    else:
4521
      exceptions = []
4522

    
4523
    if self.op.master_capable:
4524
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4525
    else:
4526
      self.master_candidate = False
4527

    
4528
    if self.op.readd:
4529
      self.new_node = old_node
4530
    else:
4531
      node_group = cfg.LookupNodeGroup(self.op.group)
4532
      self.new_node = objects.Node(name=node,
4533
                                   primary_ip=primary_ip,
4534
                                   secondary_ip=secondary_ip,
4535
                                   master_candidate=self.master_candidate,
4536
                                   offline=False, drained=False,
4537
                                   group=node_group)
4538

    
4539
    if self.op.ndparams:
4540
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4541

    
4542
  def Exec(self, feedback_fn):
4543
    """Adds the new node to the cluster.
4544

4545
    """
4546
    new_node = self.new_node
4547
    node = new_node.name
4548

    
4549
    # We adding a new node so we assume it's powered
4550
    new_node.powered = True
4551

    
4552
    # for re-adds, reset the offline/drained/master-candidate flags;
4553
    # we need to reset here, otherwise offline would prevent RPC calls
4554
    # later in the procedure; this also means that if the re-add
4555
    # fails, we are left with a non-offlined, broken node
4556
    if self.op.readd:
4557
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4558
      self.LogInfo("Readding a node, the offline/drained flags were reset")
4559
      # if we demote the node, we do cleanup later in the procedure
4560
      new_node.master_candidate = self.master_candidate
4561
      if self.changed_primary_ip:
4562
        new_node.primary_ip = self.op.primary_ip
4563

    
4564
    # copy the master/vm_capable flags
4565
    for attr in self._NFLAGS:
4566
      setattr(new_node, attr, getattr(self.op, attr))
4567

    
4568
    # notify the user about any possible mc promotion
4569
    if new_node.master_candidate:
4570
      self.LogInfo("Node will be a master candidate")
4571

    
4572
    if self.op.ndparams:
4573
      new_node.ndparams = self.op.ndparams
4574
    else:
4575
      new_node.ndparams = {}
4576

    
4577
    # check connectivity
4578
    result = self.rpc.call_version([node])[node]
4579
    result.Raise("Can't get version information from node %s" % node)
4580
    if constants.PROTOCOL_VERSION == result.payload:
4581
      logging.info("Communication to node %s fine, sw version %s match",
4582
                   node, result.payload)
4583
    else:
4584
      raise errors.OpExecError("Version mismatch master version %s,"
4585
                               " node version %s" %
4586
                               (constants.PROTOCOL_VERSION, result.payload))
4587

    
4588
    # Add node to our /etc/hosts, and add key to known_hosts
4589
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4590
      master_node = self.cfg.GetMasterNode()
4591
      result = self.rpc.call_etc_hosts_modify(master_node,
4592
                                              constants.ETC_HOSTS_ADD,
4593
                                              self.hostname.name,
4594
                                              self.hostname.ip)
4595
      result.Raise("Can't update hosts file with new host data")
4596

    
4597
    if new_node.secondary_ip != new_node.primary_ip:
4598
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4599
                               False)
4600

    
4601
    node_verify_list = [self.cfg.GetMasterNode()]
4602
    node_verify_param = {
4603
      constants.NV_NODELIST: [node],
4604
      # TODO: do a node-net-test as well?
4605
    }
4606

    
4607
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4608
                                       self.cfg.GetClusterName())
4609
    for verifier in node_verify_list:
4610
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
4611
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
4612
      if nl_payload:
4613
        for failed in nl_payload:
4614
          feedback_fn("ssh/hostname verification failed"
4615
                      " (checking from %s): %s" %
4616
                      (verifier, nl_payload[failed]))
4617
        raise errors.OpExecError("ssh/hostname verification failed")
4618

    
4619
    if self.op.readd:
4620
      _RedistributeAncillaryFiles(self)
4621
      self.context.ReaddNode(new_node)
4622
      # make sure we redistribute the config
4623
      self.cfg.Update(new_node, feedback_fn)
4624
      # and make sure the new node will not have old files around
4625
      if not new_node.master_candidate:
4626
        result = self.rpc.call_node_demote_from_mc(new_node.name)
4627
        msg = result.fail_msg
4628
        if msg:
4629
          self.LogWarning("Node failed to demote itself from master"
4630
                          " candidate status: %s" % msg)
4631
    else:
4632
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
4633
                                  additional_vm=self.op.vm_capable)
4634
      self.context.AddNode(new_node, self.proc.GetECId())
4635

    
4636

    
4637
class LUNodeSetParams(LogicalUnit):
4638
  """Modifies the parameters of a node.
4639

4640
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4641
      to the node role (as _ROLE_*)
4642
  @cvar _R2F: a dictionary from node role to tuples of flags
4643
  @cvar _FLAGS: a list of attribute names corresponding to the flags
4644

4645
  """
4646
  HPATH = "node-modify"
4647
  HTYPE = constants.HTYPE_NODE
4648
  REQ_BGL = False
4649
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4650
  _F2R = {
4651
    (True, False, False): _ROLE_CANDIDATE,
4652
    (False, True, False): _ROLE_DRAINED,
4653
    (False, False, True): _ROLE_OFFLINE,
4654
    (False, False, False): _ROLE_REGULAR,
4655
    }
4656
  _R2F = dict((v, k) for k, v in _F2R.items())
4657
  _FLAGS = ["master_candidate", "drained", "offline"]
4658

    
4659
  def CheckArguments(self):
4660
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4661
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4662
                self.op.master_capable, self.op.vm_capable,
4663
                self.op.secondary_ip, self.op.ndparams]
4664
    if all_mods.count(None) == len(all_mods):
4665
      raise errors.OpPrereqError("Please pass at least one modification",
4666
                                 errors.ECODE_INVAL)
4667
    if all_mods.count(True) > 1:
4668
      raise errors.OpPrereqError("Can't set the node into more than one"
4669
                                 " state at the same time",
4670
                                 errors.ECODE_INVAL)
4671

    
4672
    # Boolean value that tells us whether we might be demoting from MC
4673
    self.might_demote = (self.op.master_candidate == False or
4674
                         self.op.offline == True or
4675
                         self.op.drained == True or
4676
                         self.op.master_capable == False)
4677

    
4678
    if self.op.secondary_ip:
4679
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4680
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4681
                                   " address" % self.op.secondary_ip,
4682
                                   errors.ECODE_INVAL)
4683

    
4684
    self.lock_all = self.op.auto_promote and self.might_demote
4685
    self.lock_instances = self.op.secondary_ip is not None
4686

    
4687
  def ExpandNames(self):
4688
    if self.lock_all:
4689
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4690
    else:
4691
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4692

    
4693
    if self.lock_instances:
4694
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4695

    
4696
  def DeclareLocks(self, level):
4697
    # If we have locked all instances, before waiting to lock nodes, release
4698
    # all the ones living on nodes unrelated to the current operation.
4699
    if level == locking.LEVEL_NODE and self.lock_instances:
4700
      self.affected_instances = []
4701
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4702
        instances_keep = []
4703

    
4704
        # Build list of instances to release
4705
        for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
4706
          instance = self.context.cfg.GetInstanceInfo(instance_name)
4707
          if (instance.disk_template in constants.DTS_INT_MIRROR and
4708
              self.op.node_name in instance.all_nodes):
4709
            instances_keep.append(instance_name)
4710
            self.affected_instances.append(instance)
4711

    
4712
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
4713

    
4714
        assert (set(self.glm.list_owned(locking.LEVEL_INSTANCE)) ==
4715
                set(instances_keep))
4716

    
4717
  def BuildHooksEnv(self):
4718
    """Build hooks env.
4719

4720
    This runs on the master node.
4721

4722
    """
4723
    return {
4724
      "OP_TARGET": self.op.node_name,
4725
      "MASTER_CANDIDATE": str(self.op.master_candidate),
4726
      "OFFLINE": str(self.op.offline),
4727
      "DRAINED": str(self.op.drained),
4728
      "MASTER_CAPABLE": str(self.op.master_capable),
4729
      "VM_CAPABLE": str(self.op.vm_capable),
4730
      }
4731

    
4732
  def BuildHooksNodes(self):
4733
    """Build hooks nodes.
4734

4735
    """
4736
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
4737
    return (nl, nl)
4738

    
4739
  def CheckPrereq(self):
4740
    """Check prerequisites.
4741

4742
    This only checks the instance list against the existing names.
4743

4744
    """
4745
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4746

    
4747
    if (self.op.master_candidate is not None or
4748
        self.op.drained is not None or
4749
        self.op.offline is not None):
4750
      # we can't change the master's node flags
4751
      if self.op.node_name == self.cfg.GetMasterNode():
4752
        raise errors.OpPrereqError("The master role can be changed"
4753
                                   " only via master-failover",
4754
                                   errors.ECODE_INVAL)
4755

    
4756
    if self.op.master_candidate and not node.master_capable:
4757
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4758
                                 " it a master candidate" % node.name,
4759
                                 errors.ECODE_STATE)
4760

    
4761
    if self.op.vm_capable == False:
4762
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4763
      if ipri or isec:
4764
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4765
                                   " the vm_capable flag" % node.name,
4766
                                   errors.ECODE_STATE)
4767

    
4768
    if node.master_candidate and self.might_demote and not self.lock_all:
4769
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
4770
      # check if after removing the current node, we're missing master
4771
      # candidates
4772
      (mc_remaining, mc_should, _) = \
4773
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4774
      if mc_remaining < mc_should:
4775
        raise errors.OpPrereqError("Not enough master candidates, please"
4776
                                   " pass auto promote option to allow"
4777
                                   " promotion", errors.ECODE_STATE)
4778

    
4779
    self.old_flags = old_flags = (node.master_candidate,
4780
                                  node.drained, node.offline)
4781
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4782
    self.old_role = old_role = self._F2R[old_flags]
4783

    
4784
    # Check for ineffective changes
4785
    for attr in self._FLAGS:
4786
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4787
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4788
        setattr(self.op, attr, None)
4789

    
4790
    # Past this point, any flag change to False means a transition
4791
    # away from the respective state, as only real changes are kept
4792

    
4793
    # TODO: We might query the real power state if it supports OOB
4794
    if _SupportsOob(self.cfg, node):
4795
      if self.op.offline is False and not (node.powered or
4796
                                           self.op.powered == True):
4797
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
4798
                                    " offline status can be reset") %
4799
                                   self.op.node_name)
4800
    elif self.op.powered is not None:
4801
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
4802
                                  " as it does not support out-of-band"
4803
                                  " handling") % self.op.node_name)
4804

    
4805
    # If we're being deofflined/drained, we'll MC ourself if needed
4806
    if (self.op.drained == False or self.op.offline == False or
4807
        (self.op.master_capable and not node.master_capable)):
4808
      if _DecideSelfPromotion(self):
4809
        self.op.master_candidate = True
4810
        self.LogInfo("Auto-promoting node to master candidate")
4811

    
4812
    # If we're no longer master capable, we'll demote ourselves from MC
4813
    if self.op.master_capable == False and node.master_candidate:
4814
      self.LogInfo("Demoting from master candidate")
4815
      self.op.master_candidate = False
4816

    
4817
    # Compute new role
4818
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4819
    if self.op.master_candidate:
4820
      new_role = self._ROLE_CANDIDATE
4821
    elif self.op.drained:
4822
      new_role = self._ROLE_DRAINED
4823
    elif self.op.offline:
4824
      new_role = self._ROLE_OFFLINE
4825
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4826
      # False is still in new flags, which means we're un-setting (the
4827
      # only) True flag
4828
      new_role = self._ROLE_REGULAR
4829
    else: # no new flags, nothing, keep old role
4830
      new_role = old_role
4831

    
4832
    self.new_role = new_role
4833

    
4834
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
4835
      # Trying to transition out of offline status
4836
      result = self.rpc.call_version([node.name])[node.name]
4837
      if result.fail_msg:
4838
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4839
                                   " to report its version: %s" %
4840
                                   (node.name, result.fail_msg),
4841
                                   errors.ECODE_STATE)
4842
      else:
4843
        self.LogWarning("Transitioning node from offline to online state"
4844
                        " without using re-add. Please make sure the node"
4845
                        " is healthy!")
4846

    
4847
    if self.op.secondary_ip:
4848
      # Ok even without locking, because this can't be changed by any LU
4849
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4850
      master_singlehomed = master.secondary_ip == master.primary_ip
4851
      if master_singlehomed and self.op.secondary_ip:
4852
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4853
                                   " homed cluster", errors.ECODE_INVAL)
4854

    
4855
      if node.offline:
4856
        if self.affected_instances:
4857
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
4858
                                     " node has instances (%s) configured"
4859
                                     " to use it" % self.affected_instances)
4860
      else:
4861
        # On online nodes, check that no instances are running, and that
4862
        # the node has the new ip and we can reach it.
4863
        for instance in self.affected_instances:
4864
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
4865

    
4866
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4867
        if master.name != node.name:
4868
          # check reachability from master secondary ip to new secondary ip
4869
          if not netutils.TcpPing(self.op.secondary_ip,
4870
                                  constants.DEFAULT_NODED_PORT,
4871
                                  source=master.secondary_ip):
4872
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4873
                                       " based ping to node daemon port",
4874
                                       errors.ECODE_ENVIRON)
4875

    
4876
    if self.op.ndparams:
4877
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4878
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4879
      self.new_ndparams = new_ndparams
4880

    
4881
  def Exec(self, feedback_fn):
4882
    """Modifies a node.
4883

4884
    """
4885
    node = self.node
4886
    old_role = self.old_role
4887
    new_role = self.new_role
4888

    
4889
    result = []
4890

    
4891
    if self.op.ndparams:
4892
      node.ndparams = self.new_ndparams
4893

    
4894
    if self.op.powered is not None:
4895
      node.powered = self.op.powered
4896

    
4897
    for attr in ["master_capable", "vm_capable"]:
4898
      val = getattr(self.op, attr)
4899
      if val is not None:
4900
        setattr(node, attr, val)
4901
        result.append((attr, str(val)))
4902

    
4903
    if new_role != old_role:
4904
      # Tell the node to demote itself, if no longer MC and not offline
4905
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4906
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4907
        if msg:
4908
          self.LogWarning("Node failed to demote itself: %s", msg)
4909

    
4910
      new_flags = self._R2F[new_role]
4911
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4912
        if of != nf:
4913
          result.append((desc, str(nf)))
4914
      (node.master_candidate, node.drained, node.offline) = new_flags
4915

    
4916
      # we locked all nodes, we adjust the CP before updating this node
4917
      if self.lock_all:
4918
        _AdjustCandidatePool(self, [node.name])
4919

    
4920
    if self.op.secondary_ip:
4921
      node.secondary_ip = self.op.secondary_ip
4922
      result.append(("secondary_ip", self.op.secondary_ip))
4923

    
4924
    # this will trigger configuration file update, if needed
4925
    self.cfg.Update(node, feedback_fn)
4926

    
4927
    # this will trigger job queue propagation or cleanup if the mc
4928
    # flag changed
4929
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4930
      self.context.ReaddNode(node)
4931

    
4932
    return result
4933

    
4934

    
4935
class LUNodePowercycle(NoHooksLU):
4936
  """Powercycles a node.
4937

4938
  """
4939
  REQ_BGL = False
4940

    
4941
  def CheckArguments(self):
4942
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4943
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4944
      raise errors.OpPrereqError("The node is the master and the force"
4945
                                 " parameter was not set",
4946
                                 errors.ECODE_INVAL)
4947

    
4948
  def ExpandNames(self):
4949
    """Locking for PowercycleNode.
4950

4951
    This is a last-resort option and shouldn't block on other
4952
    jobs. Therefore, we grab no locks.
4953

4954
    """
4955
    self.needed_locks = {}
4956

    
4957
  def Exec(self, feedback_fn):
4958
    """Reboots a node.
4959

4960
    """
4961
    result = self.rpc.call_node_powercycle(self.op.node_name,
4962
                                           self.cfg.GetHypervisorType())
4963
    result.Raise("Failed to schedule the reboot")
4964
    return result.payload
4965

    
4966

    
4967
class LUClusterQuery(NoHooksLU):
4968
  """Query cluster configuration.
4969

4970
  """
4971
  REQ_BGL = False
4972

    
4973
  def ExpandNames(self):
4974
    self.needed_locks = {}
4975

    
4976
  def Exec(self, feedback_fn):
4977
    """Return cluster config.
4978

4979
    """
4980
    cluster = self.cfg.GetClusterInfo()
4981
    os_hvp = {}
4982

    
4983
    # Filter just for enabled hypervisors
4984
    for os_name, hv_dict in cluster.os_hvp.items():
4985
      os_hvp[os_name] = {}
4986
      for hv_name, hv_params in hv_dict.items():
4987
        if hv_name in cluster.enabled_hypervisors:
4988
          os_hvp[os_name][hv_name] = hv_params
4989

    
4990
    # Convert ip_family to ip_version
4991
    primary_ip_version = constants.IP4_VERSION
4992
    if cluster.primary_ip_family == netutils.IP6Address.family:
4993
      primary_ip_version = constants.IP6_VERSION
4994

    
4995
    result = {
4996
      "software_version": constants.RELEASE_VERSION,
4997
      "protocol_version": constants.PROTOCOL_VERSION,
4998
      "config_version": constants.CONFIG_VERSION,
4999
      "os_api_version": max(constants.OS_API_VERSIONS),
5000
      "export_version": constants.EXPORT_VERSION,
5001
      "architecture": (platform.architecture()[0], platform.machine()),
5002
      "name": cluster.cluster_name,
5003
      "master": cluster.master_node,
5004
      "default_hypervisor": cluster.enabled_hypervisors[0],
5005
      "enabled_hypervisors": cluster.enabled_hypervisors,
5006
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5007
                        for hypervisor_name in cluster.enabled_hypervisors]),
5008
      "os_hvp": os_hvp,
5009
      "beparams": cluster.beparams,
5010
      "osparams": cluster.osparams,
5011
      "nicparams": cluster.nicparams,
5012
      "ndparams": cluster.ndparams,
5013
      "candidate_pool_size": cluster.candidate_pool_size,
5014
      "master_netdev": cluster.master_netdev,
5015
      "volume_group_name": cluster.volume_group_name,
5016
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5017
      "file_storage_dir": cluster.file_storage_dir,
5018
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5019
      "maintain_node_health": cluster.maintain_node_health,
5020
      "ctime": cluster.ctime,
5021
      "mtime": cluster.mtime,
5022
      "uuid": cluster.uuid,
5023
      "tags": list(cluster.GetTags()),
5024
      "uid_pool": cluster.uid_pool,
5025
      "default_iallocator": cluster.default_iallocator,
5026
      "reserved_lvs": cluster.reserved_lvs,
5027
      "primary_ip_version": primary_ip_version,
5028
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5029
      "hidden_os": cluster.hidden_os,
5030
      "blacklisted_os": cluster.blacklisted_os,
5031
      }
5032

    
5033
    return result
5034

    
5035

    
5036
class LUClusterConfigQuery(NoHooksLU):
5037
  """Return configuration values.
5038

5039
  """
5040
  REQ_BGL = False
5041
  _FIELDS_DYNAMIC = utils.FieldSet()
5042
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5043
                                  "watcher_pause", "volume_group_name")
5044

    
5045
  def CheckArguments(self):
5046
    _CheckOutputFields(static=self._FIELDS_STATIC,
5047
                       dynamic=self._FIELDS_DYNAMIC,
5048
                       selected=self.op.output_fields)
5049

    
5050
  def ExpandNames(self):
5051
    self.needed_locks = {}
5052

    
5053
  def Exec(self, feedback_fn):
5054
    """Dump a representation of the cluster config to the standard output.
5055

5056
    """
5057
    values = []
5058
    for field in self.op.output_fields:
5059
      if field == "cluster_name":
5060
        entry = self.cfg.GetClusterName()
5061
      elif field == "master_node":
5062
        entry = self.cfg.GetMasterNode()
5063
      elif field == "drain_flag":
5064
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5065
      elif field == "watcher_pause":
5066
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5067
      elif field == "volume_group_name":
5068
        entry = self.cfg.GetVGName()
5069
      else:
5070
        raise errors.ParameterError(field)
5071
      values.append(entry)
5072
    return values
5073

    
5074

    
5075
class LUInstanceActivateDisks(NoHooksLU):
5076
  """Bring up an instance's disks.
5077

5078
  """
5079
  REQ_BGL = False
5080

    
5081
  def ExpandNames(self):
5082
    self._ExpandAndLockInstance()
5083
    self.needed_locks[locking.LEVEL_NODE] = []
5084
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5085

    
5086
  def DeclareLocks(self, level):
5087
    if level == locking.LEVEL_NODE:
5088
      self._LockInstancesNodes()
5089

    
5090
  def CheckPrereq(self):
5091
    """Check prerequisites.
5092

5093
    This checks that the instance is in the cluster.
5094

5095
    """
5096
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5097
    assert self.instance is not None, \
5098
      "Cannot retrieve locked instance %s" % self.op.instance_name
5099
    _CheckNodeOnline(self, self.instance.primary_node)
5100

    
5101
  def Exec(self, feedback_fn):
5102
    """Activate the disks.
5103

5104
    """
5105
    disks_ok, disks_info = \
5106
              _AssembleInstanceDisks(self, self.instance,
5107
                                     ignore_size=self.op.ignore_size)
5108
    if not disks_ok:
5109
      raise errors.OpExecError("Cannot activate block devices")
5110

    
5111
    return disks_info
5112

    
5113

    
5114
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5115
                           ignore_size=False):
5116
  """Prepare the block devices for an instance.
5117

5118
  This sets up the block devices on all nodes.
5119

5120
  @type lu: L{LogicalUnit}
5121
  @param lu: the logical unit on whose behalf we execute
5122
  @type instance: L{objects.Instance}
5123
  @param instance: the instance for whose disks we assemble
5124
  @type disks: list of L{objects.Disk} or None
5125
  @param disks: which disks to assemble (or all, if None)
5126
  @type ignore_secondaries: boolean
5127
  @param ignore_secondaries: if true, errors on secondary nodes
5128
      won't result in an error return from the function
5129
  @type ignore_size: boolean
5130
  @param ignore_size: if true, the current known size of the disk
5131
      will not be used during the disk activation, useful for cases
5132
      when the size is wrong
5133
  @return: False if the operation failed, otherwise a list of
5134
      (host, instance_visible_name, node_visible_name)
5135
      with the mapping from node devices to instance devices
5136

5137
  """
5138
  device_info = []
5139
  disks_ok = True
5140
  iname = instance.name
5141
  disks = _ExpandCheckDisks(instance, disks)
5142

    
5143
  # With the two passes mechanism we try to reduce the window of
5144
  # opportunity for the race condition of switching DRBD to primary
5145
  # before handshaking occured, but we do not eliminate it
5146

    
5147
  # The proper fix would be to wait (with some limits) until the
5148
  # connection has been made and drbd transitions from WFConnection
5149
  # into any other network-connected state (Connected, SyncTarget,
5150
  # SyncSource, etc.)
5151

    
5152
  # 1st pass, assemble on all nodes in secondary mode
5153
  for idx, inst_disk in enumerate(disks):
5154
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5155
      if ignore_size:
5156
        node_disk = node_disk.Copy()
5157
        node_disk.UnsetSize()
5158
      lu.cfg.SetDiskID(node_disk, node)
5159
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5160
      msg = result.fail_msg
5161
      if msg:
5162
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5163
                           " (is_primary=False, pass=1): %s",
5164
                           inst_disk.iv_name, node, msg)
5165
        if not ignore_secondaries:
5166
          disks_ok = False
5167

    
5168
  # FIXME: race condition on drbd migration to primary
5169

    
5170
  # 2nd pass, do only the primary node
5171
  for idx, inst_disk in enumerate(disks):
5172
    dev_path = None
5173

    
5174
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5175
      if node != instance.primary_node:
5176
        continue
5177
      if ignore_size:
5178
        node_disk = node_disk.Copy()
5179
        node_disk.UnsetSize()
5180
      lu.cfg.SetDiskID(node_disk, node)
5181
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5182
      msg = result.fail_msg
5183
      if msg:
5184
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5185
                           " (is_primary=True, pass=2): %s",
5186
                           inst_disk.iv_name, node, msg)
5187
        disks_ok = False
5188
      else:
5189
        dev_path = result.payload
5190

    
5191
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5192

    
5193
  # leave the disks configured for the primary node
5194
  # this is a workaround that would be fixed better by
5195
  # improving the logical/physical id handling
5196
  for disk in disks:
5197
    lu.cfg.SetDiskID(disk, instance.primary_node)
5198

    
5199
  return disks_ok, device_info
5200

    
5201

    
5202
def _StartInstanceDisks(lu, instance, force):
5203
  """Start the disks of an instance.
5204

5205
  """
5206
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5207
                                           ignore_secondaries=force)
5208
  if not disks_ok:
5209
    _ShutdownInstanceDisks(lu, instance)
5210
    if force is not None and not force:
5211
      lu.proc.LogWarning("", hint="If the message above refers to a"
5212
                         " secondary node,"
5213
                         " you can retry the operation using '--force'.")
5214
    raise errors.OpExecError("Disk consistency error")
5215

    
5216

    
5217
class LUInstanceDeactivateDisks(NoHooksLU):
5218
  """Shutdown an instance's disks.
5219

5220
  """
5221
  REQ_BGL = False
5222

    
5223
  def ExpandNames(self):
5224
    self._ExpandAndLockInstance()
5225
    self.needed_locks[locking.LEVEL_NODE] = []
5226
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5227

    
5228
  def DeclareLocks(self, level):
5229
    if level == locking.LEVEL_NODE:
5230
      self._LockInstancesNodes()
5231

    
5232
  def CheckPrereq(self):
5233
    """Check prerequisites.
5234

5235
    This checks that the instance is in the cluster.
5236

5237
    """
5238
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5239
    assert self.instance is not None, \
5240
      "Cannot retrieve locked instance %s" % self.op.instance_name
5241

    
5242
  def Exec(self, feedback_fn):
5243
    """Deactivate the disks
5244

5245
    """
5246
    instance = self.instance
5247
    if self.op.force:
5248
      _ShutdownInstanceDisks(self, instance)
5249
    else:
5250
      _SafeShutdownInstanceDisks(self, instance)
5251

    
5252

    
5253
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5254
  """Shutdown block devices of an instance.
5255

5256
  This function checks if an instance is running, before calling
5257
  _ShutdownInstanceDisks.
5258

5259
  """
5260
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5261
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5262

    
5263

    
5264
def _ExpandCheckDisks(instance, disks):
5265
  """Return the instance disks selected by the disks list
5266

5267
  @type disks: list of L{objects.Disk} or None
5268
  @param disks: selected disks
5269
  @rtype: list of L{objects.Disk}
5270
  @return: selected instance disks to act on
5271

5272
  """
5273
  if disks is None:
5274
    return instance.disks
5275
  else:
5276
    if not set(disks).issubset(instance.disks):
5277
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5278
                                   " target instance")
5279
    return disks
5280

    
5281

    
5282
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5283
  """Shutdown block devices of an instance.
5284

5285
  This does the shutdown on all nodes of the instance.
5286

5287
  If the ignore_primary is false, errors on the primary node are
5288
  ignored.
5289

5290
  """
5291
  all_result = True
5292
  disks = _ExpandCheckDisks(instance, disks)
5293

    
5294
  for disk in disks:
5295
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5296
      lu.cfg.SetDiskID(top_disk, node)
5297
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5298
      msg = result.fail_msg
5299
      if msg:
5300
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5301
                      disk.iv_name, node, msg)
5302
        if ((node == instance.primary_node and not ignore_primary) or
5303
            (node != instance.primary_node and not result.offline)):
5304
          all_result = False
5305
  return all_result
5306

    
5307

    
5308
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5309
  """Checks if a node has enough free memory.
5310

5311
  This function check if a given node has the needed amount of free
5312
  memory. In case the node has less memory or we cannot get the
5313
  information from the node, this function raise an OpPrereqError
5314
  exception.
5315

5316
  @type lu: C{LogicalUnit}
5317
  @param lu: a logical unit from which we get configuration data
5318
  @type node: C{str}
5319
  @param node: the node to check
5320
  @type reason: C{str}
5321
  @param reason: string to use in the error message
5322
  @type requested: C{int}
5323
  @param requested: the amount of memory in MiB to check for
5324
  @type hypervisor_name: C{str}
5325
  @param hypervisor_name: the hypervisor to ask for memory stats
5326
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5327
      we cannot check the node
5328

5329
  """
5330
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5331
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5332
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5333
  free_mem = nodeinfo[node].payload.get('memory_free', None)
5334
  if not isinstance(free_mem, int):
5335
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5336
                               " was '%s'" % (node, free_mem),
5337
                               errors.ECODE_ENVIRON)
5338
  if requested > free_mem:
5339
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5340
                               " needed %s MiB, available %s MiB" %
5341
                               (node, reason, requested, free_mem),
5342
                               errors.ECODE_NORES)
5343

    
5344

    
5345
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5346
  """Checks if nodes have enough free disk space in the all VGs.
5347

5348
  This function check if all given nodes have the needed amount of
5349
  free disk. In case any node has less disk or we cannot get the
5350
  information from the node, this function raise an OpPrereqError
5351
  exception.
5352

5353
  @type lu: C{LogicalUnit}
5354
  @param lu: a logical unit from which we get configuration data
5355
  @type nodenames: C{list}
5356
  @param nodenames: the list of node names to check
5357
  @type req_sizes: C{dict}
5358
  @param req_sizes: the hash of vg and corresponding amount of disk in
5359
      MiB to check for
5360
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5361
      or we cannot check the node
5362

5363
  """
5364
  for vg, req_size in req_sizes.items():
5365
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5366

    
5367

    
5368
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5369
  """Checks if nodes have enough free disk space in the specified VG.
5370

5371
  This function check if all given nodes have the needed amount of
5372
  free disk. In case any node has less disk or we cannot get the
5373
  information from the node, this function raise an OpPrereqError
5374
  exception.
5375

5376
  @type lu: C{LogicalUnit}
5377
  @param lu: a logical unit from which we get configuration data
5378
  @type nodenames: C{list}
5379
  @param nodenames: the list of node names to check
5380
  @type vg: C{str}
5381
  @param vg: the volume group to check
5382
  @type requested: C{int}
5383
  @param requested: the amount of disk in MiB to check for
5384
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5385
      or we cannot check the node
5386

5387
  """
5388
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5389
  for node in nodenames:
5390
    info = nodeinfo[node]
5391
    info.Raise("Cannot get current information from node %s" % node,
5392
               prereq=True, ecode=errors.ECODE_ENVIRON)
5393
    vg_free = info.payload.get("vg_free", None)
5394
    if not isinstance(vg_free, int):
5395
      raise errors.OpPrereqError("Can't compute free disk space on node"
5396
                                 " %s for vg %s, result was '%s'" %
5397
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5398
    if requested > vg_free:
5399
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5400
                                 " vg %s: required %d MiB, available %d MiB" %
5401
                                 (node, vg, requested, vg_free),
5402
                                 errors.ECODE_NORES)
5403

    
5404

    
5405
class LUInstanceStartup(LogicalUnit):
5406
  """Starts an instance.
5407

5408
  """
5409
  HPATH = "instance-start"
5410
  HTYPE = constants.HTYPE_INSTANCE
5411
  REQ_BGL = False
5412

    
5413
  def CheckArguments(self):
5414
    # extra beparams
5415
    if self.op.beparams:
5416
      # fill the beparams dict
5417
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5418

    
5419
  def ExpandNames(self):
5420
    self._ExpandAndLockInstance()
5421

    
5422
  def BuildHooksEnv(self):
5423
    """Build hooks env.
5424

5425
    This runs on master, primary and secondary nodes of the instance.
5426

5427
    """
5428
    env = {
5429
      "FORCE": self.op.force,
5430
      }
5431

    
5432
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5433

    
5434
    return env
5435

    
5436
  def BuildHooksNodes(self):
5437
    """Build hooks nodes.
5438

5439
    """
5440
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5441
    return (nl, nl)
5442

    
5443
  def CheckPrereq(self):
5444
    """Check prerequisites.
5445

5446
    This checks that the instance is in the cluster.
5447

5448
    """
5449
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5450
    assert self.instance is not None, \
5451
      "Cannot retrieve locked instance %s" % self.op.instance_name
5452

    
5453
    # extra hvparams
5454
    if self.op.hvparams:
5455
      # check hypervisor parameter syntax (locally)
5456
      cluster = self.cfg.GetClusterInfo()
5457
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5458
      filled_hvp = cluster.FillHV(instance)
5459
      filled_hvp.update(self.op.hvparams)
5460
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5461
      hv_type.CheckParameterSyntax(filled_hvp)
5462
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5463

    
5464
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5465

    
5466
    if self.primary_offline and self.op.ignore_offline_nodes:
5467
      self.proc.LogWarning("Ignoring offline primary node")
5468

    
5469
      if self.op.hvparams or self.op.beparams:
5470
        self.proc.LogWarning("Overridden parameters are ignored")
5471
    else:
5472
      _CheckNodeOnline(self, instance.primary_node)
5473

    
5474
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5475

    
5476
      # check bridges existence
5477
      _CheckInstanceBridgesExist(self, instance)
5478

    
5479
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5480
                                                instance.name,
5481
                                                instance.hypervisor)
5482
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5483
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5484
      if not remote_info.payload: # not running already
5485
        _CheckNodeFreeMemory(self, instance.primary_node,
5486
                             "starting instance %s" % instance.name,
5487
                             bep[constants.BE_MEMORY], instance.hypervisor)
5488

    
5489
  def Exec(self, feedback_fn):
5490
    """Start the instance.
5491

5492
    """
5493
    instance = self.instance
5494
    force = self.op.force
5495

    
5496
    if not self.op.no_remember:
5497
      self.cfg.MarkInstanceUp(instance.name)
5498

    
5499
    if self.primary_offline:
5500
      assert self.op.ignore_offline_nodes
5501
      self.proc.LogInfo("Primary node offline, marked instance as started")
5502
    else:
5503
      node_current = instance.primary_node
5504

    
5505
      _StartInstanceDisks(self, instance, force)
5506

    
5507
      result = self.rpc.call_instance_start(node_current, instance,
5508
                                            self.op.hvparams, self.op.beparams)
5509
      msg = result.fail_msg
5510
      if msg:
5511
        _ShutdownInstanceDisks(self, instance)
5512
        raise errors.OpExecError("Could not start instance: %s" % msg)
5513

    
5514

    
5515
class LUInstanceReboot(LogicalUnit):
5516
  """Reboot an instance.
5517

5518
  """
5519
  HPATH = "instance-reboot"
5520
  HTYPE = constants.HTYPE_INSTANCE
5521
  REQ_BGL = False
5522

    
5523
  def ExpandNames(self):
5524
    self._ExpandAndLockInstance()
5525

    
5526
  def BuildHooksEnv(self):
5527
    """Build hooks env.
5528

5529
    This runs on master, primary and secondary nodes of the instance.
5530

5531
    """
5532
    env = {
5533
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5534
      "REBOOT_TYPE": self.op.reboot_type,
5535
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5536
      }
5537

    
5538
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5539

    
5540
    return env
5541

    
5542
  def BuildHooksNodes(self):
5543
    """Build hooks nodes.
5544

5545
    """
5546
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5547
    return (nl, nl)
5548

    
5549
  def CheckPrereq(self):
5550
    """Check prerequisites.
5551

5552
    This checks that the instance is in the cluster.
5553

5554
    """
5555
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5556
    assert self.instance is not None, \
5557
      "Cannot retrieve locked instance %s" % self.op.instance_name
5558

    
5559
    _CheckNodeOnline(self, instance.primary_node)
5560

    
5561
    # check bridges existence
5562
    _CheckInstanceBridgesExist(self, instance)
5563

    
5564
  def Exec(self, feedback_fn):
5565
    """Reboot the instance.
5566

5567
    """
5568
    instance = self.instance
5569
    ignore_secondaries = self.op.ignore_secondaries
5570
    reboot_type = self.op.reboot_type
5571

    
5572
    remote_info = self.rpc.call_instance_info(instance.primary_node,
5573
                                              instance.name,
5574
                                              instance.hypervisor)
5575
    remote_info.Raise("Error checking node %s" % instance.primary_node)
5576
    instance_running = bool(remote_info.payload)
5577

    
5578
    node_current = instance.primary_node
5579

    
5580
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5581
                                            constants.INSTANCE_REBOOT_HARD]:
5582
      for disk in instance.disks:
5583
        self.cfg.SetDiskID(disk, node_current)
5584
      result = self.rpc.call_instance_reboot(node_current, instance,
5585
                                             reboot_type,
5586
                                             self.op.shutdown_timeout)
5587
      result.Raise("Could not reboot instance")
5588
    else:
5589
      if instance_running:
5590
        result = self.rpc.call_instance_shutdown(node_current, instance,
5591
                                                 self.op.shutdown_timeout)
5592
        result.Raise("Could not shutdown instance for full reboot")
5593
        _ShutdownInstanceDisks(self, instance)
5594
      else:
5595
        self.LogInfo("Instance %s was already stopped, starting now",
5596
                     instance.name)
5597
      _StartInstanceDisks(self, instance, ignore_secondaries)
5598
      result = self.rpc.call_instance_start(node_current, instance, None, None)
5599
      msg = result.fail_msg
5600
      if msg:
5601
        _ShutdownInstanceDisks(self, instance)
5602
        raise errors.OpExecError("Could not start instance for"
5603
                                 " full reboot: %s" % msg)
5604

    
5605
    self.cfg.MarkInstanceUp(instance.name)
5606

    
5607

    
5608
class LUInstanceShutdown(LogicalUnit):
5609
  """Shutdown an instance.
5610

5611
  """
5612
  HPATH = "instance-stop"
5613
  HTYPE = constants.HTYPE_INSTANCE
5614
  REQ_BGL = False
5615

    
5616
  def ExpandNames(self):
5617
    self._ExpandAndLockInstance()
5618

    
5619
  def BuildHooksEnv(self):
5620
    """Build hooks env.
5621

5622
    This runs on master, primary and secondary nodes of the instance.
5623

5624
    """
5625
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5626
    env["TIMEOUT"] = self.op.timeout
5627
    return env
5628

    
5629
  def BuildHooksNodes(self):
5630
    """Build hooks nodes.
5631

5632
    """
5633
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5634
    return (nl, nl)
5635

    
5636
  def CheckPrereq(self):
5637
    """Check prerequisites.
5638

5639
    This checks that the instance is in the cluster.
5640

5641
    """
5642
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5643
    assert self.instance is not None, \
5644
      "Cannot retrieve locked instance %s" % self.op.instance_name
5645

    
5646
    self.primary_offline = \
5647
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
5648

    
5649
    if self.primary_offline and self.op.ignore_offline_nodes:
5650
      self.proc.LogWarning("Ignoring offline primary node")
5651
    else:
5652
      _CheckNodeOnline(self, self.instance.primary_node)
5653

    
5654
  def Exec(self, feedback_fn):
5655
    """Shutdown the instance.
5656

5657
    """
5658
    instance = self.instance
5659
    node_current = instance.primary_node
5660
    timeout = self.op.timeout
5661

    
5662
    if not self.op.no_remember:
5663
      self.cfg.MarkInstanceDown(instance.name)
5664

    
5665
    if self.primary_offline:
5666
      assert self.op.ignore_offline_nodes
5667
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
5668
    else:
5669
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5670
      msg = result.fail_msg
5671
      if msg:
5672
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5673

    
5674
      _ShutdownInstanceDisks(self, instance)
5675

    
5676

    
5677
class LUInstanceReinstall(LogicalUnit):
5678
  """Reinstall an instance.
5679

5680
  """
5681
  HPATH = "instance-reinstall"
5682
  HTYPE = constants.HTYPE_INSTANCE
5683
  REQ_BGL = False
5684

    
5685
  def ExpandNames(self):
5686
    self._ExpandAndLockInstance()
5687

    
5688
  def BuildHooksEnv(self):
5689
    """Build hooks env.
5690

5691
    This runs on master, primary and secondary nodes of the instance.
5692

5693
    """
5694
    return _BuildInstanceHookEnvByObject(self, self.instance)
5695

    
5696
  def BuildHooksNodes(self):
5697
    """Build hooks nodes.
5698

5699
    """
5700
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5701
    return (nl, nl)
5702

    
5703
  def CheckPrereq(self):
5704
    """Check prerequisites.
5705

5706
    This checks that the instance is in the cluster and is not running.
5707

5708
    """
5709
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5710
    assert instance is not None, \
5711
      "Cannot retrieve locked instance %s" % self.op.instance_name
5712
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5713
                     " offline, cannot reinstall")
5714
    for node in instance.secondary_nodes:
5715
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
5716
                       " cannot reinstall")
5717

    
5718
    if instance.disk_template == constants.DT_DISKLESS:
5719
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5720
                                 self.op.instance_name,
5721
                                 errors.ECODE_INVAL)
5722
    _CheckInstanceDown(self, instance, "cannot reinstall")
5723

    
5724
    if self.op.os_type is not None:
5725
      # OS verification
5726
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5727
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5728
      instance_os = self.op.os_type
5729
    else:
5730
      instance_os = instance.os
5731

    
5732
    nodelist = list(instance.all_nodes)
5733

    
5734
    if self.op.osparams:
5735
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5736
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5737
      self.os_inst = i_osdict # the new dict (without defaults)
5738
    else:
5739
      self.os_inst = None
5740

    
5741
    self.instance = instance
5742

    
5743
  def Exec(self, feedback_fn):
5744
    """Reinstall the instance.
5745

5746
    """
5747
    inst = self.instance
5748

    
5749
    if self.op.os_type is not None:
5750
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5751
      inst.os = self.op.os_type
5752
      # Write to configuration
5753
      self.cfg.Update(inst, feedback_fn)
5754

    
5755
    _StartInstanceDisks(self, inst, None)
5756
    try:
5757
      feedback_fn("Running the instance OS create scripts...")
5758
      # FIXME: pass debug option from opcode to backend
5759
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5760
                                             self.op.debug_level,
5761
                                             osparams=self.os_inst)
5762
      result.Raise("Could not install OS for instance %s on node %s" %
5763
                   (inst.name, inst.primary_node))
5764
    finally:
5765
      _ShutdownInstanceDisks(self, inst)
5766

    
5767

    
5768
class LUInstanceRecreateDisks(LogicalUnit):
5769
  """Recreate an instance's missing disks.
5770

5771
  """
5772
  HPATH = "instance-recreate-disks"
5773
  HTYPE = constants.HTYPE_INSTANCE
5774
  REQ_BGL = False
5775

    
5776
  def CheckArguments(self):
5777
    # normalise the disk list
5778
    self.op.disks = sorted(frozenset(self.op.disks))
5779

    
5780
  def ExpandNames(self):
5781
    self._ExpandAndLockInstance()
5782
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5783
    if self.op.nodes:
5784
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
5785
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
5786
    else:
5787
      self.needed_locks[locking.LEVEL_NODE] = []
5788

    
5789
  def DeclareLocks(self, level):
5790
    if level == locking.LEVEL_NODE:
5791
      # if we replace the nodes, we only need to lock the old primary,
5792
      # otherwise we need to lock all nodes for disk re-creation
5793
      primary_only = bool(self.op.nodes)
5794
      self._LockInstancesNodes(primary_only=primary_only)
5795

    
5796
  def BuildHooksEnv(self):
5797
    """Build hooks env.
5798

5799
    This runs on master, primary and secondary nodes of the instance.
5800

5801
    """
5802
    return _BuildInstanceHookEnvByObject(self, self.instance)
5803

    
5804
  def BuildHooksNodes(self):
5805
    """Build hooks nodes.
5806

5807
    """
5808
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5809
    return (nl, nl)
5810

    
5811
  def CheckPrereq(self):
5812
    """Check prerequisites.
5813

5814
    This checks that the instance is in the cluster and is not running.
5815

5816
    """
5817
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5818
    assert instance is not None, \
5819
      "Cannot retrieve locked instance %s" % self.op.instance_name
5820
    if self.op.nodes:
5821
      if len(self.op.nodes) != len(instance.all_nodes):
5822
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
5823
                                   " %d replacement nodes were specified" %
5824
                                   (instance.name, len(instance.all_nodes),
5825
                                    len(self.op.nodes)),
5826
                                   errors.ECODE_INVAL)
5827
      assert instance.disk_template != constants.DT_DRBD8 or \
5828
          len(self.op.nodes) == 2
5829
      assert instance.disk_template != constants.DT_PLAIN or \
5830
          len(self.op.nodes) == 1
5831
      primary_node = self.op.nodes[0]
5832
    else:
5833
      primary_node = instance.primary_node
5834
    _CheckNodeOnline(self, primary_node)
5835

    
5836
    if instance.disk_template == constants.DT_DISKLESS:
5837
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5838
                                 self.op.instance_name, errors.ECODE_INVAL)
5839
    # if we replace nodes *and* the old primary is offline, we don't
5840
    # check
5841
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
5842
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
5843
    if not (self.op.nodes and old_pnode.offline):
5844
      _CheckInstanceDown(self, instance, "cannot recreate disks")
5845

    
5846
    if not self.op.disks:
5847
      self.op.disks = range(len(instance.disks))
5848
    else:
5849
      for idx in self.op.disks:
5850
        if idx >= len(instance.disks):
5851
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
5852
                                     errors.ECODE_INVAL)
5853
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
5854
      raise errors.OpPrereqError("Can't recreate disks partially and"
5855
                                 " change the nodes at the same time",
5856
                                 errors.ECODE_INVAL)
5857
    self.instance = instance
5858

    
5859
  def Exec(self, feedback_fn):
5860
    """Recreate the disks.
5861

5862
    """
5863
    # change primary node, if needed
5864
    if self.op.nodes:
5865
      self.instance.primary_node = self.op.nodes[0]
5866
      self.LogWarning("Changing the instance's nodes, you will have to"
5867
                      " remove any disks left on the older nodes manually")
5868

    
5869
    to_skip = []
5870
    for idx, disk in enumerate(self.instance.disks):
5871
      if idx not in self.op.disks: # disk idx has not been passed in
5872
        to_skip.append(idx)
5873
        continue
5874
      # update secondaries for disks, if needed
5875
      if self.op.nodes:
5876
        if disk.dev_type == constants.LD_DRBD8:
5877
          # need to update the nodes
5878
          assert len(self.op.nodes) == 2
5879
          logical_id = list(disk.logical_id)
5880
          logical_id[0] = self.op.nodes[0]
5881
          logical_id[1] = self.op.nodes[1]
5882
          disk.logical_id = tuple(logical_id)
5883

    
5884
    if self.op.nodes:
5885
      self.cfg.Update(self.instance, feedback_fn)
5886

    
5887
    _CreateDisks(self, self.instance, to_skip=to_skip)
5888

    
5889

    
5890
class LUInstanceRename(LogicalUnit):
5891
  """Rename an instance.
5892

5893
  """
5894
  HPATH = "instance-rename"
5895
  HTYPE = constants.HTYPE_INSTANCE
5896

    
5897
  def CheckArguments(self):
5898
    """Check arguments.
5899

5900
    """
5901
    if self.op.ip_check and not self.op.name_check:
5902
      # TODO: make the ip check more flexible and not depend on the name check
5903
      raise errors.OpPrereqError("IP address check requires a name check",
5904
                                 errors.ECODE_INVAL)
5905

    
5906
  def BuildHooksEnv(self):
5907
    """Build hooks env.
5908

5909
    This runs on master, primary and secondary nodes of the instance.
5910

5911
    """
5912
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5913
    env["INSTANCE_NEW_NAME"] = self.op.new_name
5914
    return env
5915

    
5916
  def BuildHooksNodes(self):
5917
    """Build hooks nodes.
5918

5919
    """
5920
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5921
    return (nl, nl)
5922

    
5923
  def CheckPrereq(self):
5924
    """Check prerequisites.
5925

5926
    This checks that the instance is in the cluster and is not running.
5927

5928
    """
5929
    self.op.instance_name = _ExpandInstanceName(self.cfg,
5930
                                                self.op.instance_name)
5931
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5932
    assert instance is not None
5933
    _CheckNodeOnline(self, instance.primary_node)
5934
    _CheckInstanceDown(self, instance, "cannot rename")
5935
    self.instance = instance
5936

    
5937
    new_name = self.op.new_name
5938
    if self.op.name_check:
5939
      hostname = netutils.GetHostname(name=new_name)
5940
      if hostname != new_name:
5941
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5942
                     hostname.name)
5943
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
5944
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
5945
                                    " same as given hostname '%s'") %
5946
                                    (hostname.name, self.op.new_name),
5947
                                    errors.ECODE_INVAL)
5948
      new_name = self.op.new_name = hostname.name
5949
      if (self.op.ip_check and
5950
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5951
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
5952
                                   (hostname.ip, new_name),
5953
                                   errors.ECODE_NOTUNIQUE)
5954

    
5955
    instance_list = self.cfg.GetInstanceList()
5956
    if new_name in instance_list and new_name != instance.name:
5957
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5958
                                 new_name, errors.ECODE_EXISTS)
5959

    
5960
  def Exec(self, feedback_fn):
5961
    """Rename the instance.
5962

5963
    """
5964
    inst = self.instance
5965
    old_name = inst.name
5966

    
5967
    rename_file_storage = False
5968
    if (inst.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE) and
5969
        self.op.new_name != inst.name):
5970
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5971
      rename_file_storage = True
5972

    
5973
    self.cfg.RenameInstance(inst.name, self.op.new_name)
5974
    # Change the instance lock. This is definitely safe while we hold the BGL.
5975
    # Otherwise the new lock would have to be added in acquired mode.
5976
    assert self.REQ_BGL
5977
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
5978
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5979

    
5980
    # re-read the instance from the configuration after rename
5981
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
5982

    
5983
    if rename_file_storage:
5984
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5985
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5986
                                                     old_file_storage_dir,
5987
                                                     new_file_storage_dir)
5988
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
5989
                   " (but the instance has been renamed in Ganeti)" %
5990
                   (inst.primary_node, old_file_storage_dir,
5991
                    new_file_storage_dir))
5992

    
5993
    _StartInstanceDisks(self, inst, None)
5994
    try:
5995
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5996
                                                 old_name, self.op.debug_level)
5997
      msg = result.fail_msg
5998
      if msg:
5999
        msg = ("Could not run OS rename script for instance %s on node %s"
6000
               " (but the instance has been renamed in Ganeti): %s" %
6001
               (inst.name, inst.primary_node, msg))
6002
        self.proc.LogWarning(msg)
6003
    finally:
6004
      _ShutdownInstanceDisks(self, inst)
6005

    
6006
    return inst.name
6007

    
6008

    
6009
class LUInstanceRemove(LogicalUnit):
6010
  """Remove an instance.
6011

6012
  """
6013
  HPATH = "instance-remove"
6014
  HTYPE = constants.HTYPE_INSTANCE
6015
  REQ_BGL = False
6016

    
6017
  def ExpandNames(self):
6018
    self._ExpandAndLockInstance()
6019
    self.needed_locks[locking.LEVEL_NODE] = []
6020
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6021

    
6022
  def DeclareLocks(self, level):
6023
    if level == locking.LEVEL_NODE:
6024
      self._LockInstancesNodes()
6025

    
6026
  def BuildHooksEnv(self):
6027
    """Build hooks env.
6028

6029
    This runs on master, primary and secondary nodes of the instance.
6030

6031
    """
6032
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6033
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6034
    return env
6035

    
6036
  def BuildHooksNodes(self):
6037
    """Build hooks nodes.
6038

6039
    """
6040
    nl = [self.cfg.GetMasterNode()]
6041
    nl_post = list(self.instance.all_nodes) + nl
6042
    return (nl, nl_post)
6043

    
6044
  def CheckPrereq(self):
6045
    """Check prerequisites.
6046

6047
    This checks that the instance is in the cluster.
6048

6049
    """
6050
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6051
    assert self.instance is not None, \
6052
      "Cannot retrieve locked instance %s" % self.op.instance_name
6053

    
6054
  def Exec(self, feedback_fn):
6055
    """Remove the instance.
6056

6057
    """
6058
    instance = self.instance
6059
    logging.info("Shutting down instance %s on node %s",
6060
                 instance.name, instance.primary_node)
6061

    
6062
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6063
                                             self.op.shutdown_timeout)
6064
    msg = result.fail_msg
6065
    if msg:
6066
      if self.op.ignore_failures:
6067
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6068
      else:
6069
        raise errors.OpExecError("Could not shutdown instance %s on"
6070
                                 " node %s: %s" %
6071
                                 (instance.name, instance.primary_node, msg))
6072

    
6073
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6074

    
6075

    
6076
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6077
  """Utility function to remove an instance.
6078

6079
  """
6080
  logging.info("Removing block devices for instance %s", instance.name)
6081

    
6082
  if not _RemoveDisks(lu, instance):
6083
    if not ignore_failures:
6084
      raise errors.OpExecError("Can't remove instance's disks")
6085
    feedback_fn("Warning: can't remove instance's disks")
6086

    
6087
  logging.info("Removing instance %s out of cluster config", instance.name)
6088

    
6089
  lu.cfg.RemoveInstance(instance.name)
6090

    
6091
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6092
    "Instance lock removal conflict"
6093

    
6094
  # Remove lock for the instance
6095
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6096

    
6097

    
6098
class LUInstanceQuery(NoHooksLU):
6099
  """Logical unit for querying instances.
6100

6101
  """
6102
  # pylint: disable-msg=W0142
6103
  REQ_BGL = False
6104

    
6105
  def CheckArguments(self):
6106
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6107
                             self.op.output_fields, self.op.use_locking)
6108

    
6109
  def ExpandNames(self):
6110
    self.iq.ExpandNames(self)
6111

    
6112
  def DeclareLocks(self, level):
6113
    self.iq.DeclareLocks(self, level)
6114

    
6115
  def Exec(self, feedback_fn):
6116
    return self.iq.OldStyleQuery(self)
6117

    
6118

    
6119
class LUInstanceFailover(LogicalUnit):
6120
  """Failover an instance.
6121

6122
  """
6123
  HPATH = "instance-failover"
6124
  HTYPE = constants.HTYPE_INSTANCE
6125
  REQ_BGL = False
6126

    
6127
  def CheckArguments(self):
6128
    """Check the arguments.
6129

6130
    """
6131
    self.iallocator = getattr(self.op, "iallocator", None)
6132
    self.target_node = getattr(self.op, "target_node", None)
6133

    
6134
  def ExpandNames(self):
6135
    self._ExpandAndLockInstance()
6136

    
6137
    if self.op.target_node is not None:
6138
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6139

    
6140
    self.needed_locks[locking.LEVEL_NODE] = []
6141
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6142

    
6143
    ignore_consistency = self.op.ignore_consistency
6144
    shutdown_timeout = self.op.shutdown_timeout
6145
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6146
                                       cleanup=False,
6147
                                       failover=True,
6148
                                       ignore_consistency=ignore_consistency,
6149
                                       shutdown_timeout=shutdown_timeout)
6150
    self.tasklets = [self._migrater]
6151

    
6152
  def DeclareLocks(self, level):
6153
    if level == locking.LEVEL_NODE:
6154
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6155
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6156
        if self.op.target_node is None:
6157
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6158
        else:
6159
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6160
                                                   self.op.target_node]
6161
        del self.recalculate_locks[locking.LEVEL_NODE]
6162
      else:
6163
        self._LockInstancesNodes()
6164

    
6165
  def BuildHooksEnv(self):
6166
    """Build hooks env.
6167

6168
    This runs on master, primary and secondary nodes of the instance.
6169

6170
    """
6171
    instance = self._migrater.instance
6172
    source_node = instance.primary_node
6173
    target_node = self.op.target_node
6174
    env = {
6175
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6176
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6177
      "OLD_PRIMARY": source_node,
6178
      "NEW_PRIMARY": target_node,
6179
      }
6180

    
6181
    if instance.disk_template in constants.DTS_INT_MIRROR:
6182
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6183
      env["NEW_SECONDARY"] = source_node
6184
    else:
6185
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6186

    
6187
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6188

    
6189
    return env
6190

    
6191
  def BuildHooksNodes(self):
6192
    """Build hooks nodes.
6193

6194
    """
6195
    instance = self._migrater.instance
6196
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6197
    return (nl, nl + [instance.primary_node])
6198

    
6199

    
6200
class LUInstanceMigrate(LogicalUnit):
6201
  """Migrate an instance.
6202

6203
  This is migration without shutting down, compared to the failover,
6204
  which is done with shutdown.
6205

6206
  """
6207
  HPATH = "instance-migrate"
6208
  HTYPE = constants.HTYPE_INSTANCE
6209
  REQ_BGL = False
6210

    
6211
  def ExpandNames(self):
6212
    self._ExpandAndLockInstance()
6213

    
6214
    if self.op.target_node is not None:
6215
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6216

    
6217
    self.needed_locks[locking.LEVEL_NODE] = []
6218
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6219

    
6220
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6221
                                       cleanup=self.op.cleanup,
6222
                                       failover=False,
6223
                                       fallback=self.op.allow_failover)
6224
    self.tasklets = [self._migrater]
6225

    
6226
  def DeclareLocks(self, level):
6227
    if level == locking.LEVEL_NODE:
6228
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6229
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6230
        if self.op.target_node is None:
6231
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6232
        else:
6233
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6234
                                                   self.op.target_node]
6235
        del self.recalculate_locks[locking.LEVEL_NODE]
6236
      else:
6237
        self._LockInstancesNodes()
6238

    
6239
  def BuildHooksEnv(self):
6240
    """Build hooks env.
6241

6242
    This runs on master, primary and secondary nodes of the instance.
6243

6244
    """
6245
    instance = self._migrater.instance
6246
    source_node = instance.primary_node
6247
    target_node = self.op.target_node
6248
    env = _BuildInstanceHookEnvByObject(self, instance)
6249
    env.update({
6250
      "MIGRATE_LIVE": self._migrater.live,
6251
      "MIGRATE_CLEANUP": self.op.cleanup,
6252
      "OLD_PRIMARY": source_node,
6253
      "NEW_PRIMARY": target_node,
6254
      })
6255

    
6256
    if instance.disk_template in constants.DTS_INT_MIRROR:
6257
      env["OLD_SECONDARY"] = target_node
6258
      env["NEW_SECONDARY"] = source_node
6259
    else:
6260
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6261

    
6262
    return env
6263

    
6264
  def BuildHooksNodes(self):
6265
    """Build hooks nodes.
6266

6267
    """
6268
    instance = self._migrater.instance
6269
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6270
    return (nl, nl + [instance.primary_node])
6271

    
6272

    
6273
class LUInstanceMove(LogicalUnit):
6274
  """Move an instance by data-copying.
6275

6276
  """
6277
  HPATH = "instance-move"
6278
  HTYPE = constants.HTYPE_INSTANCE
6279
  REQ_BGL = False
6280

    
6281
  def ExpandNames(self):
6282
    self._ExpandAndLockInstance()
6283
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6284
    self.op.target_node = target_node
6285
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6286
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6287

    
6288
  def DeclareLocks(self, level):
6289
    if level == locking.LEVEL_NODE:
6290
      self._LockInstancesNodes(primary_only=True)
6291

    
6292
  def BuildHooksEnv(self):
6293
    """Build hooks env.
6294

6295
    This runs on master, primary and secondary nodes of the instance.
6296

6297
    """
6298
    env = {
6299
      "TARGET_NODE": self.op.target_node,
6300
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6301
      }
6302
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6303
    return env
6304

    
6305
  def BuildHooksNodes(self):
6306
    """Build hooks nodes.
6307

6308
    """
6309
    nl = [
6310
      self.cfg.GetMasterNode(),
6311
      self.instance.primary_node,
6312
      self.op.target_node,
6313
      ]
6314
    return (nl, nl)
6315

    
6316
  def CheckPrereq(self):
6317
    """Check prerequisites.
6318

6319
    This checks that the instance is in the cluster.
6320

6321
    """
6322
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6323
    assert self.instance is not None, \
6324
      "Cannot retrieve locked instance %s" % self.op.instance_name
6325

    
6326
    node = self.cfg.GetNodeInfo(self.op.target_node)
6327
    assert node is not None, \
6328
      "Cannot retrieve locked node %s" % self.op.target_node
6329

    
6330
    self.target_node = target_node = node.name
6331

    
6332
    if target_node == instance.primary_node:
6333
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6334
                                 (instance.name, target_node),
6335
                                 errors.ECODE_STATE)
6336

    
6337
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6338

    
6339
    for idx, dsk in enumerate(instance.disks):
6340
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6341
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6342
                                   " cannot copy" % idx, errors.ECODE_STATE)
6343

    
6344
    _CheckNodeOnline(self, target_node)
6345
    _CheckNodeNotDrained(self, target_node)
6346
    _CheckNodeVmCapable(self, target_node)
6347

    
6348
    if instance.admin_up:
6349
      # check memory requirements on the secondary node
6350
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6351
                           instance.name, bep[constants.BE_MEMORY],
6352
                           instance.hypervisor)
6353
    else:
6354
      self.LogInfo("Not checking memory on the secondary node as"
6355
                   " instance will not be started")
6356

    
6357
    # check bridge existance
6358
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6359

    
6360
  def Exec(self, feedback_fn):
6361
    """Move an instance.
6362

6363
    The move is done by shutting it down on its present node, copying
6364
    the data over (slow) and starting it on the new node.
6365

6366
    """
6367
    instance = self.instance
6368

    
6369
    source_node = instance.primary_node
6370
    target_node = self.target_node
6371

    
6372
    self.LogInfo("Shutting down instance %s on source node %s",
6373
                 instance.name, source_node)
6374

    
6375
    result = self.rpc.call_instance_shutdown(source_node, instance,
6376
                                             self.op.shutdown_timeout)
6377
    msg = result.fail_msg
6378
    if msg:
6379
      if self.op.ignore_consistency:
6380
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6381
                             " Proceeding anyway. Please make sure node"
6382
                             " %s is down. Error details: %s",
6383
                             instance.name, source_node, source_node, msg)
6384
      else:
6385
        raise errors.OpExecError("Could not shutdown instance %s on"
6386
                                 " node %s: %s" %
6387
                                 (instance.name, source_node, msg))
6388

    
6389
    # create the target disks
6390
    try:
6391
      _CreateDisks(self, instance, target_node=target_node)
6392
    except errors.OpExecError:
6393
      self.LogWarning("Device creation failed, reverting...")
6394
      try:
6395
        _RemoveDisks(self, instance, target_node=target_node)
6396
      finally:
6397
        self.cfg.ReleaseDRBDMinors(instance.name)
6398
        raise
6399

    
6400
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6401

    
6402
    errs = []
6403
    # activate, get path, copy the data over
6404
    for idx, disk in enumerate(instance.disks):
6405
      self.LogInfo("Copying data for disk %d", idx)
6406
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6407
                                               instance.name, True, idx)
6408
      if result.fail_msg:
6409
        self.LogWarning("Can't assemble newly created disk %d: %s",
6410
                        idx, result.fail_msg)
6411
        errs.append(result.fail_msg)
6412
        break
6413
      dev_path = result.payload
6414
      result = self.rpc.call_blockdev_export(source_node, disk,
6415
                                             target_node, dev_path,
6416
                                             cluster_name)
6417
      if result.fail_msg:
6418
        self.LogWarning("Can't copy data over for disk %d: %s",
6419
                        idx, result.fail_msg)
6420
        errs.append(result.fail_msg)
6421
        break
6422

    
6423
    if errs:
6424
      self.LogWarning("Some disks failed to copy, aborting")
6425
      try:
6426
        _RemoveDisks(self, instance, target_node=target_node)
6427
      finally:
6428
        self.cfg.ReleaseDRBDMinors(instance.name)
6429
        raise errors.OpExecError("Errors during disk copy: %s" %
6430
                                 (",".join(errs),))
6431

    
6432
    instance.primary_node = target_node
6433
    self.cfg.Update(instance, feedback_fn)
6434

    
6435
    self.LogInfo("Removing the disks on the original node")
6436
    _RemoveDisks(self, instance, target_node=source_node)
6437

    
6438
    # Only start the instance if it's marked as up
6439
    if instance.admin_up:
6440
      self.LogInfo("Starting instance %s on node %s",
6441
                   instance.name, target_node)
6442

    
6443
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6444
                                           ignore_secondaries=True)
6445
      if not disks_ok:
6446
        _ShutdownInstanceDisks(self, instance)
6447
        raise errors.OpExecError("Can't activate the instance's disks")
6448

    
6449
      result = self.rpc.call_instance_start(target_node, instance, None, None)
6450
      msg = result.fail_msg
6451
      if msg:
6452
        _ShutdownInstanceDisks(self, instance)
6453
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6454
                                 (instance.name, target_node, msg))
6455

    
6456

    
6457
class LUNodeMigrate(LogicalUnit):
6458
  """Migrate all instances from a node.
6459

6460
  """
6461
  HPATH = "node-migrate"
6462
  HTYPE = constants.HTYPE_NODE
6463
  REQ_BGL = False
6464

    
6465
  def CheckArguments(self):
6466
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
6467

    
6468
  def ExpandNames(self):
6469
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6470

    
6471
    self.needed_locks = {}
6472

    
6473
    # Create tasklets for migrating instances for all instances on this node
6474
    names = []
6475
    tasklets = []
6476

    
6477
    self.lock_all_nodes = False
6478

    
6479
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6480
      logging.debug("Migrating instance %s", inst.name)
6481
      names.append(inst.name)
6482

    
6483
      tasklets.append(TLMigrateInstance(self, inst.name, cleanup=False))
6484

    
6485
      if inst.disk_template in constants.DTS_EXT_MIRROR:
6486
        # We need to lock all nodes, as the iallocator will choose the
6487
        # destination nodes afterwards
6488
        self.lock_all_nodes = True
6489

    
6490
    self.tasklets = tasklets
6491

    
6492
    # Declare node locks
6493
    if self.lock_all_nodes:
6494
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6495
    else:
6496
      self.needed_locks[locking.LEVEL_NODE] = [self.op.node_name]
6497
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6498

    
6499
    # Declare instance locks
6500
    self.needed_locks[locking.LEVEL_INSTANCE] = names
6501

    
6502
  def DeclareLocks(self, level):
6503
    if level == locking.LEVEL_NODE and not self.lock_all_nodes:
6504
      self._LockInstancesNodes()
6505

    
6506
  def BuildHooksEnv(self):
6507
    """Build hooks env.
6508

6509
    This runs on the master, the primary and all the secondaries.
6510

6511
    """
6512
    return {
6513
      "NODE_NAME": self.op.node_name,
6514
      }
6515

    
6516
  def BuildHooksNodes(self):
6517
    """Build hooks nodes.
6518

6519
    """
6520
    nl = [self.cfg.GetMasterNode()]
6521
    return (nl, nl)
6522

    
6523

    
6524
class TLMigrateInstance(Tasklet):
6525
  """Tasklet class for instance migration.
6526

6527
  @type live: boolean
6528
  @ivar live: whether the migration will be done live or non-live;
6529
      this variable is initalized only after CheckPrereq has run
6530
  @type cleanup: boolean
6531
  @ivar cleanup: Wheater we cleanup from a failed migration
6532
  @type iallocator: string
6533
  @ivar iallocator: The iallocator used to determine target_node
6534
  @type target_node: string
6535
  @ivar target_node: If given, the target_node to reallocate the instance to
6536
  @type failover: boolean
6537
  @ivar failover: Whether operation results in failover or migration
6538
  @type fallback: boolean
6539
  @ivar fallback: Whether fallback to failover is allowed if migration not
6540
                  possible
6541
  @type ignore_consistency: boolean
6542
  @ivar ignore_consistency: Wheter we should ignore consistency between source
6543
                            and target node
6544
  @type shutdown_timeout: int
6545
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
6546

6547
  """
6548
  def __init__(self, lu, instance_name, cleanup=False,
6549
               failover=False, fallback=False,
6550
               ignore_consistency=False,
6551
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6552
    """Initializes this class.
6553

6554
    """
6555
    Tasklet.__init__(self, lu)
6556

    
6557
    # Parameters
6558
    self.instance_name = instance_name
6559
    self.cleanup = cleanup
6560
    self.live = False # will be overridden later
6561
    self.failover = failover
6562
    self.fallback = fallback
6563
    self.ignore_consistency = ignore_consistency
6564
    self.shutdown_timeout = shutdown_timeout
6565

    
6566
  def CheckPrereq(self):
6567
    """Check prerequisites.
6568

6569
    This checks that the instance is in the cluster.
6570

6571
    """
6572
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6573
    instance = self.cfg.GetInstanceInfo(instance_name)
6574
    assert instance is not None
6575
    self.instance = instance
6576

    
6577
    if (not self.cleanup and not instance.admin_up and not self.failover and
6578
        self.fallback):
6579
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6580
                      " to failover")
6581
      self.failover = True
6582

    
6583
    if instance.disk_template not in constants.DTS_MIRRORED:
6584
      if self.failover:
6585
        text = "failovers"
6586
      else:
6587
        text = "migrations"
6588
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6589
                                 " %s" % (instance.disk_template, text),
6590
                                 errors.ECODE_STATE)
6591

    
6592
    if instance.disk_template in constants.DTS_EXT_MIRROR:
6593
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6594

    
6595
      if self.lu.op.iallocator:
6596
        self._RunAllocator()
6597
      else:
6598
        # We set set self.target_node as it is required by
6599
        # BuildHooksEnv
6600
        self.target_node = self.lu.op.target_node
6601

    
6602
      # self.target_node is already populated, either directly or by the
6603
      # iallocator run
6604
      target_node = self.target_node
6605
      if self.target_node == instance.primary_node:
6606
        raise errors.OpPrereqError("Cannot migrate instance %s"
6607
                                   " to its primary (%s)" %
6608
                                   (instance.name, instance.primary_node))
6609

    
6610
      if len(self.lu.tasklets) == 1:
6611
        # It is safe to release locks only when we're the only tasklet
6612
        # in the LU
6613
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
6614
                      keep=[instance.primary_node, self.target_node])
6615

    
6616
    else:
6617
      secondary_nodes = instance.secondary_nodes
6618
      if not secondary_nodes:
6619
        raise errors.ConfigurationError("No secondary node but using"
6620
                                        " %s disk template" %
6621
                                        instance.disk_template)
6622
      target_node = secondary_nodes[0]
6623
      if self.lu.op.iallocator or (self.lu.op.target_node and
6624
                                   self.lu.op.target_node != target_node):
6625
        if self.failover:
6626
          text = "failed over"
6627
        else:
6628
          text = "migrated"
6629
        raise errors.OpPrereqError("Instances with disk template %s cannot"
6630
                                   " be %s to arbitrary nodes"
6631
                                   " (neither an iallocator nor a target"
6632
                                   " node can be passed)" %
6633
                                   (instance.disk_template, text),
6634
                                   errors.ECODE_INVAL)
6635

    
6636
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
6637

    
6638
    # check memory requirements on the secondary node
6639
    if not self.failover or instance.admin_up:
6640
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6641
                           instance.name, i_be[constants.BE_MEMORY],
6642
                           instance.hypervisor)
6643
    else:
6644
      self.lu.LogInfo("Not checking memory on the secondary node as"
6645
                      " instance will not be started")
6646

    
6647
    # check bridge existance
6648
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6649

    
6650
    if not self.cleanup:
6651
      _CheckNodeNotDrained(self.lu, target_node)
6652
      if not self.failover:
6653
        result = self.rpc.call_instance_migratable(instance.primary_node,
6654
                                                   instance)
6655
        if result.fail_msg and self.fallback:
6656
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6657
                          " failover")
6658
          self.failover = True
6659
        else:
6660
          result.Raise("Can't migrate, please use failover",
6661
                       prereq=True, ecode=errors.ECODE_STATE)
6662

    
6663
    assert not (self.failover and self.cleanup)
6664

    
6665
    if not self.failover:
6666
      if self.lu.op.live is not None and self.lu.op.mode is not None:
6667
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6668
                                   " parameters are accepted",
6669
                                   errors.ECODE_INVAL)
6670
      if self.lu.op.live is not None:
6671
        if self.lu.op.live:
6672
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
6673
        else:
6674
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6675
        # reset the 'live' parameter to None so that repeated
6676
        # invocations of CheckPrereq do not raise an exception
6677
        self.lu.op.live = None
6678
      elif self.lu.op.mode is None:
6679
        # read the default value from the hypervisor
6680
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6681
                                                skip_globals=False)
6682
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6683

    
6684
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6685
    else:
6686
      # Failover is never live
6687
      self.live = False
6688

    
6689
  def _RunAllocator(self):
6690
    """Run the allocator based on input opcode.
6691

6692
    """
6693
    ial = IAllocator(self.cfg, self.rpc,
6694
                     mode=constants.IALLOCATOR_MODE_RELOC,
6695
                     name=self.instance_name,
6696
                     # TODO See why hail breaks with a single node below
6697
                     relocate_from=[self.instance.primary_node,
6698
                                    self.instance.primary_node],
6699
                     )
6700

    
6701
    ial.Run(self.lu.op.iallocator)
6702

    
6703
    if not ial.success:
6704
      raise errors.OpPrereqError("Can't compute nodes using"
6705
                                 " iallocator '%s': %s" %
6706
                                 (self.lu.op.iallocator, ial.info),
6707
                                 errors.ECODE_NORES)
6708
    if len(ial.result) != ial.required_nodes:
6709
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6710
                                 " of nodes (%s), required %s" %
6711
                                 (self.lu.op.iallocator, len(ial.result),
6712
                                  ial.required_nodes), errors.ECODE_FAULT)
6713
    self.target_node = ial.result[0]
6714
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6715
                 self.instance_name, self.lu.op.iallocator,
6716
                 utils.CommaJoin(ial.result))
6717

    
6718
  def _WaitUntilSync(self):
6719
    """Poll with custom rpc for disk sync.
6720

6721
    This uses our own step-based rpc call.
6722

6723
    """
6724
    self.feedback_fn("* wait until resync is done")
6725
    all_done = False
6726
    while not all_done:
6727
      all_done = True
6728
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6729
                                            self.nodes_ip,
6730
                                            self.instance.disks)
6731
      min_percent = 100
6732
      for node, nres in result.items():
6733
        nres.Raise("Cannot resync disks on node %s" % node)
6734
        node_done, node_percent = nres.payload
6735
        all_done = all_done and node_done
6736
        if node_percent is not None:
6737
          min_percent = min(min_percent, node_percent)
6738
      if not all_done:
6739
        if min_percent < 100:
6740
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
6741
        time.sleep(2)
6742

    
6743
  def _EnsureSecondary(self, node):
6744
    """Demote a node to secondary.
6745

6746
    """
6747
    self.feedback_fn("* switching node %s to secondary mode" % node)
6748

    
6749
    for dev in self.instance.disks:
6750
      self.cfg.SetDiskID(dev, node)
6751

    
6752
    result = self.rpc.call_blockdev_close(node, self.instance.name,
6753
                                          self.instance.disks)
6754
    result.Raise("Cannot change disk to secondary on node %s" % node)
6755

    
6756
  def _GoStandalone(self):
6757
    """Disconnect from the network.
6758

6759
    """
6760
    self.feedback_fn("* changing into standalone mode")
6761
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6762
                                               self.instance.disks)
6763
    for node, nres in result.items():
6764
      nres.Raise("Cannot disconnect disks node %s" % node)
6765

    
6766
  def _GoReconnect(self, multimaster):
6767
    """Reconnect to the network.
6768

6769
    """
6770
    if multimaster:
6771
      msg = "dual-master"
6772
    else:
6773
      msg = "single-master"
6774
    self.feedback_fn("* changing disks into %s mode" % msg)
6775
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6776
                                           self.instance.disks,
6777
                                           self.instance.name, multimaster)
6778
    for node, nres in result.items():
6779
      nres.Raise("Cannot change disks config on node %s" % node)
6780

    
6781
  def _ExecCleanup(self):
6782
    """Try to cleanup after a failed migration.
6783

6784
    The cleanup is done by:
6785
      - check that the instance is running only on one node
6786
        (and update the config if needed)
6787
      - change disks on its secondary node to secondary
6788
      - wait until disks are fully synchronized
6789
      - disconnect from the network
6790
      - change disks into single-master mode
6791
      - wait again until disks are fully synchronized
6792

6793
    """
6794
    instance = self.instance
6795
    target_node = self.target_node
6796
    source_node = self.source_node
6797

    
6798
    # check running on only one node
6799
    self.feedback_fn("* checking where the instance actually runs"
6800
                     " (if this hangs, the hypervisor might be in"
6801
                     " a bad state)")
6802
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6803
    for node, result in ins_l.items():
6804
      result.Raise("Can't contact node %s" % node)
6805

    
6806
    runningon_source = instance.name in ins_l[source_node].payload
6807
    runningon_target = instance.name in ins_l[target_node].payload
6808

    
6809
    if runningon_source and runningon_target:
6810
      raise errors.OpExecError("Instance seems to be running on two nodes,"
6811
                               " or the hypervisor is confused; you will have"
6812
                               " to ensure manually that it runs only on one"
6813
                               " and restart this operation")
6814

    
6815
    if not (runningon_source or runningon_target):
6816
      raise errors.OpExecError("Instance does not seem to be running at all;"
6817
                               " in this case it's safer to repair by"
6818
                               " running 'gnt-instance stop' to ensure disk"
6819
                               " shutdown, and then restarting it")
6820

    
6821
    if runningon_target:
6822
      # the migration has actually succeeded, we need to update the config
6823
      self.feedback_fn("* instance running on secondary node (%s),"
6824
                       " updating config" % target_node)
6825
      instance.primary_node = target_node
6826
      self.cfg.Update(instance, self.feedback_fn)
6827
      demoted_node = source_node
6828
    else:
6829
      self.feedback_fn("* instance confirmed to be running on its"
6830
                       " primary node (%s)" % source_node)
6831
      demoted_node = target_node
6832

    
6833
    if instance.disk_template in constants.DTS_INT_MIRROR:
6834
      self._EnsureSecondary(demoted_node)
6835
      try:
6836
        self._WaitUntilSync()
6837
      except errors.OpExecError:
6838
        # we ignore here errors, since if the device is standalone, it
6839
        # won't be able to sync
6840
        pass
6841
      self._GoStandalone()
6842
      self._GoReconnect(False)
6843
      self._WaitUntilSync()
6844

    
6845
    self.feedback_fn("* done")
6846

    
6847
  def _RevertDiskStatus(self):
6848
    """Try to revert the disk status after a failed migration.
6849

6850
    """
6851
    target_node = self.target_node
6852
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
6853
      return
6854

    
6855
    try:
6856
      self._EnsureSecondary(target_node)
6857
      self._GoStandalone()
6858
      self._GoReconnect(False)
6859
      self._WaitUntilSync()
6860
    except errors.OpExecError, err:
6861
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
6862
                         " please try to recover the instance manually;"
6863
                         " error '%s'" % str(err))
6864

    
6865
  def _AbortMigration(self):
6866
    """Call the hypervisor code to abort a started migration.
6867

6868
    """
6869
    instance = self.instance
6870
    target_node = self.target_node
6871
    migration_info = self.migration_info
6872

    
6873
    abort_result = self.rpc.call_finalize_migration(target_node,
6874
                                                    instance,
6875
                                                    migration_info,
6876
                                                    False)
6877
    abort_msg = abort_result.fail_msg
6878
    if abort_msg:
6879
      logging.error("Aborting migration failed on target node %s: %s",
6880
                    target_node, abort_msg)
6881
      # Don't raise an exception here, as we stil have to try to revert the
6882
      # disk status, even if this step failed.
6883

    
6884
  def _ExecMigration(self):
6885
    """Migrate an instance.
6886

6887
    The migrate is done by:
6888
      - change the disks into dual-master mode
6889
      - wait until disks are fully synchronized again
6890
      - migrate the instance
6891
      - change disks on the new secondary node (the old primary) to secondary
6892
      - wait until disks are fully synchronized
6893
      - change disks into single-master mode
6894

6895
    """
6896
    instance = self.instance
6897
    target_node = self.target_node
6898
    source_node = self.source_node
6899

    
6900
    self.feedback_fn("* checking disk consistency between source and target")
6901
    for dev in instance.disks:
6902
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6903
        raise errors.OpExecError("Disk %s is degraded or not fully"
6904
                                 " synchronized on target node,"
6905
                                 " aborting migration" % dev.iv_name)
6906

    
6907
    # First get the migration information from the remote node
6908
    result = self.rpc.call_migration_info(source_node, instance)
6909
    msg = result.fail_msg
6910
    if msg:
6911
      log_err = ("Failed fetching source migration information from %s: %s" %
6912
                 (source_node, msg))
6913
      logging.error(log_err)
6914
      raise errors.OpExecError(log_err)
6915

    
6916
    self.migration_info = migration_info = result.payload
6917

    
6918
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6919
      # Then switch the disks to master/master mode
6920
      self._EnsureSecondary(target_node)
6921
      self._GoStandalone()
6922
      self._GoReconnect(True)
6923
      self._WaitUntilSync()
6924

    
6925
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
6926
    result = self.rpc.call_accept_instance(target_node,
6927
                                           instance,
6928
                                           migration_info,
6929
                                           self.nodes_ip[target_node])
6930

    
6931
    msg = result.fail_msg
6932
    if msg:
6933
      logging.error("Instance pre-migration failed, trying to revert"
6934
                    " disk status: %s", msg)
6935
      self.feedback_fn("Pre-migration failed, aborting")
6936
      self._AbortMigration()
6937
      self._RevertDiskStatus()
6938
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6939
                               (instance.name, msg))
6940

    
6941
    self.feedback_fn("* migrating instance to %s" % target_node)
6942
    result = self.rpc.call_instance_migrate(source_node, instance,
6943
                                            self.nodes_ip[target_node],
6944
                                            self.live)
6945
    msg = result.fail_msg
6946
    if msg:
6947
      logging.error("Instance migration failed, trying to revert"
6948
                    " disk status: %s", msg)
6949
      self.feedback_fn("Migration failed, aborting")
6950
      self._AbortMigration()
6951
      self._RevertDiskStatus()
6952
      raise errors.OpExecError("Could not migrate instance %s: %s" %
6953
                               (instance.name, msg))
6954

    
6955
    instance.primary_node = target_node
6956
    # distribute new instance config to the other nodes
6957
    self.cfg.Update(instance, self.feedback_fn)
6958

    
6959
    result = self.rpc.call_finalize_migration(target_node,
6960
                                              instance,
6961
                                              migration_info,
6962
                                              True)
6963
    msg = result.fail_msg
6964
    if msg:
6965
      logging.error("Instance migration succeeded, but finalization failed:"
6966
                    " %s", msg)
6967
      raise errors.OpExecError("Could not finalize instance migration: %s" %
6968
                               msg)
6969

    
6970
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6971
      self._EnsureSecondary(source_node)
6972
      self._WaitUntilSync()
6973
      self._GoStandalone()
6974
      self._GoReconnect(False)
6975
      self._WaitUntilSync()
6976

    
6977
    self.feedback_fn("* done")
6978

    
6979
  def _ExecFailover(self):
6980
    """Failover an instance.
6981

6982
    The failover is done by shutting it down on its present node and
6983
    starting it on the secondary.
6984

6985
    """
6986
    instance = self.instance
6987
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
6988

    
6989
    source_node = instance.primary_node
6990
    target_node = self.target_node
6991

    
6992
    if instance.admin_up:
6993
      self.feedback_fn("* checking disk consistency between source and target")
6994
      for dev in instance.disks:
6995
        # for drbd, these are drbd over lvm
6996
        if not _CheckDiskConsistency(self, dev, target_node, False):
6997
          if not self.ignore_consistency:
6998
            raise errors.OpExecError("Disk %s is degraded on target node,"
6999
                                     " aborting failover" % dev.iv_name)
7000
    else:
7001
      self.feedback_fn("* not checking disk consistency as instance is not"
7002
                       " running")
7003

    
7004
    self.feedback_fn("* shutting down instance on source node")
7005
    logging.info("Shutting down instance %s on node %s",
7006
                 instance.name, source_node)
7007

    
7008
    result = self.rpc.call_instance_shutdown(source_node, instance,
7009
                                             self.shutdown_timeout)
7010
    msg = result.fail_msg
7011
    if msg:
7012
      if self.ignore_consistency or primary_node.offline:
7013
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7014
                           " proceeding anyway; please make sure node"
7015
                           " %s is down; error details: %s",
7016
                           instance.name, source_node, source_node, msg)
7017
      else:
7018
        raise errors.OpExecError("Could not shutdown instance %s on"
7019
                                 " node %s: %s" %
7020
                                 (instance.name, source_node, msg))
7021

    
7022
    self.feedback_fn("* deactivating the instance's disks on source node")
7023
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
7024
      raise errors.OpExecError("Can't shut down the instance's disks.")
7025

    
7026
    instance.primary_node = target_node
7027
    # distribute new instance config to the other nodes
7028
    self.cfg.Update(instance, self.feedback_fn)
7029

    
7030
    # Only start the instance if it's marked as up
7031
    if instance.admin_up:
7032
      self.feedback_fn("* activating the instance's disks on target node")
7033
      logging.info("Starting instance %s on node %s",
7034
                   instance.name, target_node)
7035

    
7036
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
7037
                                           ignore_secondaries=True)
7038
      if not disks_ok:
7039
        _ShutdownInstanceDisks(self, instance)
7040
        raise errors.OpExecError("Can't activate the instance's disks")
7041

    
7042
      self.feedback_fn("* starting the instance on the target node")
7043
      result = self.rpc.call_instance_start(target_node, instance, None, None)
7044
      msg = result.fail_msg
7045
      if msg:
7046
        _ShutdownInstanceDisks(self, instance)
7047
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7048
                                 (instance.name, target_node, msg))
7049

    
7050
  def Exec(self, feedback_fn):
7051
    """Perform the migration.
7052

7053
    """
7054
    self.feedback_fn = feedback_fn
7055
    self.source_node = self.instance.primary_node
7056

    
7057
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7058
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7059
      self.target_node = self.instance.secondary_nodes[0]
7060
      # Otherwise self.target_node has been populated either
7061
      # directly, or through an iallocator.
7062

    
7063
    self.all_nodes = [self.source_node, self.target_node]
7064
    self.nodes_ip = {
7065
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
7066
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
7067
      }
7068

    
7069
    if self.failover:
7070
      feedback_fn("Failover instance %s" % self.instance.name)
7071
      self._ExecFailover()
7072
    else:
7073
      feedback_fn("Migrating instance %s" % self.instance.name)
7074

    
7075
      if self.cleanup:
7076
        return self._ExecCleanup()
7077
      else:
7078
        return self._ExecMigration()
7079

    
7080

    
7081
def _CreateBlockDev(lu, node, instance, device, force_create,
7082
                    info, force_open):
7083
  """Create a tree of block devices on a given node.
7084

7085
  If this device type has to be created on secondaries, create it and
7086
  all its children.
7087

7088
  If not, just recurse to children keeping the same 'force' value.
7089

7090
  @param lu: the lu on whose behalf we execute
7091
  @param node: the node on which to create the device
7092
  @type instance: L{objects.Instance}
7093
  @param instance: the instance which owns the device
7094
  @type device: L{objects.Disk}
7095
  @param device: the device to create
7096
  @type force_create: boolean
7097
  @param force_create: whether to force creation of this device; this
7098
      will be change to True whenever we find a device which has
7099
      CreateOnSecondary() attribute
7100
  @param info: the extra 'metadata' we should attach to the device
7101
      (this will be represented as a LVM tag)
7102
  @type force_open: boolean
7103
  @param force_open: this parameter will be passes to the
7104
      L{backend.BlockdevCreate} function where it specifies
7105
      whether we run on primary or not, and it affects both
7106
      the child assembly and the device own Open() execution
7107

7108
  """
7109
  if device.CreateOnSecondary():
7110
    force_create = True
7111

    
7112
  if device.children:
7113
    for child in device.children:
7114
      _CreateBlockDev(lu, node, instance, child, force_create,
7115
                      info, force_open)
7116

    
7117
  if not force_create:
7118
    return
7119

    
7120
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7121

    
7122

    
7123
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7124
  """Create a single block device on a given node.
7125

7126
  This will not recurse over children of the device, so they must be
7127
  created in advance.
7128

7129
  @param lu: the lu on whose behalf we execute
7130
  @param node: the node on which to create the device
7131
  @type instance: L{objects.Instance}
7132
  @param instance: the instance which owns the device
7133
  @type device: L{objects.Disk}
7134
  @param device: the device to create
7135
  @param info: the extra 'metadata' we should attach to the device
7136
      (this will be represented as a LVM tag)
7137
  @type force_open: boolean
7138
  @param force_open: this parameter will be passes to the
7139
      L{backend.BlockdevCreate} function where it specifies
7140
      whether we run on primary or not, and it affects both
7141
      the child assembly and the device own Open() execution
7142

7143
  """
7144
  lu.cfg.SetDiskID(device, node)
7145
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7146
                                       instance.name, force_open, info)
7147
  result.Raise("Can't create block device %s on"
7148
               " node %s for instance %s" % (device, node, instance.name))
7149
  if device.physical_id is None:
7150
    device.physical_id = result.payload
7151

    
7152

    
7153
def _GenerateUniqueNames(lu, exts):
7154
  """Generate a suitable LV name.
7155

7156
  This will generate a logical volume name for the given instance.
7157

7158
  """
7159
  results = []
7160
  for val in exts:
7161
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7162
    results.append("%s%s" % (new_id, val))
7163
  return results
7164

    
7165

    
7166
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7167
                         iv_name, p_minor, s_minor):
7168
  """Generate a drbd8 device complete with its children.
7169

7170
  """
7171
  assert len(vgnames) == len(names) == 2
7172
  port = lu.cfg.AllocatePort()
7173
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7174
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7175
                          logical_id=(vgnames[0], names[0]))
7176
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7177
                          logical_id=(vgnames[1], names[1]))
7178
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7179
                          logical_id=(primary, secondary, port,
7180
                                      p_minor, s_minor,
7181
                                      shared_secret),
7182
                          children=[dev_data, dev_meta],
7183
                          iv_name=iv_name)
7184
  return drbd_dev
7185

    
7186

    
7187
def _GenerateDiskTemplate(lu, template_name,
7188
                          instance_name, primary_node,
7189
                          secondary_nodes, disk_info,
7190
                          file_storage_dir, file_driver,
7191
                          base_index, feedback_fn):
7192
  """Generate the entire disk layout for a given template type.
7193

7194
  """
7195
  #TODO: compute space requirements
7196

    
7197
  vgname = lu.cfg.GetVGName()
7198
  disk_count = len(disk_info)
7199
  disks = []
7200
  if template_name == constants.DT_DISKLESS:
7201
    pass
7202
  elif template_name == constants.DT_PLAIN:
7203
    if len(secondary_nodes) != 0:
7204
      raise errors.ProgrammerError("Wrong template configuration")
7205

    
7206
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7207
                                      for i in range(disk_count)])
7208
    for idx, disk in enumerate(disk_info):
7209
      disk_index = idx + base_index
7210
      vg = disk.get(constants.IDISK_VG, vgname)
7211
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7212
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7213
                              size=disk[constants.IDISK_SIZE],
7214
                              logical_id=(vg, names[idx]),
7215
                              iv_name="disk/%d" % disk_index,
7216
                              mode=disk[constants.IDISK_MODE])
7217
      disks.append(disk_dev)
7218
  elif template_name == constants.DT_DRBD8:
7219
    if len(secondary_nodes) != 1:
7220
      raise errors.ProgrammerError("Wrong template configuration")
7221
    remote_node = secondary_nodes[0]
7222
    minors = lu.cfg.AllocateDRBDMinor(
7223
      [primary_node, remote_node] * len(disk_info), instance_name)
7224

    
7225
    names = []
7226
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7227
                                               for i in range(disk_count)]):
7228
      names.append(lv_prefix + "_data")
7229
      names.append(lv_prefix + "_meta")
7230
    for idx, disk in enumerate(disk_info):
7231
      disk_index = idx + base_index
7232
      data_vg = disk.get(constants.IDISK_VG, vgname)
7233
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7234
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7235
                                      disk[constants.IDISK_SIZE],
7236
                                      [data_vg, meta_vg],
7237
                                      names[idx * 2:idx * 2 + 2],
7238
                                      "disk/%d" % disk_index,
7239
                                      minors[idx * 2], minors[idx * 2 + 1])
7240
      disk_dev.mode = disk[constants.IDISK_MODE]
7241
      disks.append(disk_dev)
7242
  elif template_name == constants.DT_FILE:
7243
    if len(secondary_nodes) != 0:
7244
      raise errors.ProgrammerError("Wrong template configuration")
7245

    
7246
    opcodes.RequireFileStorage()
7247

    
7248
    for idx, disk in enumerate(disk_info):
7249
      disk_index = idx + base_index
7250
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7251
                              size=disk[constants.IDISK_SIZE],
7252
                              iv_name="disk/%d" % disk_index,
7253
                              logical_id=(file_driver,
7254
                                          "%s/disk%d" % (file_storage_dir,
7255
                                                         disk_index)),
7256
                              mode=disk[constants.IDISK_MODE])
7257
      disks.append(disk_dev)
7258
  elif template_name == constants.DT_SHARED_FILE:
7259
    if len(secondary_nodes) != 0:
7260
      raise errors.ProgrammerError("Wrong template configuration")
7261

    
7262
    opcodes.RequireSharedFileStorage()
7263

    
7264
    for idx, disk in enumerate(disk_info):
7265
      disk_index = idx + base_index
7266
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7267
                              size=disk[constants.IDISK_SIZE],
7268
                              iv_name="disk/%d" % disk_index,
7269
                              logical_id=(file_driver,
7270
                                          "%s/disk%d" % (file_storage_dir,
7271
                                                         disk_index)),
7272
                              mode=disk[constants.IDISK_MODE])
7273
      disks.append(disk_dev)
7274
  elif template_name == constants.DT_BLOCK:
7275
    if len(secondary_nodes) != 0:
7276
      raise errors.ProgrammerError("Wrong template configuration")
7277

    
7278
    for idx, disk in enumerate(disk_info):
7279
      disk_index = idx + base_index
7280
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7281
                              size=disk[constants.IDISK_SIZE],
7282
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7283
                                          disk[constants.IDISK_ADOPT]),
7284
                              iv_name="disk/%d" % disk_index,
7285
                              mode=disk[constants.IDISK_MODE])
7286
      disks.append(disk_dev)
7287

    
7288
  else:
7289
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7290
  return disks
7291

    
7292

    
7293
def _GetInstanceInfoText(instance):
7294
  """Compute that text that should be added to the disk's metadata.
7295

7296
  """
7297
  return "originstname+%s" % instance.name
7298

    
7299

    
7300
def _CalcEta(time_taken, written, total_size):
7301
  """Calculates the ETA based on size written and total size.
7302

7303
  @param time_taken: The time taken so far
7304
  @param written: amount written so far
7305
  @param total_size: The total size of data to be written
7306
  @return: The remaining time in seconds
7307

7308
  """
7309
  avg_time = time_taken / float(written)
7310
  return (total_size - written) * avg_time
7311

    
7312

    
7313
def _WipeDisks(lu, instance):
7314
  """Wipes instance disks.
7315

7316
  @type lu: L{LogicalUnit}
7317
  @param lu: the logical unit on whose behalf we execute
7318
  @type instance: L{objects.Instance}
7319
  @param instance: the instance whose disks we should create
7320
  @return: the success of the wipe
7321

7322
  """
7323
  node = instance.primary_node
7324

    
7325
  for device in instance.disks:
7326
    lu.cfg.SetDiskID(device, node)
7327

    
7328
  logging.info("Pause sync of instance %s disks", instance.name)
7329
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7330

    
7331
  for idx, success in enumerate(result.payload):
7332
    if not success:
7333
      logging.warn("pause-sync of instance %s for disks %d failed",
7334
                   instance.name, idx)
7335

    
7336
  try:
7337
    for idx, device in enumerate(instance.disks):
7338
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7339
      # MAX_WIPE_CHUNK at max
7340
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7341
                            constants.MIN_WIPE_CHUNK_PERCENT)
7342
      # we _must_ make this an int, otherwise rounding errors will
7343
      # occur
7344
      wipe_chunk_size = int(wipe_chunk_size)
7345

    
7346
      lu.LogInfo("* Wiping disk %d", idx)
7347
      logging.info("Wiping disk %d for instance %s, node %s using"
7348
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7349

    
7350
      offset = 0
7351
      size = device.size
7352
      last_output = 0
7353
      start_time = time.time()
7354

    
7355
      while offset < size:
7356
        wipe_size = min(wipe_chunk_size, size - offset)
7357
        logging.debug("Wiping disk %d, offset %s, chunk %s",
7358
                      idx, offset, wipe_size)
7359
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7360
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
7361
                     (idx, offset, wipe_size))
7362
        now = time.time()
7363
        offset += wipe_size
7364
        if now - last_output >= 60:
7365
          eta = _CalcEta(now - start_time, offset, size)
7366
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
7367
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
7368
          last_output = now
7369
  finally:
7370
    logging.info("Resume sync of instance %s disks", instance.name)
7371

    
7372
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7373

    
7374
    for idx, success in enumerate(result.payload):
7375
      if not success:
7376
        lu.LogWarning("Resume sync of disk %d failed, please have a"
7377
                      " look at the status and troubleshoot the issue", idx)
7378
        logging.warn("resume-sync of instance %s for disks %d failed",
7379
                     instance.name, idx)
7380

    
7381

    
7382
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7383
  """Create all disks for an instance.
7384

7385
  This abstracts away some work from AddInstance.
7386

7387
  @type lu: L{LogicalUnit}
7388
  @param lu: the logical unit on whose behalf we execute
7389
  @type instance: L{objects.Instance}
7390
  @param instance: the instance whose disks we should create
7391
  @type to_skip: list
7392
  @param to_skip: list of indices to skip
7393
  @type target_node: string
7394
  @param target_node: if passed, overrides the target node for creation
7395
  @rtype: boolean
7396
  @return: the success of the creation
7397

7398
  """
7399
  info = _GetInstanceInfoText(instance)
7400
  if target_node is None:
7401
    pnode = instance.primary_node
7402
    all_nodes = instance.all_nodes
7403
  else:
7404
    pnode = target_node
7405
    all_nodes = [pnode]
7406

    
7407
  if instance.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE):
7408
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7409
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7410

    
7411
    result.Raise("Failed to create directory '%s' on"
7412
                 " node %s" % (file_storage_dir, pnode))
7413

    
7414
  # Note: this needs to be kept in sync with adding of disks in
7415
  # LUInstanceSetParams
7416
  for idx, device in enumerate(instance.disks):
7417
    if to_skip and idx in to_skip:
7418
      continue
7419
    logging.info("Creating volume %s for instance %s",
7420
                 device.iv_name, instance.name)
7421
    #HARDCODE
7422
    for node in all_nodes:
7423
      f_create = node == pnode
7424
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7425

    
7426

    
7427
def _RemoveDisks(lu, instance, target_node=None):
7428
  """Remove all disks for an instance.
7429

7430
  This abstracts away some work from `AddInstance()` and
7431
  `RemoveInstance()`. Note that in case some of the devices couldn't
7432
  be removed, the removal will continue with the other ones (compare
7433
  with `_CreateDisks()`).
7434

7435
  @type lu: L{LogicalUnit}
7436
  @param lu: the logical unit on whose behalf we execute
7437
  @type instance: L{objects.Instance}
7438
  @param instance: the instance whose disks we should remove
7439
  @type target_node: string
7440
  @param target_node: used to override the node on which to remove the disks
7441
  @rtype: boolean
7442
  @return: the success of the removal
7443

7444
  """
7445
  logging.info("Removing block devices for instance %s", instance.name)
7446

    
7447
  all_result = True
7448
  for device in instance.disks:
7449
    if target_node:
7450
      edata = [(target_node, device)]
7451
    else:
7452
      edata = device.ComputeNodeTree(instance.primary_node)
7453
    for node, disk in edata:
7454
      lu.cfg.SetDiskID(disk, node)
7455
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7456
      if msg:
7457
        lu.LogWarning("Could not remove block device %s on node %s,"
7458
                      " continuing anyway: %s", device.iv_name, node, msg)
7459
        all_result = False
7460

    
7461
  if instance.disk_template == constants.DT_FILE:
7462
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7463
    if target_node:
7464
      tgt = target_node
7465
    else:
7466
      tgt = instance.primary_node
7467
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7468
    if result.fail_msg:
7469
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7470
                    file_storage_dir, instance.primary_node, result.fail_msg)
7471
      all_result = False
7472

    
7473
  return all_result
7474

    
7475

    
7476
def _ComputeDiskSizePerVG(disk_template, disks):
7477
  """Compute disk size requirements in the volume group
7478

7479
  """
7480
  def _compute(disks, payload):
7481
    """Universal algorithm.
7482

7483
    """
7484
    vgs = {}
7485
    for disk in disks:
7486
      vgs[disk[constants.IDISK_VG]] = \
7487
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7488

    
7489
    return vgs
7490

    
7491
  # Required free disk space as a function of disk and swap space
7492
  req_size_dict = {
7493
    constants.DT_DISKLESS: {},
7494
    constants.DT_PLAIN: _compute(disks, 0),
7495
    # 128 MB are added for drbd metadata for each disk
7496
    constants.DT_DRBD8: _compute(disks, 128),
7497
    constants.DT_FILE: {},
7498
    constants.DT_SHARED_FILE: {},
7499
  }
7500

    
7501
  if disk_template not in req_size_dict:
7502
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7503
                                 " is unknown" %  disk_template)
7504

    
7505
  return req_size_dict[disk_template]
7506

    
7507

    
7508
def _ComputeDiskSize(disk_template, disks):
7509
  """Compute disk size requirements in the volume group
7510

7511
  """
7512
  # Required free disk space as a function of disk and swap space
7513
  req_size_dict = {
7514
    constants.DT_DISKLESS: None,
7515
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7516
    # 128 MB are added for drbd metadata for each disk
7517
    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7518
    constants.DT_FILE: None,
7519
    constants.DT_SHARED_FILE: 0,
7520
    constants.DT_BLOCK: 0,
7521
  }
7522

    
7523
  if disk_template not in req_size_dict:
7524
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7525
                                 " is unknown" %  disk_template)
7526

    
7527
  return req_size_dict[disk_template]
7528

    
7529

    
7530
def _FilterVmNodes(lu, nodenames):
7531
  """Filters out non-vm_capable nodes from a list.
7532

7533
  @type lu: L{LogicalUnit}
7534
  @param lu: the logical unit for which we check
7535
  @type nodenames: list
7536
  @param nodenames: the list of nodes on which we should check
7537
  @rtype: list
7538
  @return: the list of vm-capable nodes
7539

7540
  """
7541
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7542
  return [name for name in nodenames if name not in vm_nodes]
7543

    
7544

    
7545
def _CheckHVParams(lu, nodenames, hvname, hvparams):
7546
  """Hypervisor parameter validation.
7547

7548
  This function abstract the hypervisor parameter validation to be
7549
  used in both instance create and instance modify.
7550

7551
  @type lu: L{LogicalUnit}
7552
  @param lu: the logical unit for which we check
7553
  @type nodenames: list
7554
  @param nodenames: the list of nodes on which we should check
7555
  @type hvname: string
7556
  @param hvname: the name of the hypervisor we should use
7557
  @type hvparams: dict
7558
  @param hvparams: the parameters which we need to check
7559
  @raise errors.OpPrereqError: if the parameters are not valid
7560

7561
  """
7562
  nodenames = _FilterVmNodes(lu, nodenames)
7563
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7564
                                                  hvname,
7565
                                                  hvparams)
7566
  for node in nodenames:
7567
    info = hvinfo[node]
7568
    if info.offline:
7569
      continue
7570
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
7571

    
7572

    
7573
def _CheckOSParams(lu, required, nodenames, osname, osparams):
7574
  """OS parameters validation.
7575

7576
  @type lu: L{LogicalUnit}
7577
  @param lu: the logical unit for which we check
7578
  @type required: boolean
7579
  @param required: whether the validation should fail if the OS is not
7580
      found
7581
  @type nodenames: list
7582
  @param nodenames: the list of nodes on which we should check
7583
  @type osname: string
7584
  @param osname: the name of the hypervisor we should use
7585
  @type osparams: dict
7586
  @param osparams: the parameters which we need to check
7587
  @raise errors.OpPrereqError: if the parameters are not valid
7588

7589
  """
7590
  nodenames = _FilterVmNodes(lu, nodenames)
7591
  result = lu.rpc.call_os_validate(required, nodenames, osname,
7592
                                   [constants.OS_VALIDATE_PARAMETERS],
7593
                                   osparams)
7594
  for node, nres in result.items():
7595
    # we don't check for offline cases since this should be run only
7596
    # against the master node and/or an instance's nodes
7597
    nres.Raise("OS Parameters validation failed on node %s" % node)
7598
    if not nres.payload:
7599
      lu.LogInfo("OS %s not found on node %s, validation skipped",
7600
                 osname, node)
7601

    
7602

    
7603
class LUInstanceCreate(LogicalUnit):
7604
  """Create an instance.
7605

7606
  """
7607
  HPATH = "instance-add"
7608
  HTYPE = constants.HTYPE_INSTANCE
7609
  REQ_BGL = False
7610

    
7611
  def CheckArguments(self):
7612
    """Check arguments.
7613

7614
    """
7615
    # do not require name_check to ease forward/backward compatibility
7616
    # for tools
7617
    if self.op.no_install and self.op.start:
7618
      self.LogInfo("No-installation mode selected, disabling startup")
7619
      self.op.start = False
7620
    # validate/normalize the instance name
7621
    self.op.instance_name = \
7622
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
7623

    
7624
    if self.op.ip_check and not self.op.name_check:
7625
      # TODO: make the ip check more flexible and not depend on the name check
7626
      raise errors.OpPrereqError("Cannot do IP address check without a name"
7627
                                 " check", errors.ECODE_INVAL)
7628

    
7629
    # check nics' parameter names
7630
    for nic in self.op.nics:
7631
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7632

    
7633
    # check disks. parameter names and consistent adopt/no-adopt strategy
7634
    has_adopt = has_no_adopt = False
7635
    for disk in self.op.disks:
7636
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7637
      if constants.IDISK_ADOPT in disk:
7638
        has_adopt = True
7639
      else:
7640
        has_no_adopt = True
7641
    if has_adopt and has_no_adopt:
7642
      raise errors.OpPrereqError("Either all disks are adopted or none is",
7643
                                 errors.ECODE_INVAL)
7644
    if has_adopt:
7645
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7646
        raise errors.OpPrereqError("Disk adoption is not supported for the"
7647
                                   " '%s' disk template" %
7648
                                   self.op.disk_template,
7649
                                   errors.ECODE_INVAL)
7650
      if self.op.iallocator is not None:
7651
        raise errors.OpPrereqError("Disk adoption not allowed with an"
7652
                                   " iallocator script", errors.ECODE_INVAL)
7653
      if self.op.mode == constants.INSTANCE_IMPORT:
7654
        raise errors.OpPrereqError("Disk adoption not allowed for"
7655
                                   " instance import", errors.ECODE_INVAL)
7656
    else:
7657
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
7658
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7659
                                   " but no 'adopt' parameter given" %
7660
                                   self.op.disk_template,
7661
                                   errors.ECODE_INVAL)
7662

    
7663
    self.adopt_disks = has_adopt
7664

    
7665
    # instance name verification
7666
    if self.op.name_check:
7667
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7668
      self.op.instance_name = self.hostname1.name
7669
      # used in CheckPrereq for ip ping check
7670
      self.check_ip = self.hostname1.ip
7671
    else:
7672
      self.check_ip = None
7673

    
7674
    # file storage checks
7675
    if (self.op.file_driver and
7676
        not self.op.file_driver in constants.FILE_DRIVER):
7677
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
7678
                                 self.op.file_driver, errors.ECODE_INVAL)
7679

    
7680
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7681
      raise errors.OpPrereqError("File storage directory path not absolute",
7682
                                 errors.ECODE_INVAL)
7683

    
7684
    ### Node/iallocator related checks
7685
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7686

    
7687
    if self.op.pnode is not None:
7688
      if self.op.disk_template in constants.DTS_INT_MIRROR:
7689
        if self.op.snode is None:
7690
          raise errors.OpPrereqError("The networked disk templates need"
7691
                                     " a mirror node", errors.ECODE_INVAL)
7692
      elif self.op.snode:
7693
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7694
                        " template")
7695
        self.op.snode = None
7696

    
7697
    self._cds = _GetClusterDomainSecret()
7698

    
7699
    if self.op.mode == constants.INSTANCE_IMPORT:
7700
      # On import force_variant must be True, because if we forced it at
7701
      # initial install, our only chance when importing it back is that it
7702
      # works again!
7703
      self.op.force_variant = True
7704

    
7705
      if self.op.no_install:
7706
        self.LogInfo("No-installation mode has no effect during import")
7707

    
7708
    elif self.op.mode == constants.INSTANCE_CREATE:
7709
      if self.op.os_type is None:
7710
        raise errors.OpPrereqError("No guest OS specified",
7711
                                   errors.ECODE_INVAL)
7712
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7713
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7714
                                   " installation" % self.op.os_type,
7715
                                   errors.ECODE_STATE)
7716
      if self.op.disk_template is None:
7717
        raise errors.OpPrereqError("No disk template specified",
7718
                                   errors.ECODE_INVAL)
7719

    
7720
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7721
      # Check handshake to ensure both clusters have the same domain secret
7722
      src_handshake = self.op.source_handshake
7723
      if not src_handshake:
7724
        raise errors.OpPrereqError("Missing source handshake",
7725
                                   errors.ECODE_INVAL)
7726

    
7727
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7728
                                                           src_handshake)
7729
      if errmsg:
7730
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7731
                                   errors.ECODE_INVAL)
7732

    
7733
      # Load and check source CA
7734
      self.source_x509_ca_pem = self.op.source_x509_ca
7735
      if not self.source_x509_ca_pem:
7736
        raise errors.OpPrereqError("Missing source X509 CA",
7737
                                   errors.ECODE_INVAL)
7738

    
7739
      try:
7740
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7741
                                                    self._cds)
7742
      except OpenSSL.crypto.Error, err:
7743
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7744
                                   (err, ), errors.ECODE_INVAL)
7745

    
7746
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7747
      if errcode is not None:
7748
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7749
                                   errors.ECODE_INVAL)
7750

    
7751
      self.source_x509_ca = cert
7752

    
7753
      src_instance_name = self.op.source_instance_name
7754
      if not src_instance_name:
7755
        raise errors.OpPrereqError("Missing source instance name",
7756
                                   errors.ECODE_INVAL)
7757

    
7758
      self.source_instance_name = \
7759
          netutils.GetHostname(name=src_instance_name).name
7760

    
7761
    else:
7762
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
7763
                                 self.op.mode, errors.ECODE_INVAL)
7764

    
7765
  def ExpandNames(self):
7766
    """ExpandNames for CreateInstance.
7767

7768
    Figure out the right locks for instance creation.
7769

7770
    """
7771
    self.needed_locks = {}
7772

    
7773
    instance_name = self.op.instance_name
7774
    # this is just a preventive check, but someone might still add this
7775
    # instance in the meantime, and creation will fail at lock-add time
7776
    if instance_name in self.cfg.GetInstanceList():
7777
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7778
                                 instance_name, errors.ECODE_EXISTS)
7779

    
7780
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7781

    
7782
    if self.op.iallocator:
7783
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7784
    else:
7785
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7786
      nodelist = [self.op.pnode]
7787
      if self.op.snode is not None:
7788
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7789
        nodelist.append(self.op.snode)
7790
      self.needed_locks[locking.LEVEL_NODE] = nodelist
7791

    
7792
    # in case of import lock the source node too
7793
    if self.op.mode == constants.INSTANCE_IMPORT:
7794
      src_node = self.op.src_node
7795
      src_path = self.op.src_path
7796

    
7797
      if src_path is None:
7798
        self.op.src_path = src_path = self.op.instance_name
7799

    
7800
      if src_node is None:
7801
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7802
        self.op.src_node = None
7803
        if os.path.isabs(src_path):
7804
          raise errors.OpPrereqError("Importing an instance from an absolute"
7805
                                     " path requires a source node option",
7806
                                     errors.ECODE_INVAL)
7807
      else:
7808
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7809
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7810
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
7811
        if not os.path.isabs(src_path):
7812
          self.op.src_path = src_path = \
7813
            utils.PathJoin(constants.EXPORT_DIR, src_path)
7814

    
7815
  def _RunAllocator(self):
7816
    """Run the allocator based on input opcode.
7817

7818
    """
7819
    nics = [n.ToDict() for n in self.nics]
7820
    ial = IAllocator(self.cfg, self.rpc,
7821
                     mode=constants.IALLOCATOR_MODE_ALLOC,
7822
                     name=self.op.instance_name,
7823
                     disk_template=self.op.disk_template,
7824
                     tags=[],
7825
                     os=self.op.os_type,
7826
                     vcpus=self.be_full[constants.BE_VCPUS],
7827
                     mem_size=self.be_full[constants.BE_MEMORY],
7828
                     disks=self.disks,
7829
                     nics=nics,
7830
                     hypervisor=self.op.hypervisor,
7831
                     )
7832

    
7833
    ial.Run(self.op.iallocator)
7834

    
7835
    if not ial.success:
7836
      raise errors.OpPrereqError("Can't compute nodes using"
7837
                                 " iallocator '%s': %s" %
7838
                                 (self.op.iallocator, ial.info),
7839
                                 errors.ECODE_NORES)
7840
    if len(ial.result) != ial.required_nodes:
7841
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7842
                                 " of nodes (%s), required %s" %
7843
                                 (self.op.iallocator, len(ial.result),
7844
                                  ial.required_nodes), errors.ECODE_FAULT)
7845
    self.op.pnode = ial.result[0]
7846
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7847
                 self.op.instance_name, self.op.iallocator,
7848
                 utils.CommaJoin(ial.result))
7849
    if ial.required_nodes == 2:
7850
      self.op.snode = ial.result[1]
7851

    
7852
  def BuildHooksEnv(self):
7853
    """Build hooks env.
7854

7855
    This runs on master, primary and secondary nodes of the instance.
7856

7857
    """
7858
    env = {
7859
      "ADD_MODE": self.op.mode,
7860
      }
7861
    if self.op.mode == constants.INSTANCE_IMPORT:
7862
      env["SRC_NODE"] = self.op.src_node
7863
      env["SRC_PATH"] = self.op.src_path
7864
      env["SRC_IMAGES"] = self.src_images
7865

    
7866
    env.update(_BuildInstanceHookEnv(
7867
      name=self.op.instance_name,
7868
      primary_node=self.op.pnode,
7869
      secondary_nodes=self.secondaries,
7870
      status=self.op.start,
7871
      os_type=self.op.os_type,
7872
      memory=self.be_full[constants.BE_MEMORY],
7873
      vcpus=self.be_full[constants.BE_VCPUS],
7874
      nics=_NICListToTuple(self, self.nics),
7875
      disk_template=self.op.disk_template,
7876
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
7877
             for d in self.disks],
7878
      bep=self.be_full,
7879
      hvp=self.hv_full,
7880
      hypervisor_name=self.op.hypervisor,
7881
    ))
7882

    
7883
    return env
7884

    
7885
  def BuildHooksNodes(self):
7886
    """Build hooks nodes.
7887

7888
    """
7889
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
7890
    return nl, nl
7891

    
7892
  def _ReadExportInfo(self):
7893
    """Reads the export information from disk.
7894

7895
    It will override the opcode source node and path with the actual
7896
    information, if these two were not specified before.
7897

7898
    @return: the export information
7899

7900
    """
7901
    assert self.op.mode == constants.INSTANCE_IMPORT
7902

    
7903
    src_node = self.op.src_node
7904
    src_path = self.op.src_path
7905

    
7906
    if src_node is None:
7907
      locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
7908
      exp_list = self.rpc.call_export_list(locked_nodes)
7909
      found = False
7910
      for node in exp_list:
7911
        if exp_list[node].fail_msg:
7912
          continue
7913
        if src_path in exp_list[node].payload:
7914
          found = True
7915
          self.op.src_node = src_node = node
7916
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7917
                                                       src_path)
7918
          break
7919
      if not found:
7920
        raise errors.OpPrereqError("No export found for relative path %s" %
7921
                                    src_path, errors.ECODE_INVAL)
7922

    
7923
    _CheckNodeOnline(self, src_node)
7924
    result = self.rpc.call_export_info(src_node, src_path)
7925
    result.Raise("No export or invalid export found in dir %s" % src_path)
7926

    
7927
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7928
    if not export_info.has_section(constants.INISECT_EXP):
7929
      raise errors.ProgrammerError("Corrupted export config",
7930
                                   errors.ECODE_ENVIRON)
7931

    
7932
    ei_version = export_info.get(constants.INISECT_EXP, "version")
7933
    if (int(ei_version) != constants.EXPORT_VERSION):
7934
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7935
                                 (ei_version, constants.EXPORT_VERSION),
7936
                                 errors.ECODE_ENVIRON)
7937
    return export_info
7938

    
7939
  def _ReadExportParams(self, einfo):
7940
    """Use export parameters as defaults.
7941

7942
    In case the opcode doesn't specify (as in override) some instance
7943
    parameters, then try to use them from the export information, if
7944
    that declares them.
7945

7946
    """
7947
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7948

    
7949
    if self.op.disk_template is None:
7950
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
7951
        self.op.disk_template = einfo.get(constants.INISECT_INS,
7952
                                          "disk_template")
7953
      else:
7954
        raise errors.OpPrereqError("No disk template specified and the export"
7955
                                   " is missing the disk_template information",
7956
                                   errors.ECODE_INVAL)
7957

    
7958
    if not self.op.disks:
7959
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
7960
        disks = []
7961
        # TODO: import the disk iv_name too
7962
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7963
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7964
          disks.append({constants.IDISK_SIZE: disk_sz})
7965
        self.op.disks = disks
7966
      else:
7967
        raise errors.OpPrereqError("No disk info specified and the export"
7968
                                   " is missing the disk information",
7969
                                   errors.ECODE_INVAL)
7970

    
7971
    if (not self.op.nics and
7972
        einfo.has_option(constants.INISECT_INS, "nic_count")):
7973
      nics = []
7974
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7975
        ndict = {}
7976
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7977
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7978
          ndict[name] = v
7979
        nics.append(ndict)
7980
      self.op.nics = nics
7981

    
7982
    if (self.op.hypervisor is None and
7983
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
7984
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7985
    if einfo.has_section(constants.INISECT_HYP):
7986
      # use the export parameters but do not override the ones
7987
      # specified by the user
7988
      for name, value in einfo.items(constants.INISECT_HYP):
7989
        if name not in self.op.hvparams:
7990
          self.op.hvparams[name] = value
7991

    
7992
    if einfo.has_section(constants.INISECT_BEP):
7993
      # use the parameters, without overriding
7994
      for name, value in einfo.items(constants.INISECT_BEP):
7995
        if name not in self.op.beparams:
7996
          self.op.beparams[name] = value
7997
    else:
7998
      # try to read the parameters old style, from the main section
7999
      for name in constants.BES_PARAMETERS:
8000
        if (name not in self.op.beparams and
8001
            einfo.has_option(constants.INISECT_INS, name)):
8002
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8003

    
8004
    if einfo.has_section(constants.INISECT_OSP):
8005
      # use the parameters, without overriding
8006
      for name, value in einfo.items(constants.INISECT_OSP):
8007
        if name not in self.op.osparams:
8008
          self.op.osparams[name] = value
8009

    
8010
  def _RevertToDefaults(self, cluster):
8011
    """Revert the instance parameters to the default values.
8012

8013
    """
8014
    # hvparams
8015
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8016
    for name in self.op.hvparams.keys():
8017
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8018
        del self.op.hvparams[name]
8019
    # beparams
8020
    be_defs = cluster.SimpleFillBE({})
8021
    for name in self.op.beparams.keys():
8022
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8023
        del self.op.beparams[name]
8024
    # nic params
8025
    nic_defs = cluster.SimpleFillNIC({})
8026
    for nic in self.op.nics:
8027
      for name in constants.NICS_PARAMETERS:
8028
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8029
          del nic[name]
8030
    # osparams
8031
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8032
    for name in self.op.osparams.keys():
8033
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8034
        del self.op.osparams[name]
8035

    
8036
  def CheckPrereq(self):
8037
    """Check prerequisites.
8038

8039
    """
8040
    if self.op.mode == constants.INSTANCE_IMPORT:
8041
      export_info = self._ReadExportInfo()
8042
      self._ReadExportParams(export_info)
8043

    
8044
    if (not self.cfg.GetVGName() and
8045
        self.op.disk_template not in constants.DTS_NOT_LVM):
8046
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8047
                                 " instances", errors.ECODE_STATE)
8048

    
8049
    if self.op.hypervisor is None:
8050
      self.op.hypervisor = self.cfg.GetHypervisorType()
8051

    
8052
    cluster = self.cfg.GetClusterInfo()
8053
    enabled_hvs = cluster.enabled_hypervisors
8054
    if self.op.hypervisor not in enabled_hvs:
8055
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8056
                                 " cluster (%s)" % (self.op.hypervisor,
8057
                                  ",".join(enabled_hvs)),
8058
                                 errors.ECODE_STATE)
8059

    
8060
    # check hypervisor parameter syntax (locally)
8061
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8062
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8063
                                      self.op.hvparams)
8064
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8065
    hv_type.CheckParameterSyntax(filled_hvp)
8066
    self.hv_full = filled_hvp
8067
    # check that we don't specify global parameters on an instance
8068
    _CheckGlobalHvParams(self.op.hvparams)
8069

    
8070
    # fill and remember the beparams dict
8071
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8072
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8073

    
8074
    # build os parameters
8075
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8076

    
8077
    # now that hvp/bep are in final format, let's reset to defaults,
8078
    # if told to do so
8079
    if self.op.identify_defaults:
8080
      self._RevertToDefaults(cluster)
8081

    
8082
    # NIC buildup
8083
    self.nics = []
8084
    for idx, nic in enumerate(self.op.nics):
8085
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8086
      nic_mode = nic_mode_req
8087
      if nic_mode is None:
8088
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8089

    
8090
      # in routed mode, for the first nic, the default ip is 'auto'
8091
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8092
        default_ip_mode = constants.VALUE_AUTO
8093
      else:
8094
        default_ip_mode = constants.VALUE_NONE
8095

    
8096
      # ip validity checks
8097
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8098
      if ip is None or ip.lower() == constants.VALUE_NONE:
8099
        nic_ip = None
8100
      elif ip.lower() == constants.VALUE_AUTO:
8101
        if not self.op.name_check:
8102
          raise errors.OpPrereqError("IP address set to auto but name checks"
8103
                                     " have been skipped",
8104
                                     errors.ECODE_INVAL)
8105
        nic_ip = self.hostname1.ip
8106
      else:
8107
        if not netutils.IPAddress.IsValid(ip):
8108
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8109
                                     errors.ECODE_INVAL)
8110
        nic_ip = ip
8111

    
8112
      # TODO: check the ip address for uniqueness
8113
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8114
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8115
                                   errors.ECODE_INVAL)
8116

    
8117
      # MAC address verification
8118
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8119
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8120
        mac = utils.NormalizeAndValidateMac(mac)
8121

    
8122
        try:
8123
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8124
        except errors.ReservationError:
8125
          raise errors.OpPrereqError("MAC address %s already in use"
8126
                                     " in cluster" % mac,
8127
                                     errors.ECODE_NOTUNIQUE)
8128

    
8129
      #  Build nic parameters
8130
      link = nic.get(constants.INIC_LINK, None)
8131
      nicparams = {}
8132
      if nic_mode_req:
8133
        nicparams[constants.NIC_MODE] = nic_mode_req
8134
      if link:
8135
        nicparams[constants.NIC_LINK] = link
8136

    
8137
      check_params = cluster.SimpleFillNIC(nicparams)
8138
      objects.NIC.CheckParameterSyntax(check_params)
8139
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8140

    
8141
    # disk checks/pre-build
8142
    default_vg = self.cfg.GetVGName()
8143
    self.disks = []
8144
    for disk in self.op.disks:
8145
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8146
      if mode not in constants.DISK_ACCESS_SET:
8147
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8148
                                   mode, errors.ECODE_INVAL)
8149
      size = disk.get(constants.IDISK_SIZE, None)
8150
      if size is None:
8151
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8152
      try:
8153
        size = int(size)
8154
      except (TypeError, ValueError):
8155
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8156
                                   errors.ECODE_INVAL)
8157

    
8158
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8159
      new_disk = {
8160
        constants.IDISK_SIZE: size,
8161
        constants.IDISK_MODE: mode,
8162
        constants.IDISK_VG: data_vg,
8163
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8164
        }
8165
      if constants.IDISK_ADOPT in disk:
8166
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8167
      self.disks.append(new_disk)
8168

    
8169
    if self.op.mode == constants.INSTANCE_IMPORT:
8170

    
8171
      # Check that the new instance doesn't have less disks than the export
8172
      instance_disks = len(self.disks)
8173
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8174
      if instance_disks < export_disks:
8175
        raise errors.OpPrereqError("Not enough disks to import."
8176
                                   " (instance: %d, export: %d)" %
8177
                                   (instance_disks, export_disks),
8178
                                   errors.ECODE_INVAL)
8179

    
8180
      disk_images = []
8181
      for idx in range(export_disks):
8182
        option = 'disk%d_dump' % idx
8183
        if export_info.has_option(constants.INISECT_INS, option):
8184
          # FIXME: are the old os-es, disk sizes, etc. useful?
8185
          export_name = export_info.get(constants.INISECT_INS, option)
8186
          image = utils.PathJoin(self.op.src_path, export_name)
8187
          disk_images.append(image)
8188
        else:
8189
          disk_images.append(False)
8190

    
8191
      self.src_images = disk_images
8192

    
8193
      old_name = export_info.get(constants.INISECT_INS, 'name')
8194
      try:
8195
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
8196
      except (TypeError, ValueError), err:
8197
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
8198
                                   " an integer: %s" % str(err),
8199
                                   errors.ECODE_STATE)
8200
      if self.op.instance_name == old_name:
8201
        for idx, nic in enumerate(self.nics):
8202
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8203
            nic_mac_ini = 'nic%d_mac' % idx
8204
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8205

    
8206
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8207

    
8208
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8209
    if self.op.ip_check:
8210
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8211
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8212
                                   (self.check_ip, self.op.instance_name),
8213
                                   errors.ECODE_NOTUNIQUE)
8214

    
8215
    #### mac address generation
8216
    # By generating here the mac address both the allocator and the hooks get
8217
    # the real final mac address rather than the 'auto' or 'generate' value.
8218
    # There is a race condition between the generation and the instance object
8219
    # creation, which means that we know the mac is valid now, but we're not
8220
    # sure it will be when we actually add the instance. If things go bad
8221
    # adding the instance will abort because of a duplicate mac, and the
8222
    # creation job will fail.
8223
    for nic in self.nics:
8224
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8225
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8226

    
8227
    #### allocator run
8228

    
8229
    if self.op.iallocator is not None:
8230
      self._RunAllocator()
8231

    
8232
    #### node related checks
8233

    
8234
    # check primary node
8235
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8236
    assert self.pnode is not None, \
8237
      "Cannot retrieve locked node %s" % self.op.pnode
8238
    if pnode.offline:
8239
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8240
                                 pnode.name, errors.ECODE_STATE)
8241
    if pnode.drained:
8242
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8243
                                 pnode.name, errors.ECODE_STATE)
8244
    if not pnode.vm_capable:
8245
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8246
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8247

    
8248
    self.secondaries = []
8249

    
8250
    # mirror node verification
8251
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8252
      if self.op.snode == pnode.name:
8253
        raise errors.OpPrereqError("The secondary node cannot be the"
8254
                                   " primary node", errors.ECODE_INVAL)
8255
      _CheckNodeOnline(self, self.op.snode)
8256
      _CheckNodeNotDrained(self, self.op.snode)
8257
      _CheckNodeVmCapable(self, self.op.snode)
8258
      self.secondaries.append(self.op.snode)
8259

    
8260
    nodenames = [pnode.name] + self.secondaries
8261

    
8262
    if not self.adopt_disks:
8263
      # Check lv size requirements, if not adopting
8264
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8265
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8266

    
8267
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8268
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8269
                                disk[constants.IDISK_ADOPT])
8270
                     for disk in self.disks])
8271
      if len(all_lvs) != len(self.disks):
8272
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8273
                                   errors.ECODE_INVAL)
8274
      for lv_name in all_lvs:
8275
        try:
8276
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8277
          # to ReserveLV uses the same syntax
8278
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8279
        except errors.ReservationError:
8280
          raise errors.OpPrereqError("LV named %s used by another instance" %
8281
                                     lv_name, errors.ECODE_NOTUNIQUE)
8282

    
8283
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8284
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8285

    
8286
      node_lvs = self.rpc.call_lv_list([pnode.name],
8287
                                       vg_names.payload.keys())[pnode.name]
8288
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8289
      node_lvs = node_lvs.payload
8290

    
8291
      delta = all_lvs.difference(node_lvs.keys())
8292
      if delta:
8293
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8294
                                   utils.CommaJoin(delta),
8295
                                   errors.ECODE_INVAL)
8296
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8297
      if online_lvs:
8298
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8299
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8300
                                   errors.ECODE_STATE)
8301
      # update the size of disk based on what is found
8302
      for dsk in self.disks:
8303
        dsk[constants.IDISK_SIZE] = \
8304
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8305
                                        dsk[constants.IDISK_ADOPT])][0]))
8306

    
8307
    elif self.op.disk_template == constants.DT_BLOCK:
8308
      # Normalize and de-duplicate device paths
8309
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8310
                       for disk in self.disks])
8311
      if len(all_disks) != len(self.disks):
8312
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
8313
                                   errors.ECODE_INVAL)
8314
      baddisks = [d for d in all_disks
8315
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8316
      if baddisks:
8317
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8318
                                   " cannot be adopted" %
8319
                                   (", ".join(baddisks),
8320
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
8321
                                   errors.ECODE_INVAL)
8322

    
8323
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
8324
                                            list(all_disks))[pnode.name]
8325
      node_disks.Raise("Cannot get block device information from node %s" %
8326
                       pnode.name)
8327
      node_disks = node_disks.payload
8328
      delta = all_disks.difference(node_disks.keys())
8329
      if delta:
8330
        raise errors.OpPrereqError("Missing block device(s): %s" %
8331
                                   utils.CommaJoin(delta),
8332
                                   errors.ECODE_INVAL)
8333
      for dsk in self.disks:
8334
        dsk[constants.IDISK_SIZE] = \
8335
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8336

    
8337
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8338

    
8339
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8340
    # check OS parameters (remotely)
8341
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8342

    
8343
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8344

    
8345
    # memory check on primary node
8346
    if self.op.start:
8347
      _CheckNodeFreeMemory(self, self.pnode.name,
8348
                           "creating instance %s" % self.op.instance_name,
8349
                           self.be_full[constants.BE_MEMORY],
8350
                           self.op.hypervisor)
8351

    
8352
    self.dry_run_result = list(nodenames)
8353

    
8354
  def Exec(self, feedback_fn):
8355
    """Create and add the instance to the cluster.
8356

8357
    """
8358
    instance = self.op.instance_name
8359
    pnode_name = self.pnode.name
8360

    
8361
    ht_kind = self.op.hypervisor
8362
    if ht_kind in constants.HTS_REQ_PORT:
8363
      network_port = self.cfg.AllocatePort()
8364
    else:
8365
      network_port = None
8366

    
8367
    if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
8368
      # this is needed because os.path.join does not accept None arguments
8369
      if self.op.file_storage_dir is None:
8370
        string_file_storage_dir = ""
8371
      else:
8372
        string_file_storage_dir = self.op.file_storage_dir
8373

    
8374
      # build the full file storage dir path
8375
      if self.op.disk_template == constants.DT_SHARED_FILE:
8376
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8377
      else:
8378
        get_fsd_fn = self.cfg.GetFileStorageDir
8379

    
8380
      file_storage_dir = utils.PathJoin(get_fsd_fn(),
8381
                                        string_file_storage_dir, instance)
8382
    else:
8383
      file_storage_dir = ""
8384

    
8385
    disks = _GenerateDiskTemplate(self,
8386
                                  self.op.disk_template,
8387
                                  instance, pnode_name,
8388
                                  self.secondaries,
8389
                                  self.disks,
8390
                                  file_storage_dir,
8391
                                  self.op.file_driver,
8392
                                  0,
8393
                                  feedback_fn)
8394

    
8395
    iobj = objects.Instance(name=instance, os=self.op.os_type,
8396
                            primary_node=pnode_name,
8397
                            nics=self.nics, disks=disks,
8398
                            disk_template=self.op.disk_template,
8399
                            admin_up=False,
8400
                            network_port=network_port,
8401
                            beparams=self.op.beparams,
8402
                            hvparams=self.op.hvparams,
8403
                            hypervisor=self.op.hypervisor,
8404
                            osparams=self.op.osparams,
8405
                            )
8406

    
8407
    if self.adopt_disks:
8408
      if self.op.disk_template == constants.DT_PLAIN:
8409
        # rename LVs to the newly-generated names; we need to construct
8410
        # 'fake' LV disks with the old data, plus the new unique_id
8411
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8412
        rename_to = []
8413
        for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8414
          rename_to.append(t_dsk.logical_id)
8415
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8416
          self.cfg.SetDiskID(t_dsk, pnode_name)
8417
        result = self.rpc.call_blockdev_rename(pnode_name,
8418
                                               zip(tmp_disks, rename_to))
8419
        result.Raise("Failed to rename adoped LVs")
8420
    else:
8421
      feedback_fn("* creating instance disks...")
8422
      try:
8423
        _CreateDisks(self, iobj)
8424
      except errors.OpExecError:
8425
        self.LogWarning("Device creation failed, reverting...")
8426
        try:
8427
          _RemoveDisks(self, iobj)
8428
        finally:
8429
          self.cfg.ReleaseDRBDMinors(instance)
8430
          raise
8431

    
8432
    feedback_fn("adding instance %s to cluster config" % instance)
8433

    
8434
    self.cfg.AddInstance(iobj, self.proc.GetECId())
8435

    
8436
    # Declare that we don't want to remove the instance lock anymore, as we've
8437
    # added the instance to the config
8438
    del self.remove_locks[locking.LEVEL_INSTANCE]
8439

    
8440
    if self.op.mode == constants.INSTANCE_IMPORT:
8441
      # Release unused nodes
8442
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8443
    else:
8444
      # Release all nodes
8445
      _ReleaseLocks(self, locking.LEVEL_NODE)
8446

    
8447
    disk_abort = False
8448
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8449
      feedback_fn("* wiping instance disks...")
8450
      try:
8451
        _WipeDisks(self, iobj)
8452
      except errors.OpExecError, err:
8453
        logging.exception("Wiping disks failed")
8454
        self.LogWarning("Wiping instance disks failed (%s)", err)
8455
        disk_abort = True
8456

    
8457
    if disk_abort:
8458
      # Something is already wrong with the disks, don't do anything else
8459
      pass
8460
    elif self.op.wait_for_sync:
8461
      disk_abort = not _WaitForSync(self, iobj)
8462
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
8463
      # make sure the disks are not degraded (still sync-ing is ok)
8464
      time.sleep(15)
8465
      feedback_fn("* checking mirrors status")
8466
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8467
    else:
8468
      disk_abort = False
8469

    
8470
    if disk_abort:
8471
      _RemoveDisks(self, iobj)
8472
      self.cfg.RemoveInstance(iobj.name)
8473
      # Make sure the instance lock gets removed
8474
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8475
      raise errors.OpExecError("There are some degraded disks for"
8476
                               " this instance")
8477

    
8478
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8479
      if self.op.mode == constants.INSTANCE_CREATE:
8480
        if not self.op.no_install:
8481
          feedback_fn("* running the instance OS create scripts...")
8482
          # FIXME: pass debug option from opcode to backend
8483
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8484
                                                 self.op.debug_level)
8485
          result.Raise("Could not add os for instance %s"
8486
                       " on node %s" % (instance, pnode_name))
8487

    
8488
      elif self.op.mode == constants.INSTANCE_IMPORT:
8489
        feedback_fn("* running the instance OS import scripts...")
8490

    
8491
        transfers = []
8492

    
8493
        for idx, image in enumerate(self.src_images):
8494
          if not image:
8495
            continue
8496

    
8497
          # FIXME: pass debug option from opcode to backend
8498
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8499
                                             constants.IEIO_FILE, (image, ),
8500
                                             constants.IEIO_SCRIPT,
8501
                                             (iobj.disks[idx], idx),
8502
                                             None)
8503
          transfers.append(dt)
8504

    
8505
        import_result = \
8506
          masterd.instance.TransferInstanceData(self, feedback_fn,
8507
                                                self.op.src_node, pnode_name,
8508
                                                self.pnode.secondary_ip,
8509
                                                iobj, transfers)
8510
        if not compat.all(import_result):
8511
          self.LogWarning("Some disks for instance %s on node %s were not"
8512
                          " imported successfully" % (instance, pnode_name))
8513

    
8514
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8515
        feedback_fn("* preparing remote import...")
8516
        # The source cluster will stop the instance before attempting to make a
8517
        # connection. In some cases stopping an instance can take a long time,
8518
        # hence the shutdown timeout is added to the connection timeout.
8519
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8520
                           self.op.source_shutdown_timeout)
8521
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8522

    
8523
        assert iobj.primary_node == self.pnode.name
8524
        disk_results = \
8525
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8526
                                        self.source_x509_ca,
8527
                                        self._cds, timeouts)
8528
        if not compat.all(disk_results):
8529
          # TODO: Should the instance still be started, even if some disks
8530
          # failed to import (valid for local imports, too)?
8531
          self.LogWarning("Some disks for instance %s on node %s were not"
8532
                          " imported successfully" % (instance, pnode_name))
8533

    
8534
        # Run rename script on newly imported instance
8535
        assert iobj.name == instance
8536
        feedback_fn("Running rename script for %s" % instance)
8537
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8538
                                                   self.source_instance_name,
8539
                                                   self.op.debug_level)
8540
        if result.fail_msg:
8541
          self.LogWarning("Failed to run rename script for %s on node"
8542
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
8543

    
8544
      else:
8545
        # also checked in the prereq part
8546
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8547
                                     % self.op.mode)
8548

    
8549
    if self.op.start:
8550
      iobj.admin_up = True
8551
      self.cfg.Update(iobj, feedback_fn)
8552
      logging.info("Starting instance %s on node %s", instance, pnode_name)
8553
      feedback_fn("* starting instance...")
8554
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
8555
      result.Raise("Could not start instance")
8556

    
8557
    return list(iobj.all_nodes)
8558

    
8559

    
8560
class LUInstanceConsole(NoHooksLU):
8561
  """Connect to an instance's console.
8562

8563
  This is somewhat special in that it returns the command line that
8564
  you need to run on the master node in order to connect to the
8565
  console.
8566

8567
  """
8568
  REQ_BGL = False
8569

    
8570
  def ExpandNames(self):
8571
    self._ExpandAndLockInstance()
8572

    
8573
  def CheckPrereq(self):
8574
    """Check prerequisites.
8575

8576
    This checks that the instance is in the cluster.
8577

8578
    """
8579
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8580
    assert self.instance is not None, \
8581
      "Cannot retrieve locked instance %s" % self.op.instance_name
8582
    _CheckNodeOnline(self, self.instance.primary_node)
8583

    
8584
  def Exec(self, feedback_fn):
8585
    """Connect to the console of an instance
8586

8587
    """
8588
    instance = self.instance
8589
    node = instance.primary_node
8590

    
8591
    node_insts = self.rpc.call_instance_list([node],
8592
                                             [instance.hypervisor])[node]
8593
    node_insts.Raise("Can't get node information from %s" % node)
8594

    
8595
    if instance.name not in node_insts.payload:
8596
      if instance.admin_up:
8597
        state = constants.INSTST_ERRORDOWN
8598
      else:
8599
        state = constants.INSTST_ADMINDOWN
8600
      raise errors.OpExecError("Instance %s is not running (state %s)" %
8601
                               (instance.name, state))
8602

    
8603
    logging.debug("Connecting to console of %s on %s", instance.name, node)
8604

    
8605
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8606

    
8607

    
8608
def _GetInstanceConsole(cluster, instance):
8609
  """Returns console information for an instance.
8610

8611
  @type cluster: L{objects.Cluster}
8612
  @type instance: L{objects.Instance}
8613
  @rtype: dict
8614

8615
  """
8616
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
8617
  # beparams and hvparams are passed separately, to avoid editing the
8618
  # instance and then saving the defaults in the instance itself.
8619
  hvparams = cluster.FillHV(instance)
8620
  beparams = cluster.FillBE(instance)
8621
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8622

    
8623
  assert console.instance == instance.name
8624
  assert console.Validate()
8625

    
8626
  return console.ToDict()
8627

    
8628

    
8629
class LUInstanceReplaceDisks(LogicalUnit):
8630
  """Replace the disks of an instance.
8631

8632
  """
8633
  HPATH = "mirrors-replace"
8634
  HTYPE = constants.HTYPE_INSTANCE
8635
  REQ_BGL = False
8636

    
8637
  def CheckArguments(self):
8638
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8639
                                  self.op.iallocator)
8640

    
8641
  def ExpandNames(self):
8642
    self._ExpandAndLockInstance()
8643

    
8644
    assert locking.LEVEL_NODE not in self.needed_locks
8645
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
8646

    
8647
    assert self.op.iallocator is None or self.op.remote_node is None, \
8648
      "Conflicting options"
8649

    
8650
    if self.op.remote_node is not None:
8651
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8652

    
8653
      # Warning: do not remove the locking of the new secondary here
8654
      # unless DRBD8.AddChildren is changed to work in parallel;
8655
      # currently it doesn't since parallel invocations of
8656
      # FindUnusedMinor will conflict
8657
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
8658
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8659
    else:
8660
      self.needed_locks[locking.LEVEL_NODE] = []
8661
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8662

    
8663
      if self.op.iallocator is not None:
8664
        # iallocator will select a new node in the same group
8665
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
8666

    
8667
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8668
                                   self.op.iallocator, self.op.remote_node,
8669
                                   self.op.disks, False, self.op.early_release)
8670

    
8671
    self.tasklets = [self.replacer]
8672

    
8673
  def DeclareLocks(self, level):
8674
    if level == locking.LEVEL_NODEGROUP:
8675
      assert self.op.remote_node is None
8676
      assert self.op.iallocator is not None
8677
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
8678

    
8679
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
8680
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
8681
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8682

    
8683
    elif level == locking.LEVEL_NODE:
8684
      if self.op.iallocator is not None:
8685
        assert self.op.remote_node is None
8686
        assert not self.needed_locks[locking.LEVEL_NODE]
8687

    
8688
        # Lock member nodes of all locked groups
8689
        self.needed_locks[locking.LEVEL_NODE] = [node_name
8690
          for group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
8691
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
8692
      else:
8693
        self._LockInstancesNodes()
8694

    
8695
  def BuildHooksEnv(self):
8696
    """Build hooks env.
8697

8698
    This runs on the master, the primary and all the secondaries.
8699

8700
    """
8701
    instance = self.replacer.instance
8702
    env = {
8703
      "MODE": self.op.mode,
8704
      "NEW_SECONDARY": self.op.remote_node,
8705
      "OLD_SECONDARY": instance.secondary_nodes[0],
8706
      }
8707
    env.update(_BuildInstanceHookEnvByObject(self, instance))
8708
    return env
8709

    
8710
  def BuildHooksNodes(self):
8711
    """Build hooks nodes.
8712

8713
    """
8714
    instance = self.replacer.instance
8715
    nl = [
8716
      self.cfg.GetMasterNode(),
8717
      instance.primary_node,
8718
      ]
8719
    if self.op.remote_node is not None:
8720
      nl.append(self.op.remote_node)
8721
    return nl, nl
8722

    
8723
  def CheckPrereq(self):
8724
    """Check prerequisites.
8725

8726
    """
8727
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
8728
            self.op.iallocator is None)
8729

    
8730
    owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
8731
    if owned_groups:
8732
      groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8733
      if owned_groups != groups:
8734
        raise errors.OpExecError("Node groups used by instance '%s' changed"
8735
                                 " since lock was acquired, current list is %r,"
8736
                                 " used to be '%s'" %
8737
                                 (self.op.instance_name,
8738
                                  utils.CommaJoin(groups),
8739
                                  utils.CommaJoin(owned_groups)))
8740

    
8741
    return LogicalUnit.CheckPrereq(self)
8742

    
8743

    
8744
class TLReplaceDisks(Tasklet):
8745
  """Replaces disks for an instance.
8746

8747
  Note: Locking is not within the scope of this class.
8748

8749
  """
8750
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8751
               disks, delay_iallocator, early_release):
8752
    """Initializes this class.
8753

8754
    """
8755
    Tasklet.__init__(self, lu)
8756

    
8757
    # Parameters
8758
    self.instance_name = instance_name
8759
    self.mode = mode
8760
    self.iallocator_name = iallocator_name
8761
    self.remote_node = remote_node
8762
    self.disks = disks
8763
    self.delay_iallocator = delay_iallocator
8764
    self.early_release = early_release
8765

    
8766
    # Runtime data
8767
    self.instance = None
8768
    self.new_node = None
8769
    self.target_node = None
8770
    self.other_node = None
8771
    self.remote_node_info = None
8772
    self.node_secondary_ip = None
8773

    
8774
  @staticmethod
8775
  def CheckArguments(mode, remote_node, iallocator):
8776
    """Helper function for users of this class.
8777

8778
    """
8779
    # check for valid parameter combination
8780
    if mode == constants.REPLACE_DISK_CHG:
8781
      if remote_node is None and iallocator is None:
8782
        raise errors.OpPrereqError("When changing the secondary either an"
8783
                                   " iallocator script must be used or the"
8784
                                   " new node given", errors.ECODE_INVAL)
8785

    
8786
      if remote_node is not None and iallocator is not None:
8787
        raise errors.OpPrereqError("Give either the iallocator or the new"
8788
                                   " secondary, not both", errors.ECODE_INVAL)
8789

    
8790
    elif remote_node is not None or iallocator is not None:
8791
      # Not replacing the secondary
8792
      raise errors.OpPrereqError("The iallocator and new node options can"
8793
                                 " only be used when changing the"
8794
                                 " secondary node", errors.ECODE_INVAL)
8795

    
8796
  @staticmethod
8797
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8798
    """Compute a new secondary node using an IAllocator.
8799

8800
    """
8801
    ial = IAllocator(lu.cfg, lu.rpc,
8802
                     mode=constants.IALLOCATOR_MODE_RELOC,
8803
                     name=instance_name,
8804
                     relocate_from=relocate_from)
8805

    
8806
    ial.Run(iallocator_name)
8807

    
8808
    if not ial.success:
8809
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8810
                                 " %s" % (iallocator_name, ial.info),
8811
                                 errors.ECODE_NORES)
8812

    
8813
    if len(ial.result) != ial.required_nodes:
8814
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8815
                                 " of nodes (%s), required %s" %
8816
                                 (iallocator_name,
8817
                                  len(ial.result), ial.required_nodes),
8818
                                 errors.ECODE_FAULT)
8819

    
8820
    remote_node_name = ial.result[0]
8821

    
8822
    lu.LogInfo("Selected new secondary for instance '%s': %s",
8823
               instance_name, remote_node_name)
8824

    
8825
    return remote_node_name
8826

    
8827
  def _FindFaultyDisks(self, node_name):
8828
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8829
                                    node_name, True)
8830

    
8831
  def _CheckDisksActivated(self, instance):
8832
    """Checks if the instance disks are activated.
8833

8834
    @param instance: The instance to check disks
8835
    @return: True if they are activated, False otherwise
8836

8837
    """
8838
    nodes = instance.all_nodes
8839

    
8840
    for idx, dev in enumerate(instance.disks):
8841
      for node in nodes:
8842
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
8843
        self.cfg.SetDiskID(dev, node)
8844

    
8845
        result = self.rpc.call_blockdev_find(node, dev)
8846

    
8847
        if result.offline:
8848
          continue
8849
        elif result.fail_msg or not result.payload:
8850
          return False
8851

    
8852
    return True
8853

    
8854
  def CheckPrereq(self):
8855
    """Check prerequisites.
8856

8857
    This checks that the instance is in the cluster.
8858

8859
    """
8860
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8861
    assert instance is not None, \
8862
      "Cannot retrieve locked instance %s" % self.instance_name
8863

    
8864
    if instance.disk_template != constants.DT_DRBD8:
8865
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8866
                                 " instances", errors.ECODE_INVAL)
8867

    
8868
    if len(instance.secondary_nodes) != 1:
8869
      raise errors.OpPrereqError("The instance has a strange layout,"
8870
                                 " expected one secondary but found %d" %
8871
                                 len(instance.secondary_nodes),
8872
                                 errors.ECODE_FAULT)
8873

    
8874
    if not self.delay_iallocator:
8875
      self._CheckPrereq2()
8876

    
8877
  def _CheckPrereq2(self):
8878
    """Check prerequisites, second part.
8879

8880
    This function should always be part of CheckPrereq. It was separated and is
8881
    now called from Exec because during node evacuation iallocator was only
8882
    called with an unmodified cluster model, not taking planned changes into
8883
    account.
8884

8885
    """
8886
    instance = self.instance
8887
    secondary_node = instance.secondary_nodes[0]
8888

    
8889
    if self.iallocator_name is None:
8890
      remote_node = self.remote_node
8891
    else:
8892
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8893
                                       instance.name, instance.secondary_nodes)
8894

    
8895
    if remote_node is None:
8896
      self.remote_node_info = None
8897
    else:
8898
      assert remote_node in self.lu.glm.list_owned(locking.LEVEL_NODE), \
8899
             "Remote node '%s' is not locked" % remote_node
8900

    
8901
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8902
      assert self.remote_node_info is not None, \
8903
        "Cannot retrieve locked node %s" % remote_node
8904

    
8905
    if remote_node == self.instance.primary_node:
8906
      raise errors.OpPrereqError("The specified node is the primary node of"
8907
                                 " the instance", errors.ECODE_INVAL)
8908

    
8909
    if remote_node == secondary_node:
8910
      raise errors.OpPrereqError("The specified node is already the"
8911
                                 " secondary node of the instance",
8912
                                 errors.ECODE_INVAL)
8913

    
8914
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8915
                                    constants.REPLACE_DISK_CHG):
8916
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
8917
                                 errors.ECODE_INVAL)
8918

    
8919
    if self.mode == constants.REPLACE_DISK_AUTO:
8920
      if not self._CheckDisksActivated(instance):
8921
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
8922
                                   " first" % self.instance_name,
8923
                                   errors.ECODE_STATE)
8924
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
8925
      faulty_secondary = self._FindFaultyDisks(secondary_node)
8926

    
8927
      if faulty_primary and faulty_secondary:
8928
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8929
                                   " one node and can not be repaired"
8930
                                   " automatically" % self.instance_name,
8931
                                   errors.ECODE_STATE)
8932

    
8933
      if faulty_primary:
8934
        self.disks = faulty_primary
8935
        self.target_node = instance.primary_node
8936
        self.other_node = secondary_node
8937
        check_nodes = [self.target_node, self.other_node]
8938
      elif faulty_secondary:
8939
        self.disks = faulty_secondary
8940
        self.target_node = secondary_node
8941
        self.other_node = instance.primary_node
8942
        check_nodes = [self.target_node, self.other_node]
8943
      else:
8944
        self.disks = []
8945
        check_nodes = []
8946

    
8947
    else:
8948
      # Non-automatic modes
8949
      if self.mode == constants.REPLACE_DISK_PRI:
8950
        self.target_node = instance.primary_node
8951
        self.other_node = secondary_node
8952
        check_nodes = [self.target_node, self.other_node]
8953

    
8954
      elif self.mode == constants.REPLACE_DISK_SEC:
8955
        self.target_node = secondary_node
8956
        self.other_node = instance.primary_node
8957
        check_nodes = [self.target_node, self.other_node]
8958

    
8959
      elif self.mode == constants.REPLACE_DISK_CHG:
8960
        self.new_node = remote_node
8961
        self.other_node = instance.primary_node
8962
        self.target_node = secondary_node
8963
        check_nodes = [self.new_node, self.other_node]
8964

    
8965
        _CheckNodeNotDrained(self.lu, remote_node)
8966
        _CheckNodeVmCapable(self.lu, remote_node)
8967

    
8968
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
8969
        assert old_node_info is not None
8970
        if old_node_info.offline and not self.early_release:
8971
          # doesn't make sense to delay the release
8972
          self.early_release = True
8973
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8974
                          " early-release mode", secondary_node)
8975

    
8976
      else:
8977
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8978
                                     self.mode)
8979

    
8980
      # If not specified all disks should be replaced
8981
      if not self.disks:
8982
        self.disks = range(len(self.instance.disks))
8983

    
8984
    for node in check_nodes:
8985
      _CheckNodeOnline(self.lu, node)
8986

    
8987
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
8988
                                                          self.other_node,
8989
                                                          self.target_node]
8990
                              if node_name is not None)
8991

    
8992
    # Release unneeded node locks
8993
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
8994

    
8995
    # Release any owned node group
8996
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
8997
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
8998

    
8999
    # Check whether disks are valid
9000
    for disk_idx in self.disks:
9001
      instance.FindDisk(disk_idx)
9002

    
9003
    # Get secondary node IP addresses
9004
    self.node_secondary_ip = \
9005
      dict((node_name, self.cfg.GetNodeInfo(node_name).secondary_ip)
9006
           for node_name in touched_nodes)
9007

    
9008
  def Exec(self, feedback_fn):
9009
    """Execute disk replacement.
9010

9011
    This dispatches the disk replacement to the appropriate handler.
9012

9013
    """
9014
    if self.delay_iallocator:
9015
      self._CheckPrereq2()
9016

    
9017
    if __debug__:
9018
      # Verify owned locks before starting operation
9019
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9020
      assert set(owned_locks) == set(self.node_secondary_ip), \
9021
          ("Incorrect node locks, owning %s, expected %s" %
9022
           (owned_locks, self.node_secondary_ip.keys()))
9023

    
9024
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_INSTANCE)
9025
      assert list(owned_locks) == [self.instance_name], \
9026
          "Instance '%s' not locked" % self.instance_name
9027

    
9028
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9029
          "Should not own any node group lock at this point"
9030

    
9031
    if not self.disks:
9032
      feedback_fn("No disks need replacement")
9033
      return
9034

    
9035
    feedback_fn("Replacing disk(s) %s for %s" %
9036
                (utils.CommaJoin(self.disks), self.instance.name))
9037

    
9038
    activate_disks = (not self.instance.admin_up)
9039

    
9040
    # Activate the instance disks if we're replacing them on a down instance
9041
    if activate_disks:
9042
      _StartInstanceDisks(self.lu, self.instance, True)
9043

    
9044
    try:
9045
      # Should we replace the secondary node?
9046
      if self.new_node is not None:
9047
        fn = self._ExecDrbd8Secondary
9048
      else:
9049
        fn = self._ExecDrbd8DiskOnly
9050

    
9051
      result = fn(feedback_fn)
9052
    finally:
9053
      # Deactivate the instance disks if we're replacing them on a
9054
      # down instance
9055
      if activate_disks:
9056
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9057

    
9058
    if __debug__:
9059
      # Verify owned locks
9060
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9061
      nodes = frozenset(self.node_secondary_ip)
9062
      assert ((self.early_release and not owned_locks) or
9063
              (not self.early_release and not (set(owned_locks) - nodes))), \
9064
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9065
         " nodes=%r" % (self.early_release, owned_locks, nodes))
9066

    
9067
    return result
9068

    
9069
  def _CheckVolumeGroup(self, nodes):
9070
    self.lu.LogInfo("Checking volume groups")
9071

    
9072
    vgname = self.cfg.GetVGName()
9073

    
9074
    # Make sure volume group exists on all involved nodes
9075
    results = self.rpc.call_vg_list(nodes)
9076
    if not results:
9077
      raise errors.OpExecError("Can't list volume groups on the nodes")
9078

    
9079
    for node in nodes:
9080
      res = results[node]
9081
      res.Raise("Error checking node %s" % node)
9082
      if vgname not in res.payload:
9083
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9084
                                 (vgname, node))
9085

    
9086
  def _CheckDisksExistence(self, nodes):
9087
    # Check disk existence
9088
    for idx, dev in enumerate(self.instance.disks):
9089
      if idx not in self.disks:
9090
        continue
9091

    
9092
      for node in nodes:
9093
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9094
        self.cfg.SetDiskID(dev, node)
9095

    
9096
        result = self.rpc.call_blockdev_find(node, dev)
9097

    
9098
        msg = result.fail_msg
9099
        if msg or not result.payload:
9100
          if not msg:
9101
            msg = "disk not found"
9102
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9103
                                   (idx, node, msg))
9104

    
9105
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9106
    for idx, dev in enumerate(self.instance.disks):
9107
      if idx not in self.disks:
9108
        continue
9109

    
9110
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9111
                      (idx, node_name))
9112

    
9113
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9114
                                   ldisk=ldisk):
9115
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9116
                                 " replace disks for instance %s" %
9117
                                 (node_name, self.instance.name))
9118

    
9119
  def _CreateNewStorage(self, node_name):
9120
    iv_names = {}
9121

    
9122
    for idx, dev in enumerate(self.instance.disks):
9123
      if idx not in self.disks:
9124
        continue
9125

    
9126
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9127

    
9128
      self.cfg.SetDiskID(dev, node_name)
9129

    
9130
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9131
      names = _GenerateUniqueNames(self.lu, lv_names)
9132

    
9133
      vg_data = dev.children[0].logical_id[0]
9134
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9135
                             logical_id=(vg_data, names[0]))
9136
      vg_meta = dev.children[1].logical_id[0]
9137
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9138
                             logical_id=(vg_meta, names[1]))
9139

    
9140
      new_lvs = [lv_data, lv_meta]
9141
      old_lvs = dev.children
9142
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9143

    
9144
      # we pass force_create=True to force the LVM creation
9145
      for new_lv in new_lvs:
9146
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9147
                        _GetInstanceInfoText(self.instance), False)
9148

    
9149
    return iv_names
9150

    
9151
  def _CheckDevices(self, node_name, iv_names):
9152
    for name, (dev, _, _) in iv_names.iteritems():
9153
      self.cfg.SetDiskID(dev, node_name)
9154

    
9155
      result = self.rpc.call_blockdev_find(node_name, dev)
9156

    
9157
      msg = result.fail_msg
9158
      if msg or not result.payload:
9159
        if not msg:
9160
          msg = "disk not found"
9161
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9162
                                 (name, msg))
9163

    
9164
      if result.payload.is_degraded:
9165
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9166

    
9167
  def _RemoveOldStorage(self, node_name, iv_names):
9168
    for name, (_, old_lvs, _) in iv_names.iteritems():
9169
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9170

    
9171
      for lv in old_lvs:
9172
        self.cfg.SetDiskID(lv, node_name)
9173

    
9174
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9175
        if msg:
9176
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9177
                             hint="remove unused LVs manually")
9178

    
9179
  def _ExecDrbd8DiskOnly(self, feedback_fn):
9180
    """Replace a disk on the primary or secondary for DRBD 8.
9181

9182
    The algorithm for replace is quite complicated:
9183

9184
      1. for each disk to be replaced:
9185

9186
        1. create new LVs on the target node with unique names
9187
        1. detach old LVs from the drbd device
9188
        1. rename old LVs to name_replaced.<time_t>
9189
        1. rename new LVs to old LVs
9190
        1. attach the new LVs (with the old names now) to the drbd device
9191

9192
      1. wait for sync across all devices
9193

9194
      1. for each modified disk:
9195

9196
        1. remove old LVs (which have the name name_replaces.<time_t>)
9197

9198
    Failures are not very well handled.
9199

9200
    """
9201
    steps_total = 6
9202

    
9203
    # Step: check device activation
9204
    self.lu.LogStep(1, steps_total, "Check device existence")
9205
    self._CheckDisksExistence([self.other_node, self.target_node])
9206
    self._CheckVolumeGroup([self.target_node, self.other_node])
9207

    
9208
    # Step: check other node consistency
9209
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9210
    self._CheckDisksConsistency(self.other_node,
9211
                                self.other_node == self.instance.primary_node,
9212
                                False)
9213

    
9214
    # Step: create new storage
9215
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9216
    iv_names = self._CreateNewStorage(self.target_node)
9217

    
9218
    # Step: for each lv, detach+rename*2+attach
9219
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9220
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9221
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9222

    
9223
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9224
                                                     old_lvs)
9225
      result.Raise("Can't detach drbd from local storage on node"
9226
                   " %s for device %s" % (self.target_node, dev.iv_name))
9227
      #dev.children = []
9228
      #cfg.Update(instance)
9229

    
9230
      # ok, we created the new LVs, so now we know we have the needed
9231
      # storage; as such, we proceed on the target node to rename
9232
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9233
      # using the assumption that logical_id == physical_id (which in
9234
      # turn is the unique_id on that node)
9235

    
9236
      # FIXME(iustin): use a better name for the replaced LVs
9237
      temp_suffix = int(time.time())
9238
      ren_fn = lambda d, suff: (d.physical_id[0],
9239
                                d.physical_id[1] + "_replaced-%s" % suff)
9240

    
9241
      # Build the rename list based on what LVs exist on the node
9242
      rename_old_to_new = []
9243
      for to_ren in old_lvs:
9244
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9245
        if not result.fail_msg and result.payload:
9246
          # device exists
9247
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9248

    
9249
      self.lu.LogInfo("Renaming the old LVs on the target node")
9250
      result = self.rpc.call_blockdev_rename(self.target_node,
9251
                                             rename_old_to_new)
9252
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9253

    
9254
      # Now we rename the new LVs to the old LVs
9255
      self.lu.LogInfo("Renaming the new LVs on the target node")
9256
      rename_new_to_old = [(new, old.physical_id)
9257
                           for old, new in zip(old_lvs, new_lvs)]
9258
      result = self.rpc.call_blockdev_rename(self.target_node,
9259
                                             rename_new_to_old)
9260
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9261

    
9262
      for old, new in zip(old_lvs, new_lvs):
9263
        new.logical_id = old.logical_id
9264
        self.cfg.SetDiskID(new, self.target_node)
9265

    
9266
      for disk in old_lvs:
9267
        disk.logical_id = ren_fn(disk, temp_suffix)
9268
        self.cfg.SetDiskID(disk, self.target_node)
9269

    
9270
      # Now that the new lvs have the old name, we can add them to the device
9271
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9272
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9273
                                                  new_lvs)
9274
      msg = result.fail_msg
9275
      if msg:
9276
        for new_lv in new_lvs:
9277
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9278
                                               new_lv).fail_msg
9279
          if msg2:
9280
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9281
                               hint=("cleanup manually the unused logical"
9282
                                     "volumes"))
9283
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9284

    
9285
      dev.children = new_lvs
9286

    
9287
      self.cfg.Update(self.instance, feedback_fn)
9288

    
9289
    cstep = 5
9290
    if self.early_release:
9291
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9292
      cstep += 1
9293
      self._RemoveOldStorage(self.target_node, iv_names)
9294
      # WARNING: we release both node locks here, do not do other RPCs
9295
      # than WaitForSync to the primary node
9296
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9297
                    names=[self.target_node, self.other_node])
9298

    
9299
    # Wait for sync
9300
    # This can fail as the old devices are degraded and _WaitForSync
9301
    # does a combined result over all disks, so we don't check its return value
9302
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9303
    cstep += 1
9304
    _WaitForSync(self.lu, self.instance)
9305

    
9306
    # Check all devices manually
9307
    self._CheckDevices(self.instance.primary_node, iv_names)
9308

    
9309
    # Step: remove old storage
9310
    if not self.early_release:
9311
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9312
      cstep += 1
9313
      self._RemoveOldStorage(self.target_node, iv_names)
9314

    
9315
  def _ExecDrbd8Secondary(self, feedback_fn):
9316
    """Replace the secondary node for DRBD 8.
9317

9318
    The algorithm for replace is quite complicated:
9319
      - for all disks of the instance:
9320
        - create new LVs on the new node with same names
9321
        - shutdown the drbd device on the old secondary
9322
        - disconnect the drbd network on the primary
9323
        - create the drbd device on the new secondary
9324
        - network attach the drbd on the primary, using an artifice:
9325
          the drbd code for Attach() will connect to the network if it
9326
          finds a device which is connected to the good local disks but
9327
          not network enabled
9328
      - wait for sync across all devices
9329
      - remove all disks from the old secondary
9330

9331
    Failures are not very well handled.
9332

9333
    """
9334
    steps_total = 6
9335

    
9336
    # Step: check device activation
9337
    self.lu.LogStep(1, steps_total, "Check device existence")
9338
    self._CheckDisksExistence([self.instance.primary_node])
9339
    self._CheckVolumeGroup([self.instance.primary_node])
9340

    
9341
    # Step: check other node consistency
9342
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9343
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
9344

    
9345
    # Step: create new storage
9346
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9347
    for idx, dev in enumerate(self.instance.disks):
9348
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9349
                      (self.new_node, idx))
9350
      # we pass force_create=True to force LVM creation
9351
      for new_lv in dev.children:
9352
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9353
                        _GetInstanceInfoText(self.instance), False)
9354

    
9355
    # Step 4: dbrd minors and drbd setups changes
9356
    # after this, we must manually remove the drbd minors on both the
9357
    # error and the success paths
9358
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9359
    minors = self.cfg.AllocateDRBDMinor([self.new_node
9360
                                         for dev in self.instance.disks],
9361
                                        self.instance.name)
9362
    logging.debug("Allocated minors %r", minors)
9363

    
9364
    iv_names = {}
9365
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9366
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9367
                      (self.new_node, idx))
9368
      # create new devices on new_node; note that we create two IDs:
9369
      # one without port, so the drbd will be activated without
9370
      # networking information on the new node at this stage, and one
9371
      # with network, for the latter activation in step 4
9372
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9373
      if self.instance.primary_node == o_node1:
9374
        p_minor = o_minor1
9375
      else:
9376
        assert self.instance.primary_node == o_node2, "Three-node instance?"
9377
        p_minor = o_minor2
9378

    
9379
      new_alone_id = (self.instance.primary_node, self.new_node, None,
9380
                      p_minor, new_minor, o_secret)
9381
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
9382
                    p_minor, new_minor, o_secret)
9383

    
9384
      iv_names[idx] = (dev, dev.children, new_net_id)
9385
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9386
                    new_net_id)
9387
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9388
                              logical_id=new_alone_id,
9389
                              children=dev.children,
9390
                              size=dev.size)
9391
      try:
9392
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9393
                              _GetInstanceInfoText(self.instance), False)
9394
      except errors.GenericError:
9395
        self.cfg.ReleaseDRBDMinors(self.instance.name)
9396
        raise
9397

    
9398
    # We have new devices, shutdown the drbd on the old secondary
9399
    for idx, dev in enumerate(self.instance.disks):
9400
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9401
      self.cfg.SetDiskID(dev, self.target_node)
9402
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9403
      if msg:
9404
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9405
                           "node: %s" % (idx, msg),
9406
                           hint=("Please cleanup this device manually as"
9407
                                 " soon as possible"))
9408

    
9409
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9410
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9411
                                               self.node_secondary_ip,
9412
                                               self.instance.disks)\
9413
                                              [self.instance.primary_node]
9414

    
9415
    msg = result.fail_msg
9416
    if msg:
9417
      # detaches didn't succeed (unlikely)
9418
      self.cfg.ReleaseDRBDMinors(self.instance.name)
9419
      raise errors.OpExecError("Can't detach the disks from the network on"
9420
                               " old node: %s" % (msg,))
9421

    
9422
    # if we managed to detach at least one, we update all the disks of
9423
    # the instance to point to the new secondary
9424
    self.lu.LogInfo("Updating instance configuration")
9425
    for dev, _, new_logical_id in iv_names.itervalues():
9426
      dev.logical_id = new_logical_id
9427
      self.cfg.SetDiskID(dev, self.instance.primary_node)
9428

    
9429
    self.cfg.Update(self.instance, feedback_fn)
9430

    
9431
    # and now perform the drbd attach
9432
    self.lu.LogInfo("Attaching primary drbds to new secondary"
9433
                    " (standalone => connected)")
9434
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9435
                                            self.new_node],
9436
                                           self.node_secondary_ip,
9437
                                           self.instance.disks,
9438
                                           self.instance.name,
9439
                                           False)
9440
    for to_node, to_result in result.items():
9441
      msg = to_result.fail_msg
9442
      if msg:
9443
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9444
                           to_node, msg,
9445
                           hint=("please do a gnt-instance info to see the"
9446
                                 " status of disks"))
9447
    cstep = 5
9448
    if self.early_release:
9449
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9450
      cstep += 1
9451
      self._RemoveOldStorage(self.target_node, iv_names)
9452
      # WARNING: we release all node locks here, do not do other RPCs
9453
      # than WaitForSync to the primary node
9454
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9455
                    names=[self.instance.primary_node,
9456
                           self.target_node,
9457
                           self.new_node])
9458

    
9459
    # Wait for sync
9460
    # This can fail as the old devices are degraded and _WaitForSync
9461
    # does a combined result over all disks, so we don't check its return value
9462
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9463
    cstep += 1
9464
    _WaitForSync(self.lu, self.instance)
9465

    
9466
    # Check all devices manually
9467
    self._CheckDevices(self.instance.primary_node, iv_names)
9468

    
9469
    # Step: remove old storage
9470
    if not self.early_release:
9471
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9472
      self._RemoveOldStorage(self.target_node, iv_names)
9473

    
9474

    
9475
class LURepairNodeStorage(NoHooksLU):
9476
  """Repairs the volume group on a node.
9477

9478
  """
9479
  REQ_BGL = False
9480

    
9481
  def CheckArguments(self):
9482
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9483

    
9484
    storage_type = self.op.storage_type
9485

    
9486
    if (constants.SO_FIX_CONSISTENCY not in
9487
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9488
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
9489
                                 " repaired" % storage_type,
9490
                                 errors.ECODE_INVAL)
9491

    
9492
  def ExpandNames(self):
9493
    self.needed_locks = {
9494
      locking.LEVEL_NODE: [self.op.node_name],
9495
      }
9496

    
9497
  def _CheckFaultyDisks(self, instance, node_name):
9498
    """Ensure faulty disks abort the opcode or at least warn."""
9499
    try:
9500
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9501
                                  node_name, True):
9502
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9503
                                   " node '%s'" % (instance.name, node_name),
9504
                                   errors.ECODE_STATE)
9505
    except errors.OpPrereqError, err:
9506
      if self.op.ignore_consistency:
9507
        self.proc.LogWarning(str(err.args[0]))
9508
      else:
9509
        raise
9510

    
9511
  def CheckPrereq(self):
9512
    """Check prerequisites.
9513

9514
    """
9515
    # Check whether any instance on this node has faulty disks
9516
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9517
      if not inst.admin_up:
9518
        continue
9519
      check_nodes = set(inst.all_nodes)
9520
      check_nodes.discard(self.op.node_name)
9521
      for inst_node_name in check_nodes:
9522
        self._CheckFaultyDisks(inst, inst_node_name)
9523

    
9524
  def Exec(self, feedback_fn):
9525
    feedback_fn("Repairing storage unit '%s' on %s ..." %
9526
                (self.op.name, self.op.node_name))
9527

    
9528
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9529
    result = self.rpc.call_storage_execute(self.op.node_name,
9530
                                           self.op.storage_type, st_args,
9531
                                           self.op.name,
9532
                                           constants.SO_FIX_CONSISTENCY)
9533
    result.Raise("Failed to repair storage unit '%s' on %s" %
9534
                 (self.op.name, self.op.node_name))
9535

    
9536

    
9537
class LUNodeEvacStrategy(NoHooksLU):
9538
  """Computes the node evacuation strategy.
9539

9540
  """
9541
  REQ_BGL = False
9542

    
9543
  def CheckArguments(self):
9544
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9545

    
9546
  def ExpandNames(self):
9547
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9548
    self.needed_locks = locks = {}
9549
    if self.op.remote_node is None:
9550
      locks[locking.LEVEL_NODE] = locking.ALL_SET
9551
    else:
9552
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9553
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
9554

    
9555
  def Exec(self, feedback_fn):
9556
    if self.op.remote_node is not None:
9557
      instances = []
9558
      for node in self.op.nodes:
9559
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
9560
      result = []
9561
      for i in instances:
9562
        if i.primary_node == self.op.remote_node:
9563
          raise errors.OpPrereqError("Node %s is the primary node of"
9564
                                     " instance %s, cannot use it as"
9565
                                     " secondary" %
9566
                                     (self.op.remote_node, i.name),
9567
                                     errors.ECODE_INVAL)
9568
        result.append([i.name, self.op.remote_node])
9569
    else:
9570
      ial = IAllocator(self.cfg, self.rpc,
9571
                       mode=constants.IALLOCATOR_MODE_MEVAC,
9572
                       evac_nodes=self.op.nodes)
9573
      ial.Run(self.op.iallocator, validate=True)
9574
      if not ial.success:
9575
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
9576
                                 errors.ECODE_NORES)
9577
      result = ial.result
9578
    return result
9579

    
9580

    
9581
class LUInstanceGrowDisk(LogicalUnit):
9582
  """Grow a disk of an instance.
9583

9584
  """
9585
  HPATH = "disk-grow"
9586
  HTYPE = constants.HTYPE_INSTANCE
9587
  REQ_BGL = False
9588

    
9589
  def ExpandNames(self):
9590
    self._ExpandAndLockInstance()
9591
    self.needed_locks[locking.LEVEL_NODE] = []
9592
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9593

    
9594
  def DeclareLocks(self, level):
9595
    if level == locking.LEVEL_NODE:
9596
      self._LockInstancesNodes()
9597

    
9598
  def BuildHooksEnv(self):
9599
    """Build hooks env.
9600

9601
    This runs on the master, the primary and all the secondaries.
9602

9603
    """
9604
    env = {
9605
      "DISK": self.op.disk,
9606
      "AMOUNT": self.op.amount,
9607
      }
9608
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9609
    return env
9610

    
9611
  def BuildHooksNodes(self):
9612
    """Build hooks nodes.
9613

9614
    """
9615
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9616
    return (nl, nl)
9617

    
9618
  def CheckPrereq(self):
9619
    """Check prerequisites.
9620

9621
    This checks that the instance is in the cluster.
9622

9623
    """
9624
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9625
    assert instance is not None, \
9626
      "Cannot retrieve locked instance %s" % self.op.instance_name
9627
    nodenames = list(instance.all_nodes)
9628
    for node in nodenames:
9629
      _CheckNodeOnline(self, node)
9630

    
9631
    self.instance = instance
9632

    
9633
    if instance.disk_template not in constants.DTS_GROWABLE:
9634
      raise errors.OpPrereqError("Instance's disk layout does not support"
9635
                                 " growing", errors.ECODE_INVAL)
9636

    
9637
    self.disk = instance.FindDisk(self.op.disk)
9638

    
9639
    if instance.disk_template not in (constants.DT_FILE,
9640
                                      constants.DT_SHARED_FILE):
9641
      # TODO: check the free disk space for file, when that feature will be
9642
      # supported
9643
      _CheckNodesFreeDiskPerVG(self, nodenames,
9644
                               self.disk.ComputeGrowth(self.op.amount))
9645

    
9646
  def Exec(self, feedback_fn):
9647
    """Execute disk grow.
9648

9649
    """
9650
    instance = self.instance
9651
    disk = self.disk
9652

    
9653
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
9654
    if not disks_ok:
9655
      raise errors.OpExecError("Cannot activate block device to grow")
9656

    
9657
    # First run all grow ops in dry-run mode
9658
    for node in instance.all_nodes:
9659
      self.cfg.SetDiskID(disk, node)
9660
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
9661
      result.Raise("Grow request failed to node %s" % node)
9662

    
9663
    # We know that (as far as we can test) operations across different
9664
    # nodes will succeed, time to run it for real
9665
    for node in instance.all_nodes:
9666
      self.cfg.SetDiskID(disk, node)
9667
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
9668
      result.Raise("Grow request failed to node %s" % node)
9669

    
9670
      # TODO: Rewrite code to work properly
9671
      # DRBD goes into sync mode for a short amount of time after executing the
9672
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
9673
      # calling "resize" in sync mode fails. Sleeping for a short amount of
9674
      # time is a work-around.
9675
      time.sleep(5)
9676

    
9677
    disk.RecordGrow(self.op.amount)
9678
    self.cfg.Update(instance, feedback_fn)
9679
    if self.op.wait_for_sync:
9680
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
9681
      if disk_abort:
9682
        self.proc.LogWarning("Disk sync-ing has not returned a good"
9683
                             " status; please check the instance")
9684
      if not instance.admin_up:
9685
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9686
    elif not instance.admin_up:
9687
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
9688
                           " not supposed to be running because no wait for"
9689
                           " sync mode was requested")
9690

    
9691

    
9692
class LUInstanceQueryData(NoHooksLU):
9693
  """Query runtime instance data.
9694

9695
  """
9696
  REQ_BGL = False
9697

    
9698
  def ExpandNames(self):
9699
    self.needed_locks = {}
9700

    
9701
    # Use locking if requested or when non-static information is wanted
9702
    if not (self.op.static or self.op.use_locking):
9703
      self.LogWarning("Non-static data requested, locks need to be acquired")
9704
      self.op.use_locking = True
9705

    
9706
    if self.op.instances or not self.op.use_locking:
9707
      # Expand instance names right here
9708
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
9709
    else:
9710
      # Will use acquired locks
9711
      self.wanted_names = None
9712

    
9713
    if self.op.use_locking:
9714
      self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9715

    
9716
      if self.wanted_names is None:
9717
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9718
      else:
9719
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9720

    
9721
      self.needed_locks[locking.LEVEL_NODE] = []
9722
      self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9723
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9724

    
9725
  def DeclareLocks(self, level):
9726
    if self.op.use_locking and level == locking.LEVEL_NODE:
9727
      self._LockInstancesNodes()
9728

    
9729
  def CheckPrereq(self):
9730
    """Check prerequisites.
9731

9732
    This only checks the optional instance list against the existing names.
9733

9734
    """
9735
    if self.wanted_names is None:
9736
      assert self.op.use_locking, "Locking was not used"
9737
      self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
9738

    
9739
    self.wanted_instances = [self.cfg.GetInstanceInfo(name)
9740
                             for name in self.wanted_names]
9741

    
9742
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
9743
    """Returns the status of a block device
9744

9745
    """
9746
    if self.op.static or not node:
9747
      return None
9748

    
9749
    self.cfg.SetDiskID(dev, node)
9750

    
9751
    result = self.rpc.call_blockdev_find(node, dev)
9752
    if result.offline:
9753
      return None
9754

    
9755
    result.Raise("Can't compute disk status for %s" % instance_name)
9756

    
9757
    status = result.payload
9758
    if status is None:
9759
      return None
9760

    
9761
    return (status.dev_path, status.major, status.minor,
9762
            status.sync_percent, status.estimated_time,
9763
            status.is_degraded, status.ldisk_status)
9764

    
9765
  def _ComputeDiskStatus(self, instance, snode, dev):
9766
    """Compute block device status.
9767

9768
    """
9769
    if dev.dev_type in constants.LDS_DRBD:
9770
      # we change the snode then (otherwise we use the one passed in)
9771
      if dev.logical_id[0] == instance.primary_node:
9772
        snode = dev.logical_id[1]
9773
      else:
9774
        snode = dev.logical_id[0]
9775

    
9776
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9777
                                              instance.name, dev)
9778
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9779

    
9780
    if dev.children:
9781
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
9782
                      for child in dev.children]
9783
    else:
9784
      dev_children = []
9785

    
9786
    return {
9787
      "iv_name": dev.iv_name,
9788
      "dev_type": dev.dev_type,
9789
      "logical_id": dev.logical_id,
9790
      "physical_id": dev.physical_id,
9791
      "pstatus": dev_pstatus,
9792
      "sstatus": dev_sstatus,
9793
      "children": dev_children,
9794
      "mode": dev.mode,
9795
      "size": dev.size,
9796
      }
9797

    
9798
  def Exec(self, feedback_fn):
9799
    """Gather and return data"""
9800
    result = {}
9801

    
9802
    cluster = self.cfg.GetClusterInfo()
9803

    
9804
    for instance in self.wanted_instances:
9805
      if not self.op.static:
9806
        remote_info = self.rpc.call_instance_info(instance.primary_node,
9807
                                                  instance.name,
9808
                                                  instance.hypervisor)
9809
        remote_info.Raise("Error checking node %s" % instance.primary_node)
9810
        remote_info = remote_info.payload
9811
        if remote_info and "state" in remote_info:
9812
          remote_state = "up"
9813
        else:
9814
          remote_state = "down"
9815
      else:
9816
        remote_state = None
9817
      if instance.admin_up:
9818
        config_state = "up"
9819
      else:
9820
        config_state = "down"
9821

    
9822
      disks = [self._ComputeDiskStatus(instance, None, device)
9823
               for device in instance.disks]
9824

    
9825
      result[instance.name] = {
9826
        "name": instance.name,
9827
        "config_state": config_state,
9828
        "run_state": remote_state,
9829
        "pnode": instance.primary_node,
9830
        "snodes": instance.secondary_nodes,
9831
        "os": instance.os,
9832
        # this happens to be the same format used for hooks
9833
        "nics": _NICListToTuple(self, instance.nics),
9834
        "disk_template": instance.disk_template,
9835
        "disks": disks,
9836
        "hypervisor": instance.hypervisor,
9837
        "network_port": instance.network_port,
9838
        "hv_instance": instance.hvparams,
9839
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
9840
        "be_instance": instance.beparams,
9841
        "be_actual": cluster.FillBE(instance),
9842
        "os_instance": instance.osparams,
9843
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9844
        "serial_no": instance.serial_no,
9845
        "mtime": instance.mtime,
9846
        "ctime": instance.ctime,
9847
        "uuid": instance.uuid,
9848
        }
9849

    
9850
    return result
9851

    
9852

    
9853
class LUInstanceSetParams(LogicalUnit):
9854
  """Modifies an instances's parameters.
9855

9856
  """
9857
  HPATH = "instance-modify"
9858
  HTYPE = constants.HTYPE_INSTANCE
9859
  REQ_BGL = False
9860

    
9861
  def CheckArguments(self):
9862
    if not (self.op.nics or self.op.disks or self.op.disk_template or
9863
            self.op.hvparams or self.op.beparams or self.op.os_name):
9864
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9865

    
9866
    if self.op.hvparams:
9867
      _CheckGlobalHvParams(self.op.hvparams)
9868

    
9869
    # Disk validation
9870
    disk_addremove = 0
9871
    for disk_op, disk_dict in self.op.disks:
9872
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9873
      if disk_op == constants.DDM_REMOVE:
9874
        disk_addremove += 1
9875
        continue
9876
      elif disk_op == constants.DDM_ADD:
9877
        disk_addremove += 1
9878
      else:
9879
        if not isinstance(disk_op, int):
9880
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9881
        if not isinstance(disk_dict, dict):
9882
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9883
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9884

    
9885
      if disk_op == constants.DDM_ADD:
9886
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
9887
        if mode not in constants.DISK_ACCESS_SET:
9888
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9889
                                     errors.ECODE_INVAL)
9890
        size = disk_dict.get(constants.IDISK_SIZE, None)
9891
        if size is None:
9892
          raise errors.OpPrereqError("Required disk parameter size missing",
9893
                                     errors.ECODE_INVAL)
9894
        try:
9895
          size = int(size)
9896
        except (TypeError, ValueError), err:
9897
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9898
                                     str(err), errors.ECODE_INVAL)
9899
        disk_dict[constants.IDISK_SIZE] = size
9900
      else:
9901
        # modification of disk
9902
        if constants.IDISK_SIZE in disk_dict:
9903
          raise errors.OpPrereqError("Disk size change not possible, use"
9904
                                     " grow-disk", errors.ECODE_INVAL)
9905

    
9906
    if disk_addremove > 1:
9907
      raise errors.OpPrereqError("Only one disk add or remove operation"
9908
                                 " supported at a time", errors.ECODE_INVAL)
9909

    
9910
    if self.op.disks and self.op.disk_template is not None:
9911
      raise errors.OpPrereqError("Disk template conversion and other disk"
9912
                                 " changes not supported at the same time",
9913
                                 errors.ECODE_INVAL)
9914

    
9915
    if (self.op.disk_template and
9916
        self.op.disk_template in constants.DTS_INT_MIRROR and
9917
        self.op.remote_node is None):
9918
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
9919
                                 " one requires specifying a secondary node",
9920
                                 errors.ECODE_INVAL)
9921

    
9922
    # NIC validation
9923
    nic_addremove = 0
9924
    for nic_op, nic_dict in self.op.nics:
9925
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9926
      if nic_op == constants.DDM_REMOVE:
9927
        nic_addremove += 1
9928
        continue
9929
      elif nic_op == constants.DDM_ADD:
9930
        nic_addremove += 1
9931
      else:
9932
        if not isinstance(nic_op, int):
9933
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9934
        if not isinstance(nic_dict, dict):
9935
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9936
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9937

    
9938
      # nic_dict should be a dict
9939
      nic_ip = nic_dict.get(constants.INIC_IP, None)
9940
      if nic_ip is not None:
9941
        if nic_ip.lower() == constants.VALUE_NONE:
9942
          nic_dict[constants.INIC_IP] = None
9943
        else:
9944
          if not netutils.IPAddress.IsValid(nic_ip):
9945
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9946
                                       errors.ECODE_INVAL)
9947

    
9948
      nic_bridge = nic_dict.get('bridge', None)
9949
      nic_link = nic_dict.get(constants.INIC_LINK, None)
9950
      if nic_bridge and nic_link:
9951
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9952
                                   " at the same time", errors.ECODE_INVAL)
9953
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9954
        nic_dict['bridge'] = None
9955
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9956
        nic_dict[constants.INIC_LINK] = None
9957

    
9958
      if nic_op == constants.DDM_ADD:
9959
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
9960
        if nic_mac is None:
9961
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
9962

    
9963
      if constants.INIC_MAC in nic_dict:
9964
        nic_mac = nic_dict[constants.INIC_MAC]
9965
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9966
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9967

    
9968
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9969
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9970
                                     " modifying an existing nic",
9971
                                     errors.ECODE_INVAL)
9972

    
9973
    if nic_addremove > 1:
9974
      raise errors.OpPrereqError("Only one NIC add or remove operation"
9975
                                 " supported at a time", errors.ECODE_INVAL)
9976

    
9977
  def ExpandNames(self):
9978
    self._ExpandAndLockInstance()
9979
    self.needed_locks[locking.LEVEL_NODE] = []
9980
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9981

    
9982
  def DeclareLocks(self, level):
9983
    if level == locking.LEVEL_NODE:
9984
      self._LockInstancesNodes()
9985
      if self.op.disk_template and self.op.remote_node:
9986
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9987
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9988

    
9989
  def BuildHooksEnv(self):
9990
    """Build hooks env.
9991

9992
    This runs on the master, primary and secondaries.
9993

9994
    """
9995
    args = dict()
9996
    if constants.BE_MEMORY in self.be_new:
9997
      args['memory'] = self.be_new[constants.BE_MEMORY]
9998
    if constants.BE_VCPUS in self.be_new:
9999
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
10000
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10001
    # information at all.
10002
    if self.op.nics:
10003
      args['nics'] = []
10004
      nic_override = dict(self.op.nics)
10005
      for idx, nic in enumerate(self.instance.nics):
10006
        if idx in nic_override:
10007
          this_nic_override = nic_override[idx]
10008
        else:
10009
          this_nic_override = {}
10010
        if constants.INIC_IP in this_nic_override:
10011
          ip = this_nic_override[constants.INIC_IP]
10012
        else:
10013
          ip = nic.ip
10014
        if constants.INIC_MAC in this_nic_override:
10015
          mac = this_nic_override[constants.INIC_MAC]
10016
        else:
10017
          mac = nic.mac
10018
        if idx in self.nic_pnew:
10019
          nicparams = self.nic_pnew[idx]
10020
        else:
10021
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10022
        mode = nicparams[constants.NIC_MODE]
10023
        link = nicparams[constants.NIC_LINK]
10024
        args['nics'].append((ip, mac, mode, link))
10025
      if constants.DDM_ADD in nic_override:
10026
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10027
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10028
        nicparams = self.nic_pnew[constants.DDM_ADD]
10029
        mode = nicparams[constants.NIC_MODE]
10030
        link = nicparams[constants.NIC_LINK]
10031
        args['nics'].append((ip, mac, mode, link))
10032
      elif constants.DDM_REMOVE in nic_override:
10033
        del args['nics'][-1]
10034

    
10035
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10036
    if self.op.disk_template:
10037
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10038

    
10039
    return env
10040

    
10041
  def BuildHooksNodes(self):
10042
    """Build hooks nodes.
10043

10044
    """
10045
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10046
    return (nl, nl)
10047

    
10048
  def CheckPrereq(self):
10049
    """Check prerequisites.
10050

10051
    This only checks the instance list against the existing names.
10052

10053
    """
10054
    # checking the new params on the primary/secondary nodes
10055

    
10056
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10057
    cluster = self.cluster = self.cfg.GetClusterInfo()
10058
    assert self.instance is not None, \
10059
      "Cannot retrieve locked instance %s" % self.op.instance_name
10060
    pnode = instance.primary_node
10061
    nodelist = list(instance.all_nodes)
10062

    
10063
    # OS change
10064
    if self.op.os_name and not self.op.force:
10065
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10066
                      self.op.force_variant)
10067
      instance_os = self.op.os_name
10068
    else:
10069
      instance_os = instance.os
10070

    
10071
    if self.op.disk_template:
10072
      if instance.disk_template == self.op.disk_template:
10073
        raise errors.OpPrereqError("Instance already has disk template %s" %
10074
                                   instance.disk_template, errors.ECODE_INVAL)
10075

    
10076
      if (instance.disk_template,
10077
          self.op.disk_template) not in self._DISK_CONVERSIONS:
10078
        raise errors.OpPrereqError("Unsupported disk template conversion from"
10079
                                   " %s to %s" % (instance.disk_template,
10080
                                                  self.op.disk_template),
10081
                                   errors.ECODE_INVAL)
10082
      _CheckInstanceDown(self, instance, "cannot change disk template")
10083
      if self.op.disk_template in constants.DTS_INT_MIRROR:
10084
        if self.op.remote_node == pnode:
10085
          raise errors.OpPrereqError("Given new secondary node %s is the same"
10086
                                     " as the primary node of the instance" %
10087
                                     self.op.remote_node, errors.ECODE_STATE)
10088
        _CheckNodeOnline(self, self.op.remote_node)
10089
        _CheckNodeNotDrained(self, self.op.remote_node)
10090
        # FIXME: here we assume that the old instance type is DT_PLAIN
10091
        assert instance.disk_template == constants.DT_PLAIN
10092
        disks = [{constants.IDISK_SIZE: d.size,
10093
                  constants.IDISK_VG: d.logical_id[0]}
10094
                 for d in instance.disks]
10095
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10096
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10097

    
10098
    # hvparams processing
10099
    if self.op.hvparams:
10100
      hv_type = instance.hypervisor
10101
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10102
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10103
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10104

    
10105
      # local check
10106
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10107
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10108
      self.hv_new = hv_new # the new actual values
10109
      self.hv_inst = i_hvdict # the new dict (without defaults)
10110
    else:
10111
      self.hv_new = self.hv_inst = {}
10112

    
10113
    # beparams processing
10114
    if self.op.beparams:
10115
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10116
                                   use_none=True)
10117
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10118
      be_new = cluster.SimpleFillBE(i_bedict)
10119
      self.be_new = be_new # the new actual values
10120
      self.be_inst = i_bedict # the new dict (without defaults)
10121
    else:
10122
      self.be_new = self.be_inst = {}
10123
    be_old = cluster.FillBE(instance)
10124

    
10125
    # osparams processing
10126
    if self.op.osparams:
10127
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10128
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10129
      self.os_inst = i_osdict # the new dict (without defaults)
10130
    else:
10131
      self.os_inst = {}
10132

    
10133
    self.warn = []
10134

    
10135
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10136
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10137
      mem_check_list = [pnode]
10138
      if be_new[constants.BE_AUTO_BALANCE]:
10139
        # either we changed auto_balance to yes or it was from before
10140
        mem_check_list.extend(instance.secondary_nodes)
10141
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
10142
                                                  instance.hypervisor)
10143
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10144
                                         instance.hypervisor)
10145
      pninfo = nodeinfo[pnode]
10146
      msg = pninfo.fail_msg
10147
      if msg:
10148
        # Assume the primary node is unreachable and go ahead
10149
        self.warn.append("Can't get info from primary node %s: %s" %
10150
                         (pnode,  msg))
10151
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
10152
        self.warn.append("Node data from primary node %s doesn't contain"
10153
                         " free memory information" % pnode)
10154
      elif instance_info.fail_msg:
10155
        self.warn.append("Can't get instance runtime information: %s" %
10156
                        instance_info.fail_msg)
10157
      else:
10158
        if instance_info.payload:
10159
          current_mem = int(instance_info.payload['memory'])
10160
        else:
10161
          # Assume instance not running
10162
          # (there is a slight race condition here, but it's not very probable,
10163
          # and we have no other way to check)
10164
          current_mem = 0
10165
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10166
                    pninfo.payload['memory_free'])
10167
        if miss_mem > 0:
10168
          raise errors.OpPrereqError("This change will prevent the instance"
10169
                                     " from starting, due to %d MB of memory"
10170
                                     " missing on its primary node" % miss_mem,
10171
                                     errors.ECODE_NORES)
10172

    
10173
      if be_new[constants.BE_AUTO_BALANCE]:
10174
        for node, nres in nodeinfo.items():
10175
          if node not in instance.secondary_nodes:
10176
            continue
10177
          nres.Raise("Can't get info from secondary node %s" % node,
10178
                     prereq=True, ecode=errors.ECODE_STATE)
10179
          if not isinstance(nres.payload.get('memory_free', None), int):
10180
            raise errors.OpPrereqError("Secondary node %s didn't return free"
10181
                                       " memory information" % node,
10182
                                       errors.ECODE_STATE)
10183
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
10184
            raise errors.OpPrereqError("This change will prevent the instance"
10185
                                       " from failover to its secondary node"
10186
                                       " %s, due to not enough memory" % node,
10187
                                       errors.ECODE_STATE)
10188

    
10189
    # NIC processing
10190
    self.nic_pnew = {}
10191
    self.nic_pinst = {}
10192
    for nic_op, nic_dict in self.op.nics:
10193
      if nic_op == constants.DDM_REMOVE:
10194
        if not instance.nics:
10195
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10196
                                     errors.ECODE_INVAL)
10197
        continue
10198
      if nic_op != constants.DDM_ADD:
10199
        # an existing nic
10200
        if not instance.nics:
10201
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10202
                                     " no NICs" % nic_op,
10203
                                     errors.ECODE_INVAL)
10204
        if nic_op < 0 or nic_op >= len(instance.nics):
10205
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10206
                                     " are 0 to %d" %
10207
                                     (nic_op, len(instance.nics) - 1),
10208
                                     errors.ECODE_INVAL)
10209
        old_nic_params = instance.nics[nic_op].nicparams
10210
        old_nic_ip = instance.nics[nic_op].ip
10211
      else:
10212
        old_nic_params = {}
10213
        old_nic_ip = None
10214

    
10215
      update_params_dict = dict([(key, nic_dict[key])
10216
                                 for key in constants.NICS_PARAMETERS
10217
                                 if key in nic_dict])
10218

    
10219
      if 'bridge' in nic_dict:
10220
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
10221

    
10222
      new_nic_params = _GetUpdatedParams(old_nic_params,
10223
                                         update_params_dict)
10224
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10225
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10226
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10227
      self.nic_pinst[nic_op] = new_nic_params
10228
      self.nic_pnew[nic_op] = new_filled_nic_params
10229
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10230

    
10231
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
10232
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10233
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10234
        if msg:
10235
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10236
          if self.op.force:
10237
            self.warn.append(msg)
10238
          else:
10239
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10240
      if new_nic_mode == constants.NIC_MODE_ROUTED:
10241
        if constants.INIC_IP in nic_dict:
10242
          nic_ip = nic_dict[constants.INIC_IP]
10243
        else:
10244
          nic_ip = old_nic_ip
10245
        if nic_ip is None:
10246
          raise errors.OpPrereqError('Cannot set the nic ip to None'
10247
                                     ' on a routed nic', errors.ECODE_INVAL)
10248
      if constants.INIC_MAC in nic_dict:
10249
        nic_mac = nic_dict[constants.INIC_MAC]
10250
        if nic_mac is None:
10251
          raise errors.OpPrereqError('Cannot set the nic mac to None',
10252
                                     errors.ECODE_INVAL)
10253
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10254
          # otherwise generate the mac
10255
          nic_dict[constants.INIC_MAC] = \
10256
            self.cfg.GenerateMAC(self.proc.GetECId())
10257
        else:
10258
          # or validate/reserve the current one
10259
          try:
10260
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10261
          except errors.ReservationError:
10262
            raise errors.OpPrereqError("MAC address %s already in use"
10263
                                       " in cluster" % nic_mac,
10264
                                       errors.ECODE_NOTUNIQUE)
10265

    
10266
    # DISK processing
10267
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10268
      raise errors.OpPrereqError("Disk operations not supported for"
10269
                                 " diskless instances",
10270
                                 errors.ECODE_INVAL)
10271
    for disk_op, _ in self.op.disks:
10272
      if disk_op == constants.DDM_REMOVE:
10273
        if len(instance.disks) == 1:
10274
          raise errors.OpPrereqError("Cannot remove the last disk of"
10275
                                     " an instance", errors.ECODE_INVAL)
10276
        _CheckInstanceDown(self, instance, "cannot remove disks")
10277

    
10278
      if (disk_op == constants.DDM_ADD and
10279
          len(instance.disks) >= constants.MAX_DISKS):
10280
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10281
                                   " add more" % constants.MAX_DISKS,
10282
                                   errors.ECODE_STATE)
10283
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10284
        # an existing disk
10285
        if disk_op < 0 or disk_op >= len(instance.disks):
10286
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
10287
                                     " are 0 to %d" %
10288
                                     (disk_op, len(instance.disks)),
10289
                                     errors.ECODE_INVAL)
10290

    
10291
    return
10292

    
10293
  def _ConvertPlainToDrbd(self, feedback_fn):
10294
    """Converts an instance from plain to drbd.
10295

10296
    """
10297
    feedback_fn("Converting template to drbd")
10298
    instance = self.instance
10299
    pnode = instance.primary_node
10300
    snode = self.op.remote_node
10301

    
10302
    # create a fake disk info for _GenerateDiskTemplate
10303
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10304
                  constants.IDISK_VG: d.logical_id[0]}
10305
                 for d in instance.disks]
10306
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10307
                                      instance.name, pnode, [snode],
10308
                                      disk_info, None, None, 0, feedback_fn)
10309
    info = _GetInstanceInfoText(instance)
10310
    feedback_fn("Creating aditional volumes...")
10311
    # first, create the missing data and meta devices
10312
    for disk in new_disks:
10313
      # unfortunately this is... not too nice
10314
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10315
                            info, True)
10316
      for child in disk.children:
10317
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
10318
    # at this stage, all new LVs have been created, we can rename the
10319
    # old ones
10320
    feedback_fn("Renaming original volumes...")
10321
    rename_list = [(o, n.children[0].logical_id)
10322
                   for (o, n) in zip(instance.disks, new_disks)]
10323
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
10324
    result.Raise("Failed to rename original LVs")
10325

    
10326
    feedback_fn("Initializing DRBD devices...")
10327
    # all child devices are in place, we can now create the DRBD devices
10328
    for disk in new_disks:
10329
      for node in [pnode, snode]:
10330
        f_create = node == pnode
10331
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10332

    
10333
    # at this point, the instance has been modified
10334
    instance.disk_template = constants.DT_DRBD8
10335
    instance.disks = new_disks
10336
    self.cfg.Update(instance, feedback_fn)
10337

    
10338
    # disks are created, waiting for sync
10339
    disk_abort = not _WaitForSync(self, instance,
10340
                                  oneshot=not self.op.wait_for_sync)
10341
    if disk_abort:
10342
      raise errors.OpExecError("There are some degraded disks for"
10343
                               " this instance, please cleanup manually")
10344

    
10345
  def _ConvertDrbdToPlain(self, feedback_fn):
10346
    """Converts an instance from drbd to plain.
10347

10348
    """
10349
    instance = self.instance
10350
    assert len(instance.secondary_nodes) == 1
10351
    pnode = instance.primary_node
10352
    snode = instance.secondary_nodes[0]
10353
    feedback_fn("Converting template to plain")
10354

    
10355
    old_disks = instance.disks
10356
    new_disks = [d.children[0] for d in old_disks]
10357

    
10358
    # copy over size and mode
10359
    for parent, child in zip(old_disks, new_disks):
10360
      child.size = parent.size
10361
      child.mode = parent.mode
10362

    
10363
    # update instance structure
10364
    instance.disks = new_disks
10365
    instance.disk_template = constants.DT_PLAIN
10366
    self.cfg.Update(instance, feedback_fn)
10367

    
10368
    feedback_fn("Removing volumes on the secondary node...")
10369
    for disk in old_disks:
10370
      self.cfg.SetDiskID(disk, snode)
10371
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10372
      if msg:
10373
        self.LogWarning("Could not remove block device %s on node %s,"
10374
                        " continuing anyway: %s", disk.iv_name, snode, msg)
10375

    
10376
    feedback_fn("Removing unneeded volumes on the primary node...")
10377
    for idx, disk in enumerate(old_disks):
10378
      meta = disk.children[1]
10379
      self.cfg.SetDiskID(meta, pnode)
10380
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10381
      if msg:
10382
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
10383
                        " continuing anyway: %s", idx, pnode, msg)
10384

    
10385
  def Exec(self, feedback_fn):
10386
    """Modifies an instance.
10387

10388
    All parameters take effect only at the next restart of the instance.
10389

10390
    """
10391
    # Process here the warnings from CheckPrereq, as we don't have a
10392
    # feedback_fn there.
10393
    for warn in self.warn:
10394
      feedback_fn("WARNING: %s" % warn)
10395

    
10396
    result = []
10397
    instance = self.instance
10398
    # disk changes
10399
    for disk_op, disk_dict in self.op.disks:
10400
      if disk_op == constants.DDM_REMOVE:
10401
        # remove the last disk
10402
        device = instance.disks.pop()
10403
        device_idx = len(instance.disks)
10404
        for node, disk in device.ComputeNodeTree(instance.primary_node):
10405
          self.cfg.SetDiskID(disk, node)
10406
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10407
          if msg:
10408
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
10409
                            " continuing anyway", device_idx, node, msg)
10410
        result.append(("disk/%d" % device_idx, "remove"))
10411
      elif disk_op == constants.DDM_ADD:
10412
        # add a new disk
10413
        if instance.disk_template in (constants.DT_FILE,
10414
                                        constants.DT_SHARED_FILE):
10415
          file_driver, file_path = instance.disks[0].logical_id
10416
          file_path = os.path.dirname(file_path)
10417
        else:
10418
          file_driver = file_path = None
10419
        disk_idx_base = len(instance.disks)
10420
        new_disk = _GenerateDiskTemplate(self,
10421
                                         instance.disk_template,
10422
                                         instance.name, instance.primary_node,
10423
                                         instance.secondary_nodes,
10424
                                         [disk_dict],
10425
                                         file_path,
10426
                                         file_driver,
10427
                                         disk_idx_base, feedback_fn)[0]
10428
        instance.disks.append(new_disk)
10429
        info = _GetInstanceInfoText(instance)
10430

    
10431
        logging.info("Creating volume %s for instance %s",
10432
                     new_disk.iv_name, instance.name)
10433
        # Note: this needs to be kept in sync with _CreateDisks
10434
        #HARDCODE
10435
        for node in instance.all_nodes:
10436
          f_create = node == instance.primary_node
10437
          try:
10438
            _CreateBlockDev(self, node, instance, new_disk,
10439
                            f_create, info, f_create)
10440
          except errors.OpExecError, err:
10441
            self.LogWarning("Failed to create volume %s (%s) on"
10442
                            " node %s: %s",
10443
                            new_disk.iv_name, new_disk, node, err)
10444
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10445
                       (new_disk.size, new_disk.mode)))
10446
      else:
10447
        # change a given disk
10448
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10449
        result.append(("disk.mode/%d" % disk_op,
10450
                       disk_dict[constants.IDISK_MODE]))
10451

    
10452
    if self.op.disk_template:
10453
      r_shut = _ShutdownInstanceDisks(self, instance)
10454
      if not r_shut:
10455
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10456
                                 " proceed with disk template conversion")
10457
      mode = (instance.disk_template, self.op.disk_template)
10458
      try:
10459
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
10460
      except:
10461
        self.cfg.ReleaseDRBDMinors(instance.name)
10462
        raise
10463
      result.append(("disk_template", self.op.disk_template))
10464

    
10465
    # NIC changes
10466
    for nic_op, nic_dict in self.op.nics:
10467
      if nic_op == constants.DDM_REMOVE:
10468
        # remove the last nic
10469
        del instance.nics[-1]
10470
        result.append(("nic.%d" % len(instance.nics), "remove"))
10471
      elif nic_op == constants.DDM_ADD:
10472
        # mac and bridge should be set, by now
10473
        mac = nic_dict[constants.INIC_MAC]
10474
        ip = nic_dict.get(constants.INIC_IP, None)
10475
        nicparams = self.nic_pinst[constants.DDM_ADD]
10476
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
10477
        instance.nics.append(new_nic)
10478
        result.append(("nic.%d" % (len(instance.nics) - 1),
10479
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
10480
                       (new_nic.mac, new_nic.ip,
10481
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
10482
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
10483
                       )))
10484
      else:
10485
        for key in (constants.INIC_MAC, constants.INIC_IP):
10486
          if key in nic_dict:
10487
            setattr(instance.nics[nic_op], key, nic_dict[key])
10488
        if nic_op in self.nic_pinst:
10489
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
10490
        for key, val in nic_dict.iteritems():
10491
          result.append(("nic.%s/%d" % (key, nic_op), val))
10492

    
10493
    # hvparams changes
10494
    if self.op.hvparams:
10495
      instance.hvparams = self.hv_inst
10496
      for key, val in self.op.hvparams.iteritems():
10497
        result.append(("hv/%s" % key, val))
10498

    
10499
    # beparams changes
10500
    if self.op.beparams:
10501
      instance.beparams = self.be_inst
10502
      for key, val in self.op.beparams.iteritems():
10503
        result.append(("be/%s" % key, val))
10504

    
10505
    # OS change
10506
    if self.op.os_name:
10507
      instance.os = self.op.os_name
10508

    
10509
    # osparams changes
10510
    if self.op.osparams:
10511
      instance.osparams = self.os_inst
10512
      for key, val in self.op.osparams.iteritems():
10513
        result.append(("os/%s" % key, val))
10514

    
10515
    self.cfg.Update(instance, feedback_fn)
10516

    
10517
    return result
10518

    
10519
  _DISK_CONVERSIONS = {
10520
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10521
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10522
    }
10523

    
10524

    
10525
class LUBackupQuery(NoHooksLU):
10526
  """Query the exports list
10527

10528
  """
10529
  REQ_BGL = False
10530

    
10531
  def ExpandNames(self):
10532
    self.needed_locks = {}
10533
    self.share_locks[locking.LEVEL_NODE] = 1
10534
    if not self.op.nodes:
10535
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10536
    else:
10537
      self.needed_locks[locking.LEVEL_NODE] = \
10538
        _GetWantedNodes(self, self.op.nodes)
10539

    
10540
  def Exec(self, feedback_fn):
10541
    """Compute the list of all the exported system images.
10542

10543
    @rtype: dict
10544
    @return: a dictionary with the structure node->(export-list)
10545
        where export-list is a list of the instances exported on
10546
        that node.
10547

10548
    """
10549
    self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
10550
    rpcresult = self.rpc.call_export_list(self.nodes)
10551
    result = {}
10552
    for node in rpcresult:
10553
      if rpcresult[node].fail_msg:
10554
        result[node] = False
10555
      else:
10556
        result[node] = rpcresult[node].payload
10557

    
10558
    return result
10559

    
10560

    
10561
class LUBackupPrepare(NoHooksLU):
10562
  """Prepares an instance for an export and returns useful information.
10563

10564
  """
10565
  REQ_BGL = False
10566

    
10567
  def ExpandNames(self):
10568
    self._ExpandAndLockInstance()
10569

    
10570
  def CheckPrereq(self):
10571
    """Check prerequisites.
10572

10573
    """
10574
    instance_name = self.op.instance_name
10575

    
10576
    self.instance = self.cfg.GetInstanceInfo(instance_name)
10577
    assert self.instance is not None, \
10578
          "Cannot retrieve locked instance %s" % self.op.instance_name
10579
    _CheckNodeOnline(self, self.instance.primary_node)
10580

    
10581
    self._cds = _GetClusterDomainSecret()
10582

    
10583
  def Exec(self, feedback_fn):
10584
    """Prepares an instance for an export.
10585

10586
    """
10587
    instance = self.instance
10588

    
10589
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
10590
      salt = utils.GenerateSecret(8)
10591

    
10592
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10593
      result = self.rpc.call_x509_cert_create(instance.primary_node,
10594
                                              constants.RIE_CERT_VALIDITY)
10595
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
10596

    
10597
      (name, cert_pem) = result.payload
10598

    
10599
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10600
                                             cert_pem)
10601

    
10602
      return {
10603
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10604
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10605
                          salt),
10606
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10607
        }
10608

    
10609
    return None
10610

    
10611

    
10612
class LUBackupExport(LogicalUnit):
10613
  """Export an instance to an image in the cluster.
10614

10615
  """
10616
  HPATH = "instance-export"
10617
  HTYPE = constants.HTYPE_INSTANCE
10618
  REQ_BGL = False
10619

    
10620
  def CheckArguments(self):
10621
    """Check the arguments.
10622

10623
    """
10624
    self.x509_key_name = self.op.x509_key_name
10625
    self.dest_x509_ca_pem = self.op.destination_x509_ca
10626

    
10627
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
10628
      if not self.x509_key_name:
10629
        raise errors.OpPrereqError("Missing X509 key name for encryption",
10630
                                   errors.ECODE_INVAL)
10631

    
10632
      if not self.dest_x509_ca_pem:
10633
        raise errors.OpPrereqError("Missing destination X509 CA",
10634
                                   errors.ECODE_INVAL)
10635

    
10636
  def ExpandNames(self):
10637
    self._ExpandAndLockInstance()
10638

    
10639
    # Lock all nodes for local exports
10640
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10641
      # FIXME: lock only instance primary and destination node
10642
      #
10643
      # Sad but true, for now we have do lock all nodes, as we don't know where
10644
      # the previous export might be, and in this LU we search for it and
10645
      # remove it from its current node. In the future we could fix this by:
10646
      #  - making a tasklet to search (share-lock all), then create the
10647
      #    new one, then one to remove, after
10648
      #  - removing the removal operation altogether
10649
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10650

    
10651
  def DeclareLocks(self, level):
10652
    """Last minute lock declaration."""
10653
    # All nodes are locked anyway, so nothing to do here.
10654

    
10655
  def BuildHooksEnv(self):
10656
    """Build hooks env.
10657

10658
    This will run on the master, primary node and target node.
10659

10660
    """
10661
    env = {
10662
      "EXPORT_MODE": self.op.mode,
10663
      "EXPORT_NODE": self.op.target_node,
10664
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10665
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10666
      # TODO: Generic function for boolean env variables
10667
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10668
      }
10669

    
10670
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10671

    
10672
    return env
10673

    
10674
  def BuildHooksNodes(self):
10675
    """Build hooks nodes.
10676

10677
    """
10678
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10679

    
10680
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10681
      nl.append(self.op.target_node)
10682

    
10683
    return (nl, nl)
10684

    
10685
  def CheckPrereq(self):
10686
    """Check prerequisites.
10687

10688
    This checks that the instance and node names are valid.
10689

10690
    """
10691
    instance_name = self.op.instance_name
10692

    
10693
    self.instance = self.cfg.GetInstanceInfo(instance_name)
10694
    assert self.instance is not None, \
10695
          "Cannot retrieve locked instance %s" % self.op.instance_name
10696
    _CheckNodeOnline(self, self.instance.primary_node)
10697

    
10698
    if (self.op.remove_instance and self.instance.admin_up and
10699
        not self.op.shutdown):
10700
      raise errors.OpPrereqError("Can not remove instance without shutting it"
10701
                                 " down before")
10702

    
10703
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10704
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10705
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10706
      assert self.dst_node is not None
10707

    
10708
      _CheckNodeOnline(self, self.dst_node.name)
10709
      _CheckNodeNotDrained(self, self.dst_node.name)
10710

    
10711
      self._cds = None
10712
      self.dest_disk_info = None
10713
      self.dest_x509_ca = None
10714

    
10715
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10716
      self.dst_node = None
10717

    
10718
      if len(self.op.target_node) != len(self.instance.disks):
10719
        raise errors.OpPrereqError(("Received destination information for %s"
10720
                                    " disks, but instance %s has %s disks") %
10721
                                   (len(self.op.target_node), instance_name,
10722
                                    len(self.instance.disks)),
10723
                                   errors.ECODE_INVAL)
10724

    
10725
      cds = _GetClusterDomainSecret()
10726

    
10727
      # Check X509 key name
10728
      try:
10729
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10730
      except (TypeError, ValueError), err:
10731
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10732

    
10733
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10734
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10735
                                   errors.ECODE_INVAL)
10736

    
10737
      # Load and verify CA
10738
      try:
10739
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10740
      except OpenSSL.crypto.Error, err:
10741
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10742
                                   (err, ), errors.ECODE_INVAL)
10743

    
10744
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10745
      if errcode is not None:
10746
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10747
                                   (msg, ), errors.ECODE_INVAL)
10748

    
10749
      self.dest_x509_ca = cert
10750

    
10751
      # Verify target information
10752
      disk_info = []
10753
      for idx, disk_data in enumerate(self.op.target_node):
10754
        try:
10755
          (host, port, magic) = \
10756
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10757
        except errors.GenericError, err:
10758
          raise errors.OpPrereqError("Target info for disk %s: %s" %
10759
                                     (idx, err), errors.ECODE_INVAL)
10760

    
10761
        disk_info.append((host, port, magic))
10762

    
10763
      assert len(disk_info) == len(self.op.target_node)
10764
      self.dest_disk_info = disk_info
10765

    
10766
    else:
10767
      raise errors.ProgrammerError("Unhandled export mode %r" %
10768
                                   self.op.mode)
10769

    
10770
    # instance disk type verification
10771
    # TODO: Implement export support for file-based disks
10772
    for disk in self.instance.disks:
10773
      if disk.dev_type == constants.LD_FILE:
10774
        raise errors.OpPrereqError("Export not supported for instances with"
10775
                                   " file-based disks", errors.ECODE_INVAL)
10776

    
10777
  def _CleanupExports(self, feedback_fn):
10778
    """Removes exports of current instance from all other nodes.
10779

10780
    If an instance in a cluster with nodes A..D was exported to node C, its
10781
    exports will be removed from the nodes A, B and D.
10782

10783
    """
10784
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
10785

    
10786
    nodelist = self.cfg.GetNodeList()
10787
    nodelist.remove(self.dst_node.name)
10788

    
10789
    # on one-node clusters nodelist will be empty after the removal
10790
    # if we proceed the backup would be removed because OpBackupQuery
10791
    # substitutes an empty list with the full cluster node list.
10792
    iname = self.instance.name
10793
    if nodelist:
10794
      feedback_fn("Removing old exports for instance %s" % iname)
10795
      exportlist = self.rpc.call_export_list(nodelist)
10796
      for node in exportlist:
10797
        if exportlist[node].fail_msg:
10798
          continue
10799
        if iname in exportlist[node].payload:
10800
          msg = self.rpc.call_export_remove(node, iname).fail_msg
10801
          if msg:
10802
            self.LogWarning("Could not remove older export for instance %s"
10803
                            " on node %s: %s", iname, node, msg)
10804

    
10805
  def Exec(self, feedback_fn):
10806
    """Export an instance to an image in the cluster.
10807

10808
    """
10809
    assert self.op.mode in constants.EXPORT_MODES
10810

    
10811
    instance = self.instance
10812
    src_node = instance.primary_node
10813

    
10814
    if self.op.shutdown:
10815
      # shutdown the instance, but not the disks
10816
      feedback_fn("Shutting down instance %s" % instance.name)
10817
      result = self.rpc.call_instance_shutdown(src_node, instance,
10818
                                               self.op.shutdown_timeout)
10819
      # TODO: Maybe ignore failures if ignore_remove_failures is set
10820
      result.Raise("Could not shutdown instance %s on"
10821
                   " node %s" % (instance.name, src_node))
10822

    
10823
    # set the disks ID correctly since call_instance_start needs the
10824
    # correct drbd minor to create the symlinks
10825
    for disk in instance.disks:
10826
      self.cfg.SetDiskID(disk, src_node)
10827

    
10828
    activate_disks = (not instance.admin_up)
10829

    
10830
    if activate_disks:
10831
      # Activate the instance disks if we'exporting a stopped instance
10832
      feedback_fn("Activating disks for %s" % instance.name)
10833
      _StartInstanceDisks(self, instance, None)
10834

    
10835
    try:
10836
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10837
                                                     instance)
10838

    
10839
      helper.CreateSnapshots()
10840
      try:
10841
        if (self.op.shutdown and instance.admin_up and
10842
            not self.op.remove_instance):
10843
          assert not activate_disks
10844
          feedback_fn("Starting instance %s" % instance.name)
10845
          result = self.rpc.call_instance_start(src_node, instance, None, None)
10846
          msg = result.fail_msg
10847
          if msg:
10848
            feedback_fn("Failed to start instance: %s" % msg)
10849
            _ShutdownInstanceDisks(self, instance)
10850
            raise errors.OpExecError("Could not start instance: %s" % msg)
10851

    
10852
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
10853
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10854
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10855
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
10856
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10857

    
10858
          (key_name, _, _) = self.x509_key_name
10859

    
10860
          dest_ca_pem = \
10861
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10862
                                            self.dest_x509_ca)
10863

    
10864
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10865
                                                     key_name, dest_ca_pem,
10866
                                                     timeouts)
10867
      finally:
10868
        helper.Cleanup()
10869

    
10870
      # Check for backwards compatibility
10871
      assert len(dresults) == len(instance.disks)
10872
      assert compat.all(isinstance(i, bool) for i in dresults), \
10873
             "Not all results are boolean: %r" % dresults
10874

    
10875
    finally:
10876
      if activate_disks:
10877
        feedback_fn("Deactivating disks for %s" % instance.name)
10878
        _ShutdownInstanceDisks(self, instance)
10879

    
10880
    if not (compat.all(dresults) and fin_resu):
10881
      failures = []
10882
      if not fin_resu:
10883
        failures.append("export finalization")
10884
      if not compat.all(dresults):
10885
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10886
                               if not dsk)
10887
        failures.append("disk export: disk(s) %s" % fdsk)
10888

    
10889
      raise errors.OpExecError("Export failed, errors in %s" %
10890
                               utils.CommaJoin(failures))
10891

    
10892
    # At this point, the export was successful, we can cleanup/finish
10893

    
10894
    # Remove instance if requested
10895
    if self.op.remove_instance:
10896
      feedback_fn("Removing instance %s" % instance.name)
10897
      _RemoveInstance(self, feedback_fn, instance,
10898
                      self.op.ignore_remove_failures)
10899

    
10900
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10901
      self._CleanupExports(feedback_fn)
10902

    
10903
    return fin_resu, dresults
10904

    
10905

    
10906
class LUBackupRemove(NoHooksLU):
10907
  """Remove exports related to the named instance.
10908

10909
  """
10910
  REQ_BGL = False
10911

    
10912
  def ExpandNames(self):
10913
    self.needed_locks = {}
10914
    # We need all nodes to be locked in order for RemoveExport to work, but we
10915
    # don't need to lock the instance itself, as nothing will happen to it (and
10916
    # we can remove exports also for a removed instance)
10917
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10918

    
10919
  def Exec(self, feedback_fn):
10920
    """Remove any export.
10921

10922
    """
10923
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10924
    # If the instance was not found we'll try with the name that was passed in.
10925
    # This will only work if it was an FQDN, though.
10926
    fqdn_warn = False
10927
    if not instance_name:
10928
      fqdn_warn = True
10929
      instance_name = self.op.instance_name
10930

    
10931
    locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
10932
    exportlist = self.rpc.call_export_list(locked_nodes)
10933
    found = False
10934
    for node in exportlist:
10935
      msg = exportlist[node].fail_msg
10936
      if msg:
10937
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10938
        continue
10939
      if instance_name in exportlist[node].payload:
10940
        found = True
10941
        result = self.rpc.call_export_remove(node, instance_name)
10942
        msg = result.fail_msg
10943
        if msg:
10944
          logging.error("Could not remove export for instance %s"
10945
                        " on node %s: %s", instance_name, node, msg)
10946

    
10947
    if fqdn_warn and not found:
10948
      feedback_fn("Export not found. If trying to remove an export belonging"
10949
                  " to a deleted instance please use its Fully Qualified"
10950
                  " Domain Name.")
10951

    
10952

    
10953
class LUGroupAdd(LogicalUnit):
10954
  """Logical unit for creating node groups.
10955

10956
  """
10957
  HPATH = "group-add"
10958
  HTYPE = constants.HTYPE_GROUP
10959
  REQ_BGL = False
10960

    
10961
  def ExpandNames(self):
10962
    # We need the new group's UUID here so that we can create and acquire the
10963
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10964
    # that it should not check whether the UUID exists in the configuration.
10965
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10966
    self.needed_locks = {}
10967
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10968

    
10969
  def CheckPrereq(self):
10970
    """Check prerequisites.
10971

10972
    This checks that the given group name is not an existing node group
10973
    already.
10974

10975
    """
10976
    try:
10977
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10978
    except errors.OpPrereqError:
10979
      pass
10980
    else:
10981
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10982
                                 " node group (UUID: %s)" %
10983
                                 (self.op.group_name, existing_uuid),
10984
                                 errors.ECODE_EXISTS)
10985

    
10986
    if self.op.ndparams:
10987
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10988

    
10989
  def BuildHooksEnv(self):
10990
    """Build hooks env.
10991

10992
    """
10993
    return {
10994
      "GROUP_NAME": self.op.group_name,
10995
      }
10996

    
10997
  def BuildHooksNodes(self):
10998
    """Build hooks nodes.
10999

11000
    """
11001
    mn = self.cfg.GetMasterNode()
11002
    return ([mn], [mn])
11003

    
11004
  def Exec(self, feedback_fn):
11005
    """Add the node group to the cluster.
11006

11007
    """
11008
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11009
                                  uuid=self.group_uuid,
11010
                                  alloc_policy=self.op.alloc_policy,
11011
                                  ndparams=self.op.ndparams)
11012

    
11013
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11014
    del self.remove_locks[locking.LEVEL_NODEGROUP]
11015

    
11016

    
11017
class LUGroupAssignNodes(NoHooksLU):
11018
  """Logical unit for assigning nodes to groups.
11019

11020
  """
11021
  REQ_BGL = False
11022

    
11023
  def ExpandNames(self):
11024
    # These raise errors.OpPrereqError on their own:
11025
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11026
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11027

    
11028
    # We want to lock all the affected nodes and groups. We have readily
11029
    # available the list of nodes, and the *destination* group. To gather the
11030
    # list of "source" groups, we need to fetch node information later on.
11031
    self.needed_locks = {
11032
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11033
      locking.LEVEL_NODE: self.op.nodes,
11034
      }
11035

    
11036
  def DeclareLocks(self, level):
11037
    if level == locking.LEVEL_NODEGROUP:
11038
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11039

    
11040
      # Try to get all affected nodes' groups without having the group or node
11041
      # lock yet. Needs verification later in the code flow.
11042
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11043

    
11044
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11045

    
11046
  def CheckPrereq(self):
11047
    """Check prerequisites.
11048

11049
    """
11050
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
11051
    assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
11052
            frozenset(self.op.nodes))
11053

    
11054
    expected_locks = (set([self.group_uuid]) |
11055
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11056
    actual_locks = self.glm.list_owned(locking.LEVEL_NODEGROUP)
11057
    if actual_locks != expected_locks:
11058
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11059
                               " current groups are '%s', used to be '%s'" %
11060
                               (utils.CommaJoin(expected_locks),
11061
                                utils.CommaJoin(actual_locks)))
11062

    
11063
    self.node_data = self.cfg.GetAllNodesInfo()
11064
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11065
    instance_data = self.cfg.GetAllInstancesInfo()
11066

    
11067
    if self.group is None:
11068
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11069
                               (self.op.group_name, self.group_uuid))
11070

    
11071
    (new_splits, previous_splits) = \
11072
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11073
                                             for node in self.op.nodes],
11074
                                            self.node_data, instance_data)
11075

    
11076
    if new_splits:
11077
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11078

    
11079
      if not self.op.force:
11080
        raise errors.OpExecError("The following instances get split by this"
11081
                                 " change and --force was not given: %s" %
11082
                                 fmt_new_splits)
11083
      else:
11084
        self.LogWarning("This operation will split the following instances: %s",
11085
                        fmt_new_splits)
11086

    
11087
        if previous_splits:
11088
          self.LogWarning("In addition, these already-split instances continue"
11089
                          " to be split across groups: %s",
11090
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
11091

    
11092
  def Exec(self, feedback_fn):
11093
    """Assign nodes to a new group.
11094

11095
    """
11096
    for node in self.op.nodes:
11097
      self.node_data[node].group = self.group_uuid
11098

    
11099
    # FIXME: Depends on side-effects of modifying the result of
11100
    # C{cfg.GetAllNodesInfo}
11101

    
11102
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11103

    
11104
  @staticmethod
11105
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11106
    """Check for split instances after a node assignment.
11107

11108
    This method considers a series of node assignments as an atomic operation,
11109
    and returns information about split instances after applying the set of
11110
    changes.
11111

11112
    In particular, it returns information about newly split instances, and
11113
    instances that were already split, and remain so after the change.
11114

11115
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11116
    considered.
11117

11118
    @type changes: list of (node_name, new_group_uuid) pairs.
11119
    @param changes: list of node assignments to consider.
11120
    @param node_data: a dict with data for all nodes
11121
    @param instance_data: a dict with all instances to consider
11122
    @rtype: a two-tuple
11123
    @return: a list of instances that were previously okay and result split as a
11124
      consequence of this change, and a list of instances that were previously
11125
      split and this change does not fix.
11126

11127
    """
11128
    changed_nodes = dict((node, group) for node, group in changes
11129
                         if node_data[node].group != group)
11130

    
11131
    all_split_instances = set()
11132
    previously_split_instances = set()
11133

    
11134
    def InstanceNodes(instance):
11135
      return [instance.primary_node] + list(instance.secondary_nodes)
11136

    
11137
    for inst in instance_data.values():
11138
      if inst.disk_template not in constants.DTS_INT_MIRROR:
11139
        continue
11140

    
11141
      instance_nodes = InstanceNodes(inst)
11142

    
11143
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
11144
        previously_split_instances.add(inst.name)
11145

    
11146
      if len(set(changed_nodes.get(node, node_data[node].group)
11147
                 for node in instance_nodes)) > 1:
11148
        all_split_instances.add(inst.name)
11149

    
11150
    return (list(all_split_instances - previously_split_instances),
11151
            list(previously_split_instances & all_split_instances))
11152

    
11153

    
11154
class _GroupQuery(_QueryBase):
11155
  FIELDS = query.GROUP_FIELDS
11156

    
11157
  def ExpandNames(self, lu):
11158
    lu.needed_locks = {}
11159

    
11160
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
11161
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
11162

    
11163
    if not self.names:
11164
      self.wanted = [name_to_uuid[name]
11165
                     for name in utils.NiceSort(name_to_uuid.keys())]
11166
    else:
11167
      # Accept names to be either names or UUIDs.
11168
      missing = []
11169
      self.wanted = []
11170
      all_uuid = frozenset(self._all_groups.keys())
11171

    
11172
      for name in self.names:
11173
        if name in all_uuid:
11174
          self.wanted.append(name)
11175
        elif name in name_to_uuid:
11176
          self.wanted.append(name_to_uuid[name])
11177
        else:
11178
          missing.append(name)
11179

    
11180
      if missing:
11181
        raise errors.OpPrereqError("Some groups do not exist: %s" %
11182
                                   utils.CommaJoin(missing),
11183
                                   errors.ECODE_NOENT)
11184

    
11185
  def DeclareLocks(self, lu, level):
11186
    pass
11187

    
11188
  def _GetQueryData(self, lu):
11189
    """Computes the list of node groups and their attributes.
11190

11191
    """
11192
    do_nodes = query.GQ_NODE in self.requested_data
11193
    do_instances = query.GQ_INST in self.requested_data
11194

    
11195
    group_to_nodes = None
11196
    group_to_instances = None
11197

    
11198
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11199
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11200
    # latter GetAllInstancesInfo() is not enough, for we have to go through
11201
    # instance->node. Hence, we will need to process nodes even if we only need
11202
    # instance information.
11203
    if do_nodes or do_instances:
11204
      all_nodes = lu.cfg.GetAllNodesInfo()
11205
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11206
      node_to_group = {}
11207

    
11208
      for node in all_nodes.values():
11209
        if node.group in group_to_nodes:
11210
          group_to_nodes[node.group].append(node.name)
11211
          node_to_group[node.name] = node.group
11212

    
11213
      if do_instances:
11214
        all_instances = lu.cfg.GetAllInstancesInfo()
11215
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
11216

    
11217
        for instance in all_instances.values():
11218
          node = instance.primary_node
11219
          if node in node_to_group:
11220
            group_to_instances[node_to_group[node]].append(instance.name)
11221

    
11222
        if not do_nodes:
11223
          # Do not pass on node information if it was not requested.
11224
          group_to_nodes = None
11225

    
11226
    return query.GroupQueryData([self._all_groups[uuid]
11227
                                 for uuid in self.wanted],
11228
                                group_to_nodes, group_to_instances)
11229

    
11230

    
11231
class LUGroupQuery(NoHooksLU):
11232
  """Logical unit for querying node groups.
11233

11234
  """
11235
  REQ_BGL = False
11236

    
11237
  def CheckArguments(self):
11238
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
11239
                          self.op.output_fields, False)
11240

    
11241
  def ExpandNames(self):
11242
    self.gq.ExpandNames(self)
11243

    
11244
  def Exec(self, feedback_fn):
11245
    return self.gq.OldStyleQuery(self)
11246

    
11247

    
11248
class LUGroupSetParams(LogicalUnit):
11249
  """Modifies the parameters of a node group.
11250

11251
  """
11252
  HPATH = "group-modify"
11253
  HTYPE = constants.HTYPE_GROUP
11254
  REQ_BGL = False
11255

    
11256
  def CheckArguments(self):
11257
    all_changes = [
11258
      self.op.ndparams,
11259
      self.op.alloc_policy,
11260
      ]
11261

    
11262
    if all_changes.count(None) == len(all_changes):
11263
      raise errors.OpPrereqError("Please pass at least one modification",
11264
                                 errors.ECODE_INVAL)
11265

    
11266
  def ExpandNames(self):
11267
    # This raises errors.OpPrereqError on its own:
11268
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11269

    
11270
    self.needed_locks = {
11271
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11272
      }
11273

    
11274
  def CheckPrereq(self):
11275
    """Check prerequisites.
11276

11277
    """
11278
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11279

    
11280
    if self.group is None:
11281
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11282
                               (self.op.group_name, self.group_uuid))
11283

    
11284
    if self.op.ndparams:
11285
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
11286
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11287
      self.new_ndparams = new_ndparams
11288

    
11289
  def BuildHooksEnv(self):
11290
    """Build hooks env.
11291

11292
    """
11293
    return {
11294
      "GROUP_NAME": self.op.group_name,
11295
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
11296
      }
11297

    
11298
  def BuildHooksNodes(self):
11299
    """Build hooks nodes.
11300

11301
    """
11302
    mn = self.cfg.GetMasterNode()
11303
    return ([mn], [mn])
11304

    
11305
  def Exec(self, feedback_fn):
11306
    """Modifies the node group.
11307

11308
    """
11309
    result = []
11310

    
11311
    if self.op.ndparams:
11312
      self.group.ndparams = self.new_ndparams
11313
      result.append(("ndparams", str(self.group.ndparams)))
11314

    
11315
    if self.op.alloc_policy:
11316
      self.group.alloc_policy = self.op.alloc_policy
11317

    
11318
    self.cfg.Update(self.group, feedback_fn)
11319
    return result
11320

    
11321

    
11322

    
11323
class LUGroupRemove(LogicalUnit):
11324
  HPATH = "group-remove"
11325
  HTYPE = constants.HTYPE_GROUP
11326
  REQ_BGL = False
11327

    
11328
  def ExpandNames(self):
11329
    # This will raises errors.OpPrereqError on its own:
11330
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11331
    self.needed_locks = {
11332
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11333
      }
11334

    
11335
  def CheckPrereq(self):
11336
    """Check prerequisites.
11337

11338
    This checks that the given group name exists as a node group, that is
11339
    empty (i.e., contains no nodes), and that is not the last group of the
11340
    cluster.
11341

11342
    """
11343
    # Verify that the group is empty.
11344
    group_nodes = [node.name
11345
                   for node in self.cfg.GetAllNodesInfo().values()
11346
                   if node.group == self.group_uuid]
11347

    
11348
    if group_nodes:
11349
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
11350
                                 " nodes: %s" %
11351
                                 (self.op.group_name,
11352
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
11353
                                 errors.ECODE_STATE)
11354

    
11355
    # Verify the cluster would not be left group-less.
11356
    if len(self.cfg.GetNodeGroupList()) == 1:
11357
      raise errors.OpPrereqError("Group '%s' is the only group,"
11358
                                 " cannot be removed" %
11359
                                 self.op.group_name,
11360
                                 errors.ECODE_STATE)
11361

    
11362
  def BuildHooksEnv(self):
11363
    """Build hooks env.
11364

11365
    """
11366
    return {
11367
      "GROUP_NAME": self.op.group_name,
11368
      }
11369

    
11370
  def BuildHooksNodes(self):
11371
    """Build hooks nodes.
11372

11373
    """
11374
    mn = self.cfg.GetMasterNode()
11375
    return ([mn], [mn])
11376

    
11377
  def Exec(self, feedback_fn):
11378
    """Remove the node group.
11379

11380
    """
11381
    try:
11382
      self.cfg.RemoveNodeGroup(self.group_uuid)
11383
    except errors.ConfigurationError:
11384
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11385
                               (self.op.group_name, self.group_uuid))
11386

    
11387
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11388

    
11389

    
11390
class LUGroupRename(LogicalUnit):
11391
  HPATH = "group-rename"
11392
  HTYPE = constants.HTYPE_GROUP
11393
  REQ_BGL = False
11394

    
11395
  def ExpandNames(self):
11396
    # This raises errors.OpPrereqError on its own:
11397
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11398

    
11399
    self.needed_locks = {
11400
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11401
      }
11402

    
11403
  def CheckPrereq(self):
11404
    """Check prerequisites.
11405

11406
    Ensures requested new name is not yet used.
11407

11408
    """
11409
    try:
11410
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11411
    except errors.OpPrereqError:
11412
      pass
11413
    else:
11414
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11415
                                 " node group (UUID: %s)" %
11416
                                 (self.op.new_name, new_name_uuid),
11417
                                 errors.ECODE_EXISTS)
11418

    
11419
  def BuildHooksEnv(self):
11420
    """Build hooks env.
11421

11422
    """
11423
    return {
11424
      "OLD_NAME": self.op.group_name,
11425
      "NEW_NAME": self.op.new_name,
11426
      }
11427

    
11428
  def BuildHooksNodes(self):
11429
    """Build hooks nodes.
11430

11431
    """
11432
    mn = self.cfg.GetMasterNode()
11433

    
11434
    all_nodes = self.cfg.GetAllNodesInfo()
11435
    all_nodes.pop(mn, None)
11436

    
11437
    run_nodes = [mn]
11438
    run_nodes.extend(node.name for node in all_nodes.values()
11439
                     if node.group == self.group_uuid)
11440

    
11441
    return (run_nodes, run_nodes)
11442

    
11443
  def Exec(self, feedback_fn):
11444
    """Rename the node group.
11445

11446
    """
11447
    group = self.cfg.GetNodeGroup(self.group_uuid)
11448

    
11449
    if group is None:
11450
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11451
                               (self.op.group_name, self.group_uuid))
11452

    
11453
    group.name = self.op.new_name
11454
    self.cfg.Update(group, feedback_fn)
11455

    
11456
    return self.op.new_name
11457

    
11458

    
11459
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
11460
  """Generic tags LU.
11461

11462
  This is an abstract class which is the parent of all the other tags LUs.
11463

11464
  """
11465
  def ExpandNames(self):
11466
    self.group_uuid = None
11467
    self.needed_locks = {}
11468
    if self.op.kind == constants.TAG_NODE:
11469
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
11470
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
11471
    elif self.op.kind == constants.TAG_INSTANCE:
11472
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
11473
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
11474
    elif self.op.kind == constants.TAG_NODEGROUP:
11475
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
11476

    
11477
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
11478
    # not possible to acquire the BGL based on opcode parameters)
11479

    
11480
  def CheckPrereq(self):
11481
    """Check prerequisites.
11482

11483
    """
11484
    if self.op.kind == constants.TAG_CLUSTER:
11485
      self.target = self.cfg.GetClusterInfo()
11486
    elif self.op.kind == constants.TAG_NODE:
11487
      self.target = self.cfg.GetNodeInfo(self.op.name)
11488
    elif self.op.kind == constants.TAG_INSTANCE:
11489
      self.target = self.cfg.GetInstanceInfo(self.op.name)
11490
    elif self.op.kind == constants.TAG_NODEGROUP:
11491
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
11492
    else:
11493
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
11494
                                 str(self.op.kind), errors.ECODE_INVAL)
11495

    
11496

    
11497
class LUTagsGet(TagsLU):
11498
  """Returns the tags of a given object.
11499

11500
  """
11501
  REQ_BGL = False
11502

    
11503
  def ExpandNames(self):
11504
    TagsLU.ExpandNames(self)
11505

    
11506
    # Share locks as this is only a read operation
11507
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
11508

    
11509
  def Exec(self, feedback_fn):
11510
    """Returns the tag list.
11511

11512
    """
11513
    return list(self.target.GetTags())
11514

    
11515

    
11516
class LUTagsSearch(NoHooksLU):
11517
  """Searches the tags for a given pattern.
11518

11519
  """
11520
  REQ_BGL = False
11521

    
11522
  def ExpandNames(self):
11523
    self.needed_locks = {}
11524

    
11525
  def CheckPrereq(self):
11526
    """Check prerequisites.
11527

11528
    This checks the pattern passed for validity by compiling it.
11529

11530
    """
11531
    try:
11532
      self.re = re.compile(self.op.pattern)
11533
    except re.error, err:
11534
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
11535
                                 (self.op.pattern, err), errors.ECODE_INVAL)
11536

    
11537
  def Exec(self, feedback_fn):
11538
    """Returns the tag list.
11539

11540
    """
11541
    cfg = self.cfg
11542
    tgts = [("/cluster", cfg.GetClusterInfo())]
11543
    ilist = cfg.GetAllInstancesInfo().values()
11544
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
11545
    nlist = cfg.GetAllNodesInfo().values()
11546
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
11547
    tgts.extend(("/nodegroup/%s" % n.name, n)
11548
                for n in cfg.GetAllNodeGroupsInfo().values())
11549
    results = []
11550
    for path, target in tgts:
11551
      for tag in target.GetTags():
11552
        if self.re.search(tag):
11553
          results.append((path, tag))
11554
    return results
11555

    
11556

    
11557
class LUTagsSet(TagsLU):
11558
  """Sets a tag on a given object.
11559

11560
  """
11561
  REQ_BGL = False
11562

    
11563
  def CheckPrereq(self):
11564
    """Check prerequisites.
11565

11566
    This checks the type and length of the tag name and value.
11567

11568
    """
11569
    TagsLU.CheckPrereq(self)
11570
    for tag in self.op.tags:
11571
      objects.TaggableObject.ValidateTag(tag)
11572

    
11573
  def Exec(self, feedback_fn):
11574
    """Sets the tag.
11575

11576
    """
11577
    try:
11578
      for tag in self.op.tags:
11579
        self.target.AddTag(tag)
11580
    except errors.TagError, err:
11581
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
11582
    self.cfg.Update(self.target, feedback_fn)
11583

    
11584

    
11585
class LUTagsDel(TagsLU):
11586
  """Delete a list of tags from a given object.
11587

11588
  """
11589
  REQ_BGL = False
11590

    
11591
  def CheckPrereq(self):
11592
    """Check prerequisites.
11593

11594
    This checks that we have the given tag.
11595

11596
    """
11597
    TagsLU.CheckPrereq(self)
11598
    for tag in self.op.tags:
11599
      objects.TaggableObject.ValidateTag(tag)
11600
    del_tags = frozenset(self.op.tags)
11601
    cur_tags = self.target.GetTags()
11602

    
11603
    diff_tags = del_tags - cur_tags
11604
    if diff_tags:
11605
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
11606
      raise errors.OpPrereqError("Tag(s) %s not found" %
11607
                                 (utils.CommaJoin(diff_names), ),
11608
                                 errors.ECODE_NOENT)
11609

    
11610
  def Exec(self, feedback_fn):
11611
    """Remove the tag from the object.
11612

11613
    """
11614
    for tag in self.op.tags:
11615
      self.target.RemoveTag(tag)
11616
    self.cfg.Update(self.target, feedback_fn)
11617

    
11618

    
11619
class LUTestDelay(NoHooksLU):
11620
  """Sleep for a specified amount of time.
11621

11622
  This LU sleeps on the master and/or nodes for a specified amount of
11623
  time.
11624

11625
  """
11626
  REQ_BGL = False
11627

    
11628
  def ExpandNames(self):
11629
    """Expand names and set required locks.
11630

11631
    This expands the node list, if any.
11632

11633
    """
11634
    self.needed_locks = {}
11635
    if self.op.on_nodes:
11636
      # _GetWantedNodes can be used here, but is not always appropriate to use
11637
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
11638
      # more information.
11639
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
11640
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
11641

    
11642
  def _TestDelay(self):
11643
    """Do the actual sleep.
11644

11645
    """
11646
    if self.op.on_master:
11647
      if not utils.TestDelay(self.op.duration):
11648
        raise errors.OpExecError("Error during master delay test")
11649
    if self.op.on_nodes:
11650
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
11651
      for node, node_result in result.items():
11652
        node_result.Raise("Failure during rpc call to node %s" % node)
11653

    
11654
  def Exec(self, feedback_fn):
11655
    """Execute the test delay opcode, with the wanted repetitions.
11656

11657
    """
11658
    if self.op.repeat == 0:
11659
      self._TestDelay()
11660
    else:
11661
      top_value = self.op.repeat - 1
11662
      for i in range(self.op.repeat):
11663
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
11664
        self._TestDelay()
11665

    
11666

    
11667
class LUTestJqueue(NoHooksLU):
11668
  """Utility LU to test some aspects of the job queue.
11669

11670
  """
11671
  REQ_BGL = False
11672

    
11673
  # Must be lower than default timeout for WaitForJobChange to see whether it
11674
  # notices changed jobs
11675
  _CLIENT_CONNECT_TIMEOUT = 20.0
11676
  _CLIENT_CONFIRM_TIMEOUT = 60.0
11677

    
11678
  @classmethod
11679
  def _NotifyUsingSocket(cls, cb, errcls):
11680
    """Opens a Unix socket and waits for another program to connect.
11681

11682
    @type cb: callable
11683
    @param cb: Callback to send socket name to client
11684
    @type errcls: class
11685
    @param errcls: Exception class to use for errors
11686

11687
    """
11688
    # Using a temporary directory as there's no easy way to create temporary
11689
    # sockets without writing a custom loop around tempfile.mktemp and
11690
    # socket.bind
11691
    tmpdir = tempfile.mkdtemp()
11692
    try:
11693
      tmpsock = utils.PathJoin(tmpdir, "sock")
11694

    
11695
      logging.debug("Creating temporary socket at %s", tmpsock)
11696
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
11697
      try:
11698
        sock.bind(tmpsock)
11699
        sock.listen(1)
11700

    
11701
        # Send details to client
11702
        cb(tmpsock)
11703

    
11704
        # Wait for client to connect before continuing
11705
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
11706
        try:
11707
          (conn, _) = sock.accept()
11708
        except socket.error, err:
11709
          raise errcls("Client didn't connect in time (%s)" % err)
11710
      finally:
11711
        sock.close()
11712
    finally:
11713
      # Remove as soon as client is connected
11714
      shutil.rmtree(tmpdir)
11715

    
11716
    # Wait for client to close
11717
    try:
11718
      try:
11719
        # pylint: disable-msg=E1101
11720
        # Instance of '_socketobject' has no ... member
11721
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
11722
        conn.recv(1)
11723
      except socket.error, err:
11724
        raise errcls("Client failed to confirm notification (%s)" % err)
11725
    finally:
11726
      conn.close()
11727

    
11728
  def _SendNotification(self, test, arg, sockname):
11729
    """Sends a notification to the client.
11730

11731
    @type test: string
11732
    @param test: Test name
11733
    @param arg: Test argument (depends on test)
11734
    @type sockname: string
11735
    @param sockname: Socket path
11736

11737
    """
11738
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11739

    
11740
  def _Notify(self, prereq, test, arg):
11741
    """Notifies the client of a test.
11742

11743
    @type prereq: bool
11744
    @param prereq: Whether this is a prereq-phase test
11745
    @type test: string
11746
    @param test: Test name
11747
    @param arg: Test argument (depends on test)
11748

11749
    """
11750
    if prereq:
11751
      errcls = errors.OpPrereqError
11752
    else:
11753
      errcls = errors.OpExecError
11754

    
11755
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11756
                                                  test, arg),
11757
                                   errcls)
11758

    
11759
  def CheckArguments(self):
11760
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11761
    self.expandnames_calls = 0
11762

    
11763
  def ExpandNames(self):
11764
    checkargs_calls = getattr(self, "checkargs_calls", 0)
11765
    if checkargs_calls < 1:
11766
      raise errors.ProgrammerError("CheckArguments was not called")
11767

    
11768
    self.expandnames_calls += 1
11769

    
11770
    if self.op.notify_waitlock:
11771
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
11772

    
11773
    self.LogInfo("Expanding names")
11774

    
11775
    # Get lock on master node (just to get a lock, not for a particular reason)
11776
    self.needed_locks = {
11777
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11778
      }
11779

    
11780
  def Exec(self, feedback_fn):
11781
    if self.expandnames_calls < 1:
11782
      raise errors.ProgrammerError("ExpandNames was not called")
11783

    
11784
    if self.op.notify_exec:
11785
      self._Notify(False, constants.JQT_EXEC, None)
11786

    
11787
    self.LogInfo("Executing")
11788

    
11789
    if self.op.log_messages:
11790
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
11791
      for idx, msg in enumerate(self.op.log_messages):
11792
        self.LogInfo("Sending log message %s", idx + 1)
11793
        feedback_fn(constants.JQT_MSGPREFIX + msg)
11794
        # Report how many test messages have been sent
11795
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
11796

    
11797
    if self.op.fail:
11798
      raise errors.OpExecError("Opcode failure was requested")
11799

    
11800
    return True
11801

    
11802

    
11803
class IAllocator(object):
11804
  """IAllocator framework.
11805

11806
  An IAllocator instance has three sets of attributes:
11807
    - cfg that is needed to query the cluster
11808
    - input data (all members of the _KEYS class attribute are required)
11809
    - four buffer attributes (in|out_data|text), that represent the
11810
      input (to the external script) in text and data structure format,
11811
      and the output from it, again in two formats
11812
    - the result variables from the script (success, info, nodes) for
11813
      easy usage
11814

11815
  """
11816
  # pylint: disable-msg=R0902
11817
  # lots of instance attributes
11818

    
11819
  def __init__(self, cfg, rpc, mode, **kwargs):
11820
    self.cfg = cfg
11821
    self.rpc = rpc
11822
    # init buffer variables
11823
    self.in_text = self.out_text = self.in_data = self.out_data = None
11824
    # init all input fields so that pylint is happy
11825
    self.mode = mode
11826
    self.mem_size = self.disks = self.disk_template = None
11827
    self.os = self.tags = self.nics = self.vcpus = None
11828
    self.hypervisor = None
11829
    self.relocate_from = None
11830
    self.name = None
11831
    self.evac_nodes = None
11832
    self.instances = None
11833
    self.reloc_mode = None
11834
    self.target_groups = None
11835
    # computed fields
11836
    self.required_nodes = None
11837
    # init result fields
11838
    self.success = self.info = self.result = None
11839

    
11840
    try:
11841
      (fn, keyset, self._result_check) = self._MODE_DATA[self.mode]
11842
    except KeyError:
11843
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
11844
                                   " IAllocator" % self.mode)
11845

    
11846
    for key in kwargs:
11847
      if key not in keyset:
11848
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
11849
                                     " IAllocator" % key)
11850
      setattr(self, key, kwargs[key])
11851

    
11852
    for key in keyset:
11853
      if key not in kwargs:
11854
        raise errors.ProgrammerError("Missing input parameter '%s' to"
11855
                                     " IAllocator" % key)
11856
    self._BuildInputData(compat.partial(fn, self))
11857

    
11858
  def _ComputeClusterData(self):
11859
    """Compute the generic allocator input data.
11860

11861
    This is the data that is independent of the actual operation.
11862

11863
    """
11864
    cfg = self.cfg
11865
    cluster_info = cfg.GetClusterInfo()
11866
    # cluster data
11867
    data = {
11868
      "version": constants.IALLOCATOR_VERSION,
11869
      "cluster_name": cfg.GetClusterName(),
11870
      "cluster_tags": list(cluster_info.GetTags()),
11871
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
11872
      # we don't have job IDs
11873
      }
11874
    ninfo = cfg.GetAllNodesInfo()
11875
    iinfo = cfg.GetAllInstancesInfo().values()
11876
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
11877

    
11878
    # node data
11879
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
11880

    
11881
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11882
      hypervisor_name = self.hypervisor
11883
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11884
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
11885
    elif self.mode in (constants.IALLOCATOR_MODE_MEVAC,
11886
                       constants.IALLOCATOR_MODE_MRELOC):
11887
      hypervisor_name = cluster_info.enabled_hypervisors[0]
11888

    
11889
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
11890
                                        hypervisor_name)
11891
    node_iinfo = \
11892
      self.rpc.call_all_instances_info(node_list,
11893
                                       cluster_info.enabled_hypervisors)
11894

    
11895
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
11896

    
11897
    config_ndata = self._ComputeBasicNodeData(ninfo)
11898
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
11899
                                                 i_list, config_ndata)
11900
    assert len(data["nodes"]) == len(ninfo), \
11901
        "Incomplete node data computed"
11902

    
11903
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11904

    
11905
    self.in_data = data
11906

    
11907
  @staticmethod
11908
  def _ComputeNodeGroupData(cfg):
11909
    """Compute node groups data.
11910

11911
    """
11912
    ng = dict((guuid, {
11913
      "name": gdata.name,
11914
      "alloc_policy": gdata.alloc_policy,
11915
      })
11916
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
11917

    
11918
    return ng
11919

    
11920
  @staticmethod
11921
  def _ComputeBasicNodeData(node_cfg):
11922
    """Compute global node data.
11923

11924
    @rtype: dict
11925
    @returns: a dict of name: (node dict, node config)
11926

11927
    """
11928
    # fill in static (config-based) values
11929
    node_results = dict((ninfo.name, {
11930
      "tags": list(ninfo.GetTags()),
11931
      "primary_ip": ninfo.primary_ip,
11932
      "secondary_ip": ninfo.secondary_ip,
11933
      "offline": ninfo.offline,
11934
      "drained": ninfo.drained,
11935
      "master_candidate": ninfo.master_candidate,
11936
      "group": ninfo.group,
11937
      "master_capable": ninfo.master_capable,
11938
      "vm_capable": ninfo.vm_capable,
11939
      })
11940
      for ninfo in node_cfg.values())
11941

    
11942
    return node_results
11943

    
11944
  @staticmethod
11945
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
11946
                              node_results):
11947
    """Compute global node data.
11948

11949
    @param node_results: the basic node structures as filled from the config
11950

11951
    """
11952
    # make a copy of the current dict
11953
    node_results = dict(node_results)
11954
    for nname, nresult in node_data.items():
11955
      assert nname in node_results, "Missing basic data for node %s" % nname
11956
      ninfo = node_cfg[nname]
11957

    
11958
      if not (ninfo.offline or ninfo.drained):
11959
        nresult.Raise("Can't get data for node %s" % nname)
11960
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11961
                                nname)
11962
        remote_info = nresult.payload
11963

    
11964
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
11965
                     'vg_size', 'vg_free', 'cpu_total']:
11966
          if attr not in remote_info:
11967
            raise errors.OpExecError("Node '%s' didn't return attribute"
11968
                                     " '%s'" % (nname, attr))
11969
          if not isinstance(remote_info[attr], int):
11970
            raise errors.OpExecError("Node '%s' returned invalid value"
11971
                                     " for '%s': %s" %
11972
                                     (nname, attr, remote_info[attr]))
11973
        # compute memory used by primary instances
11974
        i_p_mem = i_p_up_mem = 0
11975
        for iinfo, beinfo in i_list:
11976
          if iinfo.primary_node == nname:
11977
            i_p_mem += beinfo[constants.BE_MEMORY]
11978
            if iinfo.name not in node_iinfo[nname].payload:
11979
              i_used_mem = 0
11980
            else:
11981
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11982
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11983
            remote_info['memory_free'] -= max(0, i_mem_diff)
11984

    
11985
            if iinfo.admin_up:
11986
              i_p_up_mem += beinfo[constants.BE_MEMORY]
11987

    
11988
        # compute memory used by instances
11989
        pnr_dyn = {
11990
          "total_memory": remote_info['memory_total'],
11991
          "reserved_memory": remote_info['memory_dom0'],
11992
          "free_memory": remote_info['memory_free'],
11993
          "total_disk": remote_info['vg_size'],
11994
          "free_disk": remote_info['vg_free'],
11995
          "total_cpus": remote_info['cpu_total'],
11996
          "i_pri_memory": i_p_mem,
11997
          "i_pri_up_memory": i_p_up_mem,
11998
          }
11999
        pnr_dyn.update(node_results[nname])
12000
        node_results[nname] = pnr_dyn
12001

    
12002
    return node_results
12003

    
12004
  @staticmethod
12005
  def _ComputeInstanceData(cluster_info, i_list):
12006
    """Compute global instance data.
12007

12008
    """
12009
    instance_data = {}
12010
    for iinfo, beinfo in i_list:
12011
      nic_data = []
12012
      for nic in iinfo.nics:
12013
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
12014
        nic_dict = {
12015
          "mac": nic.mac,
12016
          "ip": nic.ip,
12017
          "mode": filled_params[constants.NIC_MODE],
12018
          "link": filled_params[constants.NIC_LINK],
12019
          }
12020
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
12021
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
12022
        nic_data.append(nic_dict)
12023
      pir = {
12024
        "tags": list(iinfo.GetTags()),
12025
        "admin_up": iinfo.admin_up,
12026
        "vcpus": beinfo[constants.BE_VCPUS],
12027
        "memory": beinfo[constants.BE_MEMORY],
12028
        "os": iinfo.os,
12029
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
12030
        "nics": nic_data,
12031
        "disks": [{constants.IDISK_SIZE: dsk.size,
12032
                   constants.IDISK_MODE: dsk.mode}
12033
                  for dsk in iinfo.disks],
12034
        "disk_template": iinfo.disk_template,
12035
        "hypervisor": iinfo.hypervisor,
12036
        }
12037
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
12038
                                                 pir["disks"])
12039
      instance_data[iinfo.name] = pir
12040

    
12041
    return instance_data
12042

    
12043
  def _AddNewInstance(self):
12044
    """Add new instance data to allocator structure.
12045

12046
    This in combination with _AllocatorGetClusterData will create the
12047
    correct structure needed as input for the allocator.
12048

12049
    The checks for the completeness of the opcode must have already been
12050
    done.
12051

12052
    """
12053
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
12054

    
12055
    if self.disk_template in constants.DTS_INT_MIRROR:
12056
      self.required_nodes = 2
12057
    else:
12058
      self.required_nodes = 1
12059

    
12060
    request = {
12061
      "name": self.name,
12062
      "disk_template": self.disk_template,
12063
      "tags": self.tags,
12064
      "os": self.os,
12065
      "vcpus": self.vcpus,
12066
      "memory": self.mem_size,
12067
      "disks": self.disks,
12068
      "disk_space_total": disk_space,
12069
      "nics": self.nics,
12070
      "required_nodes": self.required_nodes,
12071
      }
12072

    
12073
    return request
12074

    
12075
  def _AddRelocateInstance(self):
12076
    """Add relocate instance data to allocator structure.
12077

12078
    This in combination with _IAllocatorGetClusterData will create the
12079
    correct structure needed as input for the allocator.
12080

12081
    The checks for the completeness of the opcode must have already been
12082
    done.
12083

12084
    """
12085
    instance = self.cfg.GetInstanceInfo(self.name)
12086
    if instance is None:
12087
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
12088
                                   " IAllocator" % self.name)
12089

    
12090
    if instance.disk_template not in constants.DTS_MIRRORED:
12091
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
12092
                                 errors.ECODE_INVAL)
12093

    
12094
    if instance.disk_template in constants.DTS_INT_MIRROR and \
12095
        len(instance.secondary_nodes) != 1:
12096
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
12097
                                 errors.ECODE_STATE)
12098

    
12099
    self.required_nodes = 1
12100
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
12101
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
12102

    
12103
    request = {
12104
      "name": self.name,
12105
      "disk_space_total": disk_space,
12106
      "required_nodes": self.required_nodes,
12107
      "relocate_from": self.relocate_from,
12108
      }
12109
    return request
12110

    
12111
  def _AddEvacuateNodes(self):
12112
    """Add evacuate nodes data to allocator structure.
12113

12114
    """
12115
    request = {
12116
      "evac_nodes": self.evac_nodes
12117
      }
12118
    return request
12119

    
12120
  def _AddMultiRelocate(self):
12121
    """Get data for multi-relocate requests.
12122

12123
    """
12124
    return {
12125
      "instances": self.instances,
12126
      "reloc_mode": self.reloc_mode,
12127
      "target_groups": self.target_groups,
12128
      }
12129

    
12130
  def _BuildInputData(self, fn):
12131
    """Build input data structures.
12132

12133
    """
12134
    self._ComputeClusterData()
12135

    
12136
    request = fn()
12137
    request["type"] = self.mode
12138
    self.in_data["request"] = request
12139

    
12140
    self.in_text = serializer.Dump(self.in_data)
12141

    
12142
  _MODE_DATA = {
12143
    constants.IALLOCATOR_MODE_ALLOC:
12144
      (_AddNewInstance,
12145
       ["name", "mem_size", "disks", "disk_template", "os", "tags", "nics",
12146
        "vcpus", "hypervisor"], ht.TList),
12147
    constants.IALLOCATOR_MODE_RELOC:
12148
      (_AddRelocateInstance, ["name", "relocate_from"], ht.TList),
12149
    constants.IALLOCATOR_MODE_MEVAC:
12150
      (_AddEvacuateNodes, ["evac_nodes"],
12151
       ht.TListOf(ht.TAnd(ht.TIsLength(2),
12152
                          ht.TListOf(ht.TString)))),
12153
    constants.IALLOCATOR_MODE_MRELOC:
12154
      (_AddMultiRelocate, ["instances", "reloc_mode", "target_groups"],
12155
       ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
12156
         # pylint: disable-msg=E1101
12157
         # Class '...' has no 'OP_ID' member
12158
         "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
12159
                              opcodes.OpInstanceMigrate.OP_ID,
12160
                              opcodes.OpInstanceReplaceDisks.OP_ID])
12161
         })))),
12162
    }
12163

    
12164
  def Run(self, name, validate=True, call_fn=None):
12165
    """Run an instance allocator and return the results.
12166

12167
    """
12168
    if call_fn is None:
12169
      call_fn = self.rpc.call_iallocator_runner
12170

    
12171
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
12172
    result.Raise("Failure while running the iallocator script")
12173

    
12174
    self.out_text = result.payload
12175
    if validate:
12176
      self._ValidateResult()
12177

    
12178
  def _ValidateResult(self):
12179
    """Process the allocator results.
12180

12181
    This will process and if successful save the result in
12182
    self.out_data and the other parameters.
12183

12184
    """
12185
    try:
12186
      rdict = serializer.Load(self.out_text)
12187
    except Exception, err:
12188
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
12189

    
12190
    if not isinstance(rdict, dict):
12191
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
12192

    
12193
    # TODO: remove backwards compatiblity in later versions
12194
    if "nodes" in rdict and "result" not in rdict:
12195
      rdict["result"] = rdict["nodes"]
12196
      del rdict["nodes"]
12197

    
12198
    for key in "success", "info", "result":
12199
      if key not in rdict:
12200
        raise errors.OpExecError("Can't parse iallocator results:"
12201
                                 " missing key '%s'" % key)
12202
      setattr(self, key, rdict[key])
12203

    
12204
    if not self._result_check(self.result):
12205
      raise errors.OpExecError("Iallocator returned invalid result,"
12206
                               " expected %s, got %s" %
12207
                               (self._result_check, self.result),
12208
                               errors.ECODE_INVAL)
12209

    
12210
    if self.mode in (constants.IALLOCATOR_MODE_RELOC,
12211
                     constants.IALLOCATOR_MODE_MEVAC):
12212
      node2group = dict((name, ndata["group"])
12213
                        for (name, ndata) in self.in_data["nodes"].items())
12214

    
12215
      fn = compat.partial(self._NodesToGroups, node2group,
12216
                          self.in_data["nodegroups"])
12217

    
12218
      if self.mode == constants.IALLOCATOR_MODE_RELOC:
12219
        assert self.relocate_from is not None
12220
        assert self.required_nodes == 1
12221

    
12222
        request_groups = fn(self.relocate_from)
12223
        result_groups = fn(rdict["result"])
12224

    
12225
        if result_groups != request_groups:
12226
          raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
12227
                                   " differ from original groups (%s)" %
12228
                                   (utils.CommaJoin(result_groups),
12229
                                    utils.CommaJoin(request_groups)))
12230
      elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
12231
        request_groups = fn(self.evac_nodes)
12232
        for (instance_name, secnode) in self.result:
12233
          result_groups = fn([secnode])
12234
          if result_groups != request_groups:
12235
            raise errors.OpExecError("Iallocator returned new secondary node"
12236
                                     " '%s' (group '%s') for instance '%s'"
12237
                                     " which is not in original group '%s'" %
12238
                                     (secnode, utils.CommaJoin(result_groups),
12239
                                      instance_name,
12240
                                      utils.CommaJoin(request_groups)))
12241
      else:
12242
        raise errors.ProgrammerError("Unhandled mode '%s'" % self.mode)
12243

    
12244
    self.out_data = rdict
12245

    
12246
  @staticmethod
12247
  def _NodesToGroups(node2group, groups, nodes):
12248
    """Returns a list of unique group names for a list of nodes.
12249

12250
    @type node2group: dict
12251
    @param node2group: Map from node name to group UUID
12252
    @type groups: dict
12253
    @param groups: Group information
12254
    @type nodes: list
12255
    @param nodes: Node names
12256

12257
    """
12258
    result = set()
12259

    
12260
    for node in nodes:
12261
      try:
12262
        group_uuid = node2group[node]
12263
      except KeyError:
12264
        # Ignore unknown node
12265
        pass
12266
      else:
12267
        try:
12268
          group = groups[group_uuid]
12269
        except KeyError:
12270
          # Can't find group, let's use UUID
12271
          group_name = group_uuid
12272
        else:
12273
          group_name = group["name"]
12274

    
12275
        result.add(group_name)
12276

    
12277
    return sorted(result)
12278

    
12279

    
12280
class LUTestAllocator(NoHooksLU):
12281
  """Run allocator tests.
12282

12283
  This LU runs the allocator tests
12284

12285
  """
12286
  def CheckPrereq(self):
12287
    """Check prerequisites.
12288

12289
    This checks the opcode parameters depending on the director and mode test.
12290

12291
    """
12292
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12293
      for attr in ["mem_size", "disks", "disk_template",
12294
                   "os", "tags", "nics", "vcpus"]:
12295
        if not hasattr(self.op, attr):
12296
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
12297
                                     attr, errors.ECODE_INVAL)
12298
      iname = self.cfg.ExpandInstanceName(self.op.name)
12299
      if iname is not None:
12300
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
12301
                                   iname, errors.ECODE_EXISTS)
12302
      if not isinstance(self.op.nics, list):
12303
        raise errors.OpPrereqError("Invalid parameter 'nics'",
12304
                                   errors.ECODE_INVAL)
12305
      if not isinstance(self.op.disks, list):
12306
        raise errors.OpPrereqError("Invalid parameter 'disks'",
12307
                                   errors.ECODE_INVAL)
12308
      for row in self.op.disks:
12309
        if (not isinstance(row, dict) or
12310
            "size" not in row or
12311
            not isinstance(row["size"], int) or
12312
            "mode" not in row or
12313
            row["mode"] not in ['r', 'w']):
12314
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
12315
                                     " parameter", errors.ECODE_INVAL)
12316
      if self.op.hypervisor is None:
12317
        self.op.hypervisor = self.cfg.GetHypervisorType()
12318
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12319
      fname = _ExpandInstanceName(self.cfg, self.op.name)
12320
      self.op.name = fname
12321
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
12322
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12323
      if not hasattr(self.op, "evac_nodes"):
12324
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
12325
                                   " opcode input", errors.ECODE_INVAL)
12326
    elif self.op.mode == constants.IALLOCATOR_MODE_MRELOC:
12327
      if self.op.instances:
12328
        self.op.instances = _GetWantedInstances(self, self.op.instances)
12329
      else:
12330
        raise errors.OpPrereqError("Missing instances to relocate",
12331
                                   errors.ECODE_INVAL)
12332
    else:
12333
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
12334
                                 self.op.mode, errors.ECODE_INVAL)
12335

    
12336
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
12337
      if self.op.allocator is None:
12338
        raise errors.OpPrereqError("Missing allocator name",
12339
                                   errors.ECODE_INVAL)
12340
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
12341
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
12342
                                 self.op.direction, errors.ECODE_INVAL)
12343

    
12344
  def Exec(self, feedback_fn):
12345
    """Run the allocator test.
12346

12347
    """
12348
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12349
      ial = IAllocator(self.cfg, self.rpc,
12350
                       mode=self.op.mode,
12351
                       name=self.op.name,
12352
                       mem_size=self.op.mem_size,
12353
                       disks=self.op.disks,
12354
                       disk_template=self.op.disk_template,
12355
                       os=self.op.os,
12356
                       tags=self.op.tags,
12357
                       nics=self.op.nics,
12358
                       vcpus=self.op.vcpus,
12359
                       hypervisor=self.op.hypervisor,
12360
                       )
12361
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12362
      ial = IAllocator(self.cfg, self.rpc,
12363
                       mode=self.op.mode,
12364
                       name=self.op.name,
12365
                       relocate_from=list(self.relocate_from),
12366
                       )
12367
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12368
      ial = IAllocator(self.cfg, self.rpc,
12369
                       mode=self.op.mode,
12370
                       evac_nodes=self.op.evac_nodes)
12371
    elif self.op.mode == constants.IALLOCATOR_MODE_MRELOC:
12372
      ial = IAllocator(self.cfg, self.rpc,
12373
                       mode=self.op.mode,
12374
                       instances=self.op.instances,
12375
                       reloc_mode=self.op.reloc_mode,
12376
                       target_groups=self.op.target_groups)
12377
    else:
12378
      raise errors.ProgrammerError("Uncatched mode %s in"
12379
                                   " LUTestAllocator.Exec", self.op.mode)
12380

    
12381
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
12382
      result = ial.in_text
12383
    else:
12384
      ial.Run(self.op.allocator, validate=False)
12385
      result = ial.out_text
12386
    return result
12387

    
12388

    
12389
#: Query type implementations
12390
_QUERY_IMPL = {
12391
  constants.QR_INSTANCE: _InstanceQuery,
12392
  constants.QR_NODE: _NodeQuery,
12393
  constants.QR_GROUP: _GroupQuery,
12394
  constants.QR_OS: _OsQuery,
12395
  }
12396

    
12397
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
12398

    
12399

    
12400
def _GetQueryImplementation(name):
12401
  """Returns the implemtnation for a query type.
12402

12403
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
12404

12405
  """
12406
  try:
12407
    return _QUERY_IMPL[name]
12408
  except KeyError:
12409
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
12410
                               errors.ECODE_INVAL)