Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ a5c30dc2

History | View | Annotate | Download (436 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43

    
44
from ganeti import ssh
45
from ganeti import utils
46
from ganeti import errors
47
from ganeti import hypervisor
48
from ganeti import locking
49
from ganeti import constants
50
from ganeti import objects
51
from ganeti import serializer
52
from ganeti import ssconf
53
from ganeti import uidpool
54
from ganeti import compat
55
from ganeti import masterd
56
from ganeti import netutils
57
from ganeti import query
58
from ganeti import qlang
59
from ganeti import opcodes
60
from ganeti import ht
61

    
62
import ganeti.masterd.instance # pylint: disable-msg=W0611
63

    
64

    
65
def _SupportsOob(cfg, node):
66
  """Tells if node supports OOB.
67

68
  @type cfg: L{config.ConfigWriter}
69
  @param cfg: The cluster configuration
70
  @type node: L{objects.Node}
71
  @param node: The node
72
  @return: The OOB script if supported or an empty string otherwise
73

74
  """
75
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
76

    
77

    
78
class ResultWithJobs:
79
  """Data container for LU results with jobs.
80

81
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83
  contained in the C{jobs} attribute and include the job IDs in the opcode
84
  result.
85

86
  """
87
  def __init__(self, jobs, **kwargs):
88
    """Initializes this class.
89

90
    Additional return values can be specified as keyword arguments.
91

92
    @type jobs: list of lists of L{opcode.OpCode}
93
    @param jobs: A list of lists of opcode objects
94

95
    """
96
    self.jobs = jobs
97
    self.other = kwargs
98

    
99

    
100
class LogicalUnit(object):
101
  """Logical Unit base class.
102

103
  Subclasses must follow these rules:
104
    - implement ExpandNames
105
    - implement CheckPrereq (except when tasklets are used)
106
    - implement Exec (except when tasklets are used)
107
    - implement BuildHooksEnv
108
    - implement BuildHooksNodes
109
    - redefine HPATH and HTYPE
110
    - optionally redefine their run requirements:
111
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
112

113
  Note that all commands require root permissions.
114

115
  @ivar dry_run_result: the value (if any) that will be returned to the caller
116
      in dry-run mode (signalled by opcode dry_run parameter)
117

118
  """
119
  HPATH = None
120
  HTYPE = None
121
  REQ_BGL = True
122

    
123
  def __init__(self, processor, op, context, rpc):
124
    """Constructor for LogicalUnit.
125

126
    This needs to be overridden in derived classes in order to check op
127
    validity.
128

129
    """
130
    self.proc = processor
131
    self.op = op
132
    self.cfg = context.cfg
133
    self.glm = context.glm
134
    self.context = context
135
    self.rpc = rpc
136
    # Dicts used to declare locking needs to mcpu
137
    self.needed_locks = None
138
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
139
    self.add_locks = {}
140
    self.remove_locks = {}
141
    # Used to force good behavior when calling helper functions
142
    self.recalculate_locks = {}
143
    # logging
144
    self.Log = processor.Log # pylint: disable-msg=C0103
145
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
146
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
147
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
148
    # support for dry-run
149
    self.dry_run_result = None
150
    # support for generic debug attribute
151
    if (not hasattr(self.op, "debug_level") or
152
        not isinstance(self.op.debug_level, int)):
153
      self.op.debug_level = 0
154

    
155
    # Tasklets
156
    self.tasklets = None
157

    
158
    # Validate opcode parameters and set defaults
159
    self.op.Validate(True)
160

    
161
    self.CheckArguments()
162

    
163
  def CheckArguments(self):
164
    """Check syntactic validity for the opcode arguments.
165

166
    This method is for doing a simple syntactic check and ensure
167
    validity of opcode parameters, without any cluster-related
168
    checks. While the same can be accomplished in ExpandNames and/or
169
    CheckPrereq, doing these separate is better because:
170

171
      - ExpandNames is left as as purely a lock-related function
172
      - CheckPrereq is run after we have acquired locks (and possible
173
        waited for them)
174

175
    The function is allowed to change the self.op attribute so that
176
    later methods can no longer worry about missing parameters.
177

178
    """
179
    pass
180

    
181
  def ExpandNames(self):
182
    """Expand names for this LU.
183

184
    This method is called before starting to execute the opcode, and it should
185
    update all the parameters of the opcode to their canonical form (e.g. a
186
    short node name must be fully expanded after this method has successfully
187
    completed). This way locking, hooks, logging, etc. can work correctly.
188

189
    LUs which implement this method must also populate the self.needed_locks
190
    member, as a dict with lock levels as keys, and a list of needed lock names
191
    as values. Rules:
192

193
      - use an empty dict if you don't need any lock
194
      - if you don't need any lock at a particular level omit that level
195
      - don't put anything for the BGL level
196
      - if you want all locks at a level use locking.ALL_SET as a value
197

198
    If you need to share locks (rather than acquire them exclusively) at one
199
    level you can modify self.share_locks, setting a true value (usually 1) for
200
    that level. By default locks are not shared.
201

202
    This function can also define a list of tasklets, which then will be
203
    executed in order instead of the usual LU-level CheckPrereq and Exec
204
    functions, if those are not defined by the LU.
205

206
    Examples::
207

208
      # Acquire all nodes and one instance
209
      self.needed_locks = {
210
        locking.LEVEL_NODE: locking.ALL_SET,
211
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
212
      }
213
      # Acquire just two nodes
214
      self.needed_locks = {
215
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
216
      }
217
      # Acquire no locks
218
      self.needed_locks = {} # No, you can't leave it to the default value None
219

220
    """
221
    # The implementation of this method is mandatory only if the new LU is
222
    # concurrent, so that old LUs don't need to be changed all at the same
223
    # time.
224
    if self.REQ_BGL:
225
      self.needed_locks = {} # Exclusive LUs don't need locks.
226
    else:
227
      raise NotImplementedError
228

    
229
  def DeclareLocks(self, level):
230
    """Declare LU locking needs for a level
231

232
    While most LUs can just declare their locking needs at ExpandNames time,
233
    sometimes there's the need to calculate some locks after having acquired
234
    the ones before. This function is called just before acquiring locks at a
235
    particular level, but after acquiring the ones at lower levels, and permits
236
    such calculations. It can be used to modify self.needed_locks, and by
237
    default it does nothing.
238

239
    This function is only called if you have something already set in
240
    self.needed_locks for the level.
241

242
    @param level: Locking level which is going to be locked
243
    @type level: member of ganeti.locking.LEVELS
244

245
    """
246

    
247
  def CheckPrereq(self):
248
    """Check prerequisites for this LU.
249

250
    This method should check that the prerequisites for the execution
251
    of this LU are fulfilled. It can do internode communication, but
252
    it should be idempotent - no cluster or system changes are
253
    allowed.
254

255
    The method should raise errors.OpPrereqError in case something is
256
    not fulfilled. Its return value is ignored.
257

258
    This method should also update all the parameters of the opcode to
259
    their canonical form if it hasn't been done by ExpandNames before.
260

261
    """
262
    if self.tasklets is not None:
263
      for (idx, tl) in enumerate(self.tasklets):
264
        logging.debug("Checking prerequisites for tasklet %s/%s",
265
                      idx + 1, len(self.tasklets))
266
        tl.CheckPrereq()
267
    else:
268
      pass
269

    
270
  def Exec(self, feedback_fn):
271
    """Execute the LU.
272

273
    This method should implement the actual work. It should raise
274
    errors.OpExecError for failures that are somewhat dealt with in
275
    code, or expected.
276

277
    """
278
    if self.tasklets is not None:
279
      for (idx, tl) in enumerate(self.tasklets):
280
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
281
        tl.Exec(feedback_fn)
282
    else:
283
      raise NotImplementedError
284

    
285
  def BuildHooksEnv(self):
286
    """Build hooks environment for this LU.
287

288
    @rtype: dict
289
    @return: Dictionary containing the environment that will be used for
290
      running the hooks for this LU. The keys of the dict must not be prefixed
291
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
292
      will extend the environment with additional variables. If no environment
293
      should be defined, an empty dictionary should be returned (not C{None}).
294
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
295
      will not be called.
296

297
    """
298
    raise NotImplementedError
299

    
300
  def BuildHooksNodes(self):
301
    """Build list of nodes to run LU's hooks.
302

303
    @rtype: tuple; (list, list)
304
    @return: Tuple containing a list of node names on which the hook
305
      should run before the execution and a list of node names on which the
306
      hook should run after the execution. No nodes should be returned as an
307
      empty list (and not None).
308
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
309
      will not be called.
310

311
    """
312
    raise NotImplementedError
313

    
314
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
315
    """Notify the LU about the results of its hooks.
316

317
    This method is called every time a hooks phase is executed, and notifies
318
    the Logical Unit about the hooks' result. The LU can then use it to alter
319
    its result based on the hooks.  By default the method does nothing and the
320
    previous result is passed back unchanged but any LU can define it if it
321
    wants to use the local cluster hook-scripts somehow.
322

323
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
324
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
325
    @param hook_results: the results of the multi-node hooks rpc call
326
    @param feedback_fn: function used send feedback back to the caller
327
    @param lu_result: the previous Exec result this LU had, or None
328
        in the PRE phase
329
    @return: the new Exec result, based on the previous result
330
        and hook results
331

332
    """
333
    # API must be kept, thus we ignore the unused argument and could
334
    # be a function warnings
335
    # pylint: disable-msg=W0613,R0201
336
    return lu_result
337

    
338
  def _ExpandAndLockInstance(self):
339
    """Helper function to expand and lock an instance.
340

341
    Many LUs that work on an instance take its name in self.op.instance_name
342
    and need to expand it and then declare the expanded name for locking. This
343
    function does it, and then updates self.op.instance_name to the expanded
344
    name. It also initializes needed_locks as a dict, if this hasn't been done
345
    before.
346

347
    """
348
    if self.needed_locks is None:
349
      self.needed_locks = {}
350
    else:
351
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
352
        "_ExpandAndLockInstance called with instance-level locks set"
353
    self.op.instance_name = _ExpandInstanceName(self.cfg,
354
                                                self.op.instance_name)
355
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
356

    
357
  def _LockInstancesNodes(self, primary_only=False):
358
    """Helper function to declare instances' nodes for locking.
359

360
    This function should be called after locking one or more instances to lock
361
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
362
    with all primary or secondary nodes for instances already locked and
363
    present in self.needed_locks[locking.LEVEL_INSTANCE].
364

365
    It should be called from DeclareLocks, and for safety only works if
366
    self.recalculate_locks[locking.LEVEL_NODE] is set.
367

368
    In the future it may grow parameters to just lock some instance's nodes, or
369
    to just lock primaries or secondary nodes, if needed.
370

371
    If should be called in DeclareLocks in a way similar to::
372

373
      if level == locking.LEVEL_NODE:
374
        self._LockInstancesNodes()
375

376
    @type primary_only: boolean
377
    @param primary_only: only lock primary nodes of locked instances
378

379
    """
380
    assert locking.LEVEL_NODE in self.recalculate_locks, \
381
      "_LockInstancesNodes helper function called with no nodes to recalculate"
382

    
383
    # TODO: check if we're really been called with the instance locks held
384

    
385
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
386
    # future we might want to have different behaviors depending on the value
387
    # of self.recalculate_locks[locking.LEVEL_NODE]
388
    wanted_nodes = []
389
    for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
390
      instance = self.context.cfg.GetInstanceInfo(instance_name)
391
      wanted_nodes.append(instance.primary_node)
392
      if not primary_only:
393
        wanted_nodes.extend(instance.secondary_nodes)
394

    
395
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
396
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
397
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
398
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
399

    
400
    del self.recalculate_locks[locking.LEVEL_NODE]
401

    
402

    
403
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
404
  """Simple LU which runs no hooks.
405

406
  This LU is intended as a parent for other LogicalUnits which will
407
  run no hooks, in order to reduce duplicate code.
408

409
  """
410
  HPATH = None
411
  HTYPE = None
412

    
413
  def BuildHooksEnv(self):
414
    """Empty BuildHooksEnv for NoHooksLu.
415

416
    This just raises an error.
417

418
    """
419
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
420

    
421
  def BuildHooksNodes(self):
422
    """Empty BuildHooksNodes for NoHooksLU.
423

424
    """
425
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
426

    
427

    
428
class Tasklet:
429
  """Tasklet base class.
430

431
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
432
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
433
  tasklets know nothing about locks.
434

435
  Subclasses must follow these rules:
436
    - Implement CheckPrereq
437
    - Implement Exec
438

439
  """
440
  def __init__(self, lu):
441
    self.lu = lu
442

    
443
    # Shortcuts
444
    self.cfg = lu.cfg
445
    self.rpc = lu.rpc
446

    
447
  def CheckPrereq(self):
448
    """Check prerequisites for this tasklets.
449

450
    This method should check whether the prerequisites for the execution of
451
    this tasklet are fulfilled. It can do internode communication, but it
452
    should be idempotent - no cluster or system changes are allowed.
453

454
    The method should raise errors.OpPrereqError in case something is not
455
    fulfilled. Its return value is ignored.
456

457
    This method should also update all parameters to their canonical form if it
458
    hasn't been done before.
459

460
    """
461
    pass
462

    
463
  def Exec(self, feedback_fn):
464
    """Execute the tasklet.
465

466
    This method should implement the actual work. It should raise
467
    errors.OpExecError for failures that are somewhat dealt with in code, or
468
    expected.
469

470
    """
471
    raise NotImplementedError
472

    
473

    
474
class _QueryBase:
475
  """Base for query utility classes.
476

477
  """
478
  #: Attribute holding field definitions
479
  FIELDS = None
480

    
481
  def __init__(self, filter_, fields, use_locking):
482
    """Initializes this class.
483

484
    """
485
    self.use_locking = use_locking
486

    
487
    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
488
                             namefield="name")
489
    self.requested_data = self.query.RequestedData()
490
    self.names = self.query.RequestedNames()
491

    
492
    # Sort only if no names were requested
493
    self.sort_by_name = not self.names
494

    
495
    self.do_locking = None
496
    self.wanted = None
497

    
498
  def _GetNames(self, lu, all_names, lock_level):
499
    """Helper function to determine names asked for in the query.
500

501
    """
502
    if self.do_locking:
503
      names = lu.glm.list_owned(lock_level)
504
    else:
505
      names = all_names
506

    
507
    if self.wanted == locking.ALL_SET:
508
      assert not self.names
509
      # caller didn't specify names, so ordering is not important
510
      return utils.NiceSort(names)
511

    
512
    # caller specified names and we must keep the same order
513
    assert self.names
514
    assert not self.do_locking or lu.glm.is_owned(lock_level)
515

    
516
    missing = set(self.wanted).difference(names)
517
    if missing:
518
      raise errors.OpExecError("Some items were removed before retrieving"
519
                               " their data: %s" % missing)
520

    
521
    # Return expanded names
522
    return self.wanted
523

    
524
  def ExpandNames(self, lu):
525
    """Expand names for this query.
526

527
    See L{LogicalUnit.ExpandNames}.
528

529
    """
530
    raise NotImplementedError()
531

    
532
  def DeclareLocks(self, lu, level):
533
    """Declare locks for this query.
534

535
    See L{LogicalUnit.DeclareLocks}.
536

537
    """
538
    raise NotImplementedError()
539

    
540
  def _GetQueryData(self, lu):
541
    """Collects all data for this query.
542

543
    @return: Query data object
544

545
    """
546
    raise NotImplementedError()
547

    
548
  def NewStyleQuery(self, lu):
549
    """Collect data and execute query.
550

551
    """
552
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
553
                                  sort_by_name=self.sort_by_name)
554

    
555
  def OldStyleQuery(self, lu):
556
    """Collect data and execute query.
557

558
    """
559
    return self.query.OldStyleQuery(self._GetQueryData(lu),
560
                                    sort_by_name=self.sort_by_name)
561

    
562

    
563
def _GetWantedNodes(lu, nodes):
564
  """Returns list of checked and expanded node names.
565

566
  @type lu: L{LogicalUnit}
567
  @param lu: the logical unit on whose behalf we execute
568
  @type nodes: list
569
  @param nodes: list of node names or None for all nodes
570
  @rtype: list
571
  @return: the list of nodes, sorted
572
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
573

574
  """
575
  if nodes:
576
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
577

    
578
  return utils.NiceSort(lu.cfg.GetNodeList())
579

    
580

    
581
def _GetWantedInstances(lu, instances):
582
  """Returns list of checked and expanded instance names.
583

584
  @type lu: L{LogicalUnit}
585
  @param lu: the logical unit on whose behalf we execute
586
  @type instances: list
587
  @param instances: list of instance names or None for all instances
588
  @rtype: list
589
  @return: the list of instances, sorted
590
  @raise errors.OpPrereqError: if the instances parameter is wrong type
591
  @raise errors.OpPrereqError: if any of the passed instances is not found
592

593
  """
594
  if instances:
595
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
596
  else:
597
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
598
  return wanted
599

    
600

    
601
def _GetUpdatedParams(old_params, update_dict,
602
                      use_default=True, use_none=False):
603
  """Return the new version of a parameter dictionary.
604

605
  @type old_params: dict
606
  @param old_params: old parameters
607
  @type update_dict: dict
608
  @param update_dict: dict containing new parameter values, or
609
      constants.VALUE_DEFAULT to reset the parameter to its default
610
      value
611
  @param use_default: boolean
612
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
613
      values as 'to be deleted' values
614
  @param use_none: boolean
615
  @type use_none: whether to recognise C{None} values as 'to be
616
      deleted' values
617
  @rtype: dict
618
  @return: the new parameter dictionary
619

620
  """
621
  params_copy = copy.deepcopy(old_params)
622
  for key, val in update_dict.iteritems():
623
    if ((use_default and val == constants.VALUE_DEFAULT) or
624
        (use_none and val is None)):
625
      try:
626
        del params_copy[key]
627
      except KeyError:
628
        pass
629
    else:
630
      params_copy[key] = val
631
  return params_copy
632

    
633

    
634
def _ReleaseLocks(lu, level, names=None, keep=None):
635
  """Releases locks owned by an LU.
636

637
  @type lu: L{LogicalUnit}
638
  @param level: Lock level
639
  @type names: list or None
640
  @param names: Names of locks to release
641
  @type keep: list or None
642
  @param keep: Names of locks to retain
643

644
  """
645
  assert not (keep is not None and names is not None), \
646
         "Only one of the 'names' and the 'keep' parameters can be given"
647

    
648
  if names is not None:
649
    should_release = names.__contains__
650
  elif keep:
651
    should_release = lambda name: name not in keep
652
  else:
653
    should_release = None
654

    
655
  if should_release:
656
    retain = []
657
    release = []
658

    
659
    # Determine which locks to release
660
    for name in lu.glm.list_owned(level):
661
      if should_release(name):
662
        release.append(name)
663
      else:
664
        retain.append(name)
665

    
666
    assert len(lu.glm.list_owned(level)) == (len(retain) + len(release))
667

    
668
    # Release just some locks
669
    lu.glm.release(level, names=release)
670

    
671
    assert frozenset(lu.glm.list_owned(level)) == frozenset(retain)
672
  else:
673
    # Release everything
674
    lu.glm.release(level)
675

    
676
    assert not lu.glm.is_owned(level), "No locks should be owned"
677

    
678

    
679
def _RunPostHook(lu, node_name):
680
  """Runs the post-hook for an opcode on a single node.
681

682
  """
683
  hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
684
  try:
685
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
686
  except:
687
    # pylint: disable-msg=W0702
688
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
689

    
690

    
691
def _CheckOutputFields(static, dynamic, selected):
692
  """Checks whether all selected fields are valid.
693

694
  @type static: L{utils.FieldSet}
695
  @param static: static fields set
696
  @type dynamic: L{utils.FieldSet}
697
  @param dynamic: dynamic fields set
698

699
  """
700
  f = utils.FieldSet()
701
  f.Extend(static)
702
  f.Extend(dynamic)
703

    
704
  delta = f.NonMatching(selected)
705
  if delta:
706
    raise errors.OpPrereqError("Unknown output fields selected: %s"
707
                               % ",".join(delta), errors.ECODE_INVAL)
708

    
709

    
710
def _CheckGlobalHvParams(params):
711
  """Validates that given hypervisor params are not global ones.
712

713
  This will ensure that instances don't get customised versions of
714
  global params.
715

716
  """
717
  used_globals = constants.HVC_GLOBALS.intersection(params)
718
  if used_globals:
719
    msg = ("The following hypervisor parameters are global and cannot"
720
           " be customized at instance level, please modify them at"
721
           " cluster level: %s" % utils.CommaJoin(used_globals))
722
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
723

    
724

    
725
def _CheckNodeOnline(lu, node, msg=None):
726
  """Ensure that a given node is online.
727

728
  @param lu: the LU on behalf of which we make the check
729
  @param node: the node to check
730
  @param msg: if passed, should be a message to replace the default one
731
  @raise errors.OpPrereqError: if the node is offline
732

733
  """
734
  if msg is None:
735
    msg = "Can't use offline node"
736
  if lu.cfg.GetNodeInfo(node).offline:
737
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
738

    
739

    
740
def _CheckNodeNotDrained(lu, node):
741
  """Ensure that a given node is not drained.
742

743
  @param lu: the LU on behalf of which we make the check
744
  @param node: the node to check
745
  @raise errors.OpPrereqError: if the node is drained
746

747
  """
748
  if lu.cfg.GetNodeInfo(node).drained:
749
    raise errors.OpPrereqError("Can't use drained node %s" % node,
750
                               errors.ECODE_STATE)
751

    
752

    
753
def _CheckNodeVmCapable(lu, node):
754
  """Ensure that a given node is vm capable.
755

756
  @param lu: the LU on behalf of which we make the check
757
  @param node: the node to check
758
  @raise errors.OpPrereqError: if the node is not vm capable
759

760
  """
761
  if not lu.cfg.GetNodeInfo(node).vm_capable:
762
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
763
                               errors.ECODE_STATE)
764

    
765

    
766
def _CheckNodeHasOS(lu, node, os_name, force_variant):
767
  """Ensure that a node supports a given OS.
768

769
  @param lu: the LU on behalf of which we make the check
770
  @param node: the node to check
771
  @param os_name: the OS to query about
772
  @param force_variant: whether to ignore variant errors
773
  @raise errors.OpPrereqError: if the node is not supporting the OS
774

775
  """
776
  result = lu.rpc.call_os_get(node, os_name)
777
  result.Raise("OS '%s' not in supported OS list for node %s" %
778
               (os_name, node),
779
               prereq=True, ecode=errors.ECODE_INVAL)
780
  if not force_variant:
781
    _CheckOSVariant(result.payload, os_name)
782

    
783

    
784
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
785
  """Ensure that a node has the given secondary ip.
786

787
  @type lu: L{LogicalUnit}
788
  @param lu: the LU on behalf of which we make the check
789
  @type node: string
790
  @param node: the node to check
791
  @type secondary_ip: string
792
  @param secondary_ip: the ip to check
793
  @type prereq: boolean
794
  @param prereq: whether to throw a prerequisite or an execute error
795
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
796
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
797

798
  """
799
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
800
  result.Raise("Failure checking secondary ip on node %s" % node,
801
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
802
  if not result.payload:
803
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
804
           " please fix and re-run this command" % secondary_ip)
805
    if prereq:
806
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
807
    else:
808
      raise errors.OpExecError(msg)
809

    
810

    
811
def _GetClusterDomainSecret():
812
  """Reads the cluster domain secret.
813

814
  """
815
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
816
                               strict=True)
817

    
818

    
819
def _CheckInstanceDown(lu, instance, reason):
820
  """Ensure that an instance is not running."""
821
  if instance.admin_up:
822
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
823
                               (instance.name, reason), errors.ECODE_STATE)
824

    
825
  pnode = instance.primary_node
826
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
827
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
828
              prereq=True, ecode=errors.ECODE_ENVIRON)
829

    
830
  if instance.name in ins_l.payload:
831
    raise errors.OpPrereqError("Instance %s is running, %s" %
832
                               (instance.name, reason), errors.ECODE_STATE)
833

    
834

    
835
def _ExpandItemName(fn, name, kind):
836
  """Expand an item name.
837

838
  @param fn: the function to use for expansion
839
  @param name: requested item name
840
  @param kind: text description ('Node' or 'Instance')
841
  @return: the resolved (full) name
842
  @raise errors.OpPrereqError: if the item is not found
843

844
  """
845
  full_name = fn(name)
846
  if full_name is None:
847
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
848
                               errors.ECODE_NOENT)
849
  return full_name
850

    
851

    
852
def _ExpandNodeName(cfg, name):
853
  """Wrapper over L{_ExpandItemName} for nodes."""
854
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
855

    
856

    
857
def _ExpandInstanceName(cfg, name):
858
  """Wrapper over L{_ExpandItemName} for instance."""
859
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
860

    
861

    
862
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
863
                          memory, vcpus, nics, disk_template, disks,
864
                          bep, hvp, hypervisor_name):
865
  """Builds instance related env variables for hooks
866

867
  This builds the hook environment from individual variables.
868

869
  @type name: string
870
  @param name: the name of the instance
871
  @type primary_node: string
872
  @param primary_node: the name of the instance's primary node
873
  @type secondary_nodes: list
874
  @param secondary_nodes: list of secondary nodes as strings
875
  @type os_type: string
876
  @param os_type: the name of the instance's OS
877
  @type status: boolean
878
  @param status: the should_run status of the instance
879
  @type memory: string
880
  @param memory: the memory size of the instance
881
  @type vcpus: string
882
  @param vcpus: the count of VCPUs the instance has
883
  @type nics: list
884
  @param nics: list of tuples (ip, mac, mode, link) representing
885
      the NICs the instance has
886
  @type disk_template: string
887
  @param disk_template: the disk template of the instance
888
  @type disks: list
889
  @param disks: the list of (size, mode) pairs
890
  @type bep: dict
891
  @param bep: the backend parameters for the instance
892
  @type hvp: dict
893
  @param hvp: the hypervisor parameters for the instance
894
  @type hypervisor_name: string
895
  @param hypervisor_name: the hypervisor for the instance
896
  @rtype: dict
897
  @return: the hook environment for this instance
898

899
  """
900
  if status:
901
    str_status = "up"
902
  else:
903
    str_status = "down"
904
  env = {
905
    "OP_TARGET": name,
906
    "INSTANCE_NAME": name,
907
    "INSTANCE_PRIMARY": primary_node,
908
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
909
    "INSTANCE_OS_TYPE": os_type,
910
    "INSTANCE_STATUS": str_status,
911
    "INSTANCE_MEMORY": memory,
912
    "INSTANCE_VCPUS": vcpus,
913
    "INSTANCE_DISK_TEMPLATE": disk_template,
914
    "INSTANCE_HYPERVISOR": hypervisor_name,
915
  }
916

    
917
  if nics:
918
    nic_count = len(nics)
919
    for idx, (ip, mac, mode, link) in enumerate(nics):
920
      if ip is None:
921
        ip = ""
922
      env["INSTANCE_NIC%d_IP" % idx] = ip
923
      env["INSTANCE_NIC%d_MAC" % idx] = mac
924
      env["INSTANCE_NIC%d_MODE" % idx] = mode
925
      env["INSTANCE_NIC%d_LINK" % idx] = link
926
      if mode == constants.NIC_MODE_BRIDGED:
927
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
928
  else:
929
    nic_count = 0
930

    
931
  env["INSTANCE_NIC_COUNT"] = nic_count
932

    
933
  if disks:
934
    disk_count = len(disks)
935
    for idx, (size, mode) in enumerate(disks):
936
      env["INSTANCE_DISK%d_SIZE" % idx] = size
937
      env["INSTANCE_DISK%d_MODE" % idx] = mode
938
  else:
939
    disk_count = 0
940

    
941
  env["INSTANCE_DISK_COUNT"] = disk_count
942

    
943
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
944
    for key, value in source.items():
945
      env["INSTANCE_%s_%s" % (kind, key)] = value
946

    
947
  return env
948

    
949

    
950
def _NICListToTuple(lu, nics):
951
  """Build a list of nic information tuples.
952

953
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
954
  value in LUInstanceQueryData.
955

956
  @type lu:  L{LogicalUnit}
957
  @param lu: the logical unit on whose behalf we execute
958
  @type nics: list of L{objects.NIC}
959
  @param nics: list of nics to convert to hooks tuples
960

961
  """
962
  hooks_nics = []
963
  cluster = lu.cfg.GetClusterInfo()
964
  for nic in nics:
965
    ip = nic.ip
966
    mac = nic.mac
967
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
968
    mode = filled_params[constants.NIC_MODE]
969
    link = filled_params[constants.NIC_LINK]
970
    hooks_nics.append((ip, mac, mode, link))
971
  return hooks_nics
972

    
973

    
974
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
975
  """Builds instance related env variables for hooks from an object.
976

977
  @type lu: L{LogicalUnit}
978
  @param lu: the logical unit on whose behalf we execute
979
  @type instance: L{objects.Instance}
980
  @param instance: the instance for which we should build the
981
      environment
982
  @type override: dict
983
  @param override: dictionary with key/values that will override
984
      our values
985
  @rtype: dict
986
  @return: the hook environment dictionary
987

988
  """
989
  cluster = lu.cfg.GetClusterInfo()
990
  bep = cluster.FillBE(instance)
991
  hvp = cluster.FillHV(instance)
992
  args = {
993
    'name': instance.name,
994
    'primary_node': instance.primary_node,
995
    'secondary_nodes': instance.secondary_nodes,
996
    'os_type': instance.os,
997
    'status': instance.admin_up,
998
    'memory': bep[constants.BE_MEMORY],
999
    'vcpus': bep[constants.BE_VCPUS],
1000
    'nics': _NICListToTuple(lu, instance.nics),
1001
    'disk_template': instance.disk_template,
1002
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
1003
    'bep': bep,
1004
    'hvp': hvp,
1005
    'hypervisor_name': instance.hypervisor,
1006
  }
1007
  if override:
1008
    args.update(override)
1009
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1010

    
1011

    
1012
def _AdjustCandidatePool(lu, exceptions):
1013
  """Adjust the candidate pool after node operations.
1014

1015
  """
1016
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1017
  if mod_list:
1018
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1019
               utils.CommaJoin(node.name for node in mod_list))
1020
    for name in mod_list:
1021
      lu.context.ReaddNode(name)
1022
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1023
  if mc_now > mc_max:
1024
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1025
               (mc_now, mc_max))
1026

    
1027

    
1028
def _DecideSelfPromotion(lu, exceptions=None):
1029
  """Decide whether I should promote myself as a master candidate.
1030

1031
  """
1032
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1033
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1034
  # the new node will increase mc_max with one, so:
1035
  mc_should = min(mc_should + 1, cp_size)
1036
  return mc_now < mc_should
1037

    
1038

    
1039
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1040
  """Check that the brigdes needed by a list of nics exist.
1041

1042
  """
1043
  cluster = lu.cfg.GetClusterInfo()
1044
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1045
  brlist = [params[constants.NIC_LINK] for params in paramslist
1046
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1047
  if brlist:
1048
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1049
    result.Raise("Error checking bridges on destination node '%s'" %
1050
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1051

    
1052

    
1053
def _CheckInstanceBridgesExist(lu, instance, node=None):
1054
  """Check that the brigdes needed by an instance exist.
1055

1056
  """
1057
  if node is None:
1058
    node = instance.primary_node
1059
  _CheckNicsBridgesExist(lu, instance.nics, node)
1060

    
1061

    
1062
def _CheckOSVariant(os_obj, name):
1063
  """Check whether an OS name conforms to the os variants specification.
1064

1065
  @type os_obj: L{objects.OS}
1066
  @param os_obj: OS object to check
1067
  @type name: string
1068
  @param name: OS name passed by the user, to check for validity
1069

1070
  """
1071
  if not os_obj.supported_variants:
1072
    return
1073
  variant = objects.OS.GetVariant(name)
1074
  if not variant:
1075
    raise errors.OpPrereqError("OS name must include a variant",
1076
                               errors.ECODE_INVAL)
1077

    
1078
  if variant not in os_obj.supported_variants:
1079
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1080

    
1081

    
1082
def _GetNodeInstancesInner(cfg, fn):
1083
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1084

    
1085

    
1086
def _GetNodeInstances(cfg, node_name):
1087
  """Returns a list of all primary and secondary instances on a node.
1088

1089
  """
1090

    
1091
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1092

    
1093

    
1094
def _GetNodePrimaryInstances(cfg, node_name):
1095
  """Returns primary instances on a node.
1096

1097
  """
1098
  return _GetNodeInstancesInner(cfg,
1099
                                lambda inst: node_name == inst.primary_node)
1100

    
1101

    
1102
def _GetNodeSecondaryInstances(cfg, node_name):
1103
  """Returns secondary instances on a node.
1104

1105
  """
1106
  return _GetNodeInstancesInner(cfg,
1107
                                lambda inst: node_name in inst.secondary_nodes)
1108

    
1109

    
1110
def _GetStorageTypeArgs(cfg, storage_type):
1111
  """Returns the arguments for a storage type.
1112

1113
  """
1114
  # Special case for file storage
1115
  if storage_type == constants.ST_FILE:
1116
    # storage.FileStorage wants a list of storage directories
1117
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1118

    
1119
  return []
1120

    
1121

    
1122
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1123
  faulty = []
1124

    
1125
  for dev in instance.disks:
1126
    cfg.SetDiskID(dev, node_name)
1127

    
1128
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1129
  result.Raise("Failed to get disk status from node %s" % node_name,
1130
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1131

    
1132
  for idx, bdev_status in enumerate(result.payload):
1133
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1134
      faulty.append(idx)
1135

    
1136
  return faulty
1137

    
1138

    
1139
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1140
  """Check the sanity of iallocator and node arguments and use the
1141
  cluster-wide iallocator if appropriate.
1142

1143
  Check that at most one of (iallocator, node) is specified. If none is
1144
  specified, then the LU's opcode's iallocator slot is filled with the
1145
  cluster-wide default iallocator.
1146

1147
  @type iallocator_slot: string
1148
  @param iallocator_slot: the name of the opcode iallocator slot
1149
  @type node_slot: string
1150
  @param node_slot: the name of the opcode target node slot
1151

1152
  """
1153
  node = getattr(lu.op, node_slot, None)
1154
  iallocator = getattr(lu.op, iallocator_slot, None)
1155

    
1156
  if node is not None and iallocator is not None:
1157
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1158
                               errors.ECODE_INVAL)
1159
  elif node is None and iallocator is None:
1160
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1161
    if default_iallocator:
1162
      setattr(lu.op, iallocator_slot, default_iallocator)
1163
    else:
1164
      raise errors.OpPrereqError("No iallocator or node given and no"
1165
                                 " cluster-wide default iallocator found;"
1166
                                 " please specify either an iallocator or a"
1167
                                 " node, or set a cluster-wide default"
1168
                                 " iallocator")
1169

    
1170

    
1171
class LUClusterPostInit(LogicalUnit):
1172
  """Logical unit for running hooks after cluster initialization.
1173

1174
  """
1175
  HPATH = "cluster-init"
1176
  HTYPE = constants.HTYPE_CLUSTER
1177

    
1178
  def BuildHooksEnv(self):
1179
    """Build hooks env.
1180

1181
    """
1182
    return {
1183
      "OP_TARGET": self.cfg.GetClusterName(),
1184
      }
1185

    
1186
  def BuildHooksNodes(self):
1187
    """Build hooks nodes.
1188

1189
    """
1190
    return ([], [self.cfg.GetMasterNode()])
1191

    
1192
  def Exec(self, feedback_fn):
1193
    """Nothing to do.
1194

1195
    """
1196
    return True
1197

    
1198

    
1199
class LUClusterDestroy(LogicalUnit):
1200
  """Logical unit for destroying the cluster.
1201

1202
  """
1203
  HPATH = "cluster-destroy"
1204
  HTYPE = constants.HTYPE_CLUSTER
1205

    
1206
  def BuildHooksEnv(self):
1207
    """Build hooks env.
1208

1209
    """
1210
    return {
1211
      "OP_TARGET": self.cfg.GetClusterName(),
1212
      }
1213

    
1214
  def BuildHooksNodes(self):
1215
    """Build hooks nodes.
1216

1217
    """
1218
    return ([], [])
1219

    
1220
  def CheckPrereq(self):
1221
    """Check prerequisites.
1222

1223
    This checks whether the cluster is empty.
1224

1225
    Any errors are signaled by raising errors.OpPrereqError.
1226

1227
    """
1228
    master = self.cfg.GetMasterNode()
1229

    
1230
    nodelist = self.cfg.GetNodeList()
1231
    if len(nodelist) != 1 or nodelist[0] != master:
1232
      raise errors.OpPrereqError("There are still %d node(s) in"
1233
                                 " this cluster." % (len(nodelist) - 1),
1234
                                 errors.ECODE_INVAL)
1235
    instancelist = self.cfg.GetInstanceList()
1236
    if instancelist:
1237
      raise errors.OpPrereqError("There are still %d instance(s) in"
1238
                                 " this cluster." % len(instancelist),
1239
                                 errors.ECODE_INVAL)
1240

    
1241
  def Exec(self, feedback_fn):
1242
    """Destroys the cluster.
1243

1244
    """
1245
    master = self.cfg.GetMasterNode()
1246

    
1247
    # Run post hooks on master node before it's removed
1248
    _RunPostHook(self, master)
1249

    
1250
    result = self.rpc.call_node_stop_master(master, False)
1251
    result.Raise("Could not disable the master role")
1252

    
1253
    return master
1254

    
1255

    
1256
def _VerifyCertificate(filename):
1257
  """Verifies a certificate for LUClusterVerify.
1258

1259
  @type filename: string
1260
  @param filename: Path to PEM file
1261

1262
  """
1263
  try:
1264
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1265
                                           utils.ReadFile(filename))
1266
  except Exception, err: # pylint: disable-msg=W0703
1267
    return (LUClusterVerify.ETYPE_ERROR,
1268
            "Failed to load X509 certificate %s: %s" % (filename, err))
1269

    
1270
  (errcode, msg) = \
1271
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1272
                                constants.SSL_CERT_EXPIRATION_ERROR)
1273

    
1274
  if msg:
1275
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1276
  else:
1277
    fnamemsg = None
1278

    
1279
  if errcode is None:
1280
    return (None, fnamemsg)
1281
  elif errcode == utils.CERT_WARNING:
1282
    return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1283
  elif errcode == utils.CERT_ERROR:
1284
    return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1285

    
1286
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1287

    
1288

    
1289
class _VerifyErrors(object):
1290
  """Mix-in for cluster/group verify LUs.
1291

1292
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1293
  self.op and self._feedback_fn to be available.)
1294

1295
  """
1296
  TCLUSTER = "cluster"
1297
  TNODE = "node"
1298
  TINSTANCE = "instance"
1299

    
1300
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1301
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1302
  ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1303
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1304
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1305
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1306
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1307
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1308
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1309
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1310
  ENODEDRBD = (TNODE, "ENODEDRBD")
1311
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1312
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1313
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1314
  ENODEHV = (TNODE, "ENODEHV")
1315
  ENODELVM = (TNODE, "ENODELVM")
1316
  ENODEN1 = (TNODE, "ENODEN1")
1317
  ENODENET = (TNODE, "ENODENET")
1318
  ENODEOS = (TNODE, "ENODEOS")
1319
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1320
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1321
  ENODERPC = (TNODE, "ENODERPC")
1322
  ENODESSH = (TNODE, "ENODESSH")
1323
  ENODEVERSION = (TNODE, "ENODEVERSION")
1324
  ENODESETUP = (TNODE, "ENODESETUP")
1325
  ENODETIME = (TNODE, "ENODETIME")
1326
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1327

    
1328
  ETYPE_FIELD = "code"
1329
  ETYPE_ERROR = "ERROR"
1330
  ETYPE_WARNING = "WARNING"
1331

    
1332
  def _Error(self, ecode, item, msg, *args, **kwargs):
1333
    """Format an error message.
1334

1335
    Based on the opcode's error_codes parameter, either format a
1336
    parseable error code, or a simpler error string.
1337

1338
    This must be called only from Exec and functions called from Exec.
1339

1340
    """
1341
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1342
    itype, etxt = ecode
1343
    # first complete the msg
1344
    if args:
1345
      msg = msg % args
1346
    # then format the whole message
1347
    if self.op.error_codes: # This is a mix-in. pylint: disable-msg=E1101
1348
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1349
    else:
1350
      if item:
1351
        item = " " + item
1352
      else:
1353
        item = ""
1354
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1355
    # and finally report it via the feedback_fn
1356
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable-msg=E1101
1357

    
1358
  def _ErrorIf(self, cond, *args, **kwargs):
1359
    """Log an error message if the passed condition is True.
1360

1361
    """
1362
    cond = (bool(cond)
1363
            or self.op.debug_simulate_errors) # pylint: disable-msg=E1101
1364
    if cond:
1365
      self._Error(*args, **kwargs)
1366
    # do not mark the operation as failed for WARN cases only
1367
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1368
      self.bad = self.bad or cond
1369

    
1370

    
1371
class LUClusterVerify(LogicalUnit, _VerifyErrors):
1372
  """Verifies the cluster status.
1373

1374
  """
1375
  HPATH = "cluster-verify"
1376
  HTYPE = constants.HTYPE_CLUSTER
1377
  REQ_BGL = False
1378

    
1379
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1380

    
1381
  class NodeImage(object):
1382
    """A class representing the logical and physical status of a node.
1383

1384
    @type name: string
1385
    @ivar name: the node name to which this object refers
1386
    @ivar volumes: a structure as returned from
1387
        L{ganeti.backend.GetVolumeList} (runtime)
1388
    @ivar instances: a list of running instances (runtime)
1389
    @ivar pinst: list of configured primary instances (config)
1390
    @ivar sinst: list of configured secondary instances (config)
1391
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1392
        instances for which this node is secondary (config)
1393
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1394
    @ivar dfree: free disk, as reported by the node (runtime)
1395
    @ivar offline: the offline status (config)
1396
    @type rpc_fail: boolean
1397
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1398
        not whether the individual keys were correct) (runtime)
1399
    @type lvm_fail: boolean
1400
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1401
    @type hyp_fail: boolean
1402
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1403
    @type ghost: boolean
1404
    @ivar ghost: whether this is a known node or not (config)
1405
    @type os_fail: boolean
1406
    @ivar os_fail: whether the RPC call didn't return valid OS data
1407
    @type oslist: list
1408
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1409
    @type vm_capable: boolean
1410
    @ivar vm_capable: whether the node can host instances
1411

1412
    """
1413
    def __init__(self, offline=False, name=None, vm_capable=True):
1414
      self.name = name
1415
      self.volumes = {}
1416
      self.instances = []
1417
      self.pinst = []
1418
      self.sinst = []
1419
      self.sbp = {}
1420
      self.mfree = 0
1421
      self.dfree = 0
1422
      self.offline = offline
1423
      self.vm_capable = vm_capable
1424
      self.rpc_fail = False
1425
      self.lvm_fail = False
1426
      self.hyp_fail = False
1427
      self.ghost = False
1428
      self.os_fail = False
1429
      self.oslist = {}
1430

    
1431
  def ExpandNames(self):
1432
    self.needed_locks = {
1433
      locking.LEVEL_NODE: locking.ALL_SET,
1434
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1435
    }
1436
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1437

    
1438
  def CheckPrereq(self):
1439
    self.all_node_info = self.cfg.GetAllNodesInfo()
1440
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1441
    self.my_node_names = utils.NiceSort(list(self.all_node_info))
1442
    self.my_node_info = self.all_node_info
1443
    self.my_inst_names = utils.NiceSort(list(self.all_inst_info))
1444
    self.my_inst_info = self.all_inst_info
1445

    
1446
  def _VerifyNode(self, ninfo, nresult):
1447
    """Perform some basic validation on data returned from a node.
1448

1449
      - check the result data structure is well formed and has all the
1450
        mandatory fields
1451
      - check ganeti version
1452

1453
    @type ninfo: L{objects.Node}
1454
    @param ninfo: the node to check
1455
    @param nresult: the results from the node
1456
    @rtype: boolean
1457
    @return: whether overall this call was successful (and we can expect
1458
         reasonable values in the respose)
1459

1460
    """
1461
    node = ninfo.name
1462
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1463

    
1464
    # main result, nresult should be a non-empty dict
1465
    test = not nresult or not isinstance(nresult, dict)
1466
    _ErrorIf(test, self.ENODERPC, node,
1467
                  "unable to verify node: no data returned")
1468
    if test:
1469
      return False
1470

    
1471
    # compares ganeti version
1472
    local_version = constants.PROTOCOL_VERSION
1473
    remote_version = nresult.get("version", None)
1474
    test = not (remote_version and
1475
                isinstance(remote_version, (list, tuple)) and
1476
                len(remote_version) == 2)
1477
    _ErrorIf(test, self.ENODERPC, node,
1478
             "connection to node returned invalid data")
1479
    if test:
1480
      return False
1481

    
1482
    test = local_version != remote_version[0]
1483
    _ErrorIf(test, self.ENODEVERSION, node,
1484
             "incompatible protocol versions: master %s,"
1485
             " node %s", local_version, remote_version[0])
1486
    if test:
1487
      return False
1488

    
1489
    # node seems compatible, we can actually try to look into its results
1490

    
1491
    # full package version
1492
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1493
                  self.ENODEVERSION, node,
1494
                  "software version mismatch: master %s, node %s",
1495
                  constants.RELEASE_VERSION, remote_version[1],
1496
                  code=self.ETYPE_WARNING)
1497

    
1498
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1499
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1500
      for hv_name, hv_result in hyp_result.iteritems():
1501
        test = hv_result is not None
1502
        _ErrorIf(test, self.ENODEHV, node,
1503
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1504

    
1505
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1506
    if ninfo.vm_capable and isinstance(hvp_result, list):
1507
      for item, hv_name, hv_result in hvp_result:
1508
        _ErrorIf(True, self.ENODEHV, node,
1509
                 "hypervisor %s parameter verify failure (source %s): %s",
1510
                 hv_name, item, hv_result)
1511

    
1512
    test = nresult.get(constants.NV_NODESETUP,
1513
                       ["Missing NODESETUP results"])
1514
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1515
             "; ".join(test))
1516

    
1517
    return True
1518

    
1519
  def _VerifyNodeTime(self, ninfo, nresult,
1520
                      nvinfo_starttime, nvinfo_endtime):
1521
    """Check the node time.
1522

1523
    @type ninfo: L{objects.Node}
1524
    @param ninfo: the node to check
1525
    @param nresult: the remote results for the node
1526
    @param nvinfo_starttime: the start time of the RPC call
1527
    @param nvinfo_endtime: the end time of the RPC call
1528

1529
    """
1530
    node = ninfo.name
1531
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1532

    
1533
    ntime = nresult.get(constants.NV_TIME, None)
1534
    try:
1535
      ntime_merged = utils.MergeTime(ntime)
1536
    except (ValueError, TypeError):
1537
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1538
      return
1539

    
1540
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1541
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1542
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1543
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1544
    else:
1545
      ntime_diff = None
1546

    
1547
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1548
             "Node time diverges by at least %s from master node time",
1549
             ntime_diff)
1550

    
1551
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1552
    """Check the node LVM results.
1553

1554
    @type ninfo: L{objects.Node}
1555
    @param ninfo: the node to check
1556
    @param nresult: the remote results for the node
1557
    @param vg_name: the configured VG name
1558

1559
    """
1560
    if vg_name is None:
1561
      return
1562

    
1563
    node = ninfo.name
1564
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1565

    
1566
    # checks vg existence and size > 20G
1567
    vglist = nresult.get(constants.NV_VGLIST, None)
1568
    test = not vglist
1569
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1570
    if not test:
1571
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1572
                                            constants.MIN_VG_SIZE)
1573
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1574

    
1575
    # check pv names
1576
    pvlist = nresult.get(constants.NV_PVLIST, None)
1577
    test = pvlist is None
1578
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1579
    if not test:
1580
      # check that ':' is not present in PV names, since it's a
1581
      # special character for lvcreate (denotes the range of PEs to
1582
      # use on the PV)
1583
      for _, pvname, owner_vg in pvlist:
1584
        test = ":" in pvname
1585
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1586
                 " '%s' of VG '%s'", pvname, owner_vg)
1587

    
1588
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1589
    """Check the node bridges.
1590

1591
    @type ninfo: L{objects.Node}
1592
    @param ninfo: the node to check
1593
    @param nresult: the remote results for the node
1594
    @param bridges: the expected list of bridges
1595

1596
    """
1597
    if not bridges:
1598
      return
1599

    
1600
    node = ninfo.name
1601
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1602

    
1603
    missing = nresult.get(constants.NV_BRIDGES, None)
1604
    test = not isinstance(missing, list)
1605
    _ErrorIf(test, self.ENODENET, node,
1606
             "did not return valid bridge information")
1607
    if not test:
1608
      _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1609
               utils.CommaJoin(sorted(missing)))
1610

    
1611
  def _VerifyNodeNetwork(self, ninfo, nresult):
1612
    """Check the node network connectivity results.
1613

1614
    @type ninfo: L{objects.Node}
1615
    @param ninfo: the node to check
1616
    @param nresult: the remote results for the node
1617

1618
    """
1619
    node = ninfo.name
1620
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1621

    
1622
    test = constants.NV_NODELIST not in nresult
1623
    _ErrorIf(test, self.ENODESSH, node,
1624
             "node hasn't returned node ssh connectivity data")
1625
    if not test:
1626
      if nresult[constants.NV_NODELIST]:
1627
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1628
          _ErrorIf(True, self.ENODESSH, node,
1629
                   "ssh communication with node '%s': %s", a_node, a_msg)
1630

    
1631
    test = constants.NV_NODENETTEST not in nresult
1632
    _ErrorIf(test, self.ENODENET, node,
1633
             "node hasn't returned node tcp connectivity data")
1634
    if not test:
1635
      if nresult[constants.NV_NODENETTEST]:
1636
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1637
        for anode in nlist:
1638
          _ErrorIf(True, self.ENODENET, node,
1639
                   "tcp communication with node '%s': %s",
1640
                   anode, nresult[constants.NV_NODENETTEST][anode])
1641

    
1642
    test = constants.NV_MASTERIP not in nresult
1643
    _ErrorIf(test, self.ENODENET, node,
1644
             "node hasn't returned node master IP reachability data")
1645
    if not test:
1646
      if not nresult[constants.NV_MASTERIP]:
1647
        if node == self.master_node:
1648
          msg = "the master node cannot reach the master IP (not configured?)"
1649
        else:
1650
          msg = "cannot reach the master IP"
1651
        _ErrorIf(True, self.ENODENET, node, msg)
1652

    
1653
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1654
                      diskstatus):
1655
    """Verify an instance.
1656

1657
    This function checks to see if the required block devices are
1658
    available on the instance's node.
1659

1660
    """
1661
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1662
    node_current = instanceconfig.primary_node
1663

    
1664
    node_vol_should = {}
1665
    instanceconfig.MapLVsByNode(node_vol_should)
1666

    
1667
    for node in node_vol_should:
1668
      n_img = node_image[node]
1669
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1670
        # ignore missing volumes on offline or broken nodes
1671
        continue
1672
      for volume in node_vol_should[node]:
1673
        test = volume not in n_img.volumes
1674
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1675
                 "volume %s missing on node %s", volume, node)
1676

    
1677
    if instanceconfig.admin_up:
1678
      pri_img = node_image[node_current]
1679
      test = instance not in pri_img.instances and not pri_img.offline
1680
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1681
               "instance not running on its primary node %s",
1682
               node_current)
1683

    
1684
    diskdata = [(nname, success, status, idx)
1685
                for (nname, disks) in diskstatus.items()
1686
                for idx, (success, status) in enumerate(disks)]
1687

    
1688
    for nname, success, bdev_status, idx in diskdata:
1689
      # the 'ghost node' construction in Exec() ensures that we have a
1690
      # node here
1691
      snode = node_image[nname]
1692
      bad_snode = snode.ghost or snode.offline
1693
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1694
               self.EINSTANCEFAULTYDISK, instance,
1695
               "couldn't retrieve status for disk/%s on %s: %s",
1696
               idx, nname, bdev_status)
1697
      _ErrorIf((instanceconfig.admin_up and success and
1698
                bdev_status.ldisk_status == constants.LDS_FAULTY),
1699
               self.EINSTANCEFAULTYDISK, instance,
1700
               "disk/%s on %s is faulty", idx, nname)
1701

    
1702
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1703
    """Verify if there are any unknown volumes in the cluster.
1704

1705
    The .os, .swap and backup volumes are ignored. All other volumes are
1706
    reported as unknown.
1707

1708
    @type reserved: L{ganeti.utils.FieldSet}
1709
    @param reserved: a FieldSet of reserved volume names
1710

1711
    """
1712
    for node, n_img in node_image.items():
1713
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1714
        # skip non-healthy nodes
1715
        continue
1716
      for volume in n_img.volumes:
1717
        test = ((node not in node_vol_should or
1718
                volume not in node_vol_should[node]) and
1719
                not reserved.Matches(volume))
1720
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1721
                      "volume %s is unknown", volume)
1722

    
1723
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1724
    """Verify N+1 Memory Resilience.
1725

1726
    Check that if one single node dies we can still start all the
1727
    instances it was primary for.
1728

1729
    """
1730
    cluster_info = self.cfg.GetClusterInfo()
1731
    for node, n_img in node_image.items():
1732
      # This code checks that every node which is now listed as
1733
      # secondary has enough memory to host all instances it is
1734
      # supposed to should a single other node in the cluster fail.
1735
      # FIXME: not ready for failover to an arbitrary node
1736
      # FIXME: does not support file-backed instances
1737
      # WARNING: we currently take into account down instances as well
1738
      # as up ones, considering that even if they're down someone
1739
      # might want to start them even in the event of a node failure.
1740
      if n_img.offline:
1741
        # we're skipping offline nodes from the N+1 warning, since
1742
        # most likely we don't have good memory infromation from them;
1743
        # we already list instances living on such nodes, and that's
1744
        # enough warning
1745
        continue
1746
      for prinode, instances in n_img.sbp.items():
1747
        needed_mem = 0
1748
        for instance in instances:
1749
          bep = cluster_info.FillBE(instance_cfg[instance])
1750
          if bep[constants.BE_AUTO_BALANCE]:
1751
            needed_mem += bep[constants.BE_MEMORY]
1752
        test = n_img.mfree < needed_mem
1753
        self._ErrorIf(test, self.ENODEN1, node,
1754
                      "not enough memory to accomodate instance failovers"
1755
                      " should node %s fail (%dMiB needed, %dMiB available)",
1756
                      prinode, needed_mem, n_img.mfree)
1757

    
1758
  @classmethod
1759
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1760
                   (files_all, files_all_opt, files_mc, files_vm)):
1761
    """Verifies file checksums collected from all nodes.
1762

1763
    @param errorif: Callback for reporting errors
1764
    @param nodeinfo: List of L{objects.Node} objects
1765
    @param master_node: Name of master node
1766
    @param all_nvinfo: RPC results
1767

1768
    """
1769
    node_names = frozenset(node.name for node in nodeinfo)
1770

    
1771
    assert master_node in node_names
1772
    assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1773
            sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1774
           "Found file listed in more than one file list"
1775

    
1776
    # Define functions determining which nodes to consider for a file
1777
    file2nodefn = dict([(filename, fn)
1778
      for (files, fn) in [(files_all, None),
1779
                          (files_all_opt, None),
1780
                          (files_mc, lambda node: (node.master_candidate or
1781
                                                   node.name == master_node)),
1782
                          (files_vm, lambda node: node.vm_capable)]
1783
      for filename in files])
1784

    
1785
    fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
1786

    
1787
    for node in nodeinfo:
1788
      nresult = all_nvinfo[node.name]
1789

    
1790
      if nresult.fail_msg or not nresult.payload:
1791
        node_files = None
1792
      else:
1793
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
1794

    
1795
      test = not (node_files and isinstance(node_files, dict))
1796
      errorif(test, cls.ENODEFILECHECK, node.name,
1797
              "Node did not return file checksum data")
1798
      if test:
1799
        continue
1800

    
1801
      for (filename, checksum) in node_files.items():
1802
        # Check if the file should be considered for a node
1803
        fn = file2nodefn[filename]
1804
        if fn is None or fn(node):
1805
          fileinfo[filename].setdefault(checksum, set()).add(node.name)
1806

    
1807
    for (filename, checksums) in fileinfo.items():
1808
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
1809

    
1810
      # Nodes having the file
1811
      with_file = frozenset(node_name
1812
                            for nodes in fileinfo[filename].values()
1813
                            for node_name in nodes)
1814

    
1815
      # Nodes missing file
1816
      missing_file = node_names - with_file
1817

    
1818
      if filename in files_all_opt:
1819
        # All or no nodes
1820
        errorif(missing_file and missing_file != node_names,
1821
                cls.ECLUSTERFILECHECK, None,
1822
                "File %s is optional, but it must exist on all or no nodes (not"
1823
                " found on %s)",
1824
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
1825
      else:
1826
        errorif(missing_file, cls.ECLUSTERFILECHECK, None,
1827
                "File %s is missing from node(s) %s", filename,
1828
                utils.CommaJoin(utils.NiceSort(missing_file)))
1829

    
1830
      # See if there are multiple versions of the file
1831
      test = len(checksums) > 1
1832
      if test:
1833
        variants = ["variant %s on %s" %
1834
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
1835
                    for (idx, (checksum, nodes)) in
1836
                      enumerate(sorted(checksums.items()))]
1837
      else:
1838
        variants = []
1839

    
1840
      errorif(test, cls.ECLUSTERFILECHECK, None,
1841
              "File %s found with %s different checksums (%s)",
1842
              filename, len(checksums), "; ".join(variants))
1843

    
1844
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1845
                      drbd_map):
1846
    """Verifies and the node DRBD status.
1847

1848
    @type ninfo: L{objects.Node}
1849
    @param ninfo: the node to check
1850
    @param nresult: the remote results for the node
1851
    @param instanceinfo: the dict of instances
1852
    @param drbd_helper: the configured DRBD usermode helper
1853
    @param drbd_map: the DRBD map as returned by
1854
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1855

1856
    """
1857
    node = ninfo.name
1858
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1859

    
1860
    if drbd_helper:
1861
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1862
      test = (helper_result == None)
1863
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
1864
               "no drbd usermode helper returned")
1865
      if helper_result:
1866
        status, payload = helper_result
1867
        test = not status
1868
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1869
                 "drbd usermode helper check unsuccessful: %s", payload)
1870
        test = status and (payload != drbd_helper)
1871
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1872
                 "wrong drbd usermode helper: %s", payload)
1873

    
1874
    # compute the DRBD minors
1875
    node_drbd = {}
1876
    for minor, instance in drbd_map[node].items():
1877
      test = instance not in instanceinfo
1878
      _ErrorIf(test, self.ECLUSTERCFG, None,
1879
               "ghost instance '%s' in temporary DRBD map", instance)
1880
        # ghost instance should not be running, but otherwise we
1881
        # don't give double warnings (both ghost instance and
1882
        # unallocated minor in use)
1883
      if test:
1884
        node_drbd[minor] = (instance, False)
1885
      else:
1886
        instance = instanceinfo[instance]
1887
        node_drbd[minor] = (instance.name, instance.admin_up)
1888

    
1889
    # and now check them
1890
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1891
    test = not isinstance(used_minors, (tuple, list))
1892
    _ErrorIf(test, self.ENODEDRBD, node,
1893
             "cannot parse drbd status file: %s", str(used_minors))
1894
    if test:
1895
      # we cannot check drbd status
1896
      return
1897

    
1898
    for minor, (iname, must_exist) in node_drbd.items():
1899
      test = minor not in used_minors and must_exist
1900
      _ErrorIf(test, self.ENODEDRBD, node,
1901
               "drbd minor %d of instance %s is not active", minor, iname)
1902
    for minor in used_minors:
1903
      test = minor not in node_drbd
1904
      _ErrorIf(test, self.ENODEDRBD, node,
1905
               "unallocated drbd minor %d is in use", minor)
1906

    
1907
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1908
    """Builds the node OS structures.
1909

1910
    @type ninfo: L{objects.Node}
1911
    @param ninfo: the node to check
1912
    @param nresult: the remote results for the node
1913
    @param nimg: the node image object
1914

1915
    """
1916
    node = ninfo.name
1917
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1918

    
1919
    remote_os = nresult.get(constants.NV_OSLIST, None)
1920
    test = (not isinstance(remote_os, list) or
1921
            not compat.all(isinstance(v, list) and len(v) == 7
1922
                           for v in remote_os))
1923

    
1924
    _ErrorIf(test, self.ENODEOS, node,
1925
             "node hasn't returned valid OS data")
1926

    
1927
    nimg.os_fail = test
1928

    
1929
    if test:
1930
      return
1931

    
1932
    os_dict = {}
1933

    
1934
    for (name, os_path, status, diagnose,
1935
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1936

    
1937
      if name not in os_dict:
1938
        os_dict[name] = []
1939

    
1940
      # parameters is a list of lists instead of list of tuples due to
1941
      # JSON lacking a real tuple type, fix it:
1942
      parameters = [tuple(v) for v in parameters]
1943
      os_dict[name].append((os_path, status, diagnose,
1944
                            set(variants), set(parameters), set(api_ver)))
1945

    
1946
    nimg.oslist = os_dict
1947

    
1948
  def _VerifyNodeOS(self, ninfo, nimg, base):
1949
    """Verifies the node OS list.
1950

1951
    @type ninfo: L{objects.Node}
1952
    @param ninfo: the node to check
1953
    @param nimg: the node image object
1954
    @param base: the 'template' node we match against (e.g. from the master)
1955

1956
    """
1957
    node = ninfo.name
1958
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1959

    
1960
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1961

    
1962
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
1963
    for os_name, os_data in nimg.oslist.items():
1964
      assert os_data, "Empty OS status for OS %s?!" % os_name
1965
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1966
      _ErrorIf(not f_status, self.ENODEOS, node,
1967
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1968
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1969
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1970
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1971
      # this will catched in backend too
1972
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1973
               and not f_var, self.ENODEOS, node,
1974
               "OS %s with API at least %d does not declare any variant",
1975
               os_name, constants.OS_API_V15)
1976
      # comparisons with the 'base' image
1977
      test = os_name not in base.oslist
1978
      _ErrorIf(test, self.ENODEOS, node,
1979
               "Extra OS %s not present on reference node (%s)",
1980
               os_name, base.name)
1981
      if test:
1982
        continue
1983
      assert base.oslist[os_name], "Base node has empty OS status?"
1984
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1985
      if not b_status:
1986
        # base OS is invalid, skipping
1987
        continue
1988
      for kind, a, b in [("API version", f_api, b_api),
1989
                         ("variants list", f_var, b_var),
1990
                         ("parameters", beautify_params(f_param),
1991
                          beautify_params(b_param))]:
1992
        _ErrorIf(a != b, self.ENODEOS, node,
1993
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
1994
                 kind, os_name, base.name,
1995
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
1996

    
1997
    # check any missing OSes
1998
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1999
    _ErrorIf(missing, self.ENODEOS, node,
2000
             "OSes present on reference node %s but missing on this node: %s",
2001
             base.name, utils.CommaJoin(missing))
2002

    
2003
  def _VerifyOob(self, ninfo, nresult):
2004
    """Verifies out of band functionality of a node.
2005

2006
    @type ninfo: L{objects.Node}
2007
    @param ninfo: the node to check
2008
    @param nresult: the remote results for the node
2009

2010
    """
2011
    node = ninfo.name
2012
    # We just have to verify the paths on master and/or master candidates
2013
    # as the oob helper is invoked on the master
2014
    if ((ninfo.master_candidate or ninfo.master_capable) and
2015
        constants.NV_OOB_PATHS in nresult):
2016
      for path_result in nresult[constants.NV_OOB_PATHS]:
2017
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2018

    
2019
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2020
    """Verifies and updates the node volume data.
2021

2022
    This function will update a L{NodeImage}'s internal structures
2023
    with data from the remote call.
2024

2025
    @type ninfo: L{objects.Node}
2026
    @param ninfo: the node to check
2027
    @param nresult: the remote results for the node
2028
    @param nimg: the node image object
2029
    @param vg_name: the configured VG name
2030

2031
    """
2032
    node = ninfo.name
2033
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2034

    
2035
    nimg.lvm_fail = True
2036
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2037
    if vg_name is None:
2038
      pass
2039
    elif isinstance(lvdata, basestring):
2040
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2041
               utils.SafeEncode(lvdata))
2042
    elif not isinstance(lvdata, dict):
2043
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2044
    else:
2045
      nimg.volumes = lvdata
2046
      nimg.lvm_fail = False
2047

    
2048
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2049
    """Verifies and updates the node instance list.
2050

2051
    If the listing was successful, then updates this node's instance
2052
    list. Otherwise, it marks the RPC call as failed for the instance
2053
    list key.
2054

2055
    @type ninfo: L{objects.Node}
2056
    @param ninfo: the node to check
2057
    @param nresult: the remote results for the node
2058
    @param nimg: the node image object
2059

2060
    """
2061
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2062
    test = not isinstance(idata, list)
2063
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2064
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
2065
    if test:
2066
      nimg.hyp_fail = True
2067
    else:
2068
      nimg.instances = idata
2069

    
2070
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2071
    """Verifies and computes a node information map
2072

2073
    @type ninfo: L{objects.Node}
2074
    @param ninfo: the node to check
2075
    @param nresult: the remote results for the node
2076
    @param nimg: the node image object
2077
    @param vg_name: the configured VG name
2078

2079
    """
2080
    node = ninfo.name
2081
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2082

    
2083
    # try to read free memory (from the hypervisor)
2084
    hv_info = nresult.get(constants.NV_HVINFO, None)
2085
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2086
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2087
    if not test:
2088
      try:
2089
        nimg.mfree = int(hv_info["memory_free"])
2090
      except (ValueError, TypeError):
2091
        _ErrorIf(True, self.ENODERPC, node,
2092
                 "node returned invalid nodeinfo, check hypervisor")
2093

    
2094
    # FIXME: devise a free space model for file based instances as well
2095
    if vg_name is not None:
2096
      test = (constants.NV_VGLIST not in nresult or
2097
              vg_name not in nresult[constants.NV_VGLIST])
2098
      _ErrorIf(test, self.ENODELVM, node,
2099
               "node didn't return data for the volume group '%s'"
2100
               " - it is either missing or broken", vg_name)
2101
      if not test:
2102
        try:
2103
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2104
        except (ValueError, TypeError):
2105
          _ErrorIf(True, self.ENODERPC, node,
2106
                   "node returned invalid LVM info, check LVM status")
2107

    
2108
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2109
    """Gets per-disk status information for all instances.
2110

2111
    @type nodelist: list of strings
2112
    @param nodelist: Node names
2113
    @type node_image: dict of (name, L{objects.Node})
2114
    @param node_image: Node objects
2115
    @type instanceinfo: dict of (name, L{objects.Instance})
2116
    @param instanceinfo: Instance objects
2117
    @rtype: {instance: {node: [(succes, payload)]}}
2118
    @return: a dictionary of per-instance dictionaries with nodes as
2119
        keys and disk information as values; the disk information is a
2120
        list of tuples (success, payload)
2121

2122
    """
2123
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2124

    
2125
    node_disks = {}
2126
    node_disks_devonly = {}
2127
    diskless_instances = set()
2128
    diskless = constants.DT_DISKLESS
2129

    
2130
    for nname in nodelist:
2131
      node_instances = list(itertools.chain(node_image[nname].pinst,
2132
                                            node_image[nname].sinst))
2133
      diskless_instances.update(inst for inst in node_instances
2134
                                if instanceinfo[inst].disk_template == diskless)
2135
      disks = [(inst, disk)
2136
               for inst in node_instances
2137
               for disk in instanceinfo[inst].disks]
2138

    
2139
      if not disks:
2140
        # No need to collect data
2141
        continue
2142

    
2143
      node_disks[nname] = disks
2144

    
2145
      # Creating copies as SetDiskID below will modify the objects and that can
2146
      # lead to incorrect data returned from nodes
2147
      devonly = [dev.Copy() for (_, dev) in disks]
2148

    
2149
      for dev in devonly:
2150
        self.cfg.SetDiskID(dev, nname)
2151

    
2152
      node_disks_devonly[nname] = devonly
2153

    
2154
    assert len(node_disks) == len(node_disks_devonly)
2155

    
2156
    # Collect data from all nodes with disks
2157
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2158
                                                          node_disks_devonly)
2159

    
2160
    assert len(result) == len(node_disks)
2161

    
2162
    instdisk = {}
2163

    
2164
    for (nname, nres) in result.items():
2165
      disks = node_disks[nname]
2166

    
2167
      if nres.offline:
2168
        # No data from this node
2169
        data = len(disks) * [(False, "node offline")]
2170
      else:
2171
        msg = nres.fail_msg
2172
        _ErrorIf(msg, self.ENODERPC, nname,
2173
                 "while getting disk information: %s", msg)
2174
        if msg:
2175
          # No data from this node
2176
          data = len(disks) * [(False, msg)]
2177
        else:
2178
          data = []
2179
          for idx, i in enumerate(nres.payload):
2180
            if isinstance(i, (tuple, list)) and len(i) == 2:
2181
              data.append(i)
2182
            else:
2183
              logging.warning("Invalid result from node %s, entry %d: %s",
2184
                              nname, idx, i)
2185
              data.append((False, "Invalid result from the remote node"))
2186

    
2187
      for ((inst, _), status) in zip(disks, data):
2188
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2189

    
2190
    # Add empty entries for diskless instances.
2191
    for inst in diskless_instances:
2192
      assert inst not in instdisk
2193
      instdisk[inst] = {}
2194

    
2195
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2196
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2197
                      compat.all(isinstance(s, (tuple, list)) and
2198
                                 len(s) == 2 for s in statuses)
2199
                      for inst, nnames in instdisk.items()
2200
                      for nname, statuses in nnames.items())
2201
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2202

    
2203
    return instdisk
2204

    
2205
  def _VerifyHVP(self, hvp_data):
2206
    """Verifies locally the syntax of the hypervisor parameters.
2207

2208
    """
2209
    for item, hv_name, hv_params in hvp_data:
2210
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2211
             (item, hv_name))
2212
      try:
2213
        hv_class = hypervisor.GetHypervisor(hv_name)
2214
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2215
        hv_class.CheckParameterSyntax(hv_params)
2216
      except errors.GenericError, err:
2217
        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2218

    
2219
  def BuildHooksEnv(self):
2220
    """Build hooks env.
2221

2222
    Cluster-Verify hooks just ran in the post phase and their failure makes
2223
    the output be logged in the verify output and the verification to fail.
2224

2225
    """
2226
    env = {
2227
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2228
      }
2229

    
2230
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2231
               for node in self.my_node_info.values())
2232

    
2233
    return env
2234

    
2235
  def BuildHooksNodes(self):
2236
    """Build hooks nodes.
2237

2238
    """
2239
    assert self.my_node_names, ("Node list not gathered,"
2240
      " has CheckPrereq been executed?")
2241
    return ([], self.my_node_names)
2242

    
2243
  def Exec(self, feedback_fn):
2244
    """Verify integrity of cluster, performing various test on nodes.
2245

2246
    """
2247
    # This method has too many local variables. pylint: disable-msg=R0914
2248
    self.bad = False
2249
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2250
    verbose = self.op.verbose
2251
    self._feedback_fn = feedback_fn
2252
    feedback_fn("* Verifying global settings")
2253
    for msg in self.cfg.VerifyConfig():
2254
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2255

    
2256
    # Check the cluster certificates
2257
    for cert_filename in constants.ALL_CERT_FILES:
2258
      (errcode, msg) = _VerifyCertificate(cert_filename)
2259
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2260

    
2261
    vg_name = self.cfg.GetVGName()
2262
    drbd_helper = self.cfg.GetDRBDHelper()
2263
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2264
    cluster = self.cfg.GetClusterInfo()
2265
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2266
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2267

    
2268
    i_non_redundant = [] # Non redundant instances
2269
    i_non_a_balanced = [] # Non auto-balanced instances
2270
    n_offline = 0 # Count of offline nodes
2271
    n_drained = 0 # Count of nodes being drained
2272
    node_vol_should = {}
2273

    
2274
    # FIXME: verify OS list
2275

    
2276
    # File verification
2277
    filemap = _ComputeAncillaryFiles(cluster, False)
2278

    
2279
    # do local checksums
2280
    master_node = self.master_node = self.cfg.GetMasterNode()
2281
    master_ip = self.cfg.GetMasterIP()
2282

    
2283
    # Compute the set of hypervisor parameters
2284
    hvp_data = []
2285
    for hv_name in hypervisors:
2286
      hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2287
    for os_name, os_hvp in cluster.os_hvp.items():
2288
      for hv_name, hv_params in os_hvp.items():
2289
        if not hv_params:
2290
          continue
2291
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2292
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
2293
    # TODO: collapse identical parameter values in a single one
2294
    for instance in self.all_inst_info.values():
2295
      if not instance.hvparams:
2296
        continue
2297
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2298
                       cluster.FillHV(instance)))
2299
    # and verify them locally
2300
    self._VerifyHVP(hvp_data)
2301

    
2302
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2303
    node_verify_param = {
2304
      constants.NV_FILELIST:
2305
        utils.UniqueSequence(filename
2306
                             for files in filemap
2307
                             for filename in files),
2308
      constants.NV_NODELIST: [node.name for node in self.all_node_info.values()
2309
                              if not node.offline],
2310
      constants.NV_HYPERVISOR: hypervisors,
2311
      constants.NV_HVPARAMS: hvp_data,
2312
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2313
                                 for node in node_data_list
2314
                                 if not node.offline],
2315
      constants.NV_INSTANCELIST: hypervisors,
2316
      constants.NV_VERSION: None,
2317
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2318
      constants.NV_NODESETUP: None,
2319
      constants.NV_TIME: None,
2320
      constants.NV_MASTERIP: (master_node, master_ip),
2321
      constants.NV_OSLIST: None,
2322
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2323
      }
2324

    
2325
    if vg_name is not None:
2326
      node_verify_param[constants.NV_VGLIST] = None
2327
      node_verify_param[constants.NV_LVLIST] = vg_name
2328
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2329
      node_verify_param[constants.NV_DRBDLIST] = None
2330

    
2331
    if drbd_helper:
2332
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2333

    
2334
    # bridge checks
2335
    # FIXME: this needs to be changed per node-group, not cluster-wide
2336
    bridges = set()
2337
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2338
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2339
      bridges.add(default_nicpp[constants.NIC_LINK])
2340
    for instance in self.my_inst_info.values():
2341
      for nic in instance.nics:
2342
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2343
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2344
          bridges.add(full_nic[constants.NIC_LINK])
2345

    
2346
    if bridges:
2347
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2348

    
2349
    # Build our expected cluster state
2350
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2351
                                                 name=node.name,
2352
                                                 vm_capable=node.vm_capable))
2353
                      for node in node_data_list)
2354

    
2355
    # Gather OOB paths
2356
    oob_paths = []
2357
    for node in self.all_node_info.values():
2358
      path = _SupportsOob(self.cfg, node)
2359
      if path and path not in oob_paths:
2360
        oob_paths.append(path)
2361

    
2362
    if oob_paths:
2363
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2364

    
2365
    for instance in self.my_inst_names:
2366
      inst_config = self.my_inst_info[instance]
2367

    
2368
      for nname in inst_config.all_nodes:
2369
        if nname not in node_image:
2370
          # ghost node
2371
          gnode = self.NodeImage(name=nname)
2372
          gnode.ghost = True
2373
          node_image[nname] = gnode
2374

    
2375
      inst_config.MapLVsByNode(node_vol_should)
2376

    
2377
      pnode = inst_config.primary_node
2378
      node_image[pnode].pinst.append(instance)
2379

    
2380
      for snode in inst_config.secondary_nodes:
2381
        nimg = node_image[snode]
2382
        nimg.sinst.append(instance)
2383
        if pnode not in nimg.sbp:
2384
          nimg.sbp[pnode] = []
2385
        nimg.sbp[pnode].append(instance)
2386

    
2387
    # At this point, we have the in-memory data structures complete,
2388
    # except for the runtime information, which we'll gather next
2389

    
2390
    # Due to the way our RPC system works, exact response times cannot be
2391
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2392
    # time before and after executing the request, we can at least have a time
2393
    # window.
2394
    nvinfo_starttime = time.time()
2395
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2396
                                           node_verify_param,
2397
                                           self.cfg.GetClusterName())
2398
    nvinfo_endtime = time.time()
2399

    
2400
    all_drbd_map = self.cfg.ComputeDRBDMap()
2401

    
2402
    feedback_fn("* Gathering disk information (%s nodes)" %
2403
                len(self.my_node_names))
2404
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2405
                                     self.my_inst_info)
2406

    
2407
    feedback_fn("* Verifying configuration file consistency")
2408

    
2409
    # If not all nodes are being checked, we need to make sure the master node
2410
    # and a non-checked vm_capable node are in the list.
2411
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2412
    if absent_nodes:
2413
      vf_nvinfo = all_nvinfo.copy()
2414
      vf_node_info = list(self.my_node_info.values())
2415
      additional_nodes = []
2416
      if master_node not in self.my_node_info:
2417
        additional_nodes.append(master_node)
2418
        vf_node_info.append(self.all_node_info[master_node])
2419
      # Add the first vm_capable node we find which is not included
2420
      for node in absent_nodes:
2421
        nodeinfo = self.all_node_info[node]
2422
        if nodeinfo.vm_capable and not nodeinfo.offline:
2423
          additional_nodes.append(node)
2424
          vf_node_info.append(self.all_node_info[node])
2425
          break
2426
      key = constants.NV_FILELIST
2427
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2428
                                                 {key: node_verify_param[key]},
2429
                                                 self.cfg.GetClusterName()))
2430
    else:
2431
      vf_nvinfo = all_nvinfo
2432
      vf_node_info = self.my_node_info.values()
2433

    
2434
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2435

    
2436
    feedback_fn("* Verifying node status")
2437

    
2438
    refos_img = None
2439

    
2440
    for node_i in node_data_list:
2441
      node = node_i.name
2442
      nimg = node_image[node]
2443

    
2444
      if node_i.offline:
2445
        if verbose:
2446
          feedback_fn("* Skipping offline node %s" % (node,))
2447
        n_offline += 1
2448
        continue
2449

    
2450
      if node == master_node:
2451
        ntype = "master"
2452
      elif node_i.master_candidate:
2453
        ntype = "master candidate"
2454
      elif node_i.drained:
2455
        ntype = "drained"
2456
        n_drained += 1
2457
      else:
2458
        ntype = "regular"
2459
      if verbose:
2460
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2461

    
2462
      msg = all_nvinfo[node].fail_msg
2463
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2464
      if msg:
2465
        nimg.rpc_fail = True
2466
        continue
2467

    
2468
      nresult = all_nvinfo[node].payload
2469

    
2470
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2471
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2472
      self._VerifyNodeNetwork(node_i, nresult)
2473
      self._VerifyOob(node_i, nresult)
2474

    
2475
      if nimg.vm_capable:
2476
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2477
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2478
                             all_drbd_map)
2479

    
2480
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2481
        self._UpdateNodeInstances(node_i, nresult, nimg)
2482
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2483
        self._UpdateNodeOS(node_i, nresult, nimg)
2484

    
2485
        if not nimg.os_fail:
2486
          if refos_img is None:
2487
            refos_img = nimg
2488
          self._VerifyNodeOS(node_i, nimg, refos_img)
2489
        self._VerifyNodeBridges(node_i, nresult, bridges)
2490

    
2491
        # Check whether all running instancies are primary for the node. (This
2492
        # can no longer be done from _VerifyInstance below, since some of the
2493
        # wrong instances could be from other node groups.)
2494
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2495

    
2496
        for inst in non_primary_inst:
2497
          test = inst in self.all_inst_info
2498
          _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2499
                   "instance should not run on node %s", node_i.name)
2500
          _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2501
                   "node is running unknown instance %s", inst)
2502

    
2503
    feedback_fn("* Verifying instance status")
2504
    for instance in self.my_inst_names:
2505
      if verbose:
2506
        feedback_fn("* Verifying instance %s" % instance)
2507
      inst_config = self.my_inst_info[instance]
2508
      self._VerifyInstance(instance, inst_config, node_image,
2509
                           instdisk[instance])
2510
      inst_nodes_offline = []
2511

    
2512
      pnode = inst_config.primary_node
2513
      pnode_img = node_image[pnode]
2514
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2515
               self.ENODERPC, pnode, "instance %s, connection to"
2516
               " primary node failed", instance)
2517

    
2518
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2519
               self.EINSTANCEBADNODE, instance,
2520
               "instance is marked as running and lives on offline node %s",
2521
               inst_config.primary_node)
2522

    
2523
      # If the instance is non-redundant we cannot survive losing its primary
2524
      # node, so we are not N+1 compliant. On the other hand we have no disk
2525
      # templates with more than one secondary so that situation is not well
2526
      # supported either.
2527
      # FIXME: does not support file-backed instances
2528
      if not inst_config.secondary_nodes:
2529
        i_non_redundant.append(instance)
2530

    
2531
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2532
               instance, "instance has multiple secondary nodes: %s",
2533
               utils.CommaJoin(inst_config.secondary_nodes),
2534
               code=self.ETYPE_WARNING)
2535

    
2536
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2537
        pnode = inst_config.primary_node
2538
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2539
        instance_groups = {}
2540

    
2541
        for node in instance_nodes:
2542
          instance_groups.setdefault(self.all_node_info[node].group,
2543
                                     []).append(node)
2544

    
2545
        pretty_list = [
2546
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2547
          # Sort so that we always list the primary node first.
2548
          for group, nodes in sorted(instance_groups.items(),
2549
                                     key=lambda (_, nodes): pnode in nodes,
2550
                                     reverse=True)]
2551

    
2552
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2553
                      instance, "instance has primary and secondary nodes in"
2554
                      " different groups: %s", utils.CommaJoin(pretty_list),
2555
                      code=self.ETYPE_WARNING)
2556

    
2557
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2558
        i_non_a_balanced.append(instance)
2559

    
2560
      for snode in inst_config.secondary_nodes:
2561
        s_img = node_image[snode]
2562
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2563
                 "instance %s, connection to secondary node failed", instance)
2564

    
2565
        if s_img.offline:
2566
          inst_nodes_offline.append(snode)
2567

    
2568
      # warn that the instance lives on offline nodes
2569
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2570
               "instance has offline secondary node(s) %s",
2571
               utils.CommaJoin(inst_nodes_offline))
2572
      # ... or ghost/non-vm_capable nodes
2573
      for node in inst_config.all_nodes:
2574
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2575
                 "instance lives on ghost node %s", node)
2576
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2577
                 instance, "instance lives on non-vm_capable node %s", node)
2578

    
2579
    feedback_fn("* Verifying orphan volumes")
2580
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2581
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2582

    
2583
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2584
      feedback_fn("* Verifying N+1 Memory redundancy")
2585
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2586

    
2587
    feedback_fn("* Other Notes")
2588
    if i_non_redundant:
2589
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2590
                  % len(i_non_redundant))
2591

    
2592
    if i_non_a_balanced:
2593
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2594
                  % len(i_non_a_balanced))
2595

    
2596
    if n_offline:
2597
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2598

    
2599
    if n_drained:
2600
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2601

    
2602
    return not self.bad
2603

    
2604
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2605
    """Analyze the post-hooks' result
2606

2607
    This method analyses the hook result, handles it, and sends some
2608
    nicely-formatted feedback back to the user.
2609

2610
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2611
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2612
    @param hooks_results: the results of the multi-node hooks rpc call
2613
    @param feedback_fn: function used send feedback back to the caller
2614
    @param lu_result: previous Exec result
2615
    @return: the new Exec result, based on the previous result
2616
        and hook results
2617

2618
    """
2619
    # We only really run POST phase hooks, and are only interested in
2620
    # their results
2621
    if phase == constants.HOOKS_PHASE_POST:
2622
      # Used to change hooks' output to proper indentation
2623
      feedback_fn("* Hooks Results")
2624
      assert hooks_results, "invalid result from hooks"
2625

    
2626
      for node_name in hooks_results:
2627
        res = hooks_results[node_name]
2628
        msg = res.fail_msg
2629
        test = msg and not res.offline
2630
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2631
                      "Communication failure in hooks execution: %s", msg)
2632
        if res.offline or msg:
2633
          # No need to investigate payload if node is offline or gave an error.
2634
          # override manually lu_result here as _ErrorIf only
2635
          # overrides self.bad
2636
          lu_result = 1
2637
          continue
2638
        for script, hkr, output in res.payload:
2639
          test = hkr == constants.HKR_FAIL
2640
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2641
                        "Script %s failed, output:", script)
2642
          if test:
2643
            output = self._HOOKS_INDENT_RE.sub('      ', output)
2644
            feedback_fn("%s" % output)
2645
            lu_result = 0
2646

    
2647
      return lu_result
2648

    
2649

    
2650
class LUClusterVerifyDisks(NoHooksLU):
2651
  """Verifies the cluster disks status.
2652

2653
  """
2654
  REQ_BGL = False
2655

    
2656
  def ExpandNames(self):
2657
    self.needed_locks = {
2658
      locking.LEVEL_NODE: locking.ALL_SET,
2659
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2660
    }
2661
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2662

    
2663
  def Exec(self, feedback_fn):
2664
    """Verify integrity of cluster disks.
2665

2666
    @rtype: tuple of three items
2667
    @return: a tuple of (dict of node-to-node_error, list of instances
2668
        which need activate-disks, dict of instance: (node, volume) for
2669
        missing volumes
2670

2671
    """
2672
    result = res_nodes, res_instances, res_missing = {}, [], {}
2673

    
2674
    nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2675
    instances = self.cfg.GetAllInstancesInfo().values()
2676

    
2677
    nv_dict = {}
2678
    for inst in instances:
2679
      inst_lvs = {}
2680
      if not inst.admin_up:
2681
        continue
2682
      inst.MapLVsByNode(inst_lvs)
2683
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2684
      for node, vol_list in inst_lvs.iteritems():
2685
        for vol in vol_list:
2686
          nv_dict[(node, vol)] = inst
2687

    
2688
    if not nv_dict:
2689
      return result
2690

    
2691
    node_lvs = self.rpc.call_lv_list(nodes, [])
2692
    for node, node_res in node_lvs.items():
2693
      if node_res.offline:
2694
        continue
2695
      msg = node_res.fail_msg
2696
      if msg:
2697
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2698
        res_nodes[node] = msg
2699
        continue
2700

    
2701
      lvs = node_res.payload
2702
      for lv_name, (_, _, lv_online) in lvs.items():
2703
        inst = nv_dict.pop((node, lv_name), None)
2704
        if (not lv_online and inst is not None
2705
            and inst.name not in res_instances):
2706
          res_instances.append(inst.name)
2707

    
2708
    # any leftover items in nv_dict are missing LVs, let's arrange the
2709
    # data better
2710
    for key, inst in nv_dict.iteritems():
2711
      if inst.name not in res_missing:
2712
        res_missing[inst.name] = []
2713
      res_missing[inst.name].append(key)
2714

    
2715
    return result
2716

    
2717

    
2718
class LUClusterRepairDiskSizes(NoHooksLU):
2719
  """Verifies the cluster disks sizes.
2720

2721
  """
2722
  REQ_BGL = False
2723

    
2724
  def ExpandNames(self):
2725
    if self.op.instances:
2726
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
2727
      self.needed_locks = {
2728
        locking.LEVEL_NODE: [],
2729
        locking.LEVEL_INSTANCE: self.wanted_names,
2730
        }
2731
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2732
    else:
2733
      self.wanted_names = None
2734
      self.needed_locks = {
2735
        locking.LEVEL_NODE: locking.ALL_SET,
2736
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2737
        }
2738
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2739

    
2740
  def DeclareLocks(self, level):
2741
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2742
      self._LockInstancesNodes(primary_only=True)
2743

    
2744
  def CheckPrereq(self):
2745
    """Check prerequisites.
2746

2747
    This only checks the optional instance list against the existing names.
2748

2749
    """
2750
    if self.wanted_names is None:
2751
      self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
2752

    
2753
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2754
                             in self.wanted_names]
2755

    
2756
  def _EnsureChildSizes(self, disk):
2757
    """Ensure children of the disk have the needed disk size.
2758

2759
    This is valid mainly for DRBD8 and fixes an issue where the
2760
    children have smaller disk size.
2761

2762
    @param disk: an L{ganeti.objects.Disk} object
2763

2764
    """
2765
    if disk.dev_type == constants.LD_DRBD8:
2766
      assert disk.children, "Empty children for DRBD8?"
2767
      fchild = disk.children[0]
2768
      mismatch = fchild.size < disk.size
2769
      if mismatch:
2770
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2771
                     fchild.size, disk.size)
2772
        fchild.size = disk.size
2773

    
2774
      # and we recurse on this child only, not on the metadev
2775
      return self._EnsureChildSizes(fchild) or mismatch
2776
    else:
2777
      return False
2778

    
2779
  def Exec(self, feedback_fn):
2780
    """Verify the size of cluster disks.
2781

2782
    """
2783
    # TODO: check child disks too
2784
    # TODO: check differences in size between primary/secondary nodes
2785
    per_node_disks = {}
2786
    for instance in self.wanted_instances:
2787
      pnode = instance.primary_node
2788
      if pnode not in per_node_disks:
2789
        per_node_disks[pnode] = []
2790
      for idx, disk in enumerate(instance.disks):
2791
        per_node_disks[pnode].append((instance, idx, disk))
2792

    
2793
    changed = []
2794
    for node, dskl in per_node_disks.items():
2795
      newl = [v[2].Copy() for v in dskl]
2796
      for dsk in newl:
2797
        self.cfg.SetDiskID(dsk, node)
2798
      result = self.rpc.call_blockdev_getsize(node, newl)
2799
      if result.fail_msg:
2800
        self.LogWarning("Failure in blockdev_getsize call to node"
2801
                        " %s, ignoring", node)
2802
        continue
2803
      if len(result.payload) != len(dskl):
2804
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
2805
                        " result.payload=%s", node, len(dskl), result.payload)
2806
        self.LogWarning("Invalid result from node %s, ignoring node results",
2807
                        node)
2808
        continue
2809
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
2810
        if size is None:
2811
          self.LogWarning("Disk %d of instance %s did not return size"
2812
                          " information, ignoring", idx, instance.name)
2813
          continue
2814
        if not isinstance(size, (int, long)):
2815
          self.LogWarning("Disk %d of instance %s did not return valid"
2816
                          " size information, ignoring", idx, instance.name)
2817
          continue
2818
        size = size >> 20
2819
        if size != disk.size:
2820
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2821
                       " correcting: recorded %d, actual %d", idx,
2822
                       instance.name, disk.size, size)
2823
          disk.size = size
2824
          self.cfg.Update(instance, feedback_fn)
2825
          changed.append((instance.name, idx, size))
2826
        if self._EnsureChildSizes(disk):
2827
          self.cfg.Update(instance, feedback_fn)
2828
          changed.append((instance.name, idx, disk.size))
2829
    return changed
2830

    
2831

    
2832
class LUClusterRename(LogicalUnit):
2833
  """Rename the cluster.
2834

2835
  """
2836
  HPATH = "cluster-rename"
2837
  HTYPE = constants.HTYPE_CLUSTER
2838

    
2839
  def BuildHooksEnv(self):
2840
    """Build hooks env.
2841

2842
    """
2843
    return {
2844
      "OP_TARGET": self.cfg.GetClusterName(),
2845
      "NEW_NAME": self.op.name,
2846
      }
2847

    
2848
  def BuildHooksNodes(self):
2849
    """Build hooks nodes.
2850

2851
    """
2852
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
2853

    
2854
  def CheckPrereq(self):
2855
    """Verify that the passed name is a valid one.
2856

2857
    """
2858
    hostname = netutils.GetHostname(name=self.op.name,
2859
                                    family=self.cfg.GetPrimaryIPFamily())
2860

    
2861
    new_name = hostname.name
2862
    self.ip = new_ip = hostname.ip
2863
    old_name = self.cfg.GetClusterName()
2864
    old_ip = self.cfg.GetMasterIP()
2865
    if new_name == old_name and new_ip == old_ip:
2866
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2867
                                 " cluster has changed",
2868
                                 errors.ECODE_INVAL)
2869
    if new_ip != old_ip:
2870
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2871
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2872
                                   " reachable on the network" %
2873
                                   new_ip, errors.ECODE_NOTUNIQUE)
2874

    
2875
    self.op.name = new_name
2876

    
2877
  def Exec(self, feedback_fn):
2878
    """Rename the cluster.
2879

2880
    """
2881
    clustername = self.op.name
2882
    ip = self.ip
2883

    
2884
    # shutdown the master IP
2885
    master = self.cfg.GetMasterNode()
2886
    result = self.rpc.call_node_stop_master(master, False)
2887
    result.Raise("Could not disable the master role")
2888

    
2889
    try:
2890
      cluster = self.cfg.GetClusterInfo()
2891
      cluster.cluster_name = clustername
2892
      cluster.master_ip = ip
2893
      self.cfg.Update(cluster, feedback_fn)
2894

    
2895
      # update the known hosts file
2896
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2897
      node_list = self.cfg.GetOnlineNodeList()
2898
      try:
2899
        node_list.remove(master)
2900
      except ValueError:
2901
        pass
2902
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2903
    finally:
2904
      result = self.rpc.call_node_start_master(master, False, False)
2905
      msg = result.fail_msg
2906
      if msg:
2907
        self.LogWarning("Could not re-enable the master role on"
2908
                        " the master, please restart manually: %s", msg)
2909

    
2910
    return clustername
2911

    
2912

    
2913
class LUClusterSetParams(LogicalUnit):
2914
  """Change the parameters of the cluster.
2915

2916
  """
2917
  HPATH = "cluster-modify"
2918
  HTYPE = constants.HTYPE_CLUSTER
2919
  REQ_BGL = False
2920

    
2921
  def CheckArguments(self):
2922
    """Check parameters
2923

2924
    """
2925
    if self.op.uid_pool:
2926
      uidpool.CheckUidPool(self.op.uid_pool)
2927

    
2928
    if self.op.add_uids:
2929
      uidpool.CheckUidPool(self.op.add_uids)
2930

    
2931
    if self.op.remove_uids:
2932
      uidpool.CheckUidPool(self.op.remove_uids)
2933

    
2934
  def ExpandNames(self):
2935
    # FIXME: in the future maybe other cluster params won't require checking on
2936
    # all nodes to be modified.
2937
    self.needed_locks = {
2938
      locking.LEVEL_NODE: locking.ALL_SET,
2939
    }
2940
    self.share_locks[locking.LEVEL_NODE] = 1
2941

    
2942
  def BuildHooksEnv(self):
2943
    """Build hooks env.
2944

2945
    """
2946
    return {
2947
      "OP_TARGET": self.cfg.GetClusterName(),
2948
      "NEW_VG_NAME": self.op.vg_name,
2949
      }
2950

    
2951
  def BuildHooksNodes(self):
2952
    """Build hooks nodes.
2953

2954
    """
2955
    mn = self.cfg.GetMasterNode()
2956
    return ([mn], [mn])
2957

    
2958
  def CheckPrereq(self):
2959
    """Check prerequisites.
2960

2961
    This checks whether the given params don't conflict and
2962
    if the given volume group is valid.
2963

2964
    """
2965
    if self.op.vg_name is not None and not self.op.vg_name:
2966
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2967
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2968
                                   " instances exist", errors.ECODE_INVAL)
2969

    
2970
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
2971
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2972
        raise errors.OpPrereqError("Cannot disable drbd helper while"
2973
                                   " drbd-based instances exist",
2974
                                   errors.ECODE_INVAL)
2975

    
2976
    node_list = self.glm.list_owned(locking.LEVEL_NODE)
2977

    
2978
    # if vg_name not None, checks given volume group on all nodes
2979
    if self.op.vg_name:
2980
      vglist = self.rpc.call_vg_list(node_list)
2981
      for node in node_list:
2982
        msg = vglist[node].fail_msg
2983
        if msg:
2984
          # ignoring down node
2985
          self.LogWarning("Error while gathering data on node %s"
2986
                          " (ignoring node): %s", node, msg)
2987
          continue
2988
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2989
                                              self.op.vg_name,
2990
                                              constants.MIN_VG_SIZE)
2991
        if vgstatus:
2992
          raise errors.OpPrereqError("Error on node '%s': %s" %
2993
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2994

    
2995
    if self.op.drbd_helper:
2996
      # checks given drbd helper on all nodes
2997
      helpers = self.rpc.call_drbd_helper(node_list)
2998
      for node in node_list:
2999
        ninfo = self.cfg.GetNodeInfo(node)
3000
        if ninfo.offline:
3001
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3002
          continue
3003
        msg = helpers[node].fail_msg
3004
        if msg:
3005
          raise errors.OpPrereqError("Error checking drbd helper on node"
3006
                                     " '%s': %s" % (node, msg),
3007
                                     errors.ECODE_ENVIRON)
3008
        node_helper = helpers[node].payload
3009
        if node_helper != self.op.drbd_helper:
3010
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3011
                                     (node, node_helper), errors.ECODE_ENVIRON)
3012

    
3013
    self.cluster = cluster = self.cfg.GetClusterInfo()
3014
    # validate params changes
3015
    if self.op.beparams:
3016
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3017
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3018

    
3019
    if self.op.ndparams:
3020
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3021
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3022

    
3023
      # TODO: we need a more general way to handle resetting
3024
      # cluster-level parameters to default values
3025
      if self.new_ndparams["oob_program"] == "":
3026
        self.new_ndparams["oob_program"] = \
3027
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3028

    
3029
    if self.op.nicparams:
3030
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3031
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3032
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3033
      nic_errors = []
3034

    
3035
      # check all instances for consistency
3036
      for instance in self.cfg.GetAllInstancesInfo().values():
3037
        for nic_idx, nic in enumerate(instance.nics):
3038
          params_copy = copy.deepcopy(nic.nicparams)
3039
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3040

    
3041
          # check parameter syntax
3042
          try:
3043
            objects.NIC.CheckParameterSyntax(params_filled)
3044
          except errors.ConfigurationError, err:
3045
            nic_errors.append("Instance %s, nic/%d: %s" %
3046
                              (instance.name, nic_idx, err))
3047

    
3048
          # if we're moving instances to routed, check that they have an ip
3049
          target_mode = params_filled[constants.NIC_MODE]
3050
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3051
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3052
                              " address" % (instance.name, nic_idx))
3053
      if nic_errors:
3054
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3055
                                   "\n".join(nic_errors))
3056

    
3057
    # hypervisor list/parameters
3058
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3059
    if self.op.hvparams:
3060
      for hv_name, hv_dict in self.op.hvparams.items():
3061
        if hv_name not in self.new_hvparams:
3062
          self.new_hvparams[hv_name] = hv_dict
3063
        else:
3064
          self.new_hvparams[hv_name].update(hv_dict)
3065

    
3066
    # os hypervisor parameters
3067
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3068
    if self.op.os_hvp:
3069
      for os_name, hvs in self.op.os_hvp.items():
3070
        if os_name not in self.new_os_hvp:
3071
          self.new_os_hvp[os_name] = hvs
3072
        else:
3073
          for hv_name, hv_dict in hvs.items():
3074
            if hv_name not in self.new_os_hvp[os_name]:
3075
              self.new_os_hvp[os_name][hv_name] = hv_dict
3076
            else:
3077
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3078

    
3079
    # os parameters
3080
    self.new_osp = objects.FillDict(cluster.osparams, {})
3081
    if self.op.osparams:
3082
      for os_name, osp in self.op.osparams.items():
3083
        if os_name not in self.new_osp:
3084
          self.new_osp[os_name] = {}
3085

    
3086
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3087
                                                  use_none=True)
3088

    
3089
        if not self.new_osp[os_name]:
3090
          # we removed all parameters
3091
          del self.new_osp[os_name]
3092
        else:
3093
          # check the parameter validity (remote check)
3094
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3095
                         os_name, self.new_osp[os_name])
3096

    
3097
    # changes to the hypervisor list
3098
    if self.op.enabled_hypervisors is not None:
3099
      self.hv_list = self.op.enabled_hypervisors
3100
      for hv in self.hv_list:
3101
        # if the hypervisor doesn't already exist in the cluster
3102
        # hvparams, we initialize it to empty, and then (in both
3103
        # cases) we make sure to fill the defaults, as we might not
3104
        # have a complete defaults list if the hypervisor wasn't
3105
        # enabled before
3106
        if hv not in new_hvp:
3107
          new_hvp[hv] = {}
3108
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3109
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3110
    else:
3111
      self.hv_list = cluster.enabled_hypervisors
3112

    
3113
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3114
      # either the enabled list has changed, or the parameters have, validate
3115
      for hv_name, hv_params in self.new_hvparams.items():
3116
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3117
            (self.op.enabled_hypervisors and
3118
             hv_name in self.op.enabled_hypervisors)):
3119
          # either this is a new hypervisor, or its parameters have changed
3120
          hv_class = hypervisor.GetHypervisor(hv_name)
3121
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3122
          hv_class.CheckParameterSyntax(hv_params)
3123
          _CheckHVParams(self, node_list, hv_name, hv_params)
3124

    
3125
    if self.op.os_hvp:
3126
      # no need to check any newly-enabled hypervisors, since the
3127
      # defaults have already been checked in the above code-block
3128
      for os_name, os_hvp in self.new_os_hvp.items():
3129
        for hv_name, hv_params in os_hvp.items():
3130
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3131
          # we need to fill in the new os_hvp on top of the actual hv_p
3132
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3133
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3134
          hv_class = hypervisor.GetHypervisor(hv_name)
3135
          hv_class.CheckParameterSyntax(new_osp)
3136
          _CheckHVParams(self, node_list, hv_name, new_osp)
3137

    
3138
    if self.op.default_iallocator:
3139
      alloc_script = utils.FindFile(self.op.default_iallocator,
3140
                                    constants.IALLOCATOR_SEARCH_PATH,
3141
                                    os.path.isfile)
3142
      if alloc_script is None:
3143
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3144
                                   " specified" % self.op.default_iallocator,
3145
                                   errors.ECODE_INVAL)
3146

    
3147
  def Exec(self, feedback_fn):
3148
    """Change the parameters of the cluster.
3149

3150
    """
3151
    if self.op.vg_name is not None:
3152
      new_volume = self.op.vg_name
3153
      if not new_volume:
3154
        new_volume = None
3155
      if new_volume != self.cfg.GetVGName():
3156
        self.cfg.SetVGName(new_volume)
3157
      else:
3158
        feedback_fn("Cluster LVM configuration already in desired"
3159
                    " state, not changing")
3160
    if self.op.drbd_helper is not None:
3161
      new_helper = self.op.drbd_helper
3162
      if not new_helper:
3163
        new_helper = None
3164
      if new_helper != self.cfg.GetDRBDHelper():
3165
        self.cfg.SetDRBDHelper(new_helper)
3166
      else:
3167
        feedback_fn("Cluster DRBD helper already in desired state,"
3168
                    " not changing")
3169
    if self.op.hvparams:
3170
      self.cluster.hvparams = self.new_hvparams
3171
    if self.op.os_hvp:
3172
      self.cluster.os_hvp = self.new_os_hvp
3173
    if self.op.enabled_hypervisors is not None:
3174
      self.cluster.hvparams = self.new_hvparams
3175
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3176
    if self.op.beparams:
3177
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3178
    if self.op.nicparams:
3179
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3180
    if self.op.osparams:
3181
      self.cluster.osparams = self.new_osp
3182
    if self.op.ndparams:
3183
      self.cluster.ndparams = self.new_ndparams
3184

    
3185
    if self.op.candidate_pool_size is not None:
3186
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3187
      # we need to update the pool size here, otherwise the save will fail
3188
      _AdjustCandidatePool(self, [])
3189

    
3190
    if self.op.maintain_node_health is not None:
3191
      self.cluster.maintain_node_health = self.op.maintain_node_health
3192

    
3193
    if self.op.prealloc_wipe_disks is not None:
3194
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3195

    
3196
    if self.op.add_uids is not None:
3197
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3198

    
3199
    if self.op.remove_uids is not None:
3200
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3201

    
3202
    if self.op.uid_pool is not None:
3203
      self.cluster.uid_pool = self.op.uid_pool
3204

    
3205
    if self.op.default_iallocator is not None:
3206
      self.cluster.default_iallocator = self.op.default_iallocator
3207

    
3208
    if self.op.reserved_lvs is not None:
3209
      self.cluster.reserved_lvs = self.op.reserved_lvs
3210

    
3211
    def helper_os(aname, mods, desc):
3212
      desc += " OS list"
3213
      lst = getattr(self.cluster, aname)
3214
      for key, val in mods:
3215
        if key == constants.DDM_ADD:
3216
          if val in lst:
3217
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3218
          else:
3219
            lst.append(val)
3220
        elif key == constants.DDM_REMOVE:
3221
          if val in lst:
3222
            lst.remove(val)
3223
          else:
3224
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3225
        else:
3226
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3227

    
3228
    if self.op.hidden_os:
3229
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3230

    
3231
    if self.op.blacklisted_os:
3232
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3233

    
3234
    if self.op.master_netdev:
3235
      master = self.cfg.GetMasterNode()
3236
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3237
                  self.cluster.master_netdev)
3238
      result = self.rpc.call_node_stop_master(master, False)
3239
      result.Raise("Could not disable the master ip")
3240
      feedback_fn("Changing master_netdev from %s to %s" %
3241
                  (self.cluster.master_netdev, self.op.master_netdev))
3242
      self.cluster.master_netdev = self.op.master_netdev
3243

    
3244
    self.cfg.Update(self.cluster, feedback_fn)
3245

    
3246
    if self.op.master_netdev:
3247
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3248
                  self.op.master_netdev)
3249
      result = self.rpc.call_node_start_master(master, False, False)
3250
      if result.fail_msg:
3251
        self.LogWarning("Could not re-enable the master ip on"
3252
                        " the master, please restart manually: %s",
3253
                        result.fail_msg)
3254

    
3255

    
3256
def _UploadHelper(lu, nodes, fname):
3257
  """Helper for uploading a file and showing warnings.
3258

3259
  """
3260
  if os.path.exists(fname):
3261
    result = lu.rpc.call_upload_file(nodes, fname)
3262
    for to_node, to_result in result.items():
3263
      msg = to_result.fail_msg
3264
      if msg:
3265
        msg = ("Copy of file %s to node %s failed: %s" %
3266
               (fname, to_node, msg))
3267
        lu.proc.LogWarning(msg)
3268

    
3269

    
3270
def _ComputeAncillaryFiles(cluster, redist):
3271
  """Compute files external to Ganeti which need to be consistent.
3272

3273
  @type redist: boolean
3274
  @param redist: Whether to include files which need to be redistributed
3275

3276
  """
3277
  # Compute files for all nodes
3278
  files_all = set([
3279
    constants.SSH_KNOWN_HOSTS_FILE,
3280
    constants.CONFD_HMAC_KEY,
3281
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3282
    ])
3283

    
3284
  if not redist:
3285
    files_all.update(constants.ALL_CERT_FILES)
3286
    files_all.update(ssconf.SimpleStore().GetFileList())
3287

    
3288
  if cluster.modify_etc_hosts:
3289
    files_all.add(constants.ETC_HOSTS)
3290

    
3291
  # Files which must either exist on all nodes or on none
3292
  files_all_opt = set([
3293
    constants.RAPI_USERS_FILE,
3294
    ])
3295

    
3296
  # Files which should only be on master candidates
3297
  files_mc = set()
3298
  if not redist:
3299
    files_mc.add(constants.CLUSTER_CONF_FILE)
3300

    
3301
  # Files which should only be on VM-capable nodes
3302
  files_vm = set(filename
3303
    for hv_name in cluster.enabled_hypervisors
3304
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3305

    
3306
  # Filenames must be unique
3307
  assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3308
          sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3309
         "Found file listed in more than one file list"
3310

    
3311
  return (files_all, files_all_opt, files_mc, files_vm)
3312

    
3313

    
3314
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3315
  """Distribute additional files which are part of the cluster configuration.
3316

3317
  ConfigWriter takes care of distributing the config and ssconf files, but
3318
  there are more files which should be distributed to all nodes. This function
3319
  makes sure those are copied.
3320

3321
  @param lu: calling logical unit
3322
  @param additional_nodes: list of nodes not in the config to distribute to
3323
  @type additional_vm: boolean
3324
  @param additional_vm: whether the additional nodes are vm-capable or not
3325

3326
  """
3327
  # Gather target nodes
3328
  cluster = lu.cfg.GetClusterInfo()
3329
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3330

    
3331
  online_nodes = lu.cfg.GetOnlineNodeList()
3332
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3333

    
3334
  if additional_nodes is not None:
3335
    online_nodes.extend(additional_nodes)
3336
    if additional_vm:
3337
      vm_nodes.extend(additional_nodes)
3338

    
3339
  # Never distribute to master node
3340
  for nodelist in [online_nodes, vm_nodes]:
3341
    if master_info.name in nodelist:
3342
      nodelist.remove(master_info.name)
3343

    
3344
  # Gather file lists
3345
  (files_all, files_all_opt, files_mc, files_vm) = \
3346
    _ComputeAncillaryFiles(cluster, True)
3347

    
3348
  # Never re-distribute configuration file from here
3349
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3350
              constants.CLUSTER_CONF_FILE in files_vm)
3351
  assert not files_mc, "Master candidates not handled in this function"
3352

    
3353
  filemap = [
3354
    (online_nodes, files_all),
3355
    (online_nodes, files_all_opt),
3356
    (vm_nodes, files_vm),
3357
    ]
3358

    
3359
  # Upload the files
3360
  for (node_list, files) in filemap:
3361
    for fname in files:
3362
      _UploadHelper(lu, node_list, fname)
3363

    
3364

    
3365
class LUClusterRedistConf(NoHooksLU):
3366
  """Force the redistribution of cluster configuration.
3367

3368
  This is a very simple LU.
3369

3370
  """
3371
  REQ_BGL = False
3372

    
3373
  def ExpandNames(self):
3374
    self.needed_locks = {
3375
      locking.LEVEL_NODE: locking.ALL_SET,
3376
    }
3377
    self.share_locks[locking.LEVEL_NODE] = 1
3378

    
3379
  def Exec(self, feedback_fn):
3380
    """Redistribute the configuration.
3381

3382
    """
3383
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3384
    _RedistributeAncillaryFiles(self)
3385

    
3386

    
3387
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3388
  """Sleep and poll for an instance's disk to sync.
3389

3390
  """
3391
  if not instance.disks or disks is not None and not disks:
3392
    return True
3393

    
3394
  disks = _ExpandCheckDisks(instance, disks)
3395

    
3396
  if not oneshot:
3397
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3398

    
3399
  node = instance.primary_node
3400

    
3401
  for dev in disks:
3402
    lu.cfg.SetDiskID(dev, node)
3403

    
3404
  # TODO: Convert to utils.Retry
3405

    
3406
  retries = 0
3407
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3408
  while True:
3409
    max_time = 0
3410
    done = True
3411
    cumul_degraded = False
3412
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3413
    msg = rstats.fail_msg
3414
    if msg:
3415
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3416
      retries += 1
3417
      if retries >= 10:
3418
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3419
                                 " aborting." % node)
3420
      time.sleep(6)
3421
      continue
3422
    rstats = rstats.payload
3423
    retries = 0
3424
    for i, mstat in enumerate(rstats):
3425
      if mstat is None:
3426
        lu.LogWarning("Can't compute data for node %s/%s",
3427
                           node, disks[i].iv_name)
3428
        continue
3429

    
3430
      cumul_degraded = (cumul_degraded or
3431
                        (mstat.is_degraded and mstat.sync_percent is None))
3432
      if mstat.sync_percent is not None:
3433
        done = False
3434
        if mstat.estimated_time is not None:
3435
          rem_time = ("%s remaining (estimated)" %
3436
                      utils.FormatSeconds(mstat.estimated_time))
3437
          max_time = mstat.estimated_time
3438
        else:
3439
          rem_time = "no time estimate"
3440
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3441
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3442

    
3443
    # if we're done but degraded, let's do a few small retries, to
3444
    # make sure we see a stable and not transient situation; therefore
3445
    # we force restart of the loop
3446
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3447
      logging.info("Degraded disks found, %d retries left", degr_retries)
3448
      degr_retries -= 1
3449
      time.sleep(1)
3450
      continue
3451

    
3452
    if done or oneshot:
3453
      break
3454

    
3455
    time.sleep(min(60, max_time))
3456

    
3457
  if done:
3458
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3459
  return not cumul_degraded
3460

    
3461

    
3462
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3463
  """Check that mirrors are not degraded.
3464

3465
  The ldisk parameter, if True, will change the test from the
3466
  is_degraded attribute (which represents overall non-ok status for
3467
  the device(s)) to the ldisk (representing the local storage status).
3468

3469
  """
3470
  lu.cfg.SetDiskID(dev, node)
3471

    
3472
  result = True
3473

    
3474
  if on_primary or dev.AssembleOnSecondary():
3475
    rstats = lu.rpc.call_blockdev_find(node, dev)
3476
    msg = rstats.fail_msg
3477
    if msg:
3478
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3479
      result = False
3480
    elif not rstats.payload:
3481
      lu.LogWarning("Can't find disk on node %s", node)
3482
      result = False
3483
    else:
3484
      if ldisk:
3485
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3486
      else:
3487
        result = result and not rstats.payload.is_degraded
3488

    
3489
  if dev.children:
3490
    for child in dev.children:
3491
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3492

    
3493
  return result
3494

    
3495

    
3496
class LUOobCommand(NoHooksLU):
3497
  """Logical unit for OOB handling.
3498

3499
  """
3500
  REG_BGL = False
3501
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3502

    
3503
  def ExpandNames(self):
3504
    """Gather locks we need.
3505

3506
    """
3507
    if self.op.node_names:
3508
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3509
      lock_names = self.op.node_names
3510
    else:
3511
      lock_names = locking.ALL_SET
3512

    
3513
    self.needed_locks = {
3514
      locking.LEVEL_NODE: lock_names,
3515
      }
3516

    
3517
  def CheckPrereq(self):
3518
    """Check prerequisites.
3519

3520
    This checks:
3521
     - the node exists in the configuration
3522
     - OOB is supported
3523

3524
    Any errors are signaled by raising errors.OpPrereqError.
3525

3526
    """
3527
    self.nodes = []
3528
    self.master_node = self.cfg.GetMasterNode()
3529

    
3530
    assert self.op.power_delay >= 0.0
3531

    
3532
    if self.op.node_names:
3533
      if (self.op.command in self._SKIP_MASTER and
3534
          self.master_node in self.op.node_names):
3535
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3536
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3537

    
3538
        if master_oob_handler:
3539
          additional_text = ("run '%s %s %s' if you want to operate on the"
3540
                             " master regardless") % (master_oob_handler,
3541
                                                      self.op.command,
3542
                                                      self.master_node)
3543
        else:
3544
          additional_text = "it does not support out-of-band operations"
3545

    
3546
        raise errors.OpPrereqError(("Operating on the master node %s is not"
3547
                                    " allowed for %s; %s") %
3548
                                   (self.master_node, self.op.command,
3549
                                    additional_text), errors.ECODE_INVAL)
3550
    else:
3551
      self.op.node_names = self.cfg.GetNodeList()
3552
      if self.op.command in self._SKIP_MASTER:
3553
        self.op.node_names.remove(self.master_node)
3554

    
3555
    if self.op.command in self._SKIP_MASTER:
3556
      assert self.master_node not in self.op.node_names
3557

    
3558
    for node_name in self.op.node_names:
3559
      node = self.cfg.GetNodeInfo(node_name)
3560

    
3561
      if node is None:
3562
        raise errors.OpPrereqError("Node %s not found" % node_name,
3563
                                   errors.ECODE_NOENT)
3564
      else:
3565
        self.nodes.append(node)
3566

    
3567
      if (not self.op.ignore_status and
3568
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3569
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
3570
                                    " not marked offline") % node_name,
3571
                                   errors.ECODE_STATE)
3572

    
3573
  def Exec(self, feedback_fn):
3574
    """Execute OOB and return result if we expect any.
3575

3576
    """
3577
    master_node = self.master_node
3578
    ret = []
3579

    
3580
    for idx, node in enumerate(utils.NiceSort(self.nodes,
3581
                                              key=lambda node: node.name)):
3582
      node_entry = [(constants.RS_NORMAL, node.name)]
3583
      ret.append(node_entry)
3584

    
3585
      oob_program = _SupportsOob(self.cfg, node)
3586

    
3587
      if not oob_program:
3588
        node_entry.append((constants.RS_UNAVAIL, None))
3589
        continue
3590

    
3591
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
3592
                   self.op.command, oob_program, node.name)
3593
      result = self.rpc.call_run_oob(master_node, oob_program,
3594
                                     self.op.command, node.name,
3595
                                     self.op.timeout)
3596

    
3597
      if result.fail_msg:
3598
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
3599
                        node.name, result.fail_msg)
3600
        node_entry.append((constants.RS_NODATA, None))
3601
      else:
3602
        try:
3603
          self._CheckPayload(result)
3604
        except errors.OpExecError, err:
3605
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
3606
                          node.name, err)
3607
          node_entry.append((constants.RS_NODATA, None))
3608
        else:
3609
          if self.op.command == constants.OOB_HEALTH:
3610
            # For health we should log important events
3611
            for item, status in result.payload:
3612
              if status in [constants.OOB_STATUS_WARNING,
3613
                            constants.OOB_STATUS_CRITICAL]:
3614
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
3615
                                item, node.name, status)
3616

    
3617
          if self.op.command == constants.OOB_POWER_ON:
3618
            node.powered = True
3619
          elif self.op.command == constants.OOB_POWER_OFF:
3620
            node.powered = False
3621
          elif self.op.command == constants.OOB_POWER_STATUS:
3622
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3623
            if powered != node.powered:
3624
              logging.warning(("Recorded power state (%s) of node '%s' does not"
3625
                               " match actual power state (%s)"), node.powered,
3626
                              node.name, powered)
3627

    
3628
          # For configuration changing commands we should update the node
3629
          if self.op.command in (constants.OOB_POWER_ON,
3630
                                 constants.OOB_POWER_OFF):
3631
            self.cfg.Update(node, feedback_fn)
3632

    
3633
          node_entry.append((constants.RS_NORMAL, result.payload))
3634

    
3635
          if (self.op.command == constants.OOB_POWER_ON and
3636
              idx < len(self.nodes) - 1):
3637
            time.sleep(self.op.power_delay)
3638

    
3639
    return ret
3640

    
3641
  def _CheckPayload(self, result):
3642
    """Checks if the payload is valid.
3643

3644
    @param result: RPC result
3645
    @raises errors.OpExecError: If payload is not valid
3646

3647
    """
3648
    errs = []
3649
    if self.op.command == constants.OOB_HEALTH:
3650
      if not isinstance(result.payload, list):
3651
        errs.append("command 'health' is expected to return a list but got %s" %
3652
                    type(result.payload))
3653
      else:
3654
        for item, status in result.payload:
3655
          if status not in constants.OOB_STATUSES:
3656
            errs.append("health item '%s' has invalid status '%s'" %
3657
                        (item, status))
3658

    
3659
    if self.op.command == constants.OOB_POWER_STATUS:
3660
      if not isinstance(result.payload, dict):
3661
        errs.append("power-status is expected to return a dict but got %s" %
3662
                    type(result.payload))
3663

    
3664
    if self.op.command in [
3665
        constants.OOB_POWER_ON,
3666
        constants.OOB_POWER_OFF,
3667
        constants.OOB_POWER_CYCLE,
3668
        ]:
3669
      if result.payload is not None:
3670
        errs.append("%s is expected to not return payload but got '%s'" %
3671
                    (self.op.command, result.payload))
3672

    
3673
    if errs:
3674
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3675
                               utils.CommaJoin(errs))
3676

    
3677
class _OsQuery(_QueryBase):
3678
  FIELDS = query.OS_FIELDS
3679

    
3680
  def ExpandNames(self, lu):
3681
    # Lock all nodes in shared mode
3682
    # Temporary removal of locks, should be reverted later
3683
    # TODO: reintroduce locks when they are lighter-weight
3684
    lu.needed_locks = {}
3685
    #self.share_locks[locking.LEVEL_NODE] = 1
3686
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3687

    
3688
    # The following variables interact with _QueryBase._GetNames
3689
    if self.names:
3690
      self.wanted = self.names
3691
    else:
3692
      self.wanted = locking.ALL_SET
3693

    
3694
    self.do_locking = self.use_locking
3695

    
3696
  def DeclareLocks(self, lu, level):
3697
    pass
3698

    
3699
  @staticmethod
3700
  def _DiagnoseByOS(rlist):
3701
    """Remaps a per-node return list into an a per-os per-node dictionary
3702

3703
    @param rlist: a map with node names as keys and OS objects as values
3704

3705
    @rtype: dict
3706
    @return: a dictionary with osnames as keys and as value another
3707
        map, with nodes as keys and tuples of (path, status, diagnose,
3708
        variants, parameters, api_versions) as values, eg::
3709

3710
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3711
                                     (/srv/..., False, "invalid api")],
3712
                           "node2": [(/srv/..., True, "", [], [])]}
3713
          }
3714

3715
    """
3716
    all_os = {}
3717
    # we build here the list of nodes that didn't fail the RPC (at RPC
3718
    # level), so that nodes with a non-responding node daemon don't
3719
    # make all OSes invalid
3720
    good_nodes = [node_name for node_name in rlist
3721
                  if not rlist[node_name].fail_msg]
3722
    for node_name, nr in rlist.items():
3723
      if nr.fail_msg or not nr.payload:
3724
        continue
3725
      for (name, path, status, diagnose, variants,
3726
           params, api_versions) in nr.payload:
3727
        if name not in all_os:
3728
          # build a list of nodes for this os containing empty lists
3729
          # for each node in node_list
3730
          all_os[name] = {}
3731
          for nname in good_nodes:
3732
            all_os[name][nname] = []
3733
        # convert params from [name, help] to (name, help)
3734
        params = [tuple(v) for v in params]
3735
        all_os[name][node_name].append((path, status, diagnose,
3736
                                        variants, params, api_versions))
3737
    return all_os
3738

    
3739
  def _GetQueryData(self, lu):
3740
    """Computes the list of nodes and their attributes.
3741

3742
    """
3743
    # Locking is not used
3744
    assert not (compat.any(lu.glm.is_owned(level)
3745
                           for level in locking.LEVELS
3746
                           if level != locking.LEVEL_CLUSTER) or
3747
                self.do_locking or self.use_locking)
3748

    
3749
    valid_nodes = [node.name
3750
                   for node in lu.cfg.GetAllNodesInfo().values()
3751
                   if not node.offline and node.vm_capable]
3752
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3753
    cluster = lu.cfg.GetClusterInfo()
3754

    
3755
    data = {}
3756

    
3757
    for (os_name, os_data) in pol.items():
3758
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3759
                          hidden=(os_name in cluster.hidden_os),
3760
                          blacklisted=(os_name in cluster.blacklisted_os))
3761

    
3762
      variants = set()
3763
      parameters = set()
3764
      api_versions = set()
3765

    
3766
      for idx, osl in enumerate(os_data.values()):
3767
        info.valid = bool(info.valid and osl and osl[0][1])
3768
        if not info.valid:
3769
          break
3770

    
3771
        (node_variants, node_params, node_api) = osl[0][3:6]
3772
        if idx == 0:
3773
          # First entry
3774
          variants.update(node_variants)
3775
          parameters.update(node_params)
3776
          api_versions.update(node_api)
3777
        else:
3778
          # Filter out inconsistent values
3779
          variants.intersection_update(node_variants)
3780
          parameters.intersection_update(node_params)
3781
          api_versions.intersection_update(node_api)
3782

    
3783
      info.variants = list(variants)
3784
      info.parameters = list(parameters)
3785
      info.api_versions = list(api_versions)
3786

    
3787
      data[os_name] = info
3788

    
3789
    # Prepare data in requested order
3790
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
3791
            if name in data]
3792

    
3793

    
3794
class LUOsDiagnose(NoHooksLU):
3795
  """Logical unit for OS diagnose/query.
3796

3797
  """
3798
  REQ_BGL = False
3799

    
3800
  @staticmethod
3801
  def _BuildFilter(fields, names):
3802
    """Builds a filter for querying OSes.
3803

3804
    """
3805
    name_filter = qlang.MakeSimpleFilter("name", names)
3806

    
3807
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
3808
    # respective field is not requested
3809
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
3810
                     for fname in ["hidden", "blacklisted"]
3811
                     if fname not in fields]
3812
    if "valid" not in fields:
3813
      status_filter.append([qlang.OP_TRUE, "valid"])
3814

    
3815
    if status_filter:
3816
      status_filter.insert(0, qlang.OP_AND)
3817
    else:
3818
      status_filter = None
3819

    
3820
    if name_filter and status_filter:
3821
      return [qlang.OP_AND, name_filter, status_filter]
3822
    elif name_filter:
3823
      return name_filter
3824
    else:
3825
      return status_filter
3826

    
3827
  def CheckArguments(self):
3828
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
3829
                       self.op.output_fields, False)
3830

    
3831
  def ExpandNames(self):
3832
    self.oq.ExpandNames(self)
3833

    
3834
  def Exec(self, feedback_fn):
3835
    return self.oq.OldStyleQuery(self)
3836

    
3837

    
3838
class LUNodeRemove(LogicalUnit):
3839
  """Logical unit for removing a node.
3840

3841
  """
3842
  HPATH = "node-remove"
3843
  HTYPE = constants.HTYPE_NODE
3844

    
3845
  def BuildHooksEnv(self):
3846
    """Build hooks env.
3847

3848
    This doesn't run on the target node in the pre phase as a failed
3849
    node would then be impossible to remove.
3850

3851
    """
3852
    return {
3853
      "OP_TARGET": self.op.node_name,
3854
      "NODE_NAME": self.op.node_name,
3855
      }
3856

    
3857
  def BuildHooksNodes(self):
3858
    """Build hooks nodes.
3859

3860
    """
3861
    all_nodes = self.cfg.GetNodeList()
3862
    try:
3863
      all_nodes.remove(self.op.node_name)
3864
    except ValueError:
3865
      logging.warning("Node '%s', which is about to be removed, was not found"
3866
                      " in the list of all nodes", self.op.node_name)
3867
    return (all_nodes, all_nodes)
3868

    
3869
  def CheckPrereq(self):
3870
    """Check prerequisites.
3871

3872
    This checks:
3873
     - the node exists in the configuration
3874
     - it does not have primary or secondary instances
3875
     - it's not the master
3876

3877
    Any errors are signaled by raising errors.OpPrereqError.
3878

3879
    """
3880
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3881
    node = self.cfg.GetNodeInfo(self.op.node_name)
3882
    assert node is not None
3883

    
3884
    instance_list = self.cfg.GetInstanceList()
3885

    
3886
    masternode = self.cfg.GetMasterNode()
3887
    if node.name == masternode:
3888
      raise errors.OpPrereqError("Node is the master node, failover to another"
3889
                                 " node is required", errors.ECODE_INVAL)
3890

    
3891
    for instance_name in instance_list:
3892
      instance = self.cfg.GetInstanceInfo(instance_name)
3893
      if node.name in instance.all_nodes:
3894
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3895
                                   " please remove first" % instance_name,
3896
                                   errors.ECODE_INVAL)
3897
    self.op.node_name = node.name
3898
    self.node = node
3899

    
3900
  def Exec(self, feedback_fn):
3901
    """Removes the node from the cluster.
3902

3903
    """
3904
    node = self.node
3905
    logging.info("Stopping the node daemon and removing configs from node %s",
3906
                 node.name)
3907

    
3908
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3909

    
3910
    # Promote nodes to master candidate as needed
3911
    _AdjustCandidatePool(self, exceptions=[node.name])
3912
    self.context.RemoveNode(node.name)
3913

    
3914
    # Run post hooks on the node before it's removed
3915
    _RunPostHook(self, node.name)
3916

    
3917
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3918
    msg = result.fail_msg
3919
    if msg:
3920
      self.LogWarning("Errors encountered on the remote node while leaving"
3921
                      " the cluster: %s", msg)
3922

    
3923
    # Remove node from our /etc/hosts
3924
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3925
      master_node = self.cfg.GetMasterNode()
3926
      result = self.rpc.call_etc_hosts_modify(master_node,
3927
                                              constants.ETC_HOSTS_REMOVE,
3928
                                              node.name, None)
3929
      result.Raise("Can't update hosts file with new host data")
3930
      _RedistributeAncillaryFiles(self)
3931

    
3932

    
3933
class _NodeQuery(_QueryBase):
3934
  FIELDS = query.NODE_FIELDS
3935

    
3936
  def ExpandNames(self, lu):
3937
    lu.needed_locks = {}
3938
    lu.share_locks[locking.LEVEL_NODE] = 1
3939

    
3940
    if self.names:
3941
      self.wanted = _GetWantedNodes(lu, self.names)
3942
    else:
3943
      self.wanted = locking.ALL_SET
3944

    
3945
    self.do_locking = (self.use_locking and
3946
                       query.NQ_LIVE in self.requested_data)
3947

    
3948
    if self.do_locking:
3949
      # if we don't request only static fields, we need to lock the nodes
3950
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3951

    
3952
  def DeclareLocks(self, lu, level):
3953
    pass
3954

    
3955
  def _GetQueryData(self, lu):
3956
    """Computes the list of nodes and their attributes.
3957

3958
    """
3959
    all_info = lu.cfg.GetAllNodesInfo()
3960

    
3961
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3962

    
3963
    # Gather data as requested
3964
    if query.NQ_LIVE in self.requested_data:
3965
      # filter out non-vm_capable nodes
3966
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
3967

    
3968
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
3969
                                        lu.cfg.GetHypervisorType())
3970
      live_data = dict((name, nresult.payload)
3971
                       for (name, nresult) in node_data.items()
3972
                       if not nresult.fail_msg and nresult.payload)
3973
    else:
3974
      live_data = None
3975

    
3976
    if query.NQ_INST in self.requested_data:
3977
      node_to_primary = dict([(name, set()) for name in nodenames])
3978
      node_to_secondary = dict([(name, set()) for name in nodenames])
3979

    
3980
      inst_data = lu.cfg.GetAllInstancesInfo()
3981

    
3982
      for inst in inst_data.values():
3983
        if inst.primary_node in node_to_primary:
3984
          node_to_primary[inst.primary_node].add(inst.name)
3985
        for secnode in inst.secondary_nodes:
3986
          if secnode in node_to_secondary:
3987
            node_to_secondary[secnode].add(inst.name)
3988
    else:
3989
      node_to_primary = None
3990
      node_to_secondary = None
3991

    
3992
    if query.NQ_OOB in self.requested_data:
3993
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3994
                         for name, node in all_info.iteritems())
3995
    else:
3996
      oob_support = None
3997

    
3998
    if query.NQ_GROUP in self.requested_data:
3999
      groups = lu.cfg.GetAllNodeGroupsInfo()
4000
    else:
4001
      groups = {}
4002

    
4003
    return query.NodeQueryData([all_info[name] for name in nodenames],
4004
                               live_data, lu.cfg.GetMasterNode(),
4005
                               node_to_primary, node_to_secondary, groups,
4006
                               oob_support, lu.cfg.GetClusterInfo())
4007

    
4008

    
4009
class LUNodeQuery(NoHooksLU):
4010
  """Logical unit for querying nodes.
4011

4012
  """
4013
  # pylint: disable-msg=W0142
4014
  REQ_BGL = False
4015

    
4016
  def CheckArguments(self):
4017
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4018
                         self.op.output_fields, self.op.use_locking)
4019

    
4020
  def ExpandNames(self):
4021
    self.nq.ExpandNames(self)
4022

    
4023
  def Exec(self, feedback_fn):
4024
    return self.nq.OldStyleQuery(self)
4025

    
4026

    
4027
class LUNodeQueryvols(NoHooksLU):
4028
  """Logical unit for getting volumes on node(s).
4029

4030
  """
4031
  REQ_BGL = False
4032
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4033
  _FIELDS_STATIC = utils.FieldSet("node")
4034

    
4035
  def CheckArguments(self):
4036
    _CheckOutputFields(static=self._FIELDS_STATIC,
4037
                       dynamic=self._FIELDS_DYNAMIC,
4038
                       selected=self.op.output_fields)
4039

    
4040
  def ExpandNames(self):
4041
    self.needed_locks = {}
4042
    self.share_locks[locking.LEVEL_NODE] = 1
4043
    if not self.op.nodes:
4044
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4045
    else:
4046
      self.needed_locks[locking.LEVEL_NODE] = \
4047
        _GetWantedNodes(self, self.op.nodes)
4048

    
4049
  def Exec(self, feedback_fn):
4050
    """Computes the list of nodes and their attributes.
4051

4052
    """
4053
    nodenames = self.glm.list_owned(locking.LEVEL_NODE)
4054
    volumes = self.rpc.call_node_volumes(nodenames)
4055

    
4056
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
4057
             in self.cfg.GetInstanceList()]
4058

    
4059
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
4060

    
4061
    output = []
4062
    for node in nodenames:
4063
      nresult = volumes[node]
4064
      if nresult.offline:
4065
        continue
4066
      msg = nresult.fail_msg
4067
      if msg:
4068
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4069
        continue
4070

    
4071
      node_vols = nresult.payload[:]
4072
      node_vols.sort(key=lambda vol: vol['dev'])
4073

    
4074
      for vol in node_vols:
4075
        node_output = []
4076
        for field in self.op.output_fields:
4077
          if field == "node":
4078
            val = node
4079
          elif field == "phys":
4080
            val = vol['dev']
4081
          elif field == "vg":
4082
            val = vol['vg']
4083
          elif field == "name":
4084
            val = vol['name']
4085
          elif field == "size":
4086
            val = int(float(vol['size']))
4087
          elif field == "instance":
4088
            for inst in ilist:
4089
              if node not in lv_by_node[inst]:
4090
                continue
4091
              if vol['name'] in lv_by_node[inst][node]:
4092
                val = inst.name
4093
                break
4094
            else:
4095
              val = '-'
4096
          else:
4097
            raise errors.ParameterError(field)
4098
          node_output.append(str(val))
4099

    
4100
        output.append(node_output)
4101

    
4102
    return output
4103

    
4104

    
4105
class LUNodeQueryStorage(NoHooksLU):
4106
  """Logical unit for getting information on storage units on node(s).
4107

4108
  """
4109
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4110
  REQ_BGL = False
4111

    
4112
  def CheckArguments(self):
4113
    _CheckOutputFields(static=self._FIELDS_STATIC,
4114
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4115
                       selected=self.op.output_fields)
4116

    
4117
  def ExpandNames(self):
4118
    self.needed_locks = {}
4119
    self.share_locks[locking.LEVEL_NODE] = 1
4120

    
4121
    if self.op.nodes:
4122
      self.needed_locks[locking.LEVEL_NODE] = \
4123
        _GetWantedNodes(self, self.op.nodes)
4124
    else:
4125
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4126

    
4127
  def Exec(self, feedback_fn):
4128
    """Computes the list of nodes and their attributes.
4129

4130
    """
4131
    self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
4132

    
4133
    # Always get name to sort by
4134
    if constants.SF_NAME in self.op.output_fields:
4135
      fields = self.op.output_fields[:]
4136
    else:
4137
      fields = [constants.SF_NAME] + self.op.output_fields
4138

    
4139
    # Never ask for node or type as it's only known to the LU
4140
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4141
      while extra in fields:
4142
        fields.remove(extra)
4143

    
4144
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4145
    name_idx = field_idx[constants.SF_NAME]
4146

    
4147
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4148
    data = self.rpc.call_storage_list(self.nodes,
4149
                                      self.op.storage_type, st_args,
4150
                                      self.op.name, fields)
4151

    
4152
    result = []
4153

    
4154
    for node in utils.NiceSort(self.nodes):
4155
      nresult = data[node]
4156
      if nresult.offline:
4157
        continue
4158

    
4159
      msg = nresult.fail_msg
4160
      if msg:
4161
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4162
        continue
4163

    
4164
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4165

    
4166
      for name in utils.NiceSort(rows.keys()):
4167
        row = rows[name]
4168

    
4169
        out = []
4170

    
4171
        for field in self.op.output_fields:
4172
          if field == constants.SF_NODE:
4173
            val = node
4174
          elif field == constants.SF_TYPE:
4175
            val = self.op.storage_type
4176
          elif field in field_idx:
4177
            val = row[field_idx[field]]
4178
          else:
4179
            raise errors.ParameterError(field)
4180

    
4181
          out.append(val)
4182

    
4183
        result.append(out)
4184

    
4185
    return result
4186

    
4187

    
4188
class _InstanceQuery(_QueryBase):
4189
  FIELDS = query.INSTANCE_FIELDS
4190

    
4191
  def ExpandNames(self, lu):
4192
    lu.needed_locks = {}
4193
    lu.share_locks[locking.LEVEL_INSTANCE] = 1
4194
    lu.share_locks[locking.LEVEL_NODE] = 1
4195

    
4196
    if self.names:
4197
      self.wanted = _GetWantedInstances(lu, self.names)
4198
    else:
4199
      self.wanted = locking.ALL_SET
4200

    
4201
    self.do_locking = (self.use_locking and
4202
                       query.IQ_LIVE in self.requested_data)
4203
    if self.do_locking:
4204
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4205
      lu.needed_locks[locking.LEVEL_NODE] = []
4206
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4207

    
4208
  def DeclareLocks(self, lu, level):
4209
    if level == locking.LEVEL_NODE and self.do_locking:
4210
      lu._LockInstancesNodes() # pylint: disable-msg=W0212
4211

    
4212
  def _GetQueryData(self, lu):
4213
    """Computes the list of instances and their attributes.
4214

4215
    """
4216
    cluster = lu.cfg.GetClusterInfo()
4217
    all_info = lu.cfg.GetAllInstancesInfo()
4218

    
4219
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4220

    
4221
    instance_list = [all_info[name] for name in instance_names]
4222
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4223
                                        for inst in instance_list)))
4224
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4225
    bad_nodes = []
4226
    offline_nodes = []
4227
    wrongnode_inst = set()
4228

    
4229
    # Gather data as requested
4230
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4231
      live_data = {}
4232
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4233
      for name in nodes:
4234
        result = node_data[name]
4235
        if result.offline:
4236
          # offline nodes will be in both lists
4237
          assert result.fail_msg
4238
          offline_nodes.append(name)
4239
        if result.fail_msg:
4240
          bad_nodes.append(name)
4241
        elif result.payload:
4242
          for inst in result.payload:
4243
            if inst in all_info:
4244
              if all_info[inst].primary_node == name:
4245
                live_data.update(result.payload)
4246
              else:
4247
                wrongnode_inst.add(inst)
4248
            else:
4249
              # orphan instance; we don't list it here as we don't
4250
              # handle this case yet in the output of instance listing
4251
              logging.warning("Orphan instance '%s' found on node %s",
4252
                              inst, name)
4253
        # else no instance is alive
4254
    else:
4255
      live_data = {}
4256

    
4257
    if query.IQ_DISKUSAGE in self.requested_data:
4258
      disk_usage = dict((inst.name,
4259
                         _ComputeDiskSize(inst.disk_template,
4260
                                          [{constants.IDISK_SIZE: disk.size}
4261
                                           for disk in inst.disks]))
4262
                        for inst in instance_list)
4263
    else:
4264
      disk_usage = None
4265

    
4266
    if query.IQ_CONSOLE in self.requested_data:
4267
      consinfo = {}
4268
      for inst in instance_list:
4269
        if inst.name in live_data:
4270
          # Instance is running
4271
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4272
        else:
4273
          consinfo[inst.name] = None
4274
      assert set(consinfo.keys()) == set(instance_names)
4275
    else:
4276
      consinfo = None
4277

    
4278
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4279
                                   disk_usage, offline_nodes, bad_nodes,
4280
                                   live_data, wrongnode_inst, consinfo)
4281

    
4282

    
4283
class LUQuery(NoHooksLU):
4284
  """Query for resources/items of a certain kind.
4285

4286
  """
4287
  # pylint: disable-msg=W0142
4288
  REQ_BGL = False
4289

    
4290
  def CheckArguments(self):
4291
    qcls = _GetQueryImplementation(self.op.what)
4292

    
4293
    self.impl = qcls(self.op.filter, self.op.fields, False)
4294

    
4295
  def ExpandNames(self):
4296
    self.impl.ExpandNames(self)
4297

    
4298
  def DeclareLocks(self, level):
4299
    self.impl.DeclareLocks(self, level)
4300

    
4301
  def Exec(self, feedback_fn):
4302
    return self.impl.NewStyleQuery(self)
4303

    
4304

    
4305
class LUQueryFields(NoHooksLU):
4306
  """Query for resources/items of a certain kind.
4307

4308
  """
4309
  # pylint: disable-msg=W0142
4310
  REQ_BGL = False
4311

    
4312
  def CheckArguments(self):
4313
    self.qcls = _GetQueryImplementation(self.op.what)
4314

    
4315
  def ExpandNames(self):
4316
    self.needed_locks = {}
4317

    
4318
  def Exec(self, feedback_fn):
4319
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4320

    
4321

    
4322
class LUNodeModifyStorage(NoHooksLU):
4323
  """Logical unit for modifying a storage volume on a node.
4324

4325
  """
4326
  REQ_BGL = False
4327

    
4328
  def CheckArguments(self):
4329
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4330

    
4331
    storage_type = self.op.storage_type
4332

    
4333
    try:
4334
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4335
    except KeyError:
4336
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4337
                                 " modified" % storage_type,
4338
                                 errors.ECODE_INVAL)
4339

    
4340
    diff = set(self.op.changes.keys()) - modifiable
4341
    if diff:
4342
      raise errors.OpPrereqError("The following fields can not be modified for"
4343
                                 " storage units of type '%s': %r" %
4344
                                 (storage_type, list(diff)),
4345
                                 errors.ECODE_INVAL)
4346

    
4347
  def ExpandNames(self):
4348
    self.needed_locks = {
4349
      locking.LEVEL_NODE: self.op.node_name,
4350
      }
4351

    
4352
  def Exec(self, feedback_fn):
4353
    """Computes the list of nodes and their attributes.
4354

4355
    """
4356
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4357
    result = self.rpc.call_storage_modify(self.op.node_name,
4358
                                          self.op.storage_type, st_args,
4359
                                          self.op.name, self.op.changes)
4360
    result.Raise("Failed to modify storage unit '%s' on %s" %
4361
                 (self.op.name, self.op.node_name))
4362

    
4363

    
4364
class LUNodeAdd(LogicalUnit):
4365
  """Logical unit for adding node to the cluster.
4366

4367
  """
4368
  HPATH = "node-add"
4369
  HTYPE = constants.HTYPE_NODE
4370
  _NFLAGS = ["master_capable", "vm_capable"]
4371

    
4372
  def CheckArguments(self):
4373
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4374
    # validate/normalize the node name
4375
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4376
                                         family=self.primary_ip_family)
4377
    self.op.node_name = self.hostname.name
4378

    
4379
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4380
      raise errors.OpPrereqError("Cannot readd the master node",
4381
                                 errors.ECODE_STATE)
4382

    
4383
    if self.op.readd and self.op.group:
4384
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4385
                                 " being readded", errors.ECODE_INVAL)
4386

    
4387
  def BuildHooksEnv(self):
4388
    """Build hooks env.
4389

4390
    This will run on all nodes before, and on all nodes + the new node after.
4391

4392
    """
4393
    return {
4394
      "OP_TARGET": self.op.node_name,
4395
      "NODE_NAME": self.op.node_name,
4396
      "NODE_PIP": self.op.primary_ip,
4397
      "NODE_SIP": self.op.secondary_ip,
4398
      "MASTER_CAPABLE": str(self.op.master_capable),
4399
      "VM_CAPABLE": str(self.op.vm_capable),
4400
      }
4401

    
4402
  def BuildHooksNodes(self):
4403
    """Build hooks nodes.
4404

4405
    """
4406
    # Exclude added node
4407
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4408
    post_nodes = pre_nodes + [self.op.node_name, ]
4409

    
4410
    return (pre_nodes, post_nodes)
4411

    
4412
  def CheckPrereq(self):
4413
    """Check prerequisites.
4414

4415
    This checks:
4416
     - the new node is not already in the config
4417
     - it is resolvable
4418
     - its parameters (single/dual homed) matches the cluster
4419

4420
    Any errors are signaled by raising errors.OpPrereqError.
4421

4422
    """
4423
    cfg = self.cfg
4424
    hostname = self.hostname
4425
    node = hostname.name
4426
    primary_ip = self.op.primary_ip = hostname.ip
4427
    if self.op.secondary_ip is None:
4428
      if self.primary_ip_family == netutils.IP6Address.family:
4429
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4430
                                   " IPv4 address must be given as secondary",
4431
                                   errors.ECODE_INVAL)
4432
      self.op.secondary_ip = primary_ip
4433

    
4434
    secondary_ip = self.op.secondary_ip
4435
    if not netutils.IP4Address.IsValid(secondary_ip):
4436
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4437
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4438

    
4439
    node_list = cfg.GetNodeList()
4440
    if not self.op.readd and node in node_list:
4441
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4442
                                 node, errors.ECODE_EXISTS)
4443
    elif self.op.readd and node not in node_list:
4444
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4445
                                 errors.ECODE_NOENT)
4446

    
4447
    self.changed_primary_ip = False
4448

    
4449
    for existing_node_name in node_list:
4450
      existing_node = cfg.GetNodeInfo(existing_node_name)
4451

    
4452
      if self.op.readd and node == existing_node_name:
4453
        if existing_node.secondary_ip != secondary_ip:
4454
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4455
                                     " address configuration as before",
4456
                                     errors.ECODE_INVAL)
4457
        if existing_node.primary_ip != primary_ip:
4458
          self.changed_primary_ip = True
4459

    
4460
        continue
4461

    
4462
      if (existing_node.primary_ip == primary_ip or
4463
          existing_node.secondary_ip == primary_ip or
4464
          existing_node.primary_ip == secondary_ip or
4465
          existing_node.secondary_ip == secondary_ip):
4466
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4467
                                   " existing node %s" % existing_node.name,
4468
                                   errors.ECODE_NOTUNIQUE)
4469

    
4470
    # After this 'if' block, None is no longer a valid value for the
4471
    # _capable op attributes
4472
    if self.op.readd:
4473
      old_node = self.cfg.GetNodeInfo(node)
4474
      assert old_node is not None, "Can't retrieve locked node %s" % node
4475
      for attr in self._NFLAGS:
4476
        if getattr(self.op, attr) is None:
4477
          setattr(self.op, attr, getattr(old_node, attr))
4478
    else:
4479
      for attr in self._NFLAGS:
4480
        if getattr(self.op, attr) is None:
4481
          setattr(self.op, attr, True)
4482

    
4483
    if self.op.readd and not self.op.vm_capable:
4484
      pri, sec = cfg.GetNodeInstances(node)
4485
      if pri or sec:
4486
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4487
                                   " flag set to false, but it already holds"
4488
                                   " instances" % node,
4489
                                   errors.ECODE_STATE)
4490

    
4491
    # check that the type of the node (single versus dual homed) is the
4492
    # same as for the master
4493
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4494
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4495
    newbie_singlehomed = secondary_ip == primary_ip
4496
    if master_singlehomed != newbie_singlehomed:
4497
      if master_singlehomed:
4498
        raise errors.OpPrereqError("The master has no secondary ip but the"
4499
                                   " new node has one",
4500
                                   errors.ECODE_INVAL)
4501
      else:
4502
        raise errors.OpPrereqError("The master has a secondary ip but the"
4503
                                   " new node doesn't have one",
4504
                                   errors.ECODE_INVAL)
4505

    
4506
    # checks reachability
4507
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4508
      raise errors.OpPrereqError("Node not reachable by ping",
4509
                                 errors.ECODE_ENVIRON)
4510

    
4511
    if not newbie_singlehomed:
4512
      # check reachability from my secondary ip to newbie's secondary ip
4513
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4514
                           source=myself.secondary_ip):
4515
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4516
                                   " based ping to node daemon port",
4517
                                   errors.ECODE_ENVIRON)
4518

    
4519
    if self.op.readd:
4520
      exceptions = [node]
4521
    else:
4522
      exceptions = []
4523

    
4524
    if self.op.master_capable:
4525
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4526
    else:
4527
      self.master_candidate = False
4528

    
4529
    if self.op.readd:
4530
      self.new_node = old_node
4531
    else:
4532
      node_group = cfg.LookupNodeGroup(self.op.group)
4533
      self.new_node = objects.Node(name=node,
4534
                                   primary_ip=primary_ip,
4535
                                   secondary_ip=secondary_ip,
4536
                                   master_candidate=self.master_candidate,
4537
                                   offline=False, drained=False,
4538
                                   group=node_group)
4539

    
4540
    if self.op.ndparams:
4541
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4542

    
4543
  def Exec(self, feedback_fn):
4544
    """Adds the new node to the cluster.
4545

4546
    """
4547
    new_node = self.new_node
4548
    node = new_node.name
4549

    
4550
    # We adding a new node so we assume it's powered
4551
    new_node.powered = True
4552

    
4553
    # for re-adds, reset the offline/drained/master-candidate flags;
4554
    # we need to reset here, otherwise offline would prevent RPC calls
4555
    # later in the procedure; this also means that if the re-add
4556
    # fails, we are left with a non-offlined, broken node
4557
    if self.op.readd:
4558
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4559
      self.LogInfo("Readding a node, the offline/drained flags were reset")
4560
      # if we demote the node, we do cleanup later in the procedure
4561
      new_node.master_candidate = self.master_candidate
4562
      if self.changed_primary_ip:
4563
        new_node.primary_ip = self.op.primary_ip
4564

    
4565
    # copy the master/vm_capable flags
4566
    for attr in self._NFLAGS:
4567
      setattr(new_node, attr, getattr(self.op, attr))
4568

    
4569
    # notify the user about any possible mc promotion
4570
    if new_node.master_candidate:
4571
      self.LogInfo("Node will be a master candidate")
4572

    
4573
    if self.op.ndparams:
4574
      new_node.ndparams = self.op.ndparams
4575
    else:
4576
      new_node.ndparams = {}
4577

    
4578
    # check connectivity
4579
    result = self.rpc.call_version([node])[node]
4580
    result.Raise("Can't get version information from node %s" % node)
4581
    if constants.PROTOCOL_VERSION == result.payload:
4582
      logging.info("Communication to node %s fine, sw version %s match",
4583
                   node, result.payload)
4584
    else:
4585
      raise errors.OpExecError("Version mismatch master version %s,"
4586
                               " node version %s" %
4587
                               (constants.PROTOCOL_VERSION, result.payload))
4588

    
4589
    # Add node to our /etc/hosts, and add key to known_hosts
4590
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4591
      master_node = self.cfg.GetMasterNode()
4592
      result = self.rpc.call_etc_hosts_modify(master_node,
4593
                                              constants.ETC_HOSTS_ADD,
4594
                                              self.hostname.name,
4595
                                              self.hostname.ip)
4596
      result.Raise("Can't update hosts file with new host data")
4597

    
4598
    if new_node.secondary_ip != new_node.primary_ip:
4599
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4600
                               False)
4601

    
4602
    node_verify_list = [self.cfg.GetMasterNode()]
4603
    node_verify_param = {
4604
      constants.NV_NODELIST: [node],
4605
      # TODO: do a node-net-test as well?
4606
    }
4607

    
4608
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4609
                                       self.cfg.GetClusterName())
4610
    for verifier in node_verify_list:
4611
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
4612
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
4613
      if nl_payload:
4614
        for failed in nl_payload:
4615
          feedback_fn("ssh/hostname verification failed"
4616
                      " (checking from %s): %s" %
4617
                      (verifier, nl_payload[failed]))
4618
        raise errors.OpExecError("ssh/hostname verification failed")
4619

    
4620
    if self.op.readd:
4621
      _RedistributeAncillaryFiles(self)
4622
      self.context.ReaddNode(new_node)
4623
      # make sure we redistribute the config
4624
      self.cfg.Update(new_node, feedback_fn)
4625
      # and make sure the new node will not have old files around
4626
      if not new_node.master_candidate:
4627
        result = self.rpc.call_node_demote_from_mc(new_node.name)
4628
        msg = result.fail_msg
4629
        if msg:
4630
          self.LogWarning("Node failed to demote itself from master"
4631
                          " candidate status: %s" % msg)
4632
    else:
4633
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
4634
                                  additional_vm=self.op.vm_capable)
4635
      self.context.AddNode(new_node, self.proc.GetECId())
4636

    
4637

    
4638
class LUNodeSetParams(LogicalUnit):
4639
  """Modifies the parameters of a node.
4640

4641
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4642
      to the node role (as _ROLE_*)
4643
  @cvar _R2F: a dictionary from node role to tuples of flags
4644
  @cvar _FLAGS: a list of attribute names corresponding to the flags
4645

4646
  """
4647
  HPATH = "node-modify"
4648
  HTYPE = constants.HTYPE_NODE
4649
  REQ_BGL = False
4650
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4651
  _F2R = {
4652
    (True, False, False): _ROLE_CANDIDATE,
4653
    (False, True, False): _ROLE_DRAINED,
4654
    (False, False, True): _ROLE_OFFLINE,
4655
    (False, False, False): _ROLE_REGULAR,
4656
    }
4657
  _R2F = dict((v, k) for k, v in _F2R.items())
4658
  _FLAGS = ["master_candidate", "drained", "offline"]
4659

    
4660
  def CheckArguments(self):
4661
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4662
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4663
                self.op.master_capable, self.op.vm_capable,
4664
                self.op.secondary_ip, self.op.ndparams]
4665
    if all_mods.count(None) == len(all_mods):
4666
      raise errors.OpPrereqError("Please pass at least one modification",
4667
                                 errors.ECODE_INVAL)
4668
    if all_mods.count(True) > 1:
4669
      raise errors.OpPrereqError("Can't set the node into more than one"
4670
                                 " state at the same time",
4671
                                 errors.ECODE_INVAL)
4672

    
4673
    # Boolean value that tells us whether we might be demoting from MC
4674
    self.might_demote = (self.op.master_candidate == False or
4675
                         self.op.offline == True or
4676
                         self.op.drained == True or
4677
                         self.op.master_capable == False)
4678

    
4679
    if self.op.secondary_ip:
4680
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4681
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4682
                                   " address" % self.op.secondary_ip,
4683
                                   errors.ECODE_INVAL)
4684

    
4685
    self.lock_all = self.op.auto_promote and self.might_demote
4686
    self.lock_instances = self.op.secondary_ip is not None
4687

    
4688
  def ExpandNames(self):
4689
    if self.lock_all:
4690
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4691
    else:
4692
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4693

    
4694
    if self.lock_instances:
4695
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4696

    
4697
  def DeclareLocks(self, level):
4698
    # If we have locked all instances, before waiting to lock nodes, release
4699
    # all the ones living on nodes unrelated to the current operation.
4700
    if level == locking.LEVEL_NODE and self.lock_instances:
4701
      self.affected_instances = []
4702
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4703
        instances_keep = []
4704

    
4705
        # Build list of instances to release
4706
        for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
4707
          instance = self.context.cfg.GetInstanceInfo(instance_name)
4708
          if (instance.disk_template in constants.DTS_INT_MIRROR and
4709
              self.op.node_name in instance.all_nodes):
4710
            instances_keep.append(instance_name)
4711
            self.affected_instances.append(instance)
4712

    
4713
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
4714

    
4715
        assert (set(self.glm.list_owned(locking.LEVEL_INSTANCE)) ==
4716
                set(instances_keep))
4717

    
4718
  def BuildHooksEnv(self):
4719
    """Build hooks env.
4720

4721
    This runs on the master node.
4722

4723
    """
4724
    return {
4725
      "OP_TARGET": self.op.node_name,
4726
      "MASTER_CANDIDATE": str(self.op.master_candidate),
4727
      "OFFLINE": str(self.op.offline),
4728
      "DRAINED": str(self.op.drained),
4729
      "MASTER_CAPABLE": str(self.op.master_capable),
4730
      "VM_CAPABLE": str(self.op.vm_capable),
4731
      }
4732

    
4733
  def BuildHooksNodes(self):
4734
    """Build hooks nodes.
4735

4736
    """
4737
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
4738
    return (nl, nl)
4739

    
4740
  def CheckPrereq(self):
4741
    """Check prerequisites.
4742

4743
    This only checks the instance list against the existing names.
4744

4745
    """
4746
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4747

    
4748
    if (self.op.master_candidate is not None or
4749
        self.op.drained is not None or
4750
        self.op.offline is not None):
4751
      # we can't change the master's node flags
4752
      if self.op.node_name == self.cfg.GetMasterNode():
4753
        raise errors.OpPrereqError("The master role can be changed"
4754
                                   " only via master-failover",
4755
                                   errors.ECODE_INVAL)
4756

    
4757
    if self.op.master_candidate and not node.master_capable:
4758
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4759
                                 " it a master candidate" % node.name,
4760
                                 errors.ECODE_STATE)
4761

    
4762
    if self.op.vm_capable == False:
4763
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4764
      if ipri or isec:
4765
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4766
                                   " the vm_capable flag" % node.name,
4767
                                   errors.ECODE_STATE)
4768

    
4769
    if node.master_candidate and self.might_demote and not self.lock_all:
4770
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
4771
      # check if after removing the current node, we're missing master
4772
      # candidates
4773
      (mc_remaining, mc_should, _) = \
4774
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4775
      if mc_remaining < mc_should:
4776
        raise errors.OpPrereqError("Not enough master candidates, please"
4777
                                   " pass auto promote option to allow"
4778
                                   " promotion", errors.ECODE_STATE)
4779

    
4780
    self.old_flags = old_flags = (node.master_candidate,
4781
                                  node.drained, node.offline)
4782
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4783
    self.old_role = old_role = self._F2R[old_flags]
4784

    
4785
    # Check for ineffective changes
4786
    for attr in self._FLAGS:
4787
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4788
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4789
        setattr(self.op, attr, None)
4790

    
4791
    # Past this point, any flag change to False means a transition
4792
    # away from the respective state, as only real changes are kept
4793

    
4794
    # TODO: We might query the real power state if it supports OOB
4795
    if _SupportsOob(self.cfg, node):
4796
      if self.op.offline is False and not (node.powered or
4797
                                           self.op.powered == True):
4798
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
4799
                                    " offline status can be reset") %
4800
                                   self.op.node_name)
4801
    elif self.op.powered is not None:
4802
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
4803
                                  " as it does not support out-of-band"
4804
                                  " handling") % self.op.node_name)
4805

    
4806
    # If we're being deofflined/drained, we'll MC ourself if needed
4807
    if (self.op.drained == False or self.op.offline == False or
4808
        (self.op.master_capable and not node.master_capable)):
4809
      if _DecideSelfPromotion(self):
4810
        self.op.master_candidate = True
4811
        self.LogInfo("Auto-promoting node to master candidate")
4812

    
4813
    # If we're no longer master capable, we'll demote ourselves from MC
4814
    if self.op.master_capable == False and node.master_candidate:
4815
      self.LogInfo("Demoting from master candidate")
4816
      self.op.master_candidate = False
4817

    
4818
    # Compute new role
4819
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4820
    if self.op.master_candidate:
4821
      new_role = self._ROLE_CANDIDATE
4822
    elif self.op.drained:
4823
      new_role = self._ROLE_DRAINED
4824
    elif self.op.offline:
4825
      new_role = self._ROLE_OFFLINE
4826
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4827
      # False is still in new flags, which means we're un-setting (the
4828
      # only) True flag
4829
      new_role = self._ROLE_REGULAR
4830
    else: # no new flags, nothing, keep old role
4831
      new_role = old_role
4832

    
4833
    self.new_role = new_role
4834

    
4835
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
4836
      # Trying to transition out of offline status
4837
      result = self.rpc.call_version([node.name])[node.name]
4838
      if result.fail_msg:
4839
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4840
                                   " to report its version: %s" %
4841
                                   (node.name, result.fail_msg),
4842
                                   errors.ECODE_STATE)
4843
      else:
4844
        self.LogWarning("Transitioning node from offline to online state"
4845
                        " without using re-add. Please make sure the node"
4846
                        " is healthy!")
4847

    
4848
    if self.op.secondary_ip:
4849
      # Ok even without locking, because this can't be changed by any LU
4850
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4851
      master_singlehomed = master.secondary_ip == master.primary_ip
4852
      if master_singlehomed and self.op.secondary_ip:
4853
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4854
                                   " homed cluster", errors.ECODE_INVAL)
4855

    
4856
      if node.offline:
4857
        if self.affected_instances:
4858
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
4859
                                     " node has instances (%s) configured"
4860
                                     " to use it" % self.affected_instances)
4861
      else:
4862
        # On online nodes, check that no instances are running, and that
4863
        # the node has the new ip and we can reach it.
4864
        for instance in self.affected_instances:
4865
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
4866

    
4867
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4868
        if master.name != node.name:
4869
          # check reachability from master secondary ip to new secondary ip
4870
          if not netutils.TcpPing(self.op.secondary_ip,
4871
                                  constants.DEFAULT_NODED_PORT,
4872
                                  source=master.secondary_ip):
4873
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4874
                                       " based ping to node daemon port",
4875
                                       errors.ECODE_ENVIRON)
4876

    
4877
    if self.op.ndparams:
4878
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4879
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4880
      self.new_ndparams = new_ndparams
4881

    
4882
  def Exec(self, feedback_fn):
4883
    """Modifies a node.
4884

4885
    """
4886
    node = self.node
4887
    old_role = self.old_role
4888
    new_role = self.new_role
4889

    
4890
    result = []
4891

    
4892
    if self.op.ndparams:
4893
      node.ndparams = self.new_ndparams
4894

    
4895
    if self.op.powered is not None:
4896
      node.powered = self.op.powered
4897

    
4898
    for attr in ["master_capable", "vm_capable"]:
4899
      val = getattr(self.op, attr)
4900
      if val is not None:
4901
        setattr(node, attr, val)
4902
        result.append((attr, str(val)))
4903

    
4904
    if new_role != old_role:
4905
      # Tell the node to demote itself, if no longer MC and not offline
4906
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4907
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4908
        if msg:
4909
          self.LogWarning("Node failed to demote itself: %s", msg)
4910

    
4911
      new_flags = self._R2F[new_role]
4912
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4913
        if of != nf:
4914
          result.append((desc, str(nf)))
4915
      (node.master_candidate, node.drained, node.offline) = new_flags
4916

    
4917
      # we locked all nodes, we adjust the CP before updating this node
4918
      if self.lock_all:
4919
        _AdjustCandidatePool(self, [node.name])
4920

    
4921
    if self.op.secondary_ip:
4922
      node.secondary_ip = self.op.secondary_ip
4923
      result.append(("secondary_ip", self.op.secondary_ip))
4924

    
4925
    # this will trigger configuration file update, if needed
4926
    self.cfg.Update(node, feedback_fn)
4927

    
4928
    # this will trigger job queue propagation or cleanup if the mc
4929
    # flag changed
4930
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4931
      self.context.ReaddNode(node)
4932

    
4933
    return result
4934

    
4935

    
4936
class LUNodePowercycle(NoHooksLU):
4937
  """Powercycles a node.
4938

4939
  """
4940
  REQ_BGL = False
4941

    
4942
  def CheckArguments(self):
4943
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4944
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4945
      raise errors.OpPrereqError("The node is the master and the force"
4946
                                 " parameter was not set",
4947
                                 errors.ECODE_INVAL)
4948

    
4949
  def ExpandNames(self):
4950
    """Locking for PowercycleNode.
4951

4952
    This is a last-resort option and shouldn't block on other
4953
    jobs. Therefore, we grab no locks.
4954

4955
    """
4956
    self.needed_locks = {}
4957

    
4958
  def Exec(self, feedback_fn):
4959
    """Reboots a node.
4960

4961
    """
4962
    result = self.rpc.call_node_powercycle(self.op.node_name,
4963
                                           self.cfg.GetHypervisorType())
4964
    result.Raise("Failed to schedule the reboot")
4965
    return result.payload
4966

    
4967

    
4968
class LUClusterQuery(NoHooksLU):
4969
  """Query cluster configuration.
4970

4971
  """
4972
  REQ_BGL = False
4973

    
4974
  def ExpandNames(self):
4975
    self.needed_locks = {}
4976

    
4977
  def Exec(self, feedback_fn):
4978
    """Return cluster config.
4979

4980
    """
4981
    cluster = self.cfg.GetClusterInfo()
4982
    os_hvp = {}
4983

    
4984
    # Filter just for enabled hypervisors
4985
    for os_name, hv_dict in cluster.os_hvp.items():
4986
      os_hvp[os_name] = {}
4987
      for hv_name, hv_params in hv_dict.items():
4988
        if hv_name in cluster.enabled_hypervisors:
4989
          os_hvp[os_name][hv_name] = hv_params
4990

    
4991
    # Convert ip_family to ip_version
4992
    primary_ip_version = constants.IP4_VERSION
4993
    if cluster.primary_ip_family == netutils.IP6Address.family:
4994
      primary_ip_version = constants.IP6_VERSION
4995

    
4996
    result = {
4997
      "software_version": constants.RELEASE_VERSION,
4998
      "protocol_version": constants.PROTOCOL_VERSION,
4999
      "config_version": constants.CONFIG_VERSION,
5000
      "os_api_version": max(constants.OS_API_VERSIONS),
5001
      "export_version": constants.EXPORT_VERSION,
5002
      "architecture": (platform.architecture()[0], platform.machine()),
5003
      "name": cluster.cluster_name,
5004
      "master": cluster.master_node,
5005
      "default_hypervisor": cluster.enabled_hypervisors[0],
5006
      "enabled_hypervisors": cluster.enabled_hypervisors,
5007
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5008
                        for hypervisor_name in cluster.enabled_hypervisors]),
5009
      "os_hvp": os_hvp,
5010
      "beparams": cluster.beparams,
5011
      "osparams": cluster.osparams,
5012
      "nicparams": cluster.nicparams,
5013
      "ndparams": cluster.ndparams,
5014
      "candidate_pool_size": cluster.candidate_pool_size,
5015
      "master_netdev": cluster.master_netdev,
5016
      "volume_group_name": cluster.volume_group_name,
5017
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5018
      "file_storage_dir": cluster.file_storage_dir,
5019
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5020
      "maintain_node_health": cluster.maintain_node_health,
5021
      "ctime": cluster.ctime,
5022
      "mtime": cluster.mtime,
5023
      "uuid": cluster.uuid,
5024
      "tags": list(cluster.GetTags()),
5025
      "uid_pool": cluster.uid_pool,
5026
      "default_iallocator": cluster.default_iallocator,
5027
      "reserved_lvs": cluster.reserved_lvs,
5028
      "primary_ip_version": primary_ip_version,
5029
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5030
      "hidden_os": cluster.hidden_os,
5031
      "blacklisted_os": cluster.blacklisted_os,
5032
      }
5033

    
5034
    return result
5035

    
5036

    
5037
class LUClusterConfigQuery(NoHooksLU):
5038
  """Return configuration values.
5039

5040
  """
5041
  REQ_BGL = False
5042
  _FIELDS_DYNAMIC = utils.FieldSet()
5043
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5044
                                  "watcher_pause", "volume_group_name")
5045

    
5046
  def CheckArguments(self):
5047
    _CheckOutputFields(static=self._FIELDS_STATIC,
5048
                       dynamic=self._FIELDS_DYNAMIC,
5049
                       selected=self.op.output_fields)
5050

    
5051
  def ExpandNames(self):
5052
    self.needed_locks = {}
5053

    
5054
  def Exec(self, feedback_fn):
5055
    """Dump a representation of the cluster config to the standard output.
5056

5057
    """
5058
    values = []
5059
    for field in self.op.output_fields:
5060
      if field == "cluster_name":
5061
        entry = self.cfg.GetClusterName()
5062
      elif field == "master_node":
5063
        entry = self.cfg.GetMasterNode()
5064
      elif field == "drain_flag":
5065
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5066
      elif field == "watcher_pause":
5067
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5068
      elif field == "volume_group_name":
5069
        entry = self.cfg.GetVGName()
5070
      else:
5071
        raise errors.ParameterError(field)
5072
      values.append(entry)
5073
    return values
5074

    
5075

    
5076
class LUInstanceActivateDisks(NoHooksLU):
5077
  """Bring up an instance's disks.
5078

5079
  """
5080
  REQ_BGL = False
5081

    
5082
  def ExpandNames(self):
5083
    self._ExpandAndLockInstance()
5084
    self.needed_locks[locking.LEVEL_NODE] = []
5085
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5086

    
5087
  def DeclareLocks(self, level):
5088
    if level == locking.LEVEL_NODE:
5089
      self._LockInstancesNodes()
5090

    
5091
  def CheckPrereq(self):
5092
    """Check prerequisites.
5093

5094
    This checks that the instance is in the cluster.
5095

5096
    """
5097
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5098
    assert self.instance is not None, \
5099
      "Cannot retrieve locked instance %s" % self.op.instance_name
5100
    _CheckNodeOnline(self, self.instance.primary_node)
5101

    
5102
  def Exec(self, feedback_fn):
5103
    """Activate the disks.
5104

5105
    """
5106
    disks_ok, disks_info = \
5107
              _AssembleInstanceDisks(self, self.instance,
5108
                                     ignore_size=self.op.ignore_size)
5109
    if not disks_ok:
5110
      raise errors.OpExecError("Cannot activate block devices")
5111

    
5112
    return disks_info
5113

    
5114

    
5115
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5116
                           ignore_size=False):
5117
  """Prepare the block devices for an instance.
5118

5119
  This sets up the block devices on all nodes.
5120

5121
  @type lu: L{LogicalUnit}
5122
  @param lu: the logical unit on whose behalf we execute
5123
  @type instance: L{objects.Instance}
5124
  @param instance: the instance for whose disks we assemble
5125
  @type disks: list of L{objects.Disk} or None
5126
  @param disks: which disks to assemble (or all, if None)
5127
  @type ignore_secondaries: boolean
5128
  @param ignore_secondaries: if true, errors on secondary nodes
5129
      won't result in an error return from the function
5130
  @type ignore_size: boolean
5131
  @param ignore_size: if true, the current known size of the disk
5132
      will not be used during the disk activation, useful for cases
5133
      when the size is wrong
5134
  @return: False if the operation failed, otherwise a list of
5135
      (host, instance_visible_name, node_visible_name)
5136
      with the mapping from node devices to instance devices
5137

5138
  """
5139
  device_info = []
5140
  disks_ok = True
5141
  iname = instance.name
5142
  disks = _ExpandCheckDisks(instance, disks)
5143

    
5144
  # With the two passes mechanism we try to reduce the window of
5145
  # opportunity for the race condition of switching DRBD to primary
5146
  # before handshaking occured, but we do not eliminate it
5147

    
5148
  # The proper fix would be to wait (with some limits) until the
5149
  # connection has been made and drbd transitions from WFConnection
5150
  # into any other network-connected state (Connected, SyncTarget,
5151
  # SyncSource, etc.)
5152

    
5153
  # 1st pass, assemble on all nodes in secondary mode
5154
  for idx, inst_disk in enumerate(disks):
5155
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5156
      if ignore_size:
5157
        node_disk = node_disk.Copy()
5158
        node_disk.UnsetSize()
5159
      lu.cfg.SetDiskID(node_disk, node)
5160
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5161
      msg = result.fail_msg
5162
      if msg:
5163
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5164
                           " (is_primary=False, pass=1): %s",
5165
                           inst_disk.iv_name, node, msg)
5166
        if not ignore_secondaries:
5167
          disks_ok = False
5168

    
5169
  # FIXME: race condition on drbd migration to primary
5170

    
5171
  # 2nd pass, do only the primary node
5172
  for idx, inst_disk in enumerate(disks):
5173
    dev_path = None
5174

    
5175
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5176
      if node != instance.primary_node:
5177
        continue
5178
      if ignore_size:
5179
        node_disk = node_disk.Copy()
5180
        node_disk.UnsetSize()
5181
      lu.cfg.SetDiskID(node_disk, node)
5182
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5183
      msg = result.fail_msg
5184
      if msg:
5185
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5186
                           " (is_primary=True, pass=2): %s",
5187
                           inst_disk.iv_name, node, msg)
5188
        disks_ok = False
5189
      else:
5190
        dev_path = result.payload
5191

    
5192
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5193

    
5194
  # leave the disks configured for the primary node
5195
  # this is a workaround that would be fixed better by
5196
  # improving the logical/physical id handling
5197
  for disk in disks:
5198
    lu.cfg.SetDiskID(disk, instance.primary_node)
5199

    
5200
  return disks_ok, device_info
5201

    
5202

    
5203
def _StartInstanceDisks(lu, instance, force):
5204
  """Start the disks of an instance.
5205

5206
  """
5207
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5208
                                           ignore_secondaries=force)
5209
  if not disks_ok:
5210
    _ShutdownInstanceDisks(lu, instance)
5211
    if force is not None and not force:
5212
      lu.proc.LogWarning("", hint="If the message above refers to a"
5213
                         " secondary node,"
5214
                         " you can retry the operation using '--force'.")
5215
    raise errors.OpExecError("Disk consistency error")
5216

    
5217

    
5218
class LUInstanceDeactivateDisks(NoHooksLU):
5219
  """Shutdown an instance's disks.
5220

5221
  """
5222
  REQ_BGL = False
5223

    
5224
  def ExpandNames(self):
5225
    self._ExpandAndLockInstance()
5226
    self.needed_locks[locking.LEVEL_NODE] = []
5227
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5228

    
5229
  def DeclareLocks(self, level):
5230
    if level == locking.LEVEL_NODE:
5231
      self._LockInstancesNodes()
5232

    
5233
  def CheckPrereq(self):
5234
    """Check prerequisites.
5235

5236
    This checks that the instance is in the cluster.
5237

5238
    """
5239
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5240
    assert self.instance is not None, \
5241
      "Cannot retrieve locked instance %s" % self.op.instance_name
5242

    
5243
  def Exec(self, feedback_fn):
5244
    """Deactivate the disks
5245

5246
    """
5247
    instance = self.instance
5248
    if self.op.force:
5249
      _ShutdownInstanceDisks(self, instance)
5250
    else:
5251
      _SafeShutdownInstanceDisks(self, instance)
5252

    
5253

    
5254
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5255
  """Shutdown block devices of an instance.
5256

5257
  This function checks if an instance is running, before calling
5258
  _ShutdownInstanceDisks.
5259

5260
  """
5261
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5262
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5263

    
5264

    
5265
def _ExpandCheckDisks(instance, disks):
5266
  """Return the instance disks selected by the disks list
5267

5268
  @type disks: list of L{objects.Disk} or None
5269
  @param disks: selected disks
5270
  @rtype: list of L{objects.Disk}
5271
  @return: selected instance disks to act on
5272

5273
  """
5274
  if disks is None:
5275
    return instance.disks
5276
  else:
5277
    if not set(disks).issubset(instance.disks):
5278
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5279
                                   " target instance")
5280
    return disks
5281

    
5282

    
5283
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5284
  """Shutdown block devices of an instance.
5285

5286
  This does the shutdown on all nodes of the instance.
5287

5288
  If the ignore_primary is false, errors on the primary node are
5289
  ignored.
5290

5291
  """
5292
  all_result = True
5293
  disks = _ExpandCheckDisks(instance, disks)
5294

    
5295
  for disk in disks:
5296
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5297
      lu.cfg.SetDiskID(top_disk, node)
5298
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5299
      msg = result.fail_msg
5300
      if msg:
5301
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5302
                      disk.iv_name, node, msg)
5303
        if ((node == instance.primary_node and not ignore_primary) or
5304
            (node != instance.primary_node and not result.offline)):
5305
          all_result = False
5306
  return all_result
5307

    
5308

    
5309
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5310
  """Checks if a node has enough free memory.
5311

5312
  This function check if a given node has the needed amount of free
5313
  memory. In case the node has less memory or we cannot get the
5314
  information from the node, this function raise an OpPrereqError
5315
  exception.
5316

5317
  @type lu: C{LogicalUnit}
5318
  @param lu: a logical unit from which we get configuration data
5319
  @type node: C{str}
5320
  @param node: the node to check
5321
  @type reason: C{str}
5322
  @param reason: string to use in the error message
5323
  @type requested: C{int}
5324
  @param requested: the amount of memory in MiB to check for
5325
  @type hypervisor_name: C{str}
5326
  @param hypervisor_name: the hypervisor to ask for memory stats
5327
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5328
      we cannot check the node
5329

5330
  """
5331
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5332
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5333
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5334
  free_mem = nodeinfo[node].payload.get('memory_free', None)
5335
  if not isinstance(free_mem, int):
5336
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5337
                               " was '%s'" % (node, free_mem),
5338
                               errors.ECODE_ENVIRON)
5339
  if requested > free_mem:
5340
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5341
                               " needed %s MiB, available %s MiB" %
5342
                               (node, reason, requested, free_mem),
5343
                               errors.ECODE_NORES)
5344

    
5345

    
5346
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5347
  """Checks if nodes have enough free disk space in the all VGs.
5348

5349
  This function check if all given nodes have the needed amount of
5350
  free disk. In case any node has less disk or we cannot get the
5351
  information from the node, this function raise an OpPrereqError
5352
  exception.
5353

5354
  @type lu: C{LogicalUnit}
5355
  @param lu: a logical unit from which we get configuration data
5356
  @type nodenames: C{list}
5357
  @param nodenames: the list of node names to check
5358
  @type req_sizes: C{dict}
5359
  @param req_sizes: the hash of vg and corresponding amount of disk in
5360
      MiB to check for
5361
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5362
      or we cannot check the node
5363

5364
  """
5365
  for vg, req_size in req_sizes.items():
5366
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5367

    
5368

    
5369
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5370
  """Checks if nodes have enough free disk space in the specified VG.
5371

5372
  This function check if all given nodes have the needed amount of
5373
  free disk. In case any node has less disk or we cannot get the
5374
  information from the node, this function raise an OpPrereqError
5375
  exception.
5376

5377
  @type lu: C{LogicalUnit}
5378
  @param lu: a logical unit from which we get configuration data
5379
  @type nodenames: C{list}
5380
  @param nodenames: the list of node names to check
5381
  @type vg: C{str}
5382
  @param vg: the volume group to check
5383
  @type requested: C{int}
5384
  @param requested: the amount of disk in MiB to check for
5385
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5386
      or we cannot check the node
5387

5388
  """
5389
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5390
  for node in nodenames:
5391
    info = nodeinfo[node]
5392
    info.Raise("Cannot get current information from node %s" % node,
5393
               prereq=True, ecode=errors.ECODE_ENVIRON)
5394
    vg_free = info.payload.get("vg_free", None)
5395
    if not isinstance(vg_free, int):
5396
      raise errors.OpPrereqError("Can't compute free disk space on node"
5397
                                 " %s for vg %s, result was '%s'" %
5398
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5399
    if requested > vg_free:
5400
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5401
                                 " vg %s: required %d MiB, available %d MiB" %
5402
                                 (node, vg, requested, vg_free),
5403
                                 errors.ECODE_NORES)
5404

    
5405

    
5406
class LUInstanceStartup(LogicalUnit):
5407
  """Starts an instance.
5408

5409
  """
5410
  HPATH = "instance-start"
5411
  HTYPE = constants.HTYPE_INSTANCE
5412
  REQ_BGL = False
5413

    
5414
  def CheckArguments(self):
5415
    # extra beparams
5416
    if self.op.beparams:
5417
      # fill the beparams dict
5418
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5419

    
5420
  def ExpandNames(self):
5421
    self._ExpandAndLockInstance()
5422

    
5423
  def BuildHooksEnv(self):
5424
    """Build hooks env.
5425

5426
    This runs on master, primary and secondary nodes of the instance.
5427

5428
    """
5429
    env = {
5430
      "FORCE": self.op.force,
5431
      }
5432

    
5433
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5434

    
5435
    return env
5436

    
5437
  def BuildHooksNodes(self):
5438
    """Build hooks nodes.
5439

5440
    """
5441
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5442
    return (nl, nl)
5443

    
5444
  def CheckPrereq(self):
5445
    """Check prerequisites.
5446

5447
    This checks that the instance is in the cluster.
5448

5449
    """
5450
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5451
    assert self.instance is not None, \
5452
      "Cannot retrieve locked instance %s" % self.op.instance_name
5453

    
5454
    # extra hvparams
5455
    if self.op.hvparams:
5456
      # check hypervisor parameter syntax (locally)
5457
      cluster = self.cfg.GetClusterInfo()
5458
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5459
      filled_hvp = cluster.FillHV(instance)
5460
      filled_hvp.update(self.op.hvparams)
5461
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5462
      hv_type.CheckParameterSyntax(filled_hvp)
5463
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5464

    
5465
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5466

    
5467
    if self.primary_offline and self.op.ignore_offline_nodes:
5468
      self.proc.LogWarning("Ignoring offline primary node")
5469

    
5470
      if self.op.hvparams or self.op.beparams:
5471
        self.proc.LogWarning("Overridden parameters are ignored")
5472
    else:
5473
      _CheckNodeOnline(self, instance.primary_node)
5474

    
5475
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5476

    
5477
      # check bridges existence
5478
      _CheckInstanceBridgesExist(self, instance)
5479

    
5480
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5481
                                                instance.name,
5482
                                                instance.hypervisor)
5483
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5484
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5485
      if not remote_info.payload: # not running already
5486
        _CheckNodeFreeMemory(self, instance.primary_node,
5487
                             "starting instance %s" % instance.name,
5488
                             bep[constants.BE_MEMORY], instance.hypervisor)
5489

    
5490
  def Exec(self, feedback_fn):
5491
    """Start the instance.
5492

5493
    """
5494
    instance = self.instance
5495
    force = self.op.force
5496

    
5497
    if not self.op.no_remember:
5498
      self.cfg.MarkInstanceUp(instance.name)
5499

    
5500
    if self.primary_offline:
5501
      assert self.op.ignore_offline_nodes
5502
      self.proc.LogInfo("Primary node offline, marked instance as started")
5503
    else:
5504
      node_current = instance.primary_node
5505

    
5506
      _StartInstanceDisks(self, instance, force)
5507

    
5508
      result = self.rpc.call_instance_start(node_current, instance,
5509
                                            self.op.hvparams, self.op.beparams)
5510
      msg = result.fail_msg
5511
      if msg:
5512
        _ShutdownInstanceDisks(self, instance)
5513
        raise errors.OpExecError("Could not start instance: %s" % msg)
5514

    
5515

    
5516
class LUInstanceReboot(LogicalUnit):
5517
  """Reboot an instance.
5518

5519
  """
5520
  HPATH = "instance-reboot"
5521
  HTYPE = constants.HTYPE_INSTANCE
5522
  REQ_BGL = False
5523

    
5524
  def ExpandNames(self):
5525
    self._ExpandAndLockInstance()
5526

    
5527
  def BuildHooksEnv(self):
5528
    """Build hooks env.
5529

5530
    This runs on master, primary and secondary nodes of the instance.
5531

5532
    """
5533
    env = {
5534
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5535
      "REBOOT_TYPE": self.op.reboot_type,
5536
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5537
      }
5538

    
5539
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5540

    
5541
    return env
5542

    
5543
  def BuildHooksNodes(self):
5544
    """Build hooks nodes.
5545

5546
    """
5547
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5548
    return (nl, nl)
5549

    
5550
  def CheckPrereq(self):
5551
    """Check prerequisites.
5552

5553
    This checks that the instance is in the cluster.
5554

5555
    """
5556
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5557
    assert self.instance is not None, \
5558
      "Cannot retrieve locked instance %s" % self.op.instance_name
5559

    
5560
    _CheckNodeOnline(self, instance.primary_node)
5561

    
5562
    # check bridges existence
5563
    _CheckInstanceBridgesExist(self, instance)
5564

    
5565
  def Exec(self, feedback_fn):
5566
    """Reboot the instance.
5567

5568
    """
5569
    instance = self.instance
5570
    ignore_secondaries = self.op.ignore_secondaries
5571
    reboot_type = self.op.reboot_type
5572

    
5573
    remote_info = self.rpc.call_instance_info(instance.primary_node,
5574
                                              instance.name,
5575
                                              instance.hypervisor)
5576
    remote_info.Raise("Error checking node %s" % instance.primary_node)
5577
    instance_running = bool(remote_info.payload)
5578

    
5579
    node_current = instance.primary_node
5580

    
5581
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5582
                                            constants.INSTANCE_REBOOT_HARD]:
5583
      for disk in instance.disks:
5584
        self.cfg.SetDiskID(disk, node_current)
5585
      result = self.rpc.call_instance_reboot(node_current, instance,
5586
                                             reboot_type,
5587
                                             self.op.shutdown_timeout)
5588
      result.Raise("Could not reboot instance")
5589
    else:
5590
      if instance_running:
5591
        result = self.rpc.call_instance_shutdown(node_current, instance,
5592
                                                 self.op.shutdown_timeout)
5593
        result.Raise("Could not shutdown instance for full reboot")
5594
        _ShutdownInstanceDisks(self, instance)
5595
      else:
5596
        self.LogInfo("Instance %s was already stopped, starting now",
5597
                     instance.name)
5598
      _StartInstanceDisks(self, instance, ignore_secondaries)
5599
      result = self.rpc.call_instance_start(node_current, instance, None, None)
5600
      msg = result.fail_msg
5601
      if msg:
5602
        _ShutdownInstanceDisks(self, instance)
5603
        raise errors.OpExecError("Could not start instance for"
5604
                                 " full reboot: %s" % msg)
5605

    
5606
    self.cfg.MarkInstanceUp(instance.name)
5607

    
5608

    
5609
class LUInstanceShutdown(LogicalUnit):
5610
  """Shutdown an instance.
5611

5612
  """
5613
  HPATH = "instance-stop"
5614
  HTYPE = constants.HTYPE_INSTANCE
5615
  REQ_BGL = False
5616

    
5617
  def ExpandNames(self):
5618
    self._ExpandAndLockInstance()
5619

    
5620
  def BuildHooksEnv(self):
5621
    """Build hooks env.
5622

5623
    This runs on master, primary and secondary nodes of the instance.
5624

5625
    """
5626
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5627
    env["TIMEOUT"] = self.op.timeout
5628
    return env
5629

    
5630
  def BuildHooksNodes(self):
5631
    """Build hooks nodes.
5632

5633
    """
5634
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5635
    return (nl, nl)
5636

    
5637
  def CheckPrereq(self):
5638
    """Check prerequisites.
5639

5640
    This checks that the instance is in the cluster.
5641

5642
    """
5643
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5644
    assert self.instance is not None, \
5645
      "Cannot retrieve locked instance %s" % self.op.instance_name
5646

    
5647
    self.primary_offline = \
5648
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
5649

    
5650
    if self.primary_offline and self.op.ignore_offline_nodes:
5651
      self.proc.LogWarning("Ignoring offline primary node")
5652
    else:
5653
      _CheckNodeOnline(self, self.instance.primary_node)
5654

    
5655
  def Exec(self, feedback_fn):
5656
    """Shutdown the instance.
5657

5658
    """
5659
    instance = self.instance
5660
    node_current = instance.primary_node
5661
    timeout = self.op.timeout
5662

    
5663
    if not self.op.no_remember:
5664
      self.cfg.MarkInstanceDown(instance.name)
5665

    
5666
    if self.primary_offline:
5667
      assert self.op.ignore_offline_nodes
5668
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
5669
    else:
5670
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5671
      msg = result.fail_msg
5672
      if msg:
5673
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5674

    
5675
      _ShutdownInstanceDisks(self, instance)
5676

    
5677

    
5678
class LUInstanceReinstall(LogicalUnit):
5679
  """Reinstall an instance.
5680

5681
  """
5682
  HPATH = "instance-reinstall"
5683
  HTYPE = constants.HTYPE_INSTANCE
5684
  REQ_BGL = False
5685

    
5686
  def ExpandNames(self):
5687
    self._ExpandAndLockInstance()
5688

    
5689
  def BuildHooksEnv(self):
5690
    """Build hooks env.
5691

5692
    This runs on master, primary and secondary nodes of the instance.
5693

5694
    """
5695
    return _BuildInstanceHookEnvByObject(self, self.instance)
5696

    
5697
  def BuildHooksNodes(self):
5698
    """Build hooks nodes.
5699

5700
    """
5701
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5702
    return (nl, nl)
5703

    
5704
  def CheckPrereq(self):
5705
    """Check prerequisites.
5706

5707
    This checks that the instance is in the cluster and is not running.
5708

5709
    """
5710
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5711
    assert instance is not None, \
5712
      "Cannot retrieve locked instance %s" % self.op.instance_name
5713
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5714
                     " offline, cannot reinstall")
5715
    for node in instance.secondary_nodes:
5716
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
5717
                       " cannot reinstall")
5718

    
5719
    if instance.disk_template == constants.DT_DISKLESS:
5720
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5721
                                 self.op.instance_name,
5722
                                 errors.ECODE_INVAL)
5723
    _CheckInstanceDown(self, instance, "cannot reinstall")
5724

    
5725
    if self.op.os_type is not None:
5726
      # OS verification
5727
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5728
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5729
      instance_os = self.op.os_type
5730
    else:
5731
      instance_os = instance.os
5732

    
5733
    nodelist = list(instance.all_nodes)
5734

    
5735
    if self.op.osparams:
5736
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5737
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5738
      self.os_inst = i_osdict # the new dict (without defaults)
5739
    else:
5740
      self.os_inst = None
5741

    
5742
    self.instance = instance
5743

    
5744
  def Exec(self, feedback_fn):
5745
    """Reinstall the instance.
5746

5747
    """
5748
    inst = self.instance
5749

    
5750
    if self.op.os_type is not None:
5751
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5752
      inst.os = self.op.os_type
5753
      # Write to configuration
5754
      self.cfg.Update(inst, feedback_fn)
5755

    
5756
    _StartInstanceDisks(self, inst, None)
5757
    try:
5758
      feedback_fn("Running the instance OS create scripts...")
5759
      # FIXME: pass debug option from opcode to backend
5760
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5761
                                             self.op.debug_level,
5762
                                             osparams=self.os_inst)
5763
      result.Raise("Could not install OS for instance %s on node %s" %
5764
                   (inst.name, inst.primary_node))
5765
    finally:
5766
      _ShutdownInstanceDisks(self, inst)
5767

    
5768

    
5769
class LUInstanceRecreateDisks(LogicalUnit):
5770
  """Recreate an instance's missing disks.
5771

5772
  """
5773
  HPATH = "instance-recreate-disks"
5774
  HTYPE = constants.HTYPE_INSTANCE
5775
  REQ_BGL = False
5776

    
5777
  def CheckArguments(self):
5778
    # normalise the disk list
5779
    self.op.disks = sorted(frozenset(self.op.disks))
5780

    
5781
  def ExpandNames(self):
5782
    self._ExpandAndLockInstance()
5783
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5784
    if self.op.nodes:
5785
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
5786
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
5787
    else:
5788
      self.needed_locks[locking.LEVEL_NODE] = []
5789

    
5790
  def DeclareLocks(self, level):
5791
    if level == locking.LEVEL_NODE:
5792
      # if we replace the nodes, we only need to lock the old primary,
5793
      # otherwise we need to lock all nodes for disk re-creation
5794
      primary_only = bool(self.op.nodes)
5795
      self._LockInstancesNodes(primary_only=primary_only)
5796

    
5797
  def BuildHooksEnv(self):
5798
    """Build hooks env.
5799

5800
    This runs on master, primary and secondary nodes of the instance.
5801

5802
    """
5803
    return _BuildInstanceHookEnvByObject(self, self.instance)
5804

    
5805
  def BuildHooksNodes(self):
5806
    """Build hooks nodes.
5807

5808
    """
5809
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5810
    return (nl, nl)
5811

    
5812
  def CheckPrereq(self):
5813
    """Check prerequisites.
5814

5815
    This checks that the instance is in the cluster and is not running.
5816

5817
    """
5818
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5819
    assert instance is not None, \
5820
      "Cannot retrieve locked instance %s" % self.op.instance_name
5821
    if self.op.nodes:
5822
      if len(self.op.nodes) != len(instance.all_nodes):
5823
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
5824
                                   " %d replacement nodes were specified" %
5825
                                   (instance.name, len(instance.all_nodes),
5826
                                    len(self.op.nodes)),
5827
                                   errors.ECODE_INVAL)
5828
      assert instance.disk_template != constants.DT_DRBD8 or \
5829
          len(self.op.nodes) == 2
5830
      assert instance.disk_template != constants.DT_PLAIN or \
5831
          len(self.op.nodes) == 1
5832
      primary_node = self.op.nodes[0]
5833
    else:
5834
      primary_node = instance.primary_node
5835
    _CheckNodeOnline(self, primary_node)
5836

    
5837
    if instance.disk_template == constants.DT_DISKLESS:
5838
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5839
                                 self.op.instance_name, errors.ECODE_INVAL)
5840
    # if we replace nodes *and* the old primary is offline, we don't
5841
    # check
5842
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
5843
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
5844
    if not (self.op.nodes and old_pnode.offline):
5845
      _CheckInstanceDown(self, instance, "cannot recreate disks")
5846

    
5847
    if not self.op.disks:
5848
      self.op.disks = range(len(instance.disks))
5849
    else:
5850
      for idx in self.op.disks:
5851
        if idx >= len(instance.disks):
5852
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
5853
                                     errors.ECODE_INVAL)
5854
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
5855
      raise errors.OpPrereqError("Can't recreate disks partially and"
5856
                                 " change the nodes at the same time",
5857
                                 errors.ECODE_INVAL)
5858
    self.instance = instance
5859

    
5860
  def Exec(self, feedback_fn):
5861
    """Recreate the disks.
5862

5863
    """
5864
    # change primary node, if needed
5865
    if self.op.nodes:
5866
      self.instance.primary_node = self.op.nodes[0]
5867
      self.LogWarning("Changing the instance's nodes, you will have to"
5868
                      " remove any disks left on the older nodes manually")
5869

    
5870
    to_skip = []
5871
    for idx, disk in enumerate(self.instance.disks):
5872
      if idx not in self.op.disks: # disk idx has not been passed in
5873
        to_skip.append(idx)
5874
        continue
5875
      # update secondaries for disks, if needed
5876
      if self.op.nodes:
5877
        if disk.dev_type == constants.LD_DRBD8:
5878
          # need to update the nodes
5879
          assert len(self.op.nodes) == 2
5880
          logical_id = list(disk.logical_id)
5881
          logical_id[0] = self.op.nodes[0]
5882
          logical_id[1] = self.op.nodes[1]
5883
          disk.logical_id = tuple(logical_id)
5884

    
5885
    if self.op.nodes:
5886
      self.cfg.Update(self.instance, feedback_fn)
5887

    
5888
    _CreateDisks(self, self.instance, to_skip=to_skip)
5889

    
5890

    
5891
class LUInstanceRename(LogicalUnit):
5892
  """Rename an instance.
5893

5894
  """
5895
  HPATH = "instance-rename"
5896
  HTYPE = constants.HTYPE_INSTANCE
5897

    
5898
  def CheckArguments(self):
5899
    """Check arguments.
5900

5901
    """
5902
    if self.op.ip_check and not self.op.name_check:
5903
      # TODO: make the ip check more flexible and not depend on the name check
5904
      raise errors.OpPrereqError("IP address check requires a name check",
5905
                                 errors.ECODE_INVAL)
5906

    
5907
  def BuildHooksEnv(self):
5908
    """Build hooks env.
5909

5910
    This runs on master, primary and secondary nodes of the instance.
5911

5912
    """
5913
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5914
    env["INSTANCE_NEW_NAME"] = self.op.new_name
5915
    return env
5916

    
5917
  def BuildHooksNodes(self):
5918
    """Build hooks nodes.
5919

5920
    """
5921
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5922
    return (nl, nl)
5923

    
5924
  def CheckPrereq(self):
5925
    """Check prerequisites.
5926

5927
    This checks that the instance is in the cluster and is not running.
5928

5929
    """
5930
    self.op.instance_name = _ExpandInstanceName(self.cfg,
5931
                                                self.op.instance_name)
5932
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5933
    assert instance is not None
5934
    _CheckNodeOnline(self, instance.primary_node)
5935
    _CheckInstanceDown(self, instance, "cannot rename")
5936
    self.instance = instance
5937

    
5938
    new_name = self.op.new_name
5939
    if self.op.name_check:
5940
      hostname = netutils.GetHostname(name=new_name)
5941
      if hostname != new_name:
5942
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5943
                     hostname.name)
5944
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
5945
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
5946
                                    " same as given hostname '%s'") %
5947
                                    (hostname.name, self.op.new_name),
5948
                                    errors.ECODE_INVAL)
5949
      new_name = self.op.new_name = hostname.name
5950
      if (self.op.ip_check and
5951
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5952
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
5953
                                   (hostname.ip, new_name),
5954
                                   errors.ECODE_NOTUNIQUE)
5955

    
5956
    instance_list = self.cfg.GetInstanceList()
5957
    if new_name in instance_list and new_name != instance.name:
5958
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5959
                                 new_name, errors.ECODE_EXISTS)
5960

    
5961
  def Exec(self, feedback_fn):
5962
    """Rename the instance.
5963

5964
    """
5965
    inst = self.instance
5966
    old_name = inst.name
5967

    
5968
    rename_file_storage = False
5969
    if (inst.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE) and
5970
        self.op.new_name != inst.name):
5971
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5972
      rename_file_storage = True
5973

    
5974
    self.cfg.RenameInstance(inst.name, self.op.new_name)
5975
    # Change the instance lock. This is definitely safe while we hold the BGL.
5976
    # Otherwise the new lock would have to be added in acquired mode.
5977
    assert self.REQ_BGL
5978
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
5979
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5980

    
5981
    # re-read the instance from the configuration after rename
5982
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
5983

    
5984
    if rename_file_storage:
5985
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5986
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5987
                                                     old_file_storage_dir,
5988
                                                     new_file_storage_dir)
5989
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
5990
                   " (but the instance has been renamed in Ganeti)" %
5991
                   (inst.primary_node, old_file_storage_dir,
5992
                    new_file_storage_dir))
5993

    
5994
    _StartInstanceDisks(self, inst, None)
5995
    try:
5996
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5997
                                                 old_name, self.op.debug_level)
5998
      msg = result.fail_msg
5999
      if msg:
6000
        msg = ("Could not run OS rename script for instance %s on node %s"
6001
               " (but the instance has been renamed in Ganeti): %s" %
6002
               (inst.name, inst.primary_node, msg))
6003
        self.proc.LogWarning(msg)
6004
    finally:
6005
      _ShutdownInstanceDisks(self, inst)
6006

    
6007
    return inst.name
6008

    
6009

    
6010
class LUInstanceRemove(LogicalUnit):
6011
  """Remove an instance.
6012

6013
  """
6014
  HPATH = "instance-remove"
6015
  HTYPE = constants.HTYPE_INSTANCE
6016
  REQ_BGL = False
6017

    
6018
  def ExpandNames(self):
6019
    self._ExpandAndLockInstance()
6020
    self.needed_locks[locking.LEVEL_NODE] = []
6021
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6022

    
6023
  def DeclareLocks(self, level):
6024
    if level == locking.LEVEL_NODE:
6025
      self._LockInstancesNodes()
6026

    
6027
  def BuildHooksEnv(self):
6028
    """Build hooks env.
6029

6030
    This runs on master, primary and secondary nodes of the instance.
6031

6032
    """
6033
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6034
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6035
    return env
6036

    
6037
  def BuildHooksNodes(self):
6038
    """Build hooks nodes.
6039

6040
    """
6041
    nl = [self.cfg.GetMasterNode()]
6042
    nl_post = list(self.instance.all_nodes) + nl
6043
    return (nl, nl_post)
6044

    
6045
  def CheckPrereq(self):
6046
    """Check prerequisites.
6047

6048
    This checks that the instance is in the cluster.
6049

6050
    """
6051
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6052
    assert self.instance is not None, \
6053
      "Cannot retrieve locked instance %s" % self.op.instance_name
6054

    
6055
  def Exec(self, feedback_fn):
6056
    """Remove the instance.
6057

6058
    """
6059
    instance = self.instance
6060
    logging.info("Shutting down instance %s on node %s",
6061
                 instance.name, instance.primary_node)
6062

    
6063
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6064
                                             self.op.shutdown_timeout)
6065
    msg = result.fail_msg
6066
    if msg:
6067
      if self.op.ignore_failures:
6068
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6069
      else:
6070
        raise errors.OpExecError("Could not shutdown instance %s on"
6071
                                 " node %s: %s" %
6072
                                 (instance.name, instance.primary_node, msg))
6073

    
6074
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6075

    
6076

    
6077
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6078
  """Utility function to remove an instance.
6079

6080
  """
6081
  logging.info("Removing block devices for instance %s", instance.name)
6082

    
6083
  if not _RemoveDisks(lu, instance):
6084
    if not ignore_failures:
6085
      raise errors.OpExecError("Can't remove instance's disks")
6086
    feedback_fn("Warning: can't remove instance's disks")
6087

    
6088
  logging.info("Removing instance %s out of cluster config", instance.name)
6089

    
6090
  lu.cfg.RemoveInstance(instance.name)
6091

    
6092
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6093
    "Instance lock removal conflict"
6094

    
6095
  # Remove lock for the instance
6096
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6097

    
6098

    
6099
class LUInstanceQuery(NoHooksLU):
6100
  """Logical unit for querying instances.
6101

6102
  """
6103
  # pylint: disable-msg=W0142
6104
  REQ_BGL = False
6105

    
6106
  def CheckArguments(self):
6107
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6108
                             self.op.output_fields, self.op.use_locking)
6109

    
6110
  def ExpandNames(self):
6111
    self.iq.ExpandNames(self)
6112

    
6113
  def DeclareLocks(self, level):
6114
    self.iq.DeclareLocks(self, level)
6115

    
6116
  def Exec(self, feedback_fn):
6117
    return self.iq.OldStyleQuery(self)
6118

    
6119

    
6120
class LUInstanceFailover(LogicalUnit):
6121
  """Failover an instance.
6122

6123
  """
6124
  HPATH = "instance-failover"
6125
  HTYPE = constants.HTYPE_INSTANCE
6126
  REQ_BGL = False
6127

    
6128
  def CheckArguments(self):
6129
    """Check the arguments.
6130

6131
    """
6132
    self.iallocator = getattr(self.op, "iallocator", None)
6133
    self.target_node = getattr(self.op, "target_node", None)
6134

    
6135
  def ExpandNames(self):
6136
    self._ExpandAndLockInstance()
6137

    
6138
    if self.op.target_node is not None:
6139
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6140

    
6141
    self.needed_locks[locking.LEVEL_NODE] = []
6142
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6143

    
6144
    ignore_consistency = self.op.ignore_consistency
6145
    shutdown_timeout = self.op.shutdown_timeout
6146
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6147
                                       cleanup=False,
6148
                                       failover=True,
6149
                                       ignore_consistency=ignore_consistency,
6150
                                       shutdown_timeout=shutdown_timeout)
6151
    self.tasklets = [self._migrater]
6152

    
6153
  def DeclareLocks(self, level):
6154
    if level == locking.LEVEL_NODE:
6155
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6156
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6157
        if self.op.target_node is None:
6158
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6159
        else:
6160
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6161
                                                   self.op.target_node]
6162
        del self.recalculate_locks[locking.LEVEL_NODE]
6163
      else:
6164
        self._LockInstancesNodes()
6165

    
6166
  def BuildHooksEnv(self):
6167
    """Build hooks env.
6168

6169
    This runs on master, primary and secondary nodes of the instance.
6170

6171
    """
6172
    instance = self._migrater.instance
6173
    source_node = instance.primary_node
6174
    target_node = self.op.target_node
6175
    env = {
6176
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6177
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6178
      "OLD_PRIMARY": source_node,
6179
      "NEW_PRIMARY": target_node,
6180
      }
6181

    
6182
    if instance.disk_template in constants.DTS_INT_MIRROR:
6183
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6184
      env["NEW_SECONDARY"] = source_node
6185
    else:
6186
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6187

    
6188
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6189

    
6190
    return env
6191

    
6192
  def BuildHooksNodes(self):
6193
    """Build hooks nodes.
6194

6195
    """
6196
    instance = self._migrater.instance
6197
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6198
    return (nl, nl + [instance.primary_node])
6199

    
6200

    
6201
class LUInstanceMigrate(LogicalUnit):
6202
  """Migrate an instance.
6203

6204
  This is migration without shutting down, compared to the failover,
6205
  which is done with shutdown.
6206

6207
  """
6208
  HPATH = "instance-migrate"
6209
  HTYPE = constants.HTYPE_INSTANCE
6210
  REQ_BGL = False
6211

    
6212
  def ExpandNames(self):
6213
    self._ExpandAndLockInstance()
6214

    
6215
    if self.op.target_node is not None:
6216
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6217

    
6218
    self.needed_locks[locking.LEVEL_NODE] = []
6219
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6220

    
6221
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6222
                                       cleanup=self.op.cleanup,
6223
                                       failover=False,
6224
                                       fallback=self.op.allow_failover)
6225
    self.tasklets = [self._migrater]
6226

    
6227
  def DeclareLocks(self, level):
6228
    if level == locking.LEVEL_NODE:
6229
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6230
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6231
        if self.op.target_node is None:
6232
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6233
        else:
6234
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6235
                                                   self.op.target_node]
6236
        del self.recalculate_locks[locking.LEVEL_NODE]
6237
      else:
6238
        self._LockInstancesNodes()
6239

    
6240
  def BuildHooksEnv(self):
6241
    """Build hooks env.
6242

6243
    This runs on master, primary and secondary nodes of the instance.
6244

6245
    """
6246
    instance = self._migrater.instance
6247
    source_node = instance.primary_node
6248
    target_node = self.op.target_node
6249
    env = _BuildInstanceHookEnvByObject(self, instance)
6250
    env.update({
6251
      "MIGRATE_LIVE": self._migrater.live,
6252
      "MIGRATE_CLEANUP": self.op.cleanup,
6253
      "OLD_PRIMARY": source_node,
6254
      "NEW_PRIMARY": target_node,
6255
      })
6256

    
6257
    if instance.disk_template in constants.DTS_INT_MIRROR:
6258
      env["OLD_SECONDARY"] = target_node
6259
      env["NEW_SECONDARY"] = source_node
6260
    else:
6261
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6262

    
6263
    return env
6264

    
6265
  def BuildHooksNodes(self):
6266
    """Build hooks nodes.
6267

6268
    """
6269
    instance = self._migrater.instance
6270
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6271
    return (nl, nl + [instance.primary_node])
6272

    
6273

    
6274
class LUInstanceMove(LogicalUnit):
6275
  """Move an instance by data-copying.
6276

6277
  """
6278
  HPATH = "instance-move"
6279
  HTYPE = constants.HTYPE_INSTANCE
6280
  REQ_BGL = False
6281

    
6282
  def ExpandNames(self):
6283
    self._ExpandAndLockInstance()
6284
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6285
    self.op.target_node = target_node
6286
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6287
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6288

    
6289
  def DeclareLocks(self, level):
6290
    if level == locking.LEVEL_NODE:
6291
      self._LockInstancesNodes(primary_only=True)
6292

    
6293
  def BuildHooksEnv(self):
6294
    """Build hooks env.
6295

6296
    This runs on master, primary and secondary nodes of the instance.
6297

6298
    """
6299
    env = {
6300
      "TARGET_NODE": self.op.target_node,
6301
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6302
      }
6303
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6304
    return env
6305

    
6306
  def BuildHooksNodes(self):
6307
    """Build hooks nodes.
6308

6309
    """
6310
    nl = [
6311
      self.cfg.GetMasterNode(),
6312
      self.instance.primary_node,
6313
      self.op.target_node,
6314
      ]
6315
    return (nl, nl)
6316

    
6317
  def CheckPrereq(self):
6318
    """Check prerequisites.
6319

6320
    This checks that the instance is in the cluster.
6321

6322
    """
6323
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6324
    assert self.instance is not None, \
6325
      "Cannot retrieve locked instance %s" % self.op.instance_name
6326

    
6327
    node = self.cfg.GetNodeInfo(self.op.target_node)
6328
    assert node is not None, \
6329
      "Cannot retrieve locked node %s" % self.op.target_node
6330

    
6331
    self.target_node = target_node = node.name
6332

    
6333
    if target_node == instance.primary_node:
6334
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6335
                                 (instance.name, target_node),
6336
                                 errors.ECODE_STATE)
6337

    
6338
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6339

    
6340
    for idx, dsk in enumerate(instance.disks):
6341
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6342
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6343
                                   " cannot copy" % idx, errors.ECODE_STATE)
6344

    
6345
    _CheckNodeOnline(self, target_node)
6346
    _CheckNodeNotDrained(self, target_node)
6347
    _CheckNodeVmCapable(self, target_node)
6348

    
6349
    if instance.admin_up:
6350
      # check memory requirements on the secondary node
6351
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6352
                           instance.name, bep[constants.BE_MEMORY],
6353
                           instance.hypervisor)
6354
    else:
6355
      self.LogInfo("Not checking memory on the secondary node as"
6356
                   " instance will not be started")
6357

    
6358
    # check bridge existance
6359
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6360

    
6361
  def Exec(self, feedback_fn):
6362
    """Move an instance.
6363

6364
    The move is done by shutting it down on its present node, copying
6365
    the data over (slow) and starting it on the new node.
6366

6367
    """
6368
    instance = self.instance
6369

    
6370
    source_node = instance.primary_node
6371
    target_node = self.target_node
6372

    
6373
    self.LogInfo("Shutting down instance %s on source node %s",
6374
                 instance.name, source_node)
6375

    
6376
    result = self.rpc.call_instance_shutdown(source_node, instance,
6377
                                             self.op.shutdown_timeout)
6378
    msg = result.fail_msg
6379
    if msg:
6380
      if self.op.ignore_consistency:
6381
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6382
                             " Proceeding anyway. Please make sure node"
6383
                             " %s is down. Error details: %s",
6384
                             instance.name, source_node, source_node, msg)
6385
      else:
6386
        raise errors.OpExecError("Could not shutdown instance %s on"
6387
                                 " node %s: %s" %
6388
                                 (instance.name, source_node, msg))
6389

    
6390
    # create the target disks
6391
    try:
6392
      _CreateDisks(self, instance, target_node=target_node)
6393
    except errors.OpExecError:
6394
      self.LogWarning("Device creation failed, reverting...")
6395
      try:
6396
        _RemoveDisks(self, instance, target_node=target_node)
6397
      finally:
6398
        self.cfg.ReleaseDRBDMinors(instance.name)
6399
        raise
6400

    
6401
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6402

    
6403
    errs = []
6404
    # activate, get path, copy the data over
6405
    for idx, disk in enumerate(instance.disks):
6406
      self.LogInfo("Copying data for disk %d", idx)
6407
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6408
                                               instance.name, True, idx)
6409
      if result.fail_msg:
6410
        self.LogWarning("Can't assemble newly created disk %d: %s",
6411
                        idx, result.fail_msg)
6412
        errs.append(result.fail_msg)
6413
        break
6414
      dev_path = result.payload
6415
      result = self.rpc.call_blockdev_export(source_node, disk,
6416
                                             target_node, dev_path,
6417
                                             cluster_name)
6418
      if result.fail_msg:
6419
        self.LogWarning("Can't copy data over for disk %d: %s",
6420
                        idx, result.fail_msg)
6421
        errs.append(result.fail_msg)
6422
        break
6423

    
6424
    if errs:
6425
      self.LogWarning("Some disks failed to copy, aborting")
6426
      try:
6427
        _RemoveDisks(self, instance, target_node=target_node)
6428
      finally:
6429
        self.cfg.ReleaseDRBDMinors(instance.name)
6430
        raise errors.OpExecError("Errors during disk copy: %s" %
6431
                                 (",".join(errs),))
6432

    
6433
    instance.primary_node = target_node
6434
    self.cfg.Update(instance, feedback_fn)
6435

    
6436
    self.LogInfo("Removing the disks on the original node")
6437
    _RemoveDisks(self, instance, target_node=source_node)
6438

    
6439
    # Only start the instance if it's marked as up
6440
    if instance.admin_up:
6441
      self.LogInfo("Starting instance %s on node %s",
6442
                   instance.name, target_node)
6443

    
6444
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6445
                                           ignore_secondaries=True)
6446
      if not disks_ok:
6447
        _ShutdownInstanceDisks(self, instance)
6448
        raise errors.OpExecError("Can't activate the instance's disks")
6449

    
6450
      result = self.rpc.call_instance_start(target_node, instance, None, None)
6451
      msg = result.fail_msg
6452
      if msg:
6453
        _ShutdownInstanceDisks(self, instance)
6454
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6455
                                 (instance.name, target_node, msg))
6456

    
6457

    
6458
class LUNodeMigrate(LogicalUnit):
6459
  """Migrate all instances from a node.
6460

6461
  """
6462
  HPATH = "node-migrate"
6463
  HTYPE = constants.HTYPE_NODE
6464
  REQ_BGL = False
6465

    
6466
  def CheckArguments(self):
6467
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
6468

    
6469
  def ExpandNames(self):
6470
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6471

    
6472
    self.needed_locks = {}
6473

    
6474
    # Create tasklets for migrating instances for all instances on this node
6475
    names = []
6476
    tasklets = []
6477

    
6478
    self.lock_all_nodes = False
6479

    
6480
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6481
      logging.debug("Migrating instance %s", inst.name)
6482
      names.append(inst.name)
6483

    
6484
      tasklets.append(TLMigrateInstance(self, inst.name, cleanup=False))
6485

    
6486
      if inst.disk_template in constants.DTS_EXT_MIRROR:
6487
        # We need to lock all nodes, as the iallocator will choose the
6488
        # destination nodes afterwards
6489
        self.lock_all_nodes = True
6490

    
6491
    self.tasklets = tasklets
6492

    
6493
    # Declare node locks
6494
    if self.lock_all_nodes:
6495
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6496
    else:
6497
      self.needed_locks[locking.LEVEL_NODE] = [self.op.node_name]
6498
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6499

    
6500
    # Declare instance locks
6501
    self.needed_locks[locking.LEVEL_INSTANCE] = names
6502

    
6503
  def DeclareLocks(self, level):
6504
    if level == locking.LEVEL_NODE and not self.lock_all_nodes:
6505
      self._LockInstancesNodes()
6506

    
6507
  def BuildHooksEnv(self):
6508
    """Build hooks env.
6509

6510
    This runs on the master, the primary and all the secondaries.
6511

6512
    """
6513
    return {
6514
      "NODE_NAME": self.op.node_name,
6515
      }
6516

    
6517
  def BuildHooksNodes(self):
6518
    """Build hooks nodes.
6519

6520
    """
6521
    nl = [self.cfg.GetMasterNode()]
6522
    return (nl, nl)
6523

    
6524

    
6525
class TLMigrateInstance(Tasklet):
6526
  """Tasklet class for instance migration.
6527

6528
  @type live: boolean
6529
  @ivar live: whether the migration will be done live or non-live;
6530
      this variable is initalized only after CheckPrereq has run
6531
  @type cleanup: boolean
6532
  @ivar cleanup: Wheater we cleanup from a failed migration
6533
  @type iallocator: string
6534
  @ivar iallocator: The iallocator used to determine target_node
6535
  @type target_node: string
6536
  @ivar target_node: If given, the target_node to reallocate the instance to
6537
  @type failover: boolean
6538
  @ivar failover: Whether operation results in failover or migration
6539
  @type fallback: boolean
6540
  @ivar fallback: Whether fallback to failover is allowed if migration not
6541
                  possible
6542
  @type ignore_consistency: boolean
6543
  @ivar ignore_consistency: Wheter we should ignore consistency between source
6544
                            and target node
6545
  @type shutdown_timeout: int
6546
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
6547

6548
  """
6549
  def __init__(self, lu, instance_name, cleanup=False,
6550
               failover=False, fallback=False,
6551
               ignore_consistency=False,
6552
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6553
    """Initializes this class.
6554

6555
    """
6556
    Tasklet.__init__(self, lu)
6557

    
6558
    # Parameters
6559
    self.instance_name = instance_name
6560
    self.cleanup = cleanup
6561
    self.live = False # will be overridden later
6562
    self.failover = failover
6563
    self.fallback = fallback
6564
    self.ignore_consistency = ignore_consistency
6565
    self.shutdown_timeout = shutdown_timeout
6566

    
6567
  def CheckPrereq(self):
6568
    """Check prerequisites.
6569

6570
    This checks that the instance is in the cluster.
6571

6572
    """
6573
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6574
    instance = self.cfg.GetInstanceInfo(instance_name)
6575
    assert instance is not None
6576
    self.instance = instance
6577

    
6578
    if (not self.cleanup and not instance.admin_up and not self.failover and
6579
        self.fallback):
6580
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6581
                      " to failover")
6582
      self.failover = True
6583

    
6584
    if instance.disk_template not in constants.DTS_MIRRORED:
6585
      if self.failover:
6586
        text = "failovers"
6587
      else:
6588
        text = "migrations"
6589
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6590
                                 " %s" % (instance.disk_template, text),
6591
                                 errors.ECODE_STATE)
6592

    
6593
    if instance.disk_template in constants.DTS_EXT_MIRROR:
6594
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6595

    
6596
      if self.lu.op.iallocator:
6597
        self._RunAllocator()
6598
      else:
6599
        # We set set self.target_node as it is required by
6600
        # BuildHooksEnv
6601
        self.target_node = self.lu.op.target_node
6602

    
6603
      # self.target_node is already populated, either directly or by the
6604
      # iallocator run
6605
      target_node = self.target_node
6606
      if self.target_node == instance.primary_node:
6607
        raise errors.OpPrereqError("Cannot migrate instance %s"
6608
                                   " to its primary (%s)" %
6609
                                   (instance.name, instance.primary_node))
6610

    
6611
      if len(self.lu.tasklets) == 1:
6612
        # It is safe to release locks only when we're the only tasklet
6613
        # in the LU
6614
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
6615
                      keep=[instance.primary_node, self.target_node])
6616

    
6617
    else:
6618
      secondary_nodes = instance.secondary_nodes
6619
      if not secondary_nodes:
6620
        raise errors.ConfigurationError("No secondary node but using"
6621
                                        " %s disk template" %
6622
                                        instance.disk_template)
6623
      target_node = secondary_nodes[0]
6624
      if self.lu.op.iallocator or (self.lu.op.target_node and
6625
                                   self.lu.op.target_node != target_node):
6626
        if self.failover:
6627
          text = "failed over"
6628
        else:
6629
          text = "migrated"
6630
        raise errors.OpPrereqError("Instances with disk template %s cannot"
6631
                                   " be %s to arbitrary nodes"
6632
                                   " (neither an iallocator nor a target"
6633
                                   " node can be passed)" %
6634
                                   (instance.disk_template, text),
6635
                                   errors.ECODE_INVAL)
6636

    
6637
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
6638

    
6639
    # check memory requirements on the secondary node
6640
    if not self.failover or instance.admin_up:
6641
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6642
                           instance.name, i_be[constants.BE_MEMORY],
6643
                           instance.hypervisor)
6644
    else:
6645
      self.lu.LogInfo("Not checking memory on the secondary node as"
6646
                      " instance will not be started")
6647

    
6648
    # check bridge existance
6649
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6650

    
6651
    if not self.cleanup:
6652
      _CheckNodeNotDrained(self.lu, target_node)
6653
      if not self.failover:
6654
        result = self.rpc.call_instance_migratable(instance.primary_node,
6655
                                                   instance)
6656
        if result.fail_msg and self.fallback:
6657
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6658
                          " failover")
6659
          self.failover = True
6660
        else:
6661
          result.Raise("Can't migrate, please use failover",
6662
                       prereq=True, ecode=errors.ECODE_STATE)
6663

    
6664
    assert not (self.failover and self.cleanup)
6665

    
6666
    if not self.failover:
6667
      if self.lu.op.live is not None and self.lu.op.mode is not None:
6668
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6669
                                   " parameters are accepted",
6670
                                   errors.ECODE_INVAL)
6671
      if self.lu.op.live is not None:
6672
        if self.lu.op.live:
6673
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
6674
        else:
6675
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6676
        # reset the 'live' parameter to None so that repeated
6677
        # invocations of CheckPrereq do not raise an exception
6678
        self.lu.op.live = None
6679
      elif self.lu.op.mode is None:
6680
        # read the default value from the hypervisor
6681
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6682
                                                skip_globals=False)
6683
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6684

    
6685
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6686
    else:
6687
      # Failover is never live
6688
      self.live = False
6689

    
6690
  def _RunAllocator(self):
6691
    """Run the allocator based on input opcode.
6692

6693
    """
6694
    ial = IAllocator(self.cfg, self.rpc,
6695
                     mode=constants.IALLOCATOR_MODE_RELOC,
6696
                     name=self.instance_name,
6697
                     # TODO See why hail breaks with a single node below
6698
                     relocate_from=[self.instance.primary_node,
6699
                                    self.instance.primary_node],
6700
                     )
6701

    
6702
    ial.Run(self.lu.op.iallocator)
6703

    
6704
    if not ial.success:
6705
      raise errors.OpPrereqError("Can't compute nodes using"
6706
                                 " iallocator '%s': %s" %
6707
                                 (self.lu.op.iallocator, ial.info),
6708
                                 errors.ECODE_NORES)
6709
    if len(ial.result) != ial.required_nodes:
6710
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6711
                                 " of nodes (%s), required %s" %
6712
                                 (self.lu.op.iallocator, len(ial.result),
6713
                                  ial.required_nodes), errors.ECODE_FAULT)
6714
    self.target_node = ial.result[0]
6715
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6716
                 self.instance_name, self.lu.op.iallocator,
6717
                 utils.CommaJoin(ial.result))
6718

    
6719
  def _WaitUntilSync(self):
6720
    """Poll with custom rpc for disk sync.
6721

6722
    This uses our own step-based rpc call.
6723

6724
    """
6725
    self.feedback_fn("* wait until resync is done")
6726
    all_done = False
6727
    while not all_done:
6728
      all_done = True
6729
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6730
                                            self.nodes_ip,
6731
                                            self.instance.disks)
6732
      min_percent = 100
6733
      for node, nres in result.items():
6734
        nres.Raise("Cannot resync disks on node %s" % node)
6735
        node_done, node_percent = nres.payload
6736
        all_done = all_done and node_done
6737
        if node_percent is not None:
6738
          min_percent = min(min_percent, node_percent)
6739
      if not all_done:
6740
        if min_percent < 100:
6741
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
6742
        time.sleep(2)
6743

    
6744
  def _EnsureSecondary(self, node):
6745
    """Demote a node to secondary.
6746

6747
    """
6748
    self.feedback_fn("* switching node %s to secondary mode" % node)
6749

    
6750
    for dev in self.instance.disks:
6751
      self.cfg.SetDiskID(dev, node)
6752

    
6753
    result = self.rpc.call_blockdev_close(node, self.instance.name,
6754
                                          self.instance.disks)
6755
    result.Raise("Cannot change disk to secondary on node %s" % node)
6756

    
6757
  def _GoStandalone(self):
6758
    """Disconnect from the network.
6759

6760
    """
6761
    self.feedback_fn("* changing into standalone mode")
6762
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6763
                                               self.instance.disks)
6764
    for node, nres in result.items():
6765
      nres.Raise("Cannot disconnect disks node %s" % node)
6766

    
6767
  def _GoReconnect(self, multimaster):
6768
    """Reconnect to the network.
6769

6770
    """
6771
    if multimaster:
6772
      msg = "dual-master"
6773
    else:
6774
      msg = "single-master"
6775
    self.feedback_fn("* changing disks into %s mode" % msg)
6776
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6777
                                           self.instance.disks,
6778
                                           self.instance.name, multimaster)
6779
    for node, nres in result.items():
6780
      nres.Raise("Cannot change disks config on node %s" % node)
6781

    
6782
  def _ExecCleanup(self):
6783
    """Try to cleanup after a failed migration.
6784

6785
    The cleanup is done by:
6786
      - check that the instance is running only on one node
6787
        (and update the config if needed)
6788
      - change disks on its secondary node to secondary
6789
      - wait until disks are fully synchronized
6790
      - disconnect from the network
6791
      - change disks into single-master mode
6792
      - wait again until disks are fully synchronized
6793

6794
    """
6795
    instance = self.instance
6796
    target_node = self.target_node
6797
    source_node = self.source_node
6798

    
6799
    # check running on only one node
6800
    self.feedback_fn("* checking where the instance actually runs"
6801
                     " (if this hangs, the hypervisor might be in"
6802
                     " a bad state)")
6803
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6804
    for node, result in ins_l.items():
6805
      result.Raise("Can't contact node %s" % node)
6806

    
6807
    runningon_source = instance.name in ins_l[source_node].payload
6808
    runningon_target = instance.name in ins_l[target_node].payload
6809

    
6810
    if runningon_source and runningon_target:
6811
      raise errors.OpExecError("Instance seems to be running on two nodes,"
6812
                               " or the hypervisor is confused; you will have"
6813
                               " to ensure manually that it runs only on one"
6814
                               " and restart this operation")
6815

    
6816
    if not (runningon_source or runningon_target):
6817
      raise errors.OpExecError("Instance does not seem to be running at all;"
6818
                               " in this case it's safer to repair by"
6819
                               " running 'gnt-instance stop' to ensure disk"
6820
                               " shutdown, and then restarting it")
6821

    
6822
    if runningon_target:
6823
      # the migration has actually succeeded, we need to update the config
6824
      self.feedback_fn("* instance running on secondary node (%s),"
6825
                       " updating config" % target_node)
6826
      instance.primary_node = target_node
6827
      self.cfg.Update(instance, self.feedback_fn)
6828
      demoted_node = source_node
6829
    else:
6830
      self.feedback_fn("* instance confirmed to be running on its"
6831
                       " primary node (%s)" % source_node)
6832
      demoted_node = target_node
6833

    
6834
    if instance.disk_template in constants.DTS_INT_MIRROR:
6835
      self._EnsureSecondary(demoted_node)
6836
      try:
6837
        self._WaitUntilSync()
6838
      except errors.OpExecError:
6839
        # we ignore here errors, since if the device is standalone, it
6840
        # won't be able to sync
6841
        pass
6842
      self._GoStandalone()
6843
      self._GoReconnect(False)
6844
      self._WaitUntilSync()
6845

    
6846
    self.feedback_fn("* done")
6847

    
6848
  def _RevertDiskStatus(self):
6849
    """Try to revert the disk status after a failed migration.
6850

6851
    """
6852
    target_node = self.target_node
6853
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
6854
      return
6855

    
6856
    try:
6857
      self._EnsureSecondary(target_node)
6858
      self._GoStandalone()
6859
      self._GoReconnect(False)
6860
      self._WaitUntilSync()
6861
    except errors.OpExecError, err:
6862
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
6863
                         " please try to recover the instance manually;"
6864
                         " error '%s'" % str(err))
6865

    
6866
  def _AbortMigration(self):
6867
    """Call the hypervisor code to abort a started migration.
6868

6869
    """
6870
    instance = self.instance
6871
    target_node = self.target_node
6872
    migration_info = self.migration_info
6873

    
6874
    abort_result = self.rpc.call_finalize_migration(target_node,
6875
                                                    instance,
6876
                                                    migration_info,
6877
                                                    False)
6878
    abort_msg = abort_result.fail_msg
6879
    if abort_msg:
6880
      logging.error("Aborting migration failed on target node %s: %s",
6881
                    target_node, abort_msg)
6882
      # Don't raise an exception here, as we stil have to try to revert the
6883
      # disk status, even if this step failed.
6884

    
6885
  def _ExecMigration(self):
6886
    """Migrate an instance.
6887

6888
    The migrate is done by:
6889
      - change the disks into dual-master mode
6890
      - wait until disks are fully synchronized again
6891
      - migrate the instance
6892
      - change disks on the new secondary node (the old primary) to secondary
6893
      - wait until disks are fully synchronized
6894
      - change disks into single-master mode
6895

6896
    """
6897
    instance = self.instance
6898
    target_node = self.target_node
6899
    source_node = self.source_node
6900

    
6901
    self.feedback_fn("* checking disk consistency between source and target")
6902
    for dev in instance.disks:
6903
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6904
        raise errors.OpExecError("Disk %s is degraded or not fully"
6905
                                 " synchronized on target node,"
6906
                                 " aborting migration" % dev.iv_name)
6907

    
6908
    # First get the migration information from the remote node
6909
    result = self.rpc.call_migration_info(source_node, instance)
6910
    msg = result.fail_msg
6911
    if msg:
6912
      log_err = ("Failed fetching source migration information from %s: %s" %
6913
                 (source_node, msg))
6914
      logging.error(log_err)
6915
      raise errors.OpExecError(log_err)
6916

    
6917
    self.migration_info = migration_info = result.payload
6918

    
6919
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6920
      # Then switch the disks to master/master mode
6921
      self._EnsureSecondary(target_node)
6922
      self._GoStandalone()
6923
      self._GoReconnect(True)
6924
      self._WaitUntilSync()
6925

    
6926
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
6927
    result = self.rpc.call_accept_instance(target_node,
6928
                                           instance,
6929
                                           migration_info,
6930
                                           self.nodes_ip[target_node])
6931

    
6932
    msg = result.fail_msg
6933
    if msg:
6934
      logging.error("Instance pre-migration failed, trying to revert"
6935
                    " disk status: %s", msg)
6936
      self.feedback_fn("Pre-migration failed, aborting")
6937
      self._AbortMigration()
6938
      self._RevertDiskStatus()
6939
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6940
                               (instance.name, msg))
6941

    
6942
    self.feedback_fn("* migrating instance to %s" % target_node)
6943
    result = self.rpc.call_instance_migrate(source_node, instance,
6944
                                            self.nodes_ip[target_node],
6945
                                            self.live)
6946
    msg = result.fail_msg
6947
    if msg:
6948
      logging.error("Instance migration failed, trying to revert"
6949
                    " disk status: %s", msg)
6950
      self.feedback_fn("Migration failed, aborting")
6951
      self._AbortMigration()
6952
      self._RevertDiskStatus()
6953
      raise errors.OpExecError("Could not migrate instance %s: %s" %
6954
                               (instance.name, msg))
6955

    
6956
    instance.primary_node = target_node
6957
    # distribute new instance config to the other nodes
6958
    self.cfg.Update(instance, self.feedback_fn)
6959

    
6960
    result = self.rpc.call_finalize_migration(target_node,
6961
                                              instance,
6962
                                              migration_info,
6963
                                              True)
6964
    msg = result.fail_msg
6965
    if msg:
6966
      logging.error("Instance migration succeeded, but finalization failed:"
6967
                    " %s", msg)
6968
      raise errors.OpExecError("Could not finalize instance migration: %s" %
6969
                               msg)
6970

    
6971
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6972
      self._EnsureSecondary(source_node)
6973
      self._WaitUntilSync()
6974
      self._GoStandalone()
6975
      self._GoReconnect(False)
6976
      self._WaitUntilSync()
6977

    
6978
    self.feedback_fn("* done")
6979

    
6980
  def _ExecFailover(self):
6981
    """Failover an instance.
6982

6983
    The failover is done by shutting it down on its present node and
6984
    starting it on the secondary.
6985

6986
    """
6987
    instance = self.instance
6988
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
6989

    
6990
    source_node = instance.primary_node
6991
    target_node = self.target_node
6992

    
6993
    if instance.admin_up:
6994
      self.feedback_fn("* checking disk consistency between source and target")
6995
      for dev in instance.disks:
6996
        # for drbd, these are drbd over lvm
6997
        if not _CheckDiskConsistency(self, dev, target_node, False):
6998
          if not self.ignore_consistency:
6999
            raise errors.OpExecError("Disk %s is degraded on target node,"
7000
                                     " aborting failover" % dev.iv_name)
7001
    else:
7002
      self.feedback_fn("* not checking disk consistency as instance is not"
7003
                       " running")
7004

    
7005
    self.feedback_fn("* shutting down instance on source node")
7006
    logging.info("Shutting down instance %s on node %s",
7007
                 instance.name, source_node)
7008

    
7009
    result = self.rpc.call_instance_shutdown(source_node, instance,
7010
                                             self.shutdown_timeout)
7011
    msg = result.fail_msg
7012
    if msg:
7013
      if self.ignore_consistency or primary_node.offline:
7014
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7015
                           " proceeding anyway; please make sure node"
7016
                           " %s is down; error details: %s",
7017
                           instance.name, source_node, source_node, msg)
7018
      else:
7019
        raise errors.OpExecError("Could not shutdown instance %s on"
7020
                                 " node %s: %s" %
7021
                                 (instance.name, source_node, msg))
7022

    
7023
    self.feedback_fn("* deactivating the instance's disks on source node")
7024
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
7025
      raise errors.OpExecError("Can't shut down the instance's disks.")
7026

    
7027
    instance.primary_node = target_node
7028
    # distribute new instance config to the other nodes
7029
    self.cfg.Update(instance, self.feedback_fn)
7030

    
7031
    # Only start the instance if it's marked as up
7032
    if instance.admin_up:
7033
      self.feedback_fn("* activating the instance's disks on target node")
7034
      logging.info("Starting instance %s on node %s",
7035
                   instance.name, target_node)
7036

    
7037
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
7038
                                           ignore_secondaries=True)
7039
      if not disks_ok:
7040
        _ShutdownInstanceDisks(self, instance)
7041
        raise errors.OpExecError("Can't activate the instance's disks")
7042

    
7043
      self.feedback_fn("* starting the instance on the target node")
7044
      result = self.rpc.call_instance_start(target_node, instance, None, None)
7045
      msg = result.fail_msg
7046
      if msg:
7047
        _ShutdownInstanceDisks(self, instance)
7048
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7049
                                 (instance.name, target_node, msg))
7050

    
7051
  def Exec(self, feedback_fn):
7052
    """Perform the migration.
7053

7054
    """
7055
    self.feedback_fn = feedback_fn
7056
    self.source_node = self.instance.primary_node
7057

    
7058
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7059
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7060
      self.target_node = self.instance.secondary_nodes[0]
7061
      # Otherwise self.target_node has been populated either
7062
      # directly, or through an iallocator.
7063

    
7064
    self.all_nodes = [self.source_node, self.target_node]
7065
    self.nodes_ip = {
7066
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
7067
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
7068
      }
7069

    
7070
    if self.failover:
7071
      feedback_fn("Failover instance %s" % self.instance.name)
7072
      self._ExecFailover()
7073
    else:
7074
      feedback_fn("Migrating instance %s" % self.instance.name)
7075

    
7076
      if self.cleanup:
7077
        return self._ExecCleanup()
7078
      else:
7079
        return self._ExecMigration()
7080

    
7081

    
7082
def _CreateBlockDev(lu, node, instance, device, force_create,
7083
                    info, force_open):
7084
  """Create a tree of block devices on a given node.
7085

7086
  If this device type has to be created on secondaries, create it and
7087
  all its children.
7088

7089
  If not, just recurse to children keeping the same 'force' value.
7090

7091
  @param lu: the lu on whose behalf we execute
7092
  @param node: the node on which to create the device
7093
  @type instance: L{objects.Instance}
7094
  @param instance: the instance which owns the device
7095
  @type device: L{objects.Disk}
7096
  @param device: the device to create
7097
  @type force_create: boolean
7098
  @param force_create: whether to force creation of this device; this
7099
      will be change to True whenever we find a device which has
7100
      CreateOnSecondary() attribute
7101
  @param info: the extra 'metadata' we should attach to the device
7102
      (this will be represented as a LVM tag)
7103
  @type force_open: boolean
7104
  @param force_open: this parameter will be passes to the
7105
      L{backend.BlockdevCreate} function where it specifies
7106
      whether we run on primary or not, and it affects both
7107
      the child assembly and the device own Open() execution
7108

7109
  """
7110
  if device.CreateOnSecondary():
7111
    force_create = True
7112

    
7113
  if device.children:
7114
    for child in device.children:
7115
      _CreateBlockDev(lu, node, instance, child, force_create,
7116
                      info, force_open)
7117

    
7118
  if not force_create:
7119
    return
7120

    
7121
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7122

    
7123

    
7124
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7125
  """Create a single block device on a given node.
7126

7127
  This will not recurse over children of the device, so they must be
7128
  created in advance.
7129

7130
  @param lu: the lu on whose behalf we execute
7131
  @param node: the node on which to create the device
7132
  @type instance: L{objects.Instance}
7133
  @param instance: the instance which owns the device
7134
  @type device: L{objects.Disk}
7135
  @param device: the device to create
7136
  @param info: the extra 'metadata' we should attach to the device
7137
      (this will be represented as a LVM tag)
7138
  @type force_open: boolean
7139
  @param force_open: this parameter will be passes to the
7140
      L{backend.BlockdevCreate} function where it specifies
7141
      whether we run on primary or not, and it affects both
7142
      the child assembly and the device own Open() execution
7143

7144
  """
7145
  lu.cfg.SetDiskID(device, node)
7146
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7147
                                       instance.name, force_open, info)
7148
  result.Raise("Can't create block device %s on"
7149
               " node %s for instance %s" % (device, node, instance.name))
7150
  if device.physical_id is None:
7151
    device.physical_id = result.payload
7152

    
7153

    
7154
def _GenerateUniqueNames(lu, exts):
7155
  """Generate a suitable LV name.
7156

7157
  This will generate a logical volume name for the given instance.
7158

7159
  """
7160
  results = []
7161
  for val in exts:
7162
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7163
    results.append("%s%s" % (new_id, val))
7164
  return results
7165

    
7166

    
7167
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7168
                         iv_name, p_minor, s_minor):
7169
  """Generate a drbd8 device complete with its children.
7170

7171
  """
7172
  assert len(vgnames) == len(names) == 2
7173
  port = lu.cfg.AllocatePort()
7174
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7175
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7176
                          logical_id=(vgnames[0], names[0]))
7177
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7178
                          logical_id=(vgnames[1], names[1]))
7179
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7180
                          logical_id=(primary, secondary, port,
7181
                                      p_minor, s_minor,
7182
                                      shared_secret),
7183
                          children=[dev_data, dev_meta],
7184
                          iv_name=iv_name)
7185
  return drbd_dev
7186

    
7187

    
7188
def _GenerateDiskTemplate(lu, template_name,
7189
                          instance_name, primary_node,
7190
                          secondary_nodes, disk_info,
7191
                          file_storage_dir, file_driver,
7192
                          base_index, feedback_fn):
7193
  """Generate the entire disk layout for a given template type.
7194

7195
  """
7196
  #TODO: compute space requirements
7197

    
7198
  vgname = lu.cfg.GetVGName()
7199
  disk_count = len(disk_info)
7200
  disks = []
7201
  if template_name == constants.DT_DISKLESS:
7202
    pass
7203
  elif template_name == constants.DT_PLAIN:
7204
    if len(secondary_nodes) != 0:
7205
      raise errors.ProgrammerError("Wrong template configuration")
7206

    
7207
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7208
                                      for i in range(disk_count)])
7209
    for idx, disk in enumerate(disk_info):
7210
      disk_index = idx + base_index
7211
      vg = disk.get(constants.IDISK_VG, vgname)
7212
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7213
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7214
                              size=disk[constants.IDISK_SIZE],
7215
                              logical_id=(vg, names[idx]),
7216
                              iv_name="disk/%d" % disk_index,
7217
                              mode=disk[constants.IDISK_MODE])
7218
      disks.append(disk_dev)
7219
  elif template_name == constants.DT_DRBD8:
7220
    if len(secondary_nodes) != 1:
7221
      raise errors.ProgrammerError("Wrong template configuration")
7222
    remote_node = secondary_nodes[0]
7223
    minors = lu.cfg.AllocateDRBDMinor(
7224
      [primary_node, remote_node] * len(disk_info), instance_name)
7225

    
7226
    names = []
7227
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7228
                                               for i in range(disk_count)]):
7229
      names.append(lv_prefix + "_data")
7230
      names.append(lv_prefix + "_meta")
7231
    for idx, disk in enumerate(disk_info):
7232
      disk_index = idx + base_index
7233
      data_vg = disk.get(constants.IDISK_VG, vgname)
7234
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7235
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7236
                                      disk[constants.IDISK_SIZE],
7237
                                      [data_vg, meta_vg],
7238
                                      names[idx * 2:idx * 2 + 2],
7239
                                      "disk/%d" % disk_index,
7240
                                      minors[idx * 2], minors[idx * 2 + 1])
7241
      disk_dev.mode = disk[constants.IDISK_MODE]
7242
      disks.append(disk_dev)
7243
  elif template_name == constants.DT_FILE:
7244
    if len(secondary_nodes) != 0:
7245
      raise errors.ProgrammerError("Wrong template configuration")
7246

    
7247
    opcodes.RequireFileStorage()
7248

    
7249
    for idx, disk in enumerate(disk_info):
7250
      disk_index = idx + base_index
7251
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7252
                              size=disk[constants.IDISK_SIZE],
7253
                              iv_name="disk/%d" % disk_index,
7254
                              logical_id=(file_driver,
7255
                                          "%s/disk%d" % (file_storage_dir,
7256
                                                         disk_index)),
7257
                              mode=disk[constants.IDISK_MODE])
7258
      disks.append(disk_dev)
7259
  elif template_name == constants.DT_SHARED_FILE:
7260
    if len(secondary_nodes) != 0:
7261
      raise errors.ProgrammerError("Wrong template configuration")
7262

    
7263
    opcodes.RequireSharedFileStorage()
7264

    
7265
    for idx, disk in enumerate(disk_info):
7266
      disk_index = idx + base_index
7267
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7268
                              size=disk[constants.IDISK_SIZE],
7269
                              iv_name="disk/%d" % disk_index,
7270
                              logical_id=(file_driver,
7271
                                          "%s/disk%d" % (file_storage_dir,
7272
                                                         disk_index)),
7273
                              mode=disk[constants.IDISK_MODE])
7274
      disks.append(disk_dev)
7275
  elif template_name == constants.DT_BLOCK:
7276
    if len(secondary_nodes) != 0:
7277
      raise errors.ProgrammerError("Wrong template configuration")
7278

    
7279
    for idx, disk in enumerate(disk_info):
7280
      disk_index = idx + base_index
7281
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7282
                              size=disk[constants.IDISK_SIZE],
7283
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7284
                                          disk[constants.IDISK_ADOPT]),
7285
                              iv_name="disk/%d" % disk_index,
7286
                              mode=disk[constants.IDISK_MODE])
7287
      disks.append(disk_dev)
7288

    
7289
  else:
7290
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7291
  return disks
7292

    
7293

    
7294
def _GetInstanceInfoText(instance):
7295
  """Compute that text that should be added to the disk's metadata.
7296

7297
  """
7298
  return "originstname+%s" % instance.name
7299

    
7300

    
7301
def _CalcEta(time_taken, written, total_size):
7302
  """Calculates the ETA based on size written and total size.
7303

7304
  @param time_taken: The time taken so far
7305
  @param written: amount written so far
7306
  @param total_size: The total size of data to be written
7307
  @return: The remaining time in seconds
7308

7309
  """
7310
  avg_time = time_taken / float(written)
7311
  return (total_size - written) * avg_time
7312

    
7313

    
7314
def _WipeDisks(lu, instance):
7315
  """Wipes instance disks.
7316

7317
  @type lu: L{LogicalUnit}
7318
  @param lu: the logical unit on whose behalf we execute
7319
  @type instance: L{objects.Instance}
7320
  @param instance: the instance whose disks we should create
7321
  @return: the success of the wipe
7322

7323
  """
7324
  node = instance.primary_node
7325

    
7326
  for device in instance.disks:
7327
    lu.cfg.SetDiskID(device, node)
7328

    
7329
  logging.info("Pause sync of instance %s disks", instance.name)
7330
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7331

    
7332
  for idx, success in enumerate(result.payload):
7333
    if not success:
7334
      logging.warn("pause-sync of instance %s for disks %d failed",
7335
                   instance.name, idx)
7336

    
7337
  try:
7338
    for idx, device in enumerate(instance.disks):
7339
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7340
      # MAX_WIPE_CHUNK at max
7341
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7342
                            constants.MIN_WIPE_CHUNK_PERCENT)
7343
      # we _must_ make this an int, otherwise rounding errors will
7344
      # occur
7345
      wipe_chunk_size = int(wipe_chunk_size)
7346

    
7347
      lu.LogInfo("* Wiping disk %d", idx)
7348
      logging.info("Wiping disk %d for instance %s, node %s using"
7349
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7350

    
7351
      offset = 0
7352
      size = device.size
7353
      last_output = 0
7354
      start_time = time.time()
7355

    
7356
      while offset < size:
7357
        wipe_size = min(wipe_chunk_size, size - offset)
7358
        logging.debug("Wiping disk %d, offset %s, chunk %s",
7359
                      idx, offset, wipe_size)
7360
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7361
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
7362
                     (idx, offset, wipe_size))
7363
        now = time.time()
7364
        offset += wipe_size
7365
        if now - last_output >= 60:
7366
          eta = _CalcEta(now - start_time, offset, size)
7367
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
7368
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
7369
          last_output = now
7370
  finally:
7371
    logging.info("Resume sync of instance %s disks", instance.name)
7372

    
7373
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7374

    
7375
    for idx, success in enumerate(result.payload):
7376
      if not success:
7377
        lu.LogWarning("Resume sync of disk %d failed, please have a"
7378
                      " look at the status and troubleshoot the issue", idx)
7379
        logging.warn("resume-sync of instance %s for disks %d failed",
7380
                     instance.name, idx)
7381

    
7382

    
7383
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7384
  """Create all disks for an instance.
7385

7386
  This abstracts away some work from AddInstance.
7387

7388
  @type lu: L{LogicalUnit}
7389
  @param lu: the logical unit on whose behalf we execute
7390
  @type instance: L{objects.Instance}
7391
  @param instance: the instance whose disks we should create
7392
  @type to_skip: list
7393
  @param to_skip: list of indices to skip
7394
  @type target_node: string
7395
  @param target_node: if passed, overrides the target node for creation
7396
  @rtype: boolean
7397
  @return: the success of the creation
7398

7399
  """
7400
  info = _GetInstanceInfoText(instance)
7401
  if target_node is None:
7402
    pnode = instance.primary_node
7403
    all_nodes = instance.all_nodes
7404
  else:
7405
    pnode = target_node
7406
    all_nodes = [pnode]
7407

    
7408
  if instance.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE):
7409
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7410
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7411

    
7412
    result.Raise("Failed to create directory '%s' on"
7413
                 " node %s" % (file_storage_dir, pnode))
7414

    
7415
  # Note: this needs to be kept in sync with adding of disks in
7416
  # LUInstanceSetParams
7417
  for idx, device in enumerate(instance.disks):
7418
    if to_skip and idx in to_skip:
7419
      continue
7420
    logging.info("Creating volume %s for instance %s",
7421
                 device.iv_name, instance.name)
7422
    #HARDCODE
7423
    for node in all_nodes:
7424
      f_create = node == pnode
7425
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7426

    
7427

    
7428
def _RemoveDisks(lu, instance, target_node=None):
7429
  """Remove all disks for an instance.
7430

7431
  This abstracts away some work from `AddInstance()` and
7432
  `RemoveInstance()`. Note that in case some of the devices couldn't
7433
  be removed, the removal will continue with the other ones (compare
7434
  with `_CreateDisks()`).
7435

7436
  @type lu: L{LogicalUnit}
7437
  @param lu: the logical unit on whose behalf we execute
7438
  @type instance: L{objects.Instance}
7439
  @param instance: the instance whose disks we should remove
7440
  @type target_node: string
7441
  @param target_node: used to override the node on which to remove the disks
7442
  @rtype: boolean
7443
  @return: the success of the removal
7444

7445
  """
7446
  logging.info("Removing block devices for instance %s", instance.name)
7447

    
7448
  all_result = True
7449
  for device in instance.disks:
7450
    if target_node:
7451
      edata = [(target_node, device)]
7452
    else:
7453
      edata = device.ComputeNodeTree(instance.primary_node)
7454
    for node, disk in edata:
7455
      lu.cfg.SetDiskID(disk, node)
7456
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7457
      if msg:
7458
        lu.LogWarning("Could not remove block device %s on node %s,"
7459
                      " continuing anyway: %s", device.iv_name, node, msg)
7460
        all_result = False
7461

    
7462
  if instance.disk_template == constants.DT_FILE:
7463
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7464
    if target_node:
7465
      tgt = target_node
7466
    else:
7467
      tgt = instance.primary_node
7468
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7469
    if result.fail_msg:
7470
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7471
                    file_storage_dir, instance.primary_node, result.fail_msg)
7472
      all_result = False
7473

    
7474
  return all_result
7475

    
7476

    
7477
def _ComputeDiskSizePerVG(disk_template, disks):
7478
  """Compute disk size requirements in the volume group
7479

7480
  """
7481
  def _compute(disks, payload):
7482
    """Universal algorithm.
7483

7484
    """
7485
    vgs = {}
7486
    for disk in disks:
7487
      vgs[disk[constants.IDISK_VG]] = \
7488
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7489

    
7490
    return vgs
7491

    
7492
  # Required free disk space as a function of disk and swap space
7493
  req_size_dict = {
7494
    constants.DT_DISKLESS: {},
7495
    constants.DT_PLAIN: _compute(disks, 0),
7496
    # 128 MB are added for drbd metadata for each disk
7497
    constants.DT_DRBD8: _compute(disks, 128),
7498
    constants.DT_FILE: {},
7499
    constants.DT_SHARED_FILE: {},
7500
  }
7501

    
7502
  if disk_template not in req_size_dict:
7503
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7504
                                 " is unknown" %  disk_template)
7505

    
7506
  return req_size_dict[disk_template]
7507

    
7508

    
7509
def _ComputeDiskSize(disk_template, disks):
7510
  """Compute disk size requirements in the volume group
7511

7512
  """
7513
  # Required free disk space as a function of disk and swap space
7514
  req_size_dict = {
7515
    constants.DT_DISKLESS: None,
7516
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7517
    # 128 MB are added for drbd metadata for each disk
7518
    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7519
    constants.DT_FILE: None,
7520
    constants.DT_SHARED_FILE: 0,
7521
    constants.DT_BLOCK: 0,
7522
  }
7523

    
7524
  if disk_template not in req_size_dict:
7525
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7526
                                 " is unknown" %  disk_template)
7527

    
7528
  return req_size_dict[disk_template]
7529

    
7530

    
7531
def _FilterVmNodes(lu, nodenames):
7532
  """Filters out non-vm_capable nodes from a list.
7533

7534
  @type lu: L{LogicalUnit}
7535
  @param lu: the logical unit for which we check
7536
  @type nodenames: list
7537
  @param nodenames: the list of nodes on which we should check
7538
  @rtype: list
7539
  @return: the list of vm-capable nodes
7540

7541
  """
7542
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7543
  return [name for name in nodenames if name not in vm_nodes]
7544

    
7545

    
7546
def _CheckHVParams(lu, nodenames, hvname, hvparams):
7547
  """Hypervisor parameter validation.
7548

7549
  This function abstract the hypervisor parameter validation to be
7550
  used in both instance create and instance modify.
7551

7552
  @type lu: L{LogicalUnit}
7553
  @param lu: the logical unit for which we check
7554
  @type nodenames: list
7555
  @param nodenames: the list of nodes on which we should check
7556
  @type hvname: string
7557
  @param hvname: the name of the hypervisor we should use
7558
  @type hvparams: dict
7559
  @param hvparams: the parameters which we need to check
7560
  @raise errors.OpPrereqError: if the parameters are not valid
7561

7562
  """
7563
  nodenames = _FilterVmNodes(lu, nodenames)
7564
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7565
                                                  hvname,
7566
                                                  hvparams)
7567
  for node in nodenames:
7568
    info = hvinfo[node]
7569
    if info.offline:
7570
      continue
7571
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
7572

    
7573

    
7574
def _CheckOSParams(lu, required, nodenames, osname, osparams):
7575
  """OS parameters validation.
7576

7577
  @type lu: L{LogicalUnit}
7578
  @param lu: the logical unit for which we check
7579
  @type required: boolean
7580
  @param required: whether the validation should fail if the OS is not
7581
      found
7582
  @type nodenames: list
7583
  @param nodenames: the list of nodes on which we should check
7584
  @type osname: string
7585
  @param osname: the name of the hypervisor we should use
7586
  @type osparams: dict
7587
  @param osparams: the parameters which we need to check
7588
  @raise errors.OpPrereqError: if the parameters are not valid
7589

7590
  """
7591
  nodenames = _FilterVmNodes(lu, nodenames)
7592
  result = lu.rpc.call_os_validate(required, nodenames, osname,
7593
                                   [constants.OS_VALIDATE_PARAMETERS],
7594
                                   osparams)
7595
  for node, nres in result.items():
7596
    # we don't check for offline cases since this should be run only
7597
    # against the master node and/or an instance's nodes
7598
    nres.Raise("OS Parameters validation failed on node %s" % node)
7599
    if not nres.payload:
7600
      lu.LogInfo("OS %s not found on node %s, validation skipped",
7601
                 osname, node)
7602

    
7603

    
7604
class LUInstanceCreate(LogicalUnit):
7605
  """Create an instance.
7606

7607
  """
7608
  HPATH = "instance-add"
7609
  HTYPE = constants.HTYPE_INSTANCE
7610
  REQ_BGL = False
7611

    
7612
  def CheckArguments(self):
7613
    """Check arguments.
7614

7615
    """
7616
    # do not require name_check to ease forward/backward compatibility
7617
    # for tools
7618
    if self.op.no_install and self.op.start:
7619
      self.LogInfo("No-installation mode selected, disabling startup")
7620
      self.op.start = False
7621
    # validate/normalize the instance name
7622
    self.op.instance_name = \
7623
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
7624

    
7625
    if self.op.ip_check and not self.op.name_check:
7626
      # TODO: make the ip check more flexible and not depend on the name check
7627
      raise errors.OpPrereqError("Cannot do IP address check without a name"
7628
                                 " check", errors.ECODE_INVAL)
7629

    
7630
    # check nics' parameter names
7631
    for nic in self.op.nics:
7632
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7633

    
7634
    # check disks. parameter names and consistent adopt/no-adopt strategy
7635
    has_adopt = has_no_adopt = False
7636
    for disk in self.op.disks:
7637
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7638
      if constants.IDISK_ADOPT in disk:
7639
        has_adopt = True
7640
      else:
7641
        has_no_adopt = True
7642
    if has_adopt and has_no_adopt:
7643
      raise errors.OpPrereqError("Either all disks are adopted or none is",
7644
                                 errors.ECODE_INVAL)
7645
    if has_adopt:
7646
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7647
        raise errors.OpPrereqError("Disk adoption is not supported for the"
7648
                                   " '%s' disk template" %
7649
                                   self.op.disk_template,
7650
                                   errors.ECODE_INVAL)
7651
      if self.op.iallocator is not None:
7652
        raise errors.OpPrereqError("Disk adoption not allowed with an"
7653
                                   " iallocator script", errors.ECODE_INVAL)
7654
      if self.op.mode == constants.INSTANCE_IMPORT:
7655
        raise errors.OpPrereqError("Disk adoption not allowed for"
7656
                                   " instance import", errors.ECODE_INVAL)
7657
    else:
7658
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
7659
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7660
                                   " but no 'adopt' parameter given" %
7661
                                   self.op.disk_template,
7662
                                   errors.ECODE_INVAL)
7663

    
7664
    self.adopt_disks = has_adopt
7665

    
7666
    # instance name verification
7667
    if self.op.name_check:
7668
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7669
      self.op.instance_name = self.hostname1.name
7670
      # used in CheckPrereq for ip ping check
7671
      self.check_ip = self.hostname1.ip
7672
    else:
7673
      self.check_ip = None
7674

    
7675
    # file storage checks
7676
    if (self.op.file_driver and
7677
        not self.op.file_driver in constants.FILE_DRIVER):
7678
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
7679
                                 self.op.file_driver, errors.ECODE_INVAL)
7680

    
7681
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7682
      raise errors.OpPrereqError("File storage directory path not absolute",
7683
                                 errors.ECODE_INVAL)
7684

    
7685
    ### Node/iallocator related checks
7686
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7687

    
7688
    if self.op.pnode is not None:
7689
      if self.op.disk_template in constants.DTS_INT_MIRROR:
7690
        if self.op.snode is None:
7691
          raise errors.OpPrereqError("The networked disk templates need"
7692
                                     " a mirror node", errors.ECODE_INVAL)
7693
      elif self.op.snode:
7694
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7695
                        " template")
7696
        self.op.snode = None
7697

    
7698
    self._cds = _GetClusterDomainSecret()
7699

    
7700
    if self.op.mode == constants.INSTANCE_IMPORT:
7701
      # On import force_variant must be True, because if we forced it at
7702
      # initial install, our only chance when importing it back is that it
7703
      # works again!
7704
      self.op.force_variant = True
7705

    
7706
      if self.op.no_install:
7707
        self.LogInfo("No-installation mode has no effect during import")
7708

    
7709
    elif self.op.mode == constants.INSTANCE_CREATE:
7710
      if self.op.os_type is None:
7711
        raise errors.OpPrereqError("No guest OS specified",
7712
                                   errors.ECODE_INVAL)
7713
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7714
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7715
                                   " installation" % self.op.os_type,
7716
                                   errors.ECODE_STATE)
7717
      if self.op.disk_template is None:
7718
        raise errors.OpPrereqError("No disk template specified",
7719
                                   errors.ECODE_INVAL)
7720

    
7721
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7722
      # Check handshake to ensure both clusters have the same domain secret
7723
      src_handshake = self.op.source_handshake
7724
      if not src_handshake:
7725
        raise errors.OpPrereqError("Missing source handshake",
7726
                                   errors.ECODE_INVAL)
7727

    
7728
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7729
                                                           src_handshake)
7730
      if errmsg:
7731
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7732
                                   errors.ECODE_INVAL)
7733

    
7734
      # Load and check source CA
7735
      self.source_x509_ca_pem = self.op.source_x509_ca
7736
      if not self.source_x509_ca_pem:
7737
        raise errors.OpPrereqError("Missing source X509 CA",
7738
                                   errors.ECODE_INVAL)
7739

    
7740
      try:
7741
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7742
                                                    self._cds)
7743
      except OpenSSL.crypto.Error, err:
7744
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7745
                                   (err, ), errors.ECODE_INVAL)
7746

    
7747
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7748
      if errcode is not None:
7749
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7750
                                   errors.ECODE_INVAL)
7751

    
7752
      self.source_x509_ca = cert
7753

    
7754
      src_instance_name = self.op.source_instance_name
7755
      if not src_instance_name:
7756
        raise errors.OpPrereqError("Missing source instance name",
7757
                                   errors.ECODE_INVAL)
7758

    
7759
      self.source_instance_name = \
7760
          netutils.GetHostname(name=src_instance_name).name
7761

    
7762
    else:
7763
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
7764
                                 self.op.mode, errors.ECODE_INVAL)
7765

    
7766
  def ExpandNames(self):
7767
    """ExpandNames for CreateInstance.
7768

7769
    Figure out the right locks for instance creation.
7770

7771
    """
7772
    self.needed_locks = {}
7773

    
7774
    instance_name = self.op.instance_name
7775
    # this is just a preventive check, but someone might still add this
7776
    # instance in the meantime, and creation will fail at lock-add time
7777
    if instance_name in self.cfg.GetInstanceList():
7778
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7779
                                 instance_name, errors.ECODE_EXISTS)
7780

    
7781
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7782

    
7783
    if self.op.iallocator:
7784
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7785
    else:
7786
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7787
      nodelist = [self.op.pnode]
7788
      if self.op.snode is not None:
7789
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7790
        nodelist.append(self.op.snode)
7791
      self.needed_locks[locking.LEVEL_NODE] = nodelist
7792

    
7793
    # in case of import lock the source node too
7794
    if self.op.mode == constants.INSTANCE_IMPORT:
7795
      src_node = self.op.src_node
7796
      src_path = self.op.src_path
7797

    
7798
      if src_path is None:
7799
        self.op.src_path = src_path = self.op.instance_name
7800

    
7801
      if src_node is None:
7802
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7803
        self.op.src_node = None
7804
        if os.path.isabs(src_path):
7805
          raise errors.OpPrereqError("Importing an instance from an absolute"
7806
                                     " path requires a source node option",
7807
                                     errors.ECODE_INVAL)
7808
      else:
7809
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7810
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7811
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
7812
        if not os.path.isabs(src_path):
7813
          self.op.src_path = src_path = \
7814
            utils.PathJoin(constants.EXPORT_DIR, src_path)
7815

    
7816
  def _RunAllocator(self):
7817
    """Run the allocator based on input opcode.
7818

7819
    """
7820
    nics = [n.ToDict() for n in self.nics]
7821
    ial = IAllocator(self.cfg, self.rpc,
7822
                     mode=constants.IALLOCATOR_MODE_ALLOC,
7823
                     name=self.op.instance_name,
7824
                     disk_template=self.op.disk_template,
7825
                     tags=[],
7826
                     os=self.op.os_type,
7827
                     vcpus=self.be_full[constants.BE_VCPUS],
7828
                     mem_size=self.be_full[constants.BE_MEMORY],
7829
                     disks=self.disks,
7830
                     nics=nics,
7831
                     hypervisor=self.op.hypervisor,
7832
                     )
7833

    
7834
    ial.Run(self.op.iallocator)
7835

    
7836
    if not ial.success:
7837
      raise errors.OpPrereqError("Can't compute nodes using"
7838
                                 " iallocator '%s': %s" %
7839
                                 (self.op.iallocator, ial.info),
7840
                                 errors.ECODE_NORES)
7841
    if len(ial.result) != ial.required_nodes:
7842
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7843
                                 " of nodes (%s), required %s" %
7844
                                 (self.op.iallocator, len(ial.result),
7845
                                  ial.required_nodes), errors.ECODE_FAULT)
7846
    self.op.pnode = ial.result[0]
7847
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7848
                 self.op.instance_name, self.op.iallocator,
7849
                 utils.CommaJoin(ial.result))
7850
    if ial.required_nodes == 2:
7851
      self.op.snode = ial.result[1]
7852

    
7853
  def BuildHooksEnv(self):
7854
    """Build hooks env.
7855

7856
    This runs on master, primary and secondary nodes of the instance.
7857

7858
    """
7859
    env = {
7860
      "ADD_MODE": self.op.mode,
7861
      }
7862
    if self.op.mode == constants.INSTANCE_IMPORT:
7863
      env["SRC_NODE"] = self.op.src_node
7864
      env["SRC_PATH"] = self.op.src_path
7865
      env["SRC_IMAGES"] = self.src_images
7866

    
7867
    env.update(_BuildInstanceHookEnv(
7868
      name=self.op.instance_name,
7869
      primary_node=self.op.pnode,
7870
      secondary_nodes=self.secondaries,
7871
      status=self.op.start,
7872
      os_type=self.op.os_type,
7873
      memory=self.be_full[constants.BE_MEMORY],
7874
      vcpus=self.be_full[constants.BE_VCPUS],
7875
      nics=_NICListToTuple(self, self.nics),
7876
      disk_template=self.op.disk_template,
7877
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
7878
             for d in self.disks],
7879
      bep=self.be_full,
7880
      hvp=self.hv_full,
7881
      hypervisor_name=self.op.hypervisor,
7882
    ))
7883

    
7884
    return env
7885

    
7886
  def BuildHooksNodes(self):
7887
    """Build hooks nodes.
7888

7889
    """
7890
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
7891
    return nl, nl
7892

    
7893
  def _ReadExportInfo(self):
7894
    """Reads the export information from disk.
7895

7896
    It will override the opcode source node and path with the actual
7897
    information, if these two were not specified before.
7898

7899
    @return: the export information
7900

7901
    """
7902
    assert self.op.mode == constants.INSTANCE_IMPORT
7903

    
7904
    src_node = self.op.src_node
7905
    src_path = self.op.src_path
7906

    
7907
    if src_node is None:
7908
      locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
7909
      exp_list = self.rpc.call_export_list(locked_nodes)
7910
      found = False
7911
      for node in exp_list:
7912
        if exp_list[node].fail_msg:
7913
          continue
7914
        if src_path in exp_list[node].payload:
7915
          found = True
7916
          self.op.src_node = src_node = node
7917
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7918
                                                       src_path)
7919
          break
7920
      if not found:
7921
        raise errors.OpPrereqError("No export found for relative path %s" %
7922
                                    src_path, errors.ECODE_INVAL)
7923

    
7924
    _CheckNodeOnline(self, src_node)
7925
    result = self.rpc.call_export_info(src_node, src_path)
7926
    result.Raise("No export or invalid export found in dir %s" % src_path)
7927

    
7928
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7929
    if not export_info.has_section(constants.INISECT_EXP):
7930
      raise errors.ProgrammerError("Corrupted export config",
7931
                                   errors.ECODE_ENVIRON)
7932

    
7933
    ei_version = export_info.get(constants.INISECT_EXP, "version")
7934
    if (int(ei_version) != constants.EXPORT_VERSION):
7935
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7936
                                 (ei_version, constants.EXPORT_VERSION),
7937
                                 errors.ECODE_ENVIRON)
7938
    return export_info
7939

    
7940
  def _ReadExportParams(self, einfo):
7941
    """Use export parameters as defaults.
7942

7943
    In case the opcode doesn't specify (as in override) some instance
7944
    parameters, then try to use them from the export information, if
7945
    that declares them.
7946

7947
    """
7948
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7949

    
7950
    if self.op.disk_template is None:
7951
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
7952
        self.op.disk_template = einfo.get(constants.INISECT_INS,
7953
                                          "disk_template")
7954
      else:
7955
        raise errors.OpPrereqError("No disk template specified and the export"
7956
                                   " is missing the disk_template information",
7957
                                   errors.ECODE_INVAL)
7958

    
7959
    if not self.op.disks:
7960
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
7961
        disks = []
7962
        # TODO: import the disk iv_name too
7963
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7964
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7965
          disks.append({constants.IDISK_SIZE: disk_sz})
7966
        self.op.disks = disks
7967
      else:
7968
        raise errors.OpPrereqError("No disk info specified and the export"
7969
                                   " is missing the disk information",
7970
                                   errors.ECODE_INVAL)
7971

    
7972
    if (not self.op.nics and
7973
        einfo.has_option(constants.INISECT_INS, "nic_count")):
7974
      nics = []
7975
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7976
        ndict = {}
7977
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7978
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7979
          ndict[name] = v
7980
        nics.append(ndict)
7981
      self.op.nics = nics
7982

    
7983
    if (self.op.hypervisor is None and
7984
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
7985
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7986
    if einfo.has_section(constants.INISECT_HYP):
7987
      # use the export parameters but do not override the ones
7988
      # specified by the user
7989
      for name, value in einfo.items(constants.INISECT_HYP):
7990
        if name not in self.op.hvparams:
7991
          self.op.hvparams[name] = value
7992

    
7993
    if einfo.has_section(constants.INISECT_BEP):
7994
      # use the parameters, without overriding
7995
      for name, value in einfo.items(constants.INISECT_BEP):
7996
        if name not in self.op.beparams:
7997
          self.op.beparams[name] = value
7998
    else:
7999
      # try to read the parameters old style, from the main section
8000
      for name in constants.BES_PARAMETERS:
8001
        if (name not in self.op.beparams and
8002
            einfo.has_option(constants.INISECT_INS, name)):
8003
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8004

    
8005
    if einfo.has_section(constants.INISECT_OSP):
8006
      # use the parameters, without overriding
8007
      for name, value in einfo.items(constants.INISECT_OSP):
8008
        if name not in self.op.osparams:
8009
          self.op.osparams[name] = value
8010

    
8011
  def _RevertToDefaults(self, cluster):
8012
    """Revert the instance parameters to the default values.
8013

8014
    """
8015
    # hvparams
8016
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8017
    for name in self.op.hvparams.keys():
8018
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8019
        del self.op.hvparams[name]
8020
    # beparams
8021
    be_defs = cluster.SimpleFillBE({})
8022
    for name in self.op.beparams.keys():
8023
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8024
        del self.op.beparams[name]
8025
    # nic params
8026
    nic_defs = cluster.SimpleFillNIC({})
8027
    for nic in self.op.nics:
8028
      for name in constants.NICS_PARAMETERS:
8029
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8030
          del nic[name]
8031
    # osparams
8032
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8033
    for name in self.op.osparams.keys():
8034
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8035
        del self.op.osparams[name]
8036

    
8037
  def CheckPrereq(self):
8038
    """Check prerequisites.
8039

8040
    """
8041
    if self.op.mode == constants.INSTANCE_IMPORT:
8042
      export_info = self._ReadExportInfo()
8043
      self._ReadExportParams(export_info)
8044

    
8045
    if (not self.cfg.GetVGName() and
8046
        self.op.disk_template not in constants.DTS_NOT_LVM):
8047
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8048
                                 " instances", errors.ECODE_STATE)
8049

    
8050
    if self.op.hypervisor is None:
8051
      self.op.hypervisor = self.cfg.GetHypervisorType()
8052

    
8053
    cluster = self.cfg.GetClusterInfo()
8054
    enabled_hvs = cluster.enabled_hypervisors
8055
    if self.op.hypervisor not in enabled_hvs:
8056
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8057
                                 " cluster (%s)" % (self.op.hypervisor,
8058
                                  ",".join(enabled_hvs)),
8059
                                 errors.ECODE_STATE)
8060

    
8061
    # check hypervisor parameter syntax (locally)
8062
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8063
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8064
                                      self.op.hvparams)
8065
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8066
    hv_type.CheckParameterSyntax(filled_hvp)
8067
    self.hv_full = filled_hvp
8068
    # check that we don't specify global parameters on an instance
8069
    _CheckGlobalHvParams(self.op.hvparams)
8070

    
8071
    # fill and remember the beparams dict
8072
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8073
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8074

    
8075
    # build os parameters
8076
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8077

    
8078
    # now that hvp/bep are in final format, let's reset to defaults,
8079
    # if told to do so
8080
    if self.op.identify_defaults:
8081
      self._RevertToDefaults(cluster)
8082

    
8083
    # NIC buildup
8084
    self.nics = []
8085
    for idx, nic in enumerate(self.op.nics):
8086
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8087
      nic_mode = nic_mode_req
8088
      if nic_mode is None:
8089
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8090

    
8091
      # in routed mode, for the first nic, the default ip is 'auto'
8092
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8093
        default_ip_mode = constants.VALUE_AUTO
8094
      else:
8095
        default_ip_mode = constants.VALUE_NONE
8096

    
8097
      # ip validity checks
8098
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8099
      if ip is None or ip.lower() == constants.VALUE_NONE:
8100
        nic_ip = None
8101
      elif ip.lower() == constants.VALUE_AUTO:
8102
        if not self.op.name_check:
8103
          raise errors.OpPrereqError("IP address set to auto but name checks"
8104
                                     " have been skipped",
8105
                                     errors.ECODE_INVAL)
8106
        nic_ip = self.hostname1.ip
8107
      else:
8108
        if not netutils.IPAddress.IsValid(ip):
8109
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8110
                                     errors.ECODE_INVAL)
8111
        nic_ip = ip
8112

    
8113
      # TODO: check the ip address for uniqueness
8114
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8115
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8116
                                   errors.ECODE_INVAL)
8117

    
8118
      # MAC address verification
8119
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8120
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8121
        mac = utils.NormalizeAndValidateMac(mac)
8122

    
8123
        try:
8124
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8125
        except errors.ReservationError:
8126
          raise errors.OpPrereqError("MAC address %s already in use"
8127
                                     " in cluster" % mac,
8128
                                     errors.ECODE_NOTUNIQUE)
8129

    
8130
      #  Build nic parameters
8131
      link = nic.get(constants.INIC_LINK, None)
8132
      nicparams = {}
8133
      if nic_mode_req:
8134
        nicparams[constants.NIC_MODE] = nic_mode_req
8135
      if link:
8136
        nicparams[constants.NIC_LINK] = link
8137

    
8138
      check_params = cluster.SimpleFillNIC(nicparams)
8139
      objects.NIC.CheckParameterSyntax(check_params)
8140
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8141

    
8142
    # disk checks/pre-build
8143
    default_vg = self.cfg.GetVGName()
8144
    self.disks = []
8145
    for disk in self.op.disks:
8146
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8147
      if mode not in constants.DISK_ACCESS_SET:
8148
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8149
                                   mode, errors.ECODE_INVAL)
8150
      size = disk.get(constants.IDISK_SIZE, None)
8151
      if size is None:
8152
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8153
      try:
8154
        size = int(size)
8155
      except (TypeError, ValueError):
8156
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8157
                                   errors.ECODE_INVAL)
8158

    
8159
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8160
      new_disk = {
8161
        constants.IDISK_SIZE: size,
8162
        constants.IDISK_MODE: mode,
8163
        constants.IDISK_VG: data_vg,
8164
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8165
        }
8166
      if constants.IDISK_ADOPT in disk:
8167
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8168
      self.disks.append(new_disk)
8169

    
8170
    if self.op.mode == constants.INSTANCE_IMPORT:
8171

    
8172
      # Check that the new instance doesn't have less disks than the export
8173
      instance_disks = len(self.disks)
8174
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8175
      if instance_disks < export_disks:
8176
        raise errors.OpPrereqError("Not enough disks to import."
8177
                                   " (instance: %d, export: %d)" %
8178
                                   (instance_disks, export_disks),
8179
                                   errors.ECODE_INVAL)
8180

    
8181
      disk_images = []
8182
      for idx in range(export_disks):
8183
        option = 'disk%d_dump' % idx
8184
        if export_info.has_option(constants.INISECT_INS, option):
8185
          # FIXME: are the old os-es, disk sizes, etc. useful?
8186
          export_name = export_info.get(constants.INISECT_INS, option)
8187
          image = utils.PathJoin(self.op.src_path, export_name)
8188
          disk_images.append(image)
8189
        else:
8190
          disk_images.append(False)
8191

    
8192
      self.src_images = disk_images
8193

    
8194
      old_name = export_info.get(constants.INISECT_INS, 'name')
8195
      try:
8196
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
8197
      except (TypeError, ValueError), err:
8198
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
8199
                                   " an integer: %s" % str(err),
8200
                                   errors.ECODE_STATE)
8201
      if self.op.instance_name == old_name:
8202
        for idx, nic in enumerate(self.nics):
8203
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8204
            nic_mac_ini = 'nic%d_mac' % idx
8205
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8206

    
8207
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8208

    
8209
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8210
    if self.op.ip_check:
8211
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8212
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8213
                                   (self.check_ip, self.op.instance_name),
8214
                                   errors.ECODE_NOTUNIQUE)
8215

    
8216
    #### mac address generation
8217
    # By generating here the mac address both the allocator and the hooks get
8218
    # the real final mac address rather than the 'auto' or 'generate' value.
8219
    # There is a race condition between the generation and the instance object
8220
    # creation, which means that we know the mac is valid now, but we're not
8221
    # sure it will be when we actually add the instance. If things go bad
8222
    # adding the instance will abort because of a duplicate mac, and the
8223
    # creation job will fail.
8224
    for nic in self.nics:
8225
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8226
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8227

    
8228
    #### allocator run
8229

    
8230
    if self.op.iallocator is not None:
8231
      self._RunAllocator()
8232

    
8233
    #### node related checks
8234

    
8235
    # check primary node
8236
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8237
    assert self.pnode is not None, \
8238
      "Cannot retrieve locked node %s" % self.op.pnode
8239
    if pnode.offline:
8240
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8241
                                 pnode.name, errors.ECODE_STATE)
8242
    if pnode.drained:
8243
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8244
                                 pnode.name, errors.ECODE_STATE)
8245
    if not pnode.vm_capable:
8246
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8247
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8248

    
8249
    self.secondaries = []
8250

    
8251
    # mirror node verification
8252
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8253
      if self.op.snode == pnode.name:
8254
        raise errors.OpPrereqError("The secondary node cannot be the"
8255
                                   " primary node", errors.ECODE_INVAL)
8256
      _CheckNodeOnline(self, self.op.snode)
8257
      _CheckNodeNotDrained(self, self.op.snode)
8258
      _CheckNodeVmCapable(self, self.op.snode)
8259
      self.secondaries.append(self.op.snode)
8260

    
8261
    nodenames = [pnode.name] + self.secondaries
8262

    
8263
    if not self.adopt_disks:
8264
      # Check lv size requirements, if not adopting
8265
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8266
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8267

    
8268
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8269
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8270
                                disk[constants.IDISK_ADOPT])
8271
                     for disk in self.disks])
8272
      if len(all_lvs) != len(self.disks):
8273
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8274
                                   errors.ECODE_INVAL)
8275
      for lv_name in all_lvs:
8276
        try:
8277
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8278
          # to ReserveLV uses the same syntax
8279
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8280
        except errors.ReservationError:
8281
          raise errors.OpPrereqError("LV named %s used by another instance" %
8282
                                     lv_name, errors.ECODE_NOTUNIQUE)
8283

    
8284
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8285
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8286

    
8287
      node_lvs = self.rpc.call_lv_list([pnode.name],
8288
                                       vg_names.payload.keys())[pnode.name]
8289
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8290
      node_lvs = node_lvs.payload
8291

    
8292
      delta = all_lvs.difference(node_lvs.keys())
8293
      if delta:
8294
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8295
                                   utils.CommaJoin(delta),
8296
                                   errors.ECODE_INVAL)
8297
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8298
      if online_lvs:
8299
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8300
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8301
                                   errors.ECODE_STATE)
8302
      # update the size of disk based on what is found
8303
      for dsk in self.disks:
8304
        dsk[constants.IDISK_SIZE] = \
8305
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8306
                                        dsk[constants.IDISK_ADOPT])][0]))
8307

    
8308
    elif self.op.disk_template == constants.DT_BLOCK:
8309
      # Normalize and de-duplicate device paths
8310
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8311
                       for disk in self.disks])
8312
      if len(all_disks) != len(self.disks):
8313
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
8314
                                   errors.ECODE_INVAL)
8315
      baddisks = [d for d in all_disks
8316
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8317
      if baddisks:
8318
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8319
                                   " cannot be adopted" %
8320
                                   (", ".join(baddisks),
8321
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
8322
                                   errors.ECODE_INVAL)
8323

    
8324
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
8325
                                            list(all_disks))[pnode.name]
8326
      node_disks.Raise("Cannot get block device information from node %s" %
8327
                       pnode.name)
8328
      node_disks = node_disks.payload
8329
      delta = all_disks.difference(node_disks.keys())
8330
      if delta:
8331
        raise errors.OpPrereqError("Missing block device(s): %s" %
8332
                                   utils.CommaJoin(delta),
8333
                                   errors.ECODE_INVAL)
8334
      for dsk in self.disks:
8335
        dsk[constants.IDISK_SIZE] = \
8336
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8337

    
8338
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8339

    
8340
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8341
    # check OS parameters (remotely)
8342
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8343

    
8344
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8345

    
8346
    # memory check on primary node
8347
    if self.op.start:
8348
      _CheckNodeFreeMemory(self, self.pnode.name,
8349
                           "creating instance %s" % self.op.instance_name,
8350
                           self.be_full[constants.BE_MEMORY],
8351
                           self.op.hypervisor)
8352

    
8353
    self.dry_run_result = list(nodenames)
8354

    
8355
  def Exec(self, feedback_fn):
8356
    """Create and add the instance to the cluster.
8357

8358
    """
8359
    instance = self.op.instance_name
8360
    pnode_name = self.pnode.name
8361

    
8362
    ht_kind = self.op.hypervisor
8363
    if ht_kind in constants.HTS_REQ_PORT:
8364
      network_port = self.cfg.AllocatePort()
8365
    else:
8366
      network_port = None
8367

    
8368
    if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
8369
      # this is needed because os.path.join does not accept None arguments
8370
      if self.op.file_storage_dir is None:
8371
        string_file_storage_dir = ""
8372
      else:
8373
        string_file_storage_dir = self.op.file_storage_dir
8374

    
8375
      # build the full file storage dir path
8376
      if self.op.disk_template == constants.DT_SHARED_FILE:
8377
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8378
      else:
8379
        get_fsd_fn = self.cfg.GetFileStorageDir
8380

    
8381
      file_storage_dir = utils.PathJoin(get_fsd_fn(),
8382
                                        string_file_storage_dir, instance)
8383
    else:
8384
      file_storage_dir = ""
8385

    
8386
    disks = _GenerateDiskTemplate(self,
8387
                                  self.op.disk_template,
8388
                                  instance, pnode_name,
8389
                                  self.secondaries,
8390
                                  self.disks,
8391
                                  file_storage_dir,
8392
                                  self.op.file_driver,
8393
                                  0,
8394
                                  feedback_fn)
8395

    
8396
    iobj = objects.Instance(name=instance, os=self.op.os_type,
8397
                            primary_node=pnode_name,
8398
                            nics=self.nics, disks=disks,
8399
                            disk_template=self.op.disk_template,
8400
                            admin_up=False,
8401
                            network_port=network_port,
8402
                            beparams=self.op.beparams,
8403
                            hvparams=self.op.hvparams,
8404
                            hypervisor=self.op.hypervisor,
8405
                            osparams=self.op.osparams,
8406
                            )
8407

    
8408
    if self.adopt_disks:
8409
      if self.op.disk_template == constants.DT_PLAIN:
8410
        # rename LVs to the newly-generated names; we need to construct
8411
        # 'fake' LV disks with the old data, plus the new unique_id
8412
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8413
        rename_to = []
8414
        for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8415
          rename_to.append(t_dsk.logical_id)
8416
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8417
          self.cfg.SetDiskID(t_dsk, pnode_name)
8418
        result = self.rpc.call_blockdev_rename(pnode_name,
8419
                                               zip(tmp_disks, rename_to))
8420
        result.Raise("Failed to rename adoped LVs")
8421
    else:
8422
      feedback_fn("* creating instance disks...")
8423
      try:
8424
        _CreateDisks(self, iobj)
8425
      except errors.OpExecError:
8426
        self.LogWarning("Device creation failed, reverting...")
8427
        try:
8428
          _RemoveDisks(self, iobj)
8429
        finally:
8430
          self.cfg.ReleaseDRBDMinors(instance)
8431
          raise
8432

    
8433
    feedback_fn("adding instance %s to cluster config" % instance)
8434

    
8435
    self.cfg.AddInstance(iobj, self.proc.GetECId())
8436

    
8437
    # Declare that we don't want to remove the instance lock anymore, as we've
8438
    # added the instance to the config
8439
    del self.remove_locks[locking.LEVEL_INSTANCE]
8440

    
8441
    if self.op.mode == constants.INSTANCE_IMPORT:
8442
      # Release unused nodes
8443
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8444
    else:
8445
      # Release all nodes
8446
      _ReleaseLocks(self, locking.LEVEL_NODE)
8447

    
8448
    disk_abort = False
8449
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8450
      feedback_fn("* wiping instance disks...")
8451
      try:
8452
        _WipeDisks(self, iobj)
8453
      except errors.OpExecError, err:
8454
        logging.exception("Wiping disks failed")
8455
        self.LogWarning("Wiping instance disks failed (%s)", err)
8456
        disk_abort = True
8457

    
8458
    if disk_abort:
8459
      # Something is already wrong with the disks, don't do anything else
8460
      pass
8461
    elif self.op.wait_for_sync:
8462
      disk_abort = not _WaitForSync(self, iobj)
8463
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
8464
      # make sure the disks are not degraded (still sync-ing is ok)
8465
      time.sleep(15)
8466
      feedback_fn("* checking mirrors status")
8467
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8468
    else:
8469
      disk_abort = False
8470

    
8471
    if disk_abort:
8472
      _RemoveDisks(self, iobj)
8473
      self.cfg.RemoveInstance(iobj.name)
8474
      # Make sure the instance lock gets removed
8475
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8476
      raise errors.OpExecError("There are some degraded disks for"
8477
                               " this instance")
8478

    
8479
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8480
      if self.op.mode == constants.INSTANCE_CREATE:
8481
        if not self.op.no_install:
8482
          feedback_fn("* running the instance OS create scripts...")
8483
          # FIXME: pass debug option from opcode to backend
8484
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8485
                                                 self.op.debug_level)
8486
          result.Raise("Could not add os for instance %s"
8487
                       " on node %s" % (instance, pnode_name))
8488

    
8489
      elif self.op.mode == constants.INSTANCE_IMPORT:
8490
        feedback_fn("* running the instance OS import scripts...")
8491

    
8492
        transfers = []
8493

    
8494
        for idx, image in enumerate(self.src_images):
8495
          if not image:
8496
            continue
8497

    
8498
          # FIXME: pass debug option from opcode to backend
8499
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8500
                                             constants.IEIO_FILE, (image, ),
8501
                                             constants.IEIO_SCRIPT,
8502
                                             (iobj.disks[idx], idx),
8503
                                             None)
8504
          transfers.append(dt)
8505

    
8506
        import_result = \
8507
          masterd.instance.TransferInstanceData(self, feedback_fn,
8508
                                                self.op.src_node, pnode_name,
8509
                                                self.pnode.secondary_ip,
8510
                                                iobj, transfers)
8511
        if not compat.all(import_result):
8512
          self.LogWarning("Some disks for instance %s on node %s were not"
8513
                          " imported successfully" % (instance, pnode_name))
8514

    
8515
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8516
        feedback_fn("* preparing remote import...")
8517
        # The source cluster will stop the instance before attempting to make a
8518
        # connection. In some cases stopping an instance can take a long time,
8519
        # hence the shutdown timeout is added to the connection timeout.
8520
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8521
                           self.op.source_shutdown_timeout)
8522
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8523

    
8524
        assert iobj.primary_node == self.pnode.name
8525
        disk_results = \
8526
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8527
                                        self.source_x509_ca,
8528
                                        self._cds, timeouts)
8529
        if not compat.all(disk_results):
8530
          # TODO: Should the instance still be started, even if some disks
8531
          # failed to import (valid for local imports, too)?
8532
          self.LogWarning("Some disks for instance %s on node %s were not"
8533
                          " imported successfully" % (instance, pnode_name))
8534

    
8535
        # Run rename script on newly imported instance
8536
        assert iobj.name == instance
8537
        feedback_fn("Running rename script for %s" % instance)
8538
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8539
                                                   self.source_instance_name,
8540
                                                   self.op.debug_level)
8541
        if result.fail_msg:
8542
          self.LogWarning("Failed to run rename script for %s on node"
8543
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
8544

    
8545
      else:
8546
        # also checked in the prereq part
8547
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8548
                                     % self.op.mode)
8549

    
8550
    if self.op.start:
8551
      iobj.admin_up = True
8552
      self.cfg.Update(iobj, feedback_fn)
8553
      logging.info("Starting instance %s on node %s", instance, pnode_name)
8554
      feedback_fn("* starting instance...")
8555
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
8556
      result.Raise("Could not start instance")
8557

    
8558
    return list(iobj.all_nodes)
8559

    
8560

    
8561
class LUInstanceConsole(NoHooksLU):
8562
  """Connect to an instance's console.
8563

8564
  This is somewhat special in that it returns the command line that
8565
  you need to run on the master node in order to connect to the
8566
  console.
8567

8568
  """
8569
  REQ_BGL = False
8570

    
8571
  def ExpandNames(self):
8572
    self._ExpandAndLockInstance()
8573

    
8574
  def CheckPrereq(self):
8575
    """Check prerequisites.
8576

8577
    This checks that the instance is in the cluster.
8578

8579
    """
8580
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8581
    assert self.instance is not None, \
8582
      "Cannot retrieve locked instance %s" % self.op.instance_name
8583
    _CheckNodeOnline(self, self.instance.primary_node)
8584

    
8585
  def Exec(self, feedback_fn):
8586
    """Connect to the console of an instance
8587

8588
    """
8589
    instance = self.instance
8590
    node = instance.primary_node
8591

    
8592
    node_insts = self.rpc.call_instance_list([node],
8593
                                             [instance.hypervisor])[node]
8594
    node_insts.Raise("Can't get node information from %s" % node)
8595

    
8596
    if instance.name not in node_insts.payload:
8597
      if instance.admin_up:
8598
        state = constants.INSTST_ERRORDOWN
8599
      else:
8600
        state = constants.INSTST_ADMINDOWN
8601
      raise errors.OpExecError("Instance %s is not running (state %s)" %
8602
                               (instance.name, state))
8603

    
8604
    logging.debug("Connecting to console of %s on %s", instance.name, node)
8605

    
8606
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8607

    
8608

    
8609
def _GetInstanceConsole(cluster, instance):
8610
  """Returns console information for an instance.
8611

8612
  @type cluster: L{objects.Cluster}
8613
  @type instance: L{objects.Instance}
8614
  @rtype: dict
8615

8616
  """
8617
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
8618
  # beparams and hvparams are passed separately, to avoid editing the
8619
  # instance and then saving the defaults in the instance itself.
8620
  hvparams = cluster.FillHV(instance)
8621
  beparams = cluster.FillBE(instance)
8622
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8623

    
8624
  assert console.instance == instance.name
8625
  assert console.Validate()
8626

    
8627
  return console.ToDict()
8628

    
8629

    
8630
class LUInstanceReplaceDisks(LogicalUnit):
8631
  """Replace the disks of an instance.
8632

8633
  """
8634
  HPATH = "mirrors-replace"
8635
  HTYPE = constants.HTYPE_INSTANCE
8636
  REQ_BGL = False
8637

    
8638
  def CheckArguments(self):
8639
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8640
                                  self.op.iallocator)
8641

    
8642
  def ExpandNames(self):
8643
    self._ExpandAndLockInstance()
8644

    
8645
    assert locking.LEVEL_NODE not in self.needed_locks
8646
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
8647

    
8648
    assert self.op.iallocator is None or self.op.remote_node is None, \
8649
      "Conflicting options"
8650

    
8651
    if self.op.remote_node is not None:
8652
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8653

    
8654
      # Warning: do not remove the locking of the new secondary here
8655
      # unless DRBD8.AddChildren is changed to work in parallel;
8656
      # currently it doesn't since parallel invocations of
8657
      # FindUnusedMinor will conflict
8658
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
8659
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8660
    else:
8661
      self.needed_locks[locking.LEVEL_NODE] = []
8662
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8663

    
8664
      if self.op.iallocator is not None:
8665
        # iallocator will select a new node in the same group
8666
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
8667

    
8668
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8669
                                   self.op.iallocator, self.op.remote_node,
8670
                                   self.op.disks, False, self.op.early_release)
8671

    
8672
    self.tasklets = [self.replacer]
8673

    
8674
  def DeclareLocks(self, level):
8675
    if level == locking.LEVEL_NODEGROUP:
8676
      assert self.op.remote_node is None
8677
      assert self.op.iallocator is not None
8678
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
8679

    
8680
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
8681
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
8682
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8683

    
8684
    elif level == locking.LEVEL_NODE:
8685
      if self.op.iallocator is not None:
8686
        assert self.op.remote_node is None
8687
        assert not self.needed_locks[locking.LEVEL_NODE]
8688

    
8689
        # Lock member nodes of all locked groups
8690
        self.needed_locks[locking.LEVEL_NODE] = [node_name
8691
          for group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
8692
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
8693
      else:
8694
        self._LockInstancesNodes()
8695

    
8696
  def BuildHooksEnv(self):
8697
    """Build hooks env.
8698

8699
    This runs on the master, the primary and all the secondaries.
8700

8701
    """
8702
    instance = self.replacer.instance
8703
    env = {
8704
      "MODE": self.op.mode,
8705
      "NEW_SECONDARY": self.op.remote_node,
8706
      "OLD_SECONDARY": instance.secondary_nodes[0],
8707
      }
8708
    env.update(_BuildInstanceHookEnvByObject(self, instance))
8709
    return env
8710

    
8711
  def BuildHooksNodes(self):
8712
    """Build hooks nodes.
8713

8714
    """
8715
    instance = self.replacer.instance
8716
    nl = [
8717
      self.cfg.GetMasterNode(),
8718
      instance.primary_node,
8719
      ]
8720
    if self.op.remote_node is not None:
8721
      nl.append(self.op.remote_node)
8722
    return nl, nl
8723

    
8724
  def CheckPrereq(self):
8725
    """Check prerequisites.
8726

8727
    """
8728
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
8729
            self.op.iallocator is None)
8730

    
8731
    owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
8732
    if owned_groups:
8733
      groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8734
      if owned_groups != groups:
8735
        raise errors.OpExecError("Node groups used by instance '%s' changed"
8736
                                 " since lock was acquired, current list is %r,"
8737
                                 " used to be '%s'" %
8738
                                 (self.op.instance_name,
8739
                                  utils.CommaJoin(groups),
8740
                                  utils.CommaJoin(owned_groups)))
8741

    
8742
    return LogicalUnit.CheckPrereq(self)
8743

    
8744

    
8745
class TLReplaceDisks(Tasklet):
8746
  """Replaces disks for an instance.
8747

8748
  Note: Locking is not within the scope of this class.
8749

8750
  """
8751
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8752
               disks, delay_iallocator, early_release):
8753
    """Initializes this class.
8754

8755
    """
8756
    Tasklet.__init__(self, lu)
8757

    
8758
    # Parameters
8759
    self.instance_name = instance_name
8760
    self.mode = mode
8761
    self.iallocator_name = iallocator_name
8762
    self.remote_node = remote_node
8763
    self.disks = disks
8764
    self.delay_iallocator = delay_iallocator
8765
    self.early_release = early_release
8766

    
8767
    # Runtime data
8768
    self.instance = None
8769
    self.new_node = None
8770
    self.target_node = None
8771
    self.other_node = None
8772
    self.remote_node_info = None
8773
    self.node_secondary_ip = None
8774

    
8775
  @staticmethod
8776
  def CheckArguments(mode, remote_node, iallocator):
8777
    """Helper function for users of this class.
8778

8779
    """
8780
    # check for valid parameter combination
8781
    if mode == constants.REPLACE_DISK_CHG:
8782
      if remote_node is None and iallocator is None:
8783
        raise errors.OpPrereqError("When changing the secondary either an"
8784
                                   " iallocator script must be used or the"
8785
                                   " new node given", errors.ECODE_INVAL)
8786

    
8787
      if remote_node is not None and iallocator is not None:
8788
        raise errors.OpPrereqError("Give either the iallocator or the new"
8789
                                   " secondary, not both", errors.ECODE_INVAL)
8790

    
8791
    elif remote_node is not None or iallocator is not None:
8792
      # Not replacing the secondary
8793
      raise errors.OpPrereqError("The iallocator and new node options can"
8794
                                 " only be used when changing the"
8795
                                 " secondary node", errors.ECODE_INVAL)
8796

    
8797
  @staticmethod
8798
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8799
    """Compute a new secondary node using an IAllocator.
8800

8801
    """
8802
    ial = IAllocator(lu.cfg, lu.rpc,
8803
                     mode=constants.IALLOCATOR_MODE_RELOC,
8804
                     name=instance_name,
8805
                     relocate_from=relocate_from)
8806

    
8807
    ial.Run(iallocator_name)
8808

    
8809
    if not ial.success:
8810
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8811
                                 " %s" % (iallocator_name, ial.info),
8812
                                 errors.ECODE_NORES)
8813

    
8814
    if len(ial.result) != ial.required_nodes:
8815
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8816
                                 " of nodes (%s), required %s" %
8817
                                 (iallocator_name,
8818
                                  len(ial.result), ial.required_nodes),
8819
                                 errors.ECODE_FAULT)
8820

    
8821
    remote_node_name = ial.result[0]
8822

    
8823
    lu.LogInfo("Selected new secondary for instance '%s': %s",
8824
               instance_name, remote_node_name)
8825

    
8826
    return remote_node_name
8827

    
8828
  def _FindFaultyDisks(self, node_name):
8829
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8830
                                    node_name, True)
8831

    
8832
  def _CheckDisksActivated(self, instance):
8833
    """Checks if the instance disks are activated.
8834

8835
    @param instance: The instance to check disks
8836
    @return: True if they are activated, False otherwise
8837

8838
    """
8839
    nodes = instance.all_nodes
8840

    
8841
    for idx, dev in enumerate(instance.disks):
8842
      for node in nodes:
8843
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
8844
        self.cfg.SetDiskID(dev, node)
8845

    
8846
        result = self.rpc.call_blockdev_find(node, dev)
8847

    
8848
        if result.offline:
8849
          continue
8850
        elif result.fail_msg or not result.payload:
8851
          return False
8852

    
8853
    return True
8854

    
8855
  def CheckPrereq(self):
8856
    """Check prerequisites.
8857

8858
    This checks that the instance is in the cluster.
8859

8860
    """
8861
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8862
    assert instance is not None, \
8863
      "Cannot retrieve locked instance %s" % self.instance_name
8864

    
8865
    if instance.disk_template != constants.DT_DRBD8:
8866
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8867
                                 " instances", errors.ECODE_INVAL)
8868

    
8869
    if len(instance.secondary_nodes) != 1:
8870
      raise errors.OpPrereqError("The instance has a strange layout,"
8871
                                 " expected one secondary but found %d" %
8872
                                 len(instance.secondary_nodes),
8873
                                 errors.ECODE_FAULT)
8874

    
8875
    if not self.delay_iallocator:
8876
      self._CheckPrereq2()
8877

    
8878
  def _CheckPrereq2(self):
8879
    """Check prerequisites, second part.
8880

8881
    This function should always be part of CheckPrereq. It was separated and is
8882
    now called from Exec because during node evacuation iallocator was only
8883
    called with an unmodified cluster model, not taking planned changes into
8884
    account.
8885

8886
    """
8887
    instance = self.instance
8888
    secondary_node = instance.secondary_nodes[0]
8889

    
8890
    if self.iallocator_name is None:
8891
      remote_node = self.remote_node
8892
    else:
8893
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8894
                                       instance.name, instance.secondary_nodes)
8895

    
8896
    if remote_node is None:
8897
      self.remote_node_info = None
8898
    else:
8899
      assert remote_node in self.lu.glm.list_owned(locking.LEVEL_NODE), \
8900
             "Remote node '%s' is not locked" % remote_node
8901

    
8902
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8903
      assert self.remote_node_info is not None, \
8904
        "Cannot retrieve locked node %s" % remote_node
8905

    
8906
    if remote_node == self.instance.primary_node:
8907
      raise errors.OpPrereqError("The specified node is the primary node of"
8908
                                 " the instance", errors.ECODE_INVAL)
8909

    
8910
    if remote_node == secondary_node:
8911
      raise errors.OpPrereqError("The specified node is already the"
8912
                                 " secondary node of the instance",
8913
                                 errors.ECODE_INVAL)
8914

    
8915
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8916
                                    constants.REPLACE_DISK_CHG):
8917
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
8918
                                 errors.ECODE_INVAL)
8919

    
8920
    if self.mode == constants.REPLACE_DISK_AUTO:
8921
      if not self._CheckDisksActivated(instance):
8922
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
8923
                                   " first" % self.instance_name,
8924
                                   errors.ECODE_STATE)
8925
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
8926
      faulty_secondary = self._FindFaultyDisks(secondary_node)
8927

    
8928
      if faulty_primary and faulty_secondary:
8929
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8930
                                   " one node and can not be repaired"
8931
                                   " automatically" % self.instance_name,
8932
                                   errors.ECODE_STATE)
8933

    
8934
      if faulty_primary:
8935
        self.disks = faulty_primary
8936
        self.target_node = instance.primary_node
8937
        self.other_node = secondary_node
8938
        check_nodes = [self.target_node, self.other_node]
8939
      elif faulty_secondary:
8940
        self.disks = faulty_secondary
8941
        self.target_node = secondary_node
8942
        self.other_node = instance.primary_node
8943
        check_nodes = [self.target_node, self.other_node]
8944
      else:
8945
        self.disks = []
8946
        check_nodes = []
8947

    
8948
    else:
8949
      # Non-automatic modes
8950
      if self.mode == constants.REPLACE_DISK_PRI:
8951
        self.target_node = instance.primary_node
8952
        self.other_node = secondary_node
8953
        check_nodes = [self.target_node, self.other_node]
8954

    
8955
      elif self.mode == constants.REPLACE_DISK_SEC:
8956
        self.target_node = secondary_node
8957
        self.other_node = instance.primary_node
8958
        check_nodes = [self.target_node, self.other_node]
8959

    
8960
      elif self.mode == constants.REPLACE_DISK_CHG:
8961
        self.new_node = remote_node
8962
        self.other_node = instance.primary_node
8963
        self.target_node = secondary_node
8964
        check_nodes = [self.new_node, self.other_node]
8965

    
8966
        _CheckNodeNotDrained(self.lu, remote_node)
8967
        _CheckNodeVmCapable(self.lu, remote_node)
8968

    
8969
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
8970
        assert old_node_info is not None
8971
        if old_node_info.offline and not self.early_release:
8972
          # doesn't make sense to delay the release
8973
          self.early_release = True
8974
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8975
                          " early-release mode", secondary_node)
8976

    
8977
      else:
8978
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8979
                                     self.mode)
8980

    
8981
      # If not specified all disks should be replaced
8982
      if not self.disks:
8983
        self.disks = range(len(self.instance.disks))
8984

    
8985
    for node in check_nodes:
8986
      _CheckNodeOnline(self.lu, node)
8987

    
8988
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
8989
                                                          self.other_node,
8990
                                                          self.target_node]
8991
                              if node_name is not None)
8992

    
8993
    # Release unneeded node locks
8994
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
8995

    
8996
    # Release any owned node group
8997
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
8998
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
8999

    
9000
    # Check whether disks are valid
9001
    for disk_idx in self.disks:
9002
      instance.FindDisk(disk_idx)
9003

    
9004
    # Get secondary node IP addresses
9005
    self.node_secondary_ip = \
9006
      dict((node_name, self.cfg.GetNodeInfo(node_name).secondary_ip)
9007
           for node_name in touched_nodes)
9008

    
9009
  def Exec(self, feedback_fn):
9010
    """Execute disk replacement.
9011

9012
    This dispatches the disk replacement to the appropriate handler.
9013

9014
    """
9015
    if self.delay_iallocator:
9016
      self._CheckPrereq2()
9017

    
9018
    if __debug__:
9019
      # Verify owned locks before starting operation
9020
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9021
      assert set(owned_locks) == set(self.node_secondary_ip), \
9022
          ("Incorrect node locks, owning %s, expected %s" %
9023
           (owned_locks, self.node_secondary_ip.keys()))
9024

    
9025
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_INSTANCE)
9026
      assert list(owned_locks) == [self.instance_name], \
9027
          "Instance '%s' not locked" % self.instance_name
9028

    
9029
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9030
          "Should not own any node group lock at this point"
9031

    
9032
    if not self.disks:
9033
      feedback_fn("No disks need replacement")
9034
      return
9035

    
9036
    feedback_fn("Replacing disk(s) %s for %s" %
9037
                (utils.CommaJoin(self.disks), self.instance.name))
9038

    
9039
    activate_disks = (not self.instance.admin_up)
9040

    
9041
    # Activate the instance disks if we're replacing them on a down instance
9042
    if activate_disks:
9043
      _StartInstanceDisks(self.lu, self.instance, True)
9044

    
9045
    try:
9046
      # Should we replace the secondary node?
9047
      if self.new_node is not None:
9048
        fn = self._ExecDrbd8Secondary
9049
      else:
9050
        fn = self._ExecDrbd8DiskOnly
9051

    
9052
      result = fn(feedback_fn)
9053
    finally:
9054
      # Deactivate the instance disks if we're replacing them on a
9055
      # down instance
9056
      if activate_disks:
9057
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9058

    
9059
    if __debug__:
9060
      # Verify owned locks
9061
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9062
      nodes = frozenset(self.node_secondary_ip)
9063
      assert ((self.early_release and not owned_locks) or
9064
              (not self.early_release and not (set(owned_locks) - nodes))), \
9065
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9066
         " nodes=%r" % (self.early_release, owned_locks, nodes))
9067

    
9068
    return result
9069

    
9070
  def _CheckVolumeGroup(self, nodes):
9071
    self.lu.LogInfo("Checking volume groups")
9072

    
9073
    vgname = self.cfg.GetVGName()
9074

    
9075
    # Make sure volume group exists on all involved nodes
9076
    results = self.rpc.call_vg_list(nodes)
9077
    if not results:
9078
      raise errors.OpExecError("Can't list volume groups on the nodes")
9079

    
9080
    for node in nodes:
9081
      res = results[node]
9082
      res.Raise("Error checking node %s" % node)
9083
      if vgname not in res.payload:
9084
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9085
                                 (vgname, node))
9086

    
9087
  def _CheckDisksExistence(self, nodes):
9088
    # Check disk existence
9089
    for idx, dev in enumerate(self.instance.disks):
9090
      if idx not in self.disks:
9091
        continue
9092

    
9093
      for node in nodes:
9094
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9095
        self.cfg.SetDiskID(dev, node)
9096

    
9097
        result = self.rpc.call_blockdev_find(node, dev)
9098

    
9099
        msg = result.fail_msg
9100
        if msg or not result.payload:
9101
          if not msg:
9102
            msg = "disk not found"
9103
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9104
                                   (idx, node, msg))
9105

    
9106
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9107
    for idx, dev in enumerate(self.instance.disks):
9108
      if idx not in self.disks:
9109
        continue
9110

    
9111
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9112
                      (idx, node_name))
9113

    
9114
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9115
                                   ldisk=ldisk):
9116
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9117
                                 " replace disks for instance %s" %
9118
                                 (node_name, self.instance.name))
9119

    
9120
  def _CreateNewStorage(self, node_name):
9121
    iv_names = {}
9122

    
9123
    for idx, dev in enumerate(self.instance.disks):
9124
      if idx not in self.disks:
9125
        continue
9126

    
9127
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9128

    
9129
      self.cfg.SetDiskID(dev, node_name)
9130

    
9131
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9132
      names = _GenerateUniqueNames(self.lu, lv_names)
9133

    
9134
      vg_data = dev.children[0].logical_id[0]
9135
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9136
                             logical_id=(vg_data, names[0]))
9137
      vg_meta = dev.children[1].logical_id[0]
9138
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9139
                             logical_id=(vg_meta, names[1]))
9140

    
9141
      new_lvs = [lv_data, lv_meta]
9142
      old_lvs = dev.children
9143
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9144

    
9145
      # we pass force_create=True to force the LVM creation
9146
      for new_lv in new_lvs:
9147
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9148
                        _GetInstanceInfoText(self.instance), False)
9149

    
9150
    return iv_names
9151

    
9152
  def _CheckDevices(self, node_name, iv_names):
9153
    for name, (dev, _, _) in iv_names.iteritems():
9154
      self.cfg.SetDiskID(dev, node_name)
9155

    
9156
      result = self.rpc.call_blockdev_find(node_name, dev)
9157

    
9158
      msg = result.fail_msg
9159
      if msg or not result.payload:
9160
        if not msg:
9161
          msg = "disk not found"
9162
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9163
                                 (name, msg))
9164

    
9165
      if result.payload.is_degraded:
9166
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9167

    
9168
  def _RemoveOldStorage(self, node_name, iv_names):
9169
    for name, (_, old_lvs, _) in iv_names.iteritems():
9170
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9171

    
9172
      for lv in old_lvs:
9173
        self.cfg.SetDiskID(lv, node_name)
9174

    
9175
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9176
        if msg:
9177
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9178
                             hint="remove unused LVs manually")
9179

    
9180
  def _ExecDrbd8DiskOnly(self, feedback_fn):
9181
    """Replace a disk on the primary or secondary for DRBD 8.
9182

9183
    The algorithm for replace is quite complicated:
9184

9185
      1. for each disk to be replaced:
9186

9187
        1. create new LVs on the target node with unique names
9188
        1. detach old LVs from the drbd device
9189
        1. rename old LVs to name_replaced.<time_t>
9190
        1. rename new LVs to old LVs
9191
        1. attach the new LVs (with the old names now) to the drbd device
9192

9193
      1. wait for sync across all devices
9194

9195
      1. for each modified disk:
9196

9197
        1. remove old LVs (which have the name name_replaces.<time_t>)
9198

9199
    Failures are not very well handled.
9200

9201
    """
9202
    steps_total = 6
9203

    
9204
    # Step: check device activation
9205
    self.lu.LogStep(1, steps_total, "Check device existence")
9206
    self._CheckDisksExistence([self.other_node, self.target_node])
9207
    self._CheckVolumeGroup([self.target_node, self.other_node])
9208

    
9209
    # Step: check other node consistency
9210
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9211
    self._CheckDisksConsistency(self.other_node,
9212
                                self.other_node == self.instance.primary_node,
9213
                                False)
9214

    
9215
    # Step: create new storage
9216
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9217
    iv_names = self._CreateNewStorage(self.target_node)
9218

    
9219
    # Step: for each lv, detach+rename*2+attach
9220
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9221
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9222
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9223

    
9224
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9225
                                                     old_lvs)
9226
      result.Raise("Can't detach drbd from local storage on node"
9227
                   " %s for device %s" % (self.target_node, dev.iv_name))
9228
      #dev.children = []
9229
      #cfg.Update(instance)
9230

    
9231
      # ok, we created the new LVs, so now we know we have the needed
9232
      # storage; as such, we proceed on the target node to rename
9233
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9234
      # using the assumption that logical_id == physical_id (which in
9235
      # turn is the unique_id on that node)
9236

    
9237
      # FIXME(iustin): use a better name for the replaced LVs
9238
      temp_suffix = int(time.time())
9239
      ren_fn = lambda d, suff: (d.physical_id[0],
9240
                                d.physical_id[1] + "_replaced-%s" % suff)
9241

    
9242
      # Build the rename list based on what LVs exist on the node
9243
      rename_old_to_new = []
9244
      for to_ren in old_lvs:
9245
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9246
        if not result.fail_msg and result.payload:
9247
          # device exists
9248
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9249

    
9250
      self.lu.LogInfo("Renaming the old LVs on the target node")
9251
      result = self.rpc.call_blockdev_rename(self.target_node,
9252
                                             rename_old_to_new)
9253
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9254

    
9255
      # Now we rename the new LVs to the old LVs
9256
      self.lu.LogInfo("Renaming the new LVs on the target node")
9257
      rename_new_to_old = [(new, old.physical_id)
9258
                           for old, new in zip(old_lvs, new_lvs)]
9259
      result = self.rpc.call_blockdev_rename(self.target_node,
9260
                                             rename_new_to_old)
9261
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9262

    
9263
      for old, new in zip(old_lvs, new_lvs):
9264
        new.logical_id = old.logical_id
9265
        self.cfg.SetDiskID(new, self.target_node)
9266

    
9267
      for disk in old_lvs:
9268
        disk.logical_id = ren_fn(disk, temp_suffix)
9269
        self.cfg.SetDiskID(disk, self.target_node)
9270

    
9271
      # Now that the new lvs have the old name, we can add them to the device
9272
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9273
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9274
                                                  new_lvs)
9275
      msg = result.fail_msg
9276
      if msg:
9277
        for new_lv in new_lvs:
9278
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9279
                                               new_lv).fail_msg
9280
          if msg2:
9281
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9282
                               hint=("cleanup manually the unused logical"
9283
                                     "volumes"))
9284
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9285

    
9286
      dev.children = new_lvs
9287

    
9288
      self.cfg.Update(self.instance, feedback_fn)
9289

    
9290
    cstep = 5
9291
    if self.early_release:
9292
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9293
      cstep += 1
9294
      self._RemoveOldStorage(self.target_node, iv_names)
9295
      # WARNING: we release both node locks here, do not do other RPCs
9296
      # than WaitForSync to the primary node
9297
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9298
                    names=[self.target_node, self.other_node])
9299

    
9300
    # Wait for sync
9301
    # This can fail as the old devices are degraded and _WaitForSync
9302
    # does a combined result over all disks, so we don't check its return value
9303
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9304
    cstep += 1
9305
    _WaitForSync(self.lu, self.instance)
9306

    
9307
    # Check all devices manually
9308
    self._CheckDevices(self.instance.primary_node, iv_names)
9309

    
9310
    # Step: remove old storage
9311
    if not self.early_release:
9312
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9313
      cstep += 1
9314
      self._RemoveOldStorage(self.target_node, iv_names)
9315

    
9316
  def _ExecDrbd8Secondary(self, feedback_fn):
9317
    """Replace the secondary node for DRBD 8.
9318

9319
    The algorithm for replace is quite complicated:
9320
      - for all disks of the instance:
9321
        - create new LVs on the new node with same names
9322
        - shutdown the drbd device on the old secondary
9323
        - disconnect the drbd network on the primary
9324
        - create the drbd device on the new secondary
9325
        - network attach the drbd on the primary, using an artifice:
9326
          the drbd code for Attach() will connect to the network if it
9327
          finds a device which is connected to the good local disks but
9328
          not network enabled
9329
      - wait for sync across all devices
9330
      - remove all disks from the old secondary
9331

9332
    Failures are not very well handled.
9333

9334
    """
9335
    steps_total = 6
9336

    
9337
    # Step: check device activation
9338
    self.lu.LogStep(1, steps_total, "Check device existence")
9339
    self._CheckDisksExistence([self.instance.primary_node])
9340
    self._CheckVolumeGroup([self.instance.primary_node])
9341

    
9342
    # Step: check other node consistency
9343
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9344
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
9345

    
9346
    # Step: create new storage
9347
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9348
    for idx, dev in enumerate(self.instance.disks):
9349
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9350
                      (self.new_node, idx))
9351
      # we pass force_create=True to force LVM creation
9352
      for new_lv in dev.children:
9353
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9354
                        _GetInstanceInfoText(self.instance), False)
9355

    
9356
    # Step 4: dbrd minors and drbd setups changes
9357
    # after this, we must manually remove the drbd minors on both the
9358
    # error and the success paths
9359
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9360
    minors = self.cfg.AllocateDRBDMinor([self.new_node
9361
                                         for dev in self.instance.disks],
9362
                                        self.instance.name)
9363
    logging.debug("Allocated minors %r", minors)
9364

    
9365
    iv_names = {}
9366
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9367
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9368
                      (self.new_node, idx))
9369
      # create new devices on new_node; note that we create two IDs:
9370
      # one without port, so the drbd will be activated without
9371
      # networking information on the new node at this stage, and one
9372
      # with network, for the latter activation in step 4
9373
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9374
      if self.instance.primary_node == o_node1:
9375
        p_minor = o_minor1
9376
      else:
9377
        assert self.instance.primary_node == o_node2, "Three-node instance?"
9378
        p_minor = o_minor2
9379

    
9380
      new_alone_id = (self.instance.primary_node, self.new_node, None,
9381
                      p_minor, new_minor, o_secret)
9382
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
9383
                    p_minor, new_minor, o_secret)
9384

    
9385
      iv_names[idx] = (dev, dev.children, new_net_id)
9386
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9387
                    new_net_id)
9388
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9389
                              logical_id=new_alone_id,
9390
                              children=dev.children,
9391
                              size=dev.size)
9392
      try:
9393
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9394
                              _GetInstanceInfoText(self.instance), False)
9395
      except errors.GenericError:
9396
        self.cfg.ReleaseDRBDMinors(self.instance.name)
9397
        raise
9398

    
9399
    # We have new devices, shutdown the drbd on the old secondary
9400
    for idx, dev in enumerate(self.instance.disks):
9401
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9402
      self.cfg.SetDiskID(dev, self.target_node)
9403
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9404
      if msg:
9405
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9406
                           "node: %s" % (idx, msg),
9407
                           hint=("Please cleanup this device manually as"
9408
                                 " soon as possible"))
9409

    
9410
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9411
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9412
                                               self.node_secondary_ip,
9413
                                               self.instance.disks)\
9414
                                              [self.instance.primary_node]
9415

    
9416
    msg = result.fail_msg
9417
    if msg:
9418
      # detaches didn't succeed (unlikely)
9419
      self.cfg.ReleaseDRBDMinors(self.instance.name)
9420
      raise errors.OpExecError("Can't detach the disks from the network on"
9421
                               " old node: %s" % (msg,))
9422

    
9423
    # if we managed to detach at least one, we update all the disks of
9424
    # the instance to point to the new secondary
9425
    self.lu.LogInfo("Updating instance configuration")
9426
    for dev, _, new_logical_id in iv_names.itervalues():
9427
      dev.logical_id = new_logical_id
9428
      self.cfg.SetDiskID(dev, self.instance.primary_node)
9429

    
9430
    self.cfg.Update(self.instance, feedback_fn)
9431

    
9432
    # and now perform the drbd attach
9433
    self.lu.LogInfo("Attaching primary drbds to new secondary"
9434
                    " (standalone => connected)")
9435
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9436
                                            self.new_node],
9437
                                           self.node_secondary_ip,
9438
                                           self.instance.disks,
9439
                                           self.instance.name,
9440
                                           False)
9441
    for to_node, to_result in result.items():
9442
      msg = to_result.fail_msg
9443
      if msg:
9444
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9445
                           to_node, msg,
9446
                           hint=("please do a gnt-instance info to see the"
9447
                                 " status of disks"))
9448
    cstep = 5
9449
    if self.early_release:
9450
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9451
      cstep += 1
9452
      self._RemoveOldStorage(self.target_node, iv_names)
9453
      # WARNING: we release all node locks here, do not do other RPCs
9454
      # than WaitForSync to the primary node
9455
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9456
                    names=[self.instance.primary_node,
9457
                           self.target_node,
9458
                           self.new_node])
9459

    
9460
    # Wait for sync
9461
    # This can fail as the old devices are degraded and _WaitForSync
9462
    # does a combined result over all disks, so we don't check its return value
9463
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9464
    cstep += 1
9465
    _WaitForSync(self.lu, self.instance)
9466

    
9467
    # Check all devices manually
9468
    self._CheckDevices(self.instance.primary_node, iv_names)
9469

    
9470
    # Step: remove old storage
9471
    if not self.early_release:
9472
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9473
      self._RemoveOldStorage(self.target_node, iv_names)
9474

    
9475

    
9476
class LURepairNodeStorage(NoHooksLU):
9477
  """Repairs the volume group on a node.
9478

9479
  """
9480
  REQ_BGL = False
9481

    
9482
  def CheckArguments(self):
9483
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9484

    
9485
    storage_type = self.op.storage_type
9486

    
9487
    if (constants.SO_FIX_CONSISTENCY not in
9488
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9489
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
9490
                                 " repaired" % storage_type,
9491
                                 errors.ECODE_INVAL)
9492

    
9493
  def ExpandNames(self):
9494
    self.needed_locks = {
9495
      locking.LEVEL_NODE: [self.op.node_name],
9496
      }
9497

    
9498
  def _CheckFaultyDisks(self, instance, node_name):
9499
    """Ensure faulty disks abort the opcode or at least warn."""
9500
    try:
9501
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9502
                                  node_name, True):
9503
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9504
                                   " node '%s'" % (instance.name, node_name),
9505
                                   errors.ECODE_STATE)
9506
    except errors.OpPrereqError, err:
9507
      if self.op.ignore_consistency:
9508
        self.proc.LogWarning(str(err.args[0]))
9509
      else:
9510
        raise
9511

    
9512
  def CheckPrereq(self):
9513
    """Check prerequisites.
9514

9515
    """
9516
    # Check whether any instance on this node has faulty disks
9517
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9518
      if not inst.admin_up:
9519
        continue
9520
      check_nodes = set(inst.all_nodes)
9521
      check_nodes.discard(self.op.node_name)
9522
      for inst_node_name in check_nodes:
9523
        self._CheckFaultyDisks(inst, inst_node_name)
9524

    
9525
  def Exec(self, feedback_fn):
9526
    feedback_fn("Repairing storage unit '%s' on %s ..." %
9527
                (self.op.name, self.op.node_name))
9528

    
9529
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9530
    result = self.rpc.call_storage_execute(self.op.node_name,
9531
                                           self.op.storage_type, st_args,
9532
                                           self.op.name,
9533
                                           constants.SO_FIX_CONSISTENCY)
9534
    result.Raise("Failed to repair storage unit '%s' on %s" %
9535
                 (self.op.name, self.op.node_name))
9536

    
9537

    
9538
class LUNodeEvacStrategy(NoHooksLU):
9539
  """Computes the node evacuation strategy.
9540

9541
  """
9542
  REQ_BGL = False
9543

    
9544
  def CheckArguments(self):
9545
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9546

    
9547
  def ExpandNames(self):
9548
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9549
    self.needed_locks = locks = {}
9550
    if self.op.remote_node is None:
9551
      locks[locking.LEVEL_NODE] = locking.ALL_SET
9552
    else:
9553
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9554
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
9555

    
9556
  def Exec(self, feedback_fn):
9557
    if self.op.remote_node is not None:
9558
      instances = []
9559
      for node in self.op.nodes:
9560
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
9561
      result = []
9562
      for i in instances:
9563
        if i.primary_node == self.op.remote_node:
9564
          raise errors.OpPrereqError("Node %s is the primary node of"
9565
                                     " instance %s, cannot use it as"
9566
                                     " secondary" %
9567
                                     (self.op.remote_node, i.name),
9568
                                     errors.ECODE_INVAL)
9569
        result.append([i.name, self.op.remote_node])
9570
    else:
9571
      ial = IAllocator(self.cfg, self.rpc,
9572
                       mode=constants.IALLOCATOR_MODE_MEVAC,
9573
                       evac_nodes=self.op.nodes)
9574
      ial.Run(self.op.iallocator, validate=True)
9575
      if not ial.success:
9576
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
9577
                                 errors.ECODE_NORES)
9578
      result = ial.result
9579
    return result
9580

    
9581

    
9582
class LUInstanceGrowDisk(LogicalUnit):
9583
  """Grow a disk of an instance.
9584

9585
  """
9586
  HPATH = "disk-grow"
9587
  HTYPE = constants.HTYPE_INSTANCE
9588
  REQ_BGL = False
9589

    
9590
  def ExpandNames(self):
9591
    self._ExpandAndLockInstance()
9592
    self.needed_locks[locking.LEVEL_NODE] = []
9593
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9594

    
9595
  def DeclareLocks(self, level):
9596
    if level == locking.LEVEL_NODE:
9597
      self._LockInstancesNodes()
9598

    
9599
  def BuildHooksEnv(self):
9600
    """Build hooks env.
9601

9602
    This runs on the master, the primary and all the secondaries.
9603

9604
    """
9605
    env = {
9606
      "DISK": self.op.disk,
9607
      "AMOUNT": self.op.amount,
9608
      }
9609
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9610
    return env
9611

    
9612
  def BuildHooksNodes(self):
9613
    """Build hooks nodes.
9614

9615
    """
9616
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9617
    return (nl, nl)
9618

    
9619
  def CheckPrereq(self):
9620
    """Check prerequisites.
9621

9622
    This checks that the instance is in the cluster.
9623

9624
    """
9625
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9626
    assert instance is not None, \
9627
      "Cannot retrieve locked instance %s" % self.op.instance_name
9628
    nodenames = list(instance.all_nodes)
9629
    for node in nodenames:
9630
      _CheckNodeOnline(self, node)
9631

    
9632
    self.instance = instance
9633

    
9634
    if instance.disk_template not in constants.DTS_GROWABLE:
9635
      raise errors.OpPrereqError("Instance's disk layout does not support"
9636
                                 " growing", errors.ECODE_INVAL)
9637

    
9638
    self.disk = instance.FindDisk(self.op.disk)
9639

    
9640
    if instance.disk_template not in (constants.DT_FILE,
9641
                                      constants.DT_SHARED_FILE):
9642
      # TODO: check the free disk space for file, when that feature will be
9643
      # supported
9644
      _CheckNodesFreeDiskPerVG(self, nodenames,
9645
                               self.disk.ComputeGrowth(self.op.amount))
9646

    
9647
  def Exec(self, feedback_fn):
9648
    """Execute disk grow.
9649

9650
    """
9651
    instance = self.instance
9652
    disk = self.disk
9653

    
9654
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
9655
    if not disks_ok:
9656
      raise errors.OpExecError("Cannot activate block device to grow")
9657

    
9658
    # First run all grow ops in dry-run mode
9659
    for node in instance.all_nodes:
9660
      self.cfg.SetDiskID(disk, node)
9661
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
9662
      result.Raise("Grow request failed to node %s" % node)
9663

    
9664
    # We know that (as far as we can test) operations across different
9665
    # nodes will succeed, time to run it for real
9666
    for node in instance.all_nodes:
9667
      self.cfg.SetDiskID(disk, node)
9668
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
9669
      result.Raise("Grow request failed to node %s" % node)
9670

    
9671
      # TODO: Rewrite code to work properly
9672
      # DRBD goes into sync mode for a short amount of time after executing the
9673
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
9674
      # calling "resize" in sync mode fails. Sleeping for a short amount of
9675
      # time is a work-around.
9676
      time.sleep(5)
9677

    
9678
    disk.RecordGrow(self.op.amount)
9679
    self.cfg.Update(instance, feedback_fn)
9680
    if self.op.wait_for_sync:
9681
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
9682
      if disk_abort:
9683
        self.proc.LogWarning("Disk sync-ing has not returned a good"
9684
                             " status; please check the instance")
9685
      if not instance.admin_up:
9686
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9687
    elif not instance.admin_up:
9688
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
9689
                           " not supposed to be running because no wait for"
9690
                           " sync mode was requested")
9691

    
9692

    
9693
class LUInstanceQueryData(NoHooksLU):
9694
  """Query runtime instance data.
9695

9696
  """
9697
  REQ_BGL = False
9698

    
9699
  def ExpandNames(self):
9700
    self.needed_locks = {}
9701

    
9702
    # Use locking if requested or when non-static information is wanted
9703
    if not (self.op.static or self.op.use_locking):
9704
      self.LogWarning("Non-static data requested, locks need to be acquired")
9705
      self.op.use_locking = True
9706

    
9707
    if self.op.instances or not self.op.use_locking:
9708
      # Expand instance names right here
9709
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
9710
    else:
9711
      # Will use acquired locks
9712
      self.wanted_names = None
9713

    
9714
    if self.op.use_locking:
9715
      self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9716

    
9717
      if self.wanted_names is None:
9718
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9719
      else:
9720
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9721

    
9722
      self.needed_locks[locking.LEVEL_NODE] = []
9723
      self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9724
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9725

    
9726
  def DeclareLocks(self, level):
9727
    if self.op.use_locking and level == locking.LEVEL_NODE:
9728
      self._LockInstancesNodes()
9729

    
9730
  def CheckPrereq(self):
9731
    """Check prerequisites.
9732

9733
    This only checks the optional instance list against the existing names.
9734

9735
    """
9736
    if self.wanted_names is None:
9737
      assert self.op.use_locking, "Locking was not used"
9738
      self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
9739

    
9740
    self.wanted_instances = [self.cfg.GetInstanceInfo(name)
9741
                             for name in self.wanted_names]
9742

    
9743
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
9744
    """Returns the status of a block device
9745

9746
    """
9747
    if self.op.static or not node:
9748
      return None
9749

    
9750
    self.cfg.SetDiskID(dev, node)
9751

    
9752
    result = self.rpc.call_blockdev_find(node, dev)
9753
    if result.offline:
9754
      return None
9755

    
9756
    result.Raise("Can't compute disk status for %s" % instance_name)
9757

    
9758
    status = result.payload
9759
    if status is None:
9760
      return None
9761

    
9762
    return (status.dev_path, status.major, status.minor,
9763
            status.sync_percent, status.estimated_time,
9764
            status.is_degraded, status.ldisk_status)
9765

    
9766
  def _ComputeDiskStatus(self, instance, snode, dev):
9767
    """Compute block device status.
9768

9769
    """
9770
    if dev.dev_type in constants.LDS_DRBD:
9771
      # we change the snode then (otherwise we use the one passed in)
9772
      if dev.logical_id[0] == instance.primary_node:
9773
        snode = dev.logical_id[1]
9774
      else:
9775
        snode = dev.logical_id[0]
9776

    
9777
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9778
                                              instance.name, dev)
9779
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9780

    
9781
    if dev.children:
9782
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
9783
                      for child in dev.children]
9784
    else:
9785
      dev_children = []
9786

    
9787
    return {
9788
      "iv_name": dev.iv_name,
9789
      "dev_type": dev.dev_type,
9790
      "logical_id": dev.logical_id,
9791
      "physical_id": dev.physical_id,
9792
      "pstatus": dev_pstatus,
9793
      "sstatus": dev_sstatus,
9794
      "children": dev_children,
9795
      "mode": dev.mode,
9796
      "size": dev.size,
9797
      }
9798

    
9799
  def Exec(self, feedback_fn):
9800
    """Gather and return data"""
9801
    result = {}
9802

    
9803
    cluster = self.cfg.GetClusterInfo()
9804

    
9805
    for instance in self.wanted_instances:
9806
      if not self.op.static:
9807
        remote_info = self.rpc.call_instance_info(instance.primary_node,
9808
                                                  instance.name,
9809
                                                  instance.hypervisor)
9810
        remote_info.Raise("Error checking node %s" % instance.primary_node)
9811
        remote_info = remote_info.payload
9812
        if remote_info and "state" in remote_info:
9813
          remote_state = "up"
9814
        else:
9815
          remote_state = "down"
9816
      else:
9817
        remote_state = None
9818
      if instance.admin_up:
9819
        config_state = "up"
9820
      else:
9821
        config_state = "down"
9822

    
9823
      disks = [self._ComputeDiskStatus(instance, None, device)
9824
               for device in instance.disks]
9825

    
9826
      result[instance.name] = {
9827
        "name": instance.name,
9828
        "config_state": config_state,
9829
        "run_state": remote_state,
9830
        "pnode": instance.primary_node,
9831
        "snodes": instance.secondary_nodes,
9832
        "os": instance.os,
9833
        # this happens to be the same format used for hooks
9834
        "nics": _NICListToTuple(self, instance.nics),
9835
        "disk_template": instance.disk_template,
9836
        "disks": disks,
9837
        "hypervisor": instance.hypervisor,
9838
        "network_port": instance.network_port,
9839
        "hv_instance": instance.hvparams,
9840
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
9841
        "be_instance": instance.beparams,
9842
        "be_actual": cluster.FillBE(instance),
9843
        "os_instance": instance.osparams,
9844
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9845
        "serial_no": instance.serial_no,
9846
        "mtime": instance.mtime,
9847
        "ctime": instance.ctime,
9848
        "uuid": instance.uuid,
9849
        }
9850

    
9851
    return result
9852

    
9853

    
9854
class LUInstanceSetParams(LogicalUnit):
9855
  """Modifies an instances's parameters.
9856

9857
  """
9858
  HPATH = "instance-modify"
9859
  HTYPE = constants.HTYPE_INSTANCE
9860
  REQ_BGL = False
9861

    
9862
  def CheckArguments(self):
9863
    if not (self.op.nics or self.op.disks or self.op.disk_template or
9864
            self.op.hvparams or self.op.beparams or self.op.os_name):
9865
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9866

    
9867
    if self.op.hvparams:
9868
      _CheckGlobalHvParams(self.op.hvparams)
9869

    
9870
    # Disk validation
9871
    disk_addremove = 0
9872
    for disk_op, disk_dict in self.op.disks:
9873
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9874
      if disk_op == constants.DDM_REMOVE:
9875
        disk_addremove += 1
9876
        continue
9877
      elif disk_op == constants.DDM_ADD:
9878
        disk_addremove += 1
9879
      else:
9880
        if not isinstance(disk_op, int):
9881
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9882
        if not isinstance(disk_dict, dict):
9883
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9884
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9885

    
9886
      if disk_op == constants.DDM_ADD:
9887
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
9888
        if mode not in constants.DISK_ACCESS_SET:
9889
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9890
                                     errors.ECODE_INVAL)
9891
        size = disk_dict.get(constants.IDISK_SIZE, None)
9892
        if size is None:
9893
          raise errors.OpPrereqError("Required disk parameter size missing",
9894
                                     errors.ECODE_INVAL)
9895
        try:
9896
          size = int(size)
9897
        except (TypeError, ValueError), err:
9898
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9899
                                     str(err), errors.ECODE_INVAL)
9900
        disk_dict[constants.IDISK_SIZE] = size
9901
      else:
9902
        # modification of disk
9903
        if constants.IDISK_SIZE in disk_dict:
9904
          raise errors.OpPrereqError("Disk size change not possible, use"
9905
                                     " grow-disk", errors.ECODE_INVAL)
9906

    
9907
    if disk_addremove > 1:
9908
      raise errors.OpPrereqError("Only one disk add or remove operation"
9909
                                 " supported at a time", errors.ECODE_INVAL)
9910

    
9911
    if self.op.disks and self.op.disk_template is not None:
9912
      raise errors.OpPrereqError("Disk template conversion and other disk"
9913
                                 " changes not supported at the same time",
9914
                                 errors.ECODE_INVAL)
9915

    
9916
    if (self.op.disk_template and
9917
        self.op.disk_template in constants.DTS_INT_MIRROR and
9918
        self.op.remote_node is None):
9919
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
9920
                                 " one requires specifying a secondary node",
9921
                                 errors.ECODE_INVAL)
9922

    
9923
    # NIC validation
9924
    nic_addremove = 0
9925
    for nic_op, nic_dict in self.op.nics:
9926
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9927
      if nic_op == constants.DDM_REMOVE:
9928
        nic_addremove += 1
9929
        continue
9930
      elif nic_op == constants.DDM_ADD:
9931
        nic_addremove += 1
9932
      else:
9933
        if not isinstance(nic_op, int):
9934
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9935
        if not isinstance(nic_dict, dict):
9936
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9937
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9938

    
9939
      # nic_dict should be a dict
9940
      nic_ip = nic_dict.get(constants.INIC_IP, None)
9941
      if nic_ip is not None:
9942
        if nic_ip.lower() == constants.VALUE_NONE:
9943
          nic_dict[constants.INIC_IP] = None
9944
        else:
9945
          if not netutils.IPAddress.IsValid(nic_ip):
9946
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9947
                                       errors.ECODE_INVAL)
9948

    
9949
      nic_bridge = nic_dict.get('bridge', None)
9950
      nic_link = nic_dict.get(constants.INIC_LINK, None)
9951
      if nic_bridge and nic_link:
9952
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9953
                                   " at the same time", errors.ECODE_INVAL)
9954
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9955
        nic_dict['bridge'] = None
9956
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9957
        nic_dict[constants.INIC_LINK] = None
9958

    
9959
      if nic_op == constants.DDM_ADD:
9960
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
9961
        if nic_mac is None:
9962
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
9963

    
9964
      if constants.INIC_MAC in nic_dict:
9965
        nic_mac = nic_dict[constants.INIC_MAC]
9966
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9967
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9968

    
9969
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9970
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9971
                                     " modifying an existing nic",
9972
                                     errors.ECODE_INVAL)
9973

    
9974
    if nic_addremove > 1:
9975
      raise errors.OpPrereqError("Only one NIC add or remove operation"
9976
                                 " supported at a time", errors.ECODE_INVAL)
9977

    
9978
  def ExpandNames(self):
9979
    self._ExpandAndLockInstance()
9980
    self.needed_locks[locking.LEVEL_NODE] = []
9981
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9982

    
9983
  def DeclareLocks(self, level):
9984
    if level == locking.LEVEL_NODE:
9985
      self._LockInstancesNodes()
9986
      if self.op.disk_template and self.op.remote_node:
9987
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9988
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9989

    
9990
  def BuildHooksEnv(self):
9991
    """Build hooks env.
9992

9993
    This runs on the master, primary and secondaries.
9994

9995
    """
9996
    args = dict()
9997
    if constants.BE_MEMORY in self.be_new:
9998
      args['memory'] = self.be_new[constants.BE_MEMORY]
9999
    if constants.BE_VCPUS in self.be_new:
10000
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
10001
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10002
    # information at all.
10003
    if self.op.nics:
10004
      args['nics'] = []
10005
      nic_override = dict(self.op.nics)
10006
      for idx, nic in enumerate(self.instance.nics):
10007
        if idx in nic_override:
10008
          this_nic_override = nic_override[idx]
10009
        else:
10010
          this_nic_override = {}
10011
        if constants.INIC_IP in this_nic_override:
10012
          ip = this_nic_override[constants.INIC_IP]
10013
        else:
10014
          ip = nic.ip
10015
        if constants.INIC_MAC in this_nic_override:
10016
          mac = this_nic_override[constants.INIC_MAC]
10017
        else:
10018
          mac = nic.mac
10019
        if idx in self.nic_pnew:
10020
          nicparams = self.nic_pnew[idx]
10021
        else:
10022
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10023
        mode = nicparams[constants.NIC_MODE]
10024
        link = nicparams[constants.NIC_LINK]
10025
        args['nics'].append((ip, mac, mode, link))
10026
      if constants.DDM_ADD in nic_override:
10027
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10028
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10029
        nicparams = self.nic_pnew[constants.DDM_ADD]
10030
        mode = nicparams[constants.NIC_MODE]
10031
        link = nicparams[constants.NIC_LINK]
10032
        args['nics'].append((ip, mac, mode, link))
10033
      elif constants.DDM_REMOVE in nic_override:
10034
        del args['nics'][-1]
10035

    
10036
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10037
    if self.op.disk_template:
10038
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10039

    
10040
    return env
10041

    
10042
  def BuildHooksNodes(self):
10043
    """Build hooks nodes.
10044

10045
    """
10046
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10047
    return (nl, nl)
10048

    
10049
  def CheckPrereq(self):
10050
    """Check prerequisites.
10051

10052
    This only checks the instance list against the existing names.
10053

10054
    """
10055
    # checking the new params on the primary/secondary nodes
10056

    
10057
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10058
    cluster = self.cluster = self.cfg.GetClusterInfo()
10059
    assert self.instance is not None, \
10060
      "Cannot retrieve locked instance %s" % self.op.instance_name
10061
    pnode = instance.primary_node
10062
    nodelist = list(instance.all_nodes)
10063

    
10064
    # OS change
10065
    if self.op.os_name and not self.op.force:
10066
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10067
                      self.op.force_variant)
10068
      instance_os = self.op.os_name
10069
    else:
10070
      instance_os = instance.os
10071

    
10072
    if self.op.disk_template:
10073
      if instance.disk_template == self.op.disk_template:
10074
        raise errors.OpPrereqError("Instance already has disk template %s" %
10075
                                   instance.disk_template, errors.ECODE_INVAL)
10076

    
10077
      if (instance.disk_template,
10078
          self.op.disk_template) not in self._DISK_CONVERSIONS:
10079
        raise errors.OpPrereqError("Unsupported disk template conversion from"
10080
                                   " %s to %s" % (instance.disk_template,
10081
                                                  self.op.disk_template),
10082
                                   errors.ECODE_INVAL)
10083
      _CheckInstanceDown(self, instance, "cannot change disk template")
10084
      if self.op.disk_template in constants.DTS_INT_MIRROR:
10085
        if self.op.remote_node == pnode:
10086
          raise errors.OpPrereqError("Given new secondary node %s is the same"
10087
                                     " as the primary node of the instance" %
10088
                                     self.op.remote_node, errors.ECODE_STATE)
10089
        _CheckNodeOnline(self, self.op.remote_node)
10090
        _CheckNodeNotDrained(self, self.op.remote_node)
10091
        # FIXME: here we assume that the old instance type is DT_PLAIN
10092
        assert instance.disk_template == constants.DT_PLAIN
10093
        disks = [{constants.IDISK_SIZE: d.size,
10094
                  constants.IDISK_VG: d.logical_id[0]}
10095
                 for d in instance.disks]
10096
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10097
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10098

    
10099
    # hvparams processing
10100
    if self.op.hvparams:
10101
      hv_type = instance.hypervisor
10102
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10103
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10104
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10105

    
10106
      # local check
10107
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10108
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10109
      self.hv_new = hv_new # the new actual values
10110
      self.hv_inst = i_hvdict # the new dict (without defaults)
10111
    else:
10112
      self.hv_new = self.hv_inst = {}
10113

    
10114
    # beparams processing
10115
    if self.op.beparams:
10116
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10117
                                   use_none=True)
10118
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10119
      be_new = cluster.SimpleFillBE(i_bedict)
10120
      self.be_new = be_new # the new actual values
10121
      self.be_inst = i_bedict # the new dict (without defaults)
10122
    else:
10123
      self.be_new = self.be_inst = {}
10124
    be_old = cluster.FillBE(instance)
10125

    
10126
    # osparams processing
10127
    if self.op.osparams:
10128
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10129
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10130
      self.os_inst = i_osdict # the new dict (without defaults)
10131
    else:
10132
      self.os_inst = {}
10133

    
10134
    self.warn = []
10135

    
10136
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10137
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10138
      mem_check_list = [pnode]
10139
      if be_new[constants.BE_AUTO_BALANCE]:
10140
        # either we changed auto_balance to yes or it was from before
10141
        mem_check_list.extend(instance.secondary_nodes)
10142
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
10143
                                                  instance.hypervisor)
10144
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10145
                                         instance.hypervisor)
10146
      pninfo = nodeinfo[pnode]
10147
      msg = pninfo.fail_msg
10148
      if msg:
10149
        # Assume the primary node is unreachable and go ahead
10150
        self.warn.append("Can't get info from primary node %s: %s" %
10151
                         (pnode,  msg))
10152
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
10153
        self.warn.append("Node data from primary node %s doesn't contain"
10154
                         " free memory information" % pnode)
10155
      elif instance_info.fail_msg:
10156
        self.warn.append("Can't get instance runtime information: %s" %
10157
                        instance_info.fail_msg)
10158
      else:
10159
        if instance_info.payload:
10160
          current_mem = int(instance_info.payload['memory'])
10161
        else:
10162
          # Assume instance not running
10163
          # (there is a slight race condition here, but it's not very probable,
10164
          # and we have no other way to check)
10165
          current_mem = 0
10166
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10167
                    pninfo.payload['memory_free'])
10168
        if miss_mem > 0:
10169
          raise errors.OpPrereqError("This change will prevent the instance"
10170
                                     " from starting, due to %d MB of memory"
10171
                                     " missing on its primary node" % miss_mem,
10172
                                     errors.ECODE_NORES)
10173

    
10174
      if be_new[constants.BE_AUTO_BALANCE]:
10175
        for node, nres in nodeinfo.items():
10176
          if node not in instance.secondary_nodes:
10177
            continue
10178
          nres.Raise("Can't get info from secondary node %s" % node,
10179
                     prereq=True, ecode=errors.ECODE_STATE)
10180
          if not isinstance(nres.payload.get('memory_free', None), int):
10181
            raise errors.OpPrereqError("Secondary node %s didn't return free"
10182
                                       " memory information" % node,
10183
                                       errors.ECODE_STATE)
10184
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
10185
            raise errors.OpPrereqError("This change will prevent the instance"
10186
                                       " from failover to its secondary node"
10187
                                       " %s, due to not enough memory" % node,
10188
                                       errors.ECODE_STATE)
10189

    
10190
    # NIC processing
10191
    self.nic_pnew = {}
10192
    self.nic_pinst = {}
10193
    for nic_op, nic_dict in self.op.nics:
10194
      if nic_op == constants.DDM_REMOVE:
10195
        if not instance.nics:
10196
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10197
                                     errors.ECODE_INVAL)
10198
        continue
10199
      if nic_op != constants.DDM_ADD:
10200
        # an existing nic
10201
        if not instance.nics:
10202
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10203
                                     " no NICs" % nic_op,
10204
                                     errors.ECODE_INVAL)
10205
        if nic_op < 0 or nic_op >= len(instance.nics):
10206
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10207
                                     " are 0 to %d" %
10208
                                     (nic_op, len(instance.nics) - 1),
10209
                                     errors.ECODE_INVAL)
10210
        old_nic_params = instance.nics[nic_op].nicparams
10211
        old_nic_ip = instance.nics[nic_op].ip
10212
      else:
10213
        old_nic_params = {}
10214
        old_nic_ip = None
10215

    
10216
      update_params_dict = dict([(key, nic_dict[key])
10217
                                 for key in constants.NICS_PARAMETERS
10218
                                 if key in nic_dict])
10219

    
10220
      if 'bridge' in nic_dict:
10221
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
10222

    
10223
      new_nic_params = _GetUpdatedParams(old_nic_params,
10224
                                         update_params_dict)
10225
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10226
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10227
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10228
      self.nic_pinst[nic_op] = new_nic_params
10229
      self.nic_pnew[nic_op] = new_filled_nic_params
10230
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10231

    
10232
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
10233
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10234
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10235
        if msg:
10236
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10237
          if self.op.force:
10238
            self.warn.append(msg)
10239
          else:
10240
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10241
      if new_nic_mode == constants.NIC_MODE_ROUTED:
10242
        if constants.INIC_IP in nic_dict:
10243
          nic_ip = nic_dict[constants.INIC_IP]
10244
        else:
10245
          nic_ip = old_nic_ip
10246
        if nic_ip is None:
10247
          raise errors.OpPrereqError('Cannot set the nic ip to None'
10248
                                     ' on a routed nic', errors.ECODE_INVAL)
10249
      if constants.INIC_MAC in nic_dict:
10250
        nic_mac = nic_dict[constants.INIC_MAC]
10251
        if nic_mac is None:
10252
          raise errors.OpPrereqError('Cannot set the nic mac to None',
10253
                                     errors.ECODE_INVAL)
10254
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10255
          # otherwise generate the mac
10256
          nic_dict[constants.INIC_MAC] = \
10257
            self.cfg.GenerateMAC(self.proc.GetECId())
10258
        else:
10259
          # or validate/reserve the current one
10260
          try:
10261
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10262
          except errors.ReservationError:
10263
            raise errors.OpPrereqError("MAC address %s already in use"
10264
                                       " in cluster" % nic_mac,
10265
                                       errors.ECODE_NOTUNIQUE)
10266

    
10267
    # DISK processing
10268
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10269
      raise errors.OpPrereqError("Disk operations not supported for"
10270
                                 " diskless instances",
10271
                                 errors.ECODE_INVAL)
10272
    for disk_op, _ in self.op.disks:
10273
      if disk_op == constants.DDM_REMOVE:
10274
        if len(instance.disks) == 1:
10275
          raise errors.OpPrereqError("Cannot remove the last disk of"
10276
                                     " an instance", errors.ECODE_INVAL)
10277
        _CheckInstanceDown(self, instance, "cannot remove disks")
10278

    
10279
      if (disk_op == constants.DDM_ADD and
10280
          len(instance.disks) >= constants.MAX_DISKS):
10281
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10282
                                   " add more" % constants.MAX_DISKS,
10283
                                   errors.ECODE_STATE)
10284
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10285
        # an existing disk
10286
        if disk_op < 0 or disk_op >= len(instance.disks):
10287
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
10288
                                     " are 0 to %d" %
10289
                                     (disk_op, len(instance.disks)),
10290
                                     errors.ECODE_INVAL)
10291

    
10292
    return
10293

    
10294
  def _ConvertPlainToDrbd(self, feedback_fn):
10295
    """Converts an instance from plain to drbd.
10296

10297
    """
10298
    feedback_fn("Converting template to drbd")
10299
    instance = self.instance
10300
    pnode = instance.primary_node
10301
    snode = self.op.remote_node
10302

    
10303
    # create a fake disk info for _GenerateDiskTemplate
10304
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10305
                  constants.IDISK_VG: d.logical_id[0]}
10306
                 for d in instance.disks]
10307
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10308
                                      instance.name, pnode, [snode],
10309
                                      disk_info, None, None, 0, feedback_fn)
10310
    info = _GetInstanceInfoText(instance)
10311
    feedback_fn("Creating aditional volumes...")
10312
    # first, create the missing data and meta devices
10313
    for disk in new_disks:
10314
      # unfortunately this is... not too nice
10315
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10316
                            info, True)
10317
      for child in disk.children:
10318
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
10319
    # at this stage, all new LVs have been created, we can rename the
10320
    # old ones
10321
    feedback_fn("Renaming original volumes...")
10322
    rename_list = [(o, n.children[0].logical_id)
10323
                   for (o, n) in zip(instance.disks, new_disks)]
10324
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
10325
    result.Raise("Failed to rename original LVs")
10326

    
10327
    feedback_fn("Initializing DRBD devices...")
10328
    # all child devices are in place, we can now create the DRBD devices
10329
    for disk in new_disks:
10330
      for node in [pnode, snode]:
10331
        f_create = node == pnode
10332
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10333

    
10334
    # at this point, the instance has been modified
10335
    instance.disk_template = constants.DT_DRBD8
10336
    instance.disks = new_disks
10337
    self.cfg.Update(instance, feedback_fn)
10338

    
10339
    # disks are created, waiting for sync
10340
    disk_abort = not _WaitForSync(self, instance,
10341
                                  oneshot=not self.op.wait_for_sync)
10342
    if disk_abort:
10343
      raise errors.OpExecError("There are some degraded disks for"
10344
                               " this instance, please cleanup manually")
10345

    
10346
  def _ConvertDrbdToPlain(self, feedback_fn):
10347
    """Converts an instance from drbd to plain.
10348

10349
    """
10350
    instance = self.instance
10351
    assert len(instance.secondary_nodes) == 1
10352
    pnode = instance.primary_node
10353
    snode = instance.secondary_nodes[0]
10354
    feedback_fn("Converting template to plain")
10355

    
10356
    old_disks = instance.disks
10357
    new_disks = [d.children[0] for d in old_disks]
10358

    
10359
    # copy over size and mode
10360
    for parent, child in zip(old_disks, new_disks):
10361
      child.size = parent.size
10362
      child.mode = parent.mode
10363

    
10364
    # update instance structure
10365
    instance.disks = new_disks
10366
    instance.disk_template = constants.DT_PLAIN
10367
    self.cfg.Update(instance, feedback_fn)
10368

    
10369
    feedback_fn("Removing volumes on the secondary node...")
10370
    for disk in old_disks:
10371
      self.cfg.SetDiskID(disk, snode)
10372
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10373
      if msg:
10374
        self.LogWarning("Could not remove block device %s on node %s,"
10375
                        " continuing anyway: %s", disk.iv_name, snode, msg)
10376

    
10377
    feedback_fn("Removing unneeded volumes on the primary node...")
10378
    for idx, disk in enumerate(old_disks):
10379
      meta = disk.children[1]
10380
      self.cfg.SetDiskID(meta, pnode)
10381
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10382
      if msg:
10383
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
10384
                        " continuing anyway: %s", idx, pnode, msg)
10385

    
10386
  def Exec(self, feedback_fn):
10387
    """Modifies an instance.
10388

10389
    All parameters take effect only at the next restart of the instance.
10390

10391
    """
10392
    # Process here the warnings from CheckPrereq, as we don't have a
10393
    # feedback_fn there.
10394
    for warn in self.warn:
10395
      feedback_fn("WARNING: %s" % warn)
10396

    
10397
    result = []
10398
    instance = self.instance
10399
    # disk changes
10400
    for disk_op, disk_dict in self.op.disks:
10401
      if disk_op == constants.DDM_REMOVE:
10402
        # remove the last disk
10403
        device = instance.disks.pop()
10404
        device_idx = len(instance.disks)
10405
        for node, disk in device.ComputeNodeTree(instance.primary_node):
10406
          self.cfg.SetDiskID(disk, node)
10407
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10408
          if msg:
10409
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
10410
                            " continuing anyway", device_idx, node, msg)
10411
        result.append(("disk/%d" % device_idx, "remove"))
10412
      elif disk_op == constants.DDM_ADD:
10413
        # add a new disk
10414
        if instance.disk_template in (constants.DT_FILE,
10415
                                        constants.DT_SHARED_FILE):
10416
          file_driver, file_path = instance.disks[0].logical_id
10417
          file_path = os.path.dirname(file_path)
10418
        else:
10419
          file_driver = file_path = None
10420
        disk_idx_base = len(instance.disks)
10421
        new_disk = _GenerateDiskTemplate(self,
10422
                                         instance.disk_template,
10423
                                         instance.name, instance.primary_node,
10424
                                         instance.secondary_nodes,
10425
                                         [disk_dict],
10426
                                         file_path,
10427
                                         file_driver,
10428
                                         disk_idx_base, feedback_fn)[0]
10429
        instance.disks.append(new_disk)
10430
        info = _GetInstanceInfoText(instance)
10431

    
10432
        logging.info("Creating volume %s for instance %s",
10433
                     new_disk.iv_name, instance.name)
10434
        # Note: this needs to be kept in sync with _CreateDisks
10435
        #HARDCODE
10436
        for node in instance.all_nodes:
10437
          f_create = node == instance.primary_node
10438
          try:
10439
            _CreateBlockDev(self, node, instance, new_disk,
10440
                            f_create, info, f_create)
10441
          except errors.OpExecError, err:
10442
            self.LogWarning("Failed to create volume %s (%s) on"
10443
                            " node %s: %s",
10444
                            new_disk.iv_name, new_disk, node, err)
10445
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10446
                       (new_disk.size, new_disk.mode)))
10447
      else:
10448
        # change a given disk
10449
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10450
        result.append(("disk.mode/%d" % disk_op,
10451
                       disk_dict[constants.IDISK_MODE]))
10452

    
10453
    if self.op.disk_template:
10454
      r_shut = _ShutdownInstanceDisks(self, instance)
10455
      if not r_shut:
10456
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10457
                                 " proceed with disk template conversion")
10458
      mode = (instance.disk_template, self.op.disk_template)
10459
      try:
10460
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
10461
      except:
10462
        self.cfg.ReleaseDRBDMinors(instance.name)
10463
        raise
10464
      result.append(("disk_template", self.op.disk_template))
10465

    
10466
    # NIC changes
10467
    for nic_op, nic_dict in self.op.nics:
10468
      if nic_op == constants.DDM_REMOVE:
10469
        # remove the last nic
10470
        del instance.nics[-1]
10471
        result.append(("nic.%d" % len(instance.nics), "remove"))
10472
      elif nic_op == constants.DDM_ADD:
10473
        # mac and bridge should be set, by now
10474
        mac = nic_dict[constants.INIC_MAC]
10475
        ip = nic_dict.get(constants.INIC_IP, None)
10476
        nicparams = self.nic_pinst[constants.DDM_ADD]
10477
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
10478
        instance.nics.append(new_nic)
10479
        result.append(("nic.%d" % (len(instance.nics) - 1),
10480
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
10481
                       (new_nic.mac, new_nic.ip,
10482
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
10483
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
10484
                       )))
10485
      else:
10486
        for key in (constants.INIC_MAC, constants.INIC_IP):
10487
          if key in nic_dict:
10488
            setattr(instance.nics[nic_op], key, nic_dict[key])
10489
        if nic_op in self.nic_pinst:
10490
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
10491
        for key, val in nic_dict.iteritems():
10492
          result.append(("nic.%s/%d" % (key, nic_op), val))
10493

    
10494
    # hvparams changes
10495
    if self.op.hvparams:
10496
      instance.hvparams = self.hv_inst
10497
      for key, val in self.op.hvparams.iteritems():
10498
        result.append(("hv/%s" % key, val))
10499

    
10500
    # beparams changes
10501
    if self.op.beparams:
10502
      instance.beparams = self.be_inst
10503
      for key, val in self.op.beparams.iteritems():
10504
        result.append(("be/%s" % key, val))
10505

    
10506
    # OS change
10507
    if self.op.os_name:
10508
      instance.os = self.op.os_name
10509

    
10510
    # osparams changes
10511
    if self.op.osparams:
10512
      instance.osparams = self.os_inst
10513
      for key, val in self.op.osparams.iteritems():
10514
        result.append(("os/%s" % key, val))
10515

    
10516
    self.cfg.Update(instance, feedback_fn)
10517

    
10518
    return result
10519

    
10520
  _DISK_CONVERSIONS = {
10521
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10522
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10523
    }
10524

    
10525

    
10526
class LUBackupQuery(NoHooksLU):
10527
  """Query the exports list
10528

10529
  """
10530
  REQ_BGL = False
10531

    
10532
  def ExpandNames(self):
10533
    self.needed_locks = {}
10534
    self.share_locks[locking.LEVEL_NODE] = 1
10535
    if not self.op.nodes:
10536
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10537
    else:
10538
      self.needed_locks[locking.LEVEL_NODE] = \
10539
        _GetWantedNodes(self, self.op.nodes)
10540

    
10541
  def Exec(self, feedback_fn):
10542
    """Compute the list of all the exported system images.
10543

10544
    @rtype: dict
10545
    @return: a dictionary with the structure node->(export-list)
10546
        where export-list is a list of the instances exported on
10547
        that node.
10548

10549
    """
10550
    self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
10551
    rpcresult = self.rpc.call_export_list(self.nodes)
10552
    result = {}
10553
    for node in rpcresult:
10554
      if rpcresult[node].fail_msg:
10555
        result[node] = False
10556
      else:
10557
        result[node] = rpcresult[node].payload
10558

    
10559
    return result
10560

    
10561

    
10562
class LUBackupPrepare(NoHooksLU):
10563
  """Prepares an instance for an export and returns useful information.
10564

10565
  """
10566
  REQ_BGL = False
10567

    
10568
  def ExpandNames(self):
10569
    self._ExpandAndLockInstance()
10570

    
10571
  def CheckPrereq(self):
10572
    """Check prerequisites.
10573

10574
    """
10575
    instance_name = self.op.instance_name
10576

    
10577
    self.instance = self.cfg.GetInstanceInfo(instance_name)
10578
    assert self.instance is not None, \
10579
          "Cannot retrieve locked instance %s" % self.op.instance_name
10580
    _CheckNodeOnline(self, self.instance.primary_node)
10581

    
10582
    self._cds = _GetClusterDomainSecret()
10583

    
10584
  def Exec(self, feedback_fn):
10585
    """Prepares an instance for an export.
10586

10587
    """
10588
    instance = self.instance
10589

    
10590
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
10591
      salt = utils.GenerateSecret(8)
10592

    
10593
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10594
      result = self.rpc.call_x509_cert_create(instance.primary_node,
10595
                                              constants.RIE_CERT_VALIDITY)
10596
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
10597

    
10598
      (name, cert_pem) = result.payload
10599

    
10600
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10601
                                             cert_pem)
10602

    
10603
      return {
10604
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10605
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10606
                          salt),
10607
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10608
        }
10609

    
10610
    return None
10611

    
10612

    
10613
class LUBackupExport(LogicalUnit):
10614
  """Export an instance to an image in the cluster.
10615

10616
  """
10617
  HPATH = "instance-export"
10618
  HTYPE = constants.HTYPE_INSTANCE
10619
  REQ_BGL = False
10620

    
10621
  def CheckArguments(self):
10622
    """Check the arguments.
10623

10624
    """
10625
    self.x509_key_name = self.op.x509_key_name
10626
    self.dest_x509_ca_pem = self.op.destination_x509_ca
10627

    
10628
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
10629
      if not self.x509_key_name:
10630
        raise errors.OpPrereqError("Missing X509 key name for encryption",
10631
                                   errors.ECODE_INVAL)
10632

    
10633
      if not self.dest_x509_ca_pem:
10634
        raise errors.OpPrereqError("Missing destination X509 CA",
10635
                                   errors.ECODE_INVAL)
10636

    
10637
  def ExpandNames(self):
10638
    self._ExpandAndLockInstance()
10639

    
10640
    # Lock all nodes for local exports
10641
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10642
      # FIXME: lock only instance primary and destination node
10643
      #
10644
      # Sad but true, for now we have do lock all nodes, as we don't know where
10645
      # the previous export might be, and in this LU we search for it and
10646
      # remove it from its current node. In the future we could fix this by:
10647
      #  - making a tasklet to search (share-lock all), then create the
10648
      #    new one, then one to remove, after
10649
      #  - removing the removal operation altogether
10650
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10651

    
10652
  def DeclareLocks(self, level):
10653
    """Last minute lock declaration."""
10654
    # All nodes are locked anyway, so nothing to do here.
10655

    
10656
  def BuildHooksEnv(self):
10657
    """Build hooks env.
10658

10659
    This will run on the master, primary node and target node.
10660

10661
    """
10662
    env = {
10663
      "EXPORT_MODE": self.op.mode,
10664
      "EXPORT_NODE": self.op.target_node,
10665
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10666
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10667
      # TODO: Generic function for boolean env variables
10668
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10669
      }
10670

    
10671
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10672

    
10673
    return env
10674

    
10675
  def BuildHooksNodes(self):
10676
    """Build hooks nodes.
10677

10678
    """
10679
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10680

    
10681
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10682
      nl.append(self.op.target_node)
10683

    
10684
    return (nl, nl)
10685

    
10686
  def CheckPrereq(self):
10687
    """Check prerequisites.
10688

10689
    This checks that the instance and node names are valid.
10690

10691
    """
10692
    instance_name = self.op.instance_name
10693

    
10694
    self.instance = self.cfg.GetInstanceInfo(instance_name)
10695
    assert self.instance is not None, \
10696
          "Cannot retrieve locked instance %s" % self.op.instance_name
10697
    _CheckNodeOnline(self, self.instance.primary_node)
10698

    
10699
    if (self.op.remove_instance and self.instance.admin_up and
10700
        not self.op.shutdown):
10701
      raise errors.OpPrereqError("Can not remove instance without shutting it"
10702
                                 " down before")
10703

    
10704
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10705
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10706
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10707
      assert self.dst_node is not None
10708

    
10709
      _CheckNodeOnline(self, self.dst_node.name)
10710
      _CheckNodeNotDrained(self, self.dst_node.name)
10711

    
10712
      self._cds = None
10713
      self.dest_disk_info = None
10714
      self.dest_x509_ca = None
10715

    
10716
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10717
      self.dst_node = None
10718

    
10719
      if len(self.op.target_node) != len(self.instance.disks):
10720
        raise errors.OpPrereqError(("Received destination information for %s"
10721
                                    " disks, but instance %s has %s disks") %
10722
                                   (len(self.op.target_node), instance_name,
10723
                                    len(self.instance.disks)),
10724
                                   errors.ECODE_INVAL)
10725

    
10726
      cds = _GetClusterDomainSecret()
10727

    
10728
      # Check X509 key name
10729
      try:
10730
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10731
      except (TypeError, ValueError), err:
10732
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10733

    
10734
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10735
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10736
                                   errors.ECODE_INVAL)
10737

    
10738
      # Load and verify CA
10739
      try:
10740
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10741
      except OpenSSL.crypto.Error, err:
10742
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10743
                                   (err, ), errors.ECODE_INVAL)
10744

    
10745
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10746
      if errcode is not None:
10747
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10748
                                   (msg, ), errors.ECODE_INVAL)
10749

    
10750
      self.dest_x509_ca = cert
10751

    
10752
      # Verify target information
10753
      disk_info = []
10754
      for idx, disk_data in enumerate(self.op.target_node):
10755
        try:
10756
          (host, port, magic) = \
10757
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10758
        except errors.GenericError, err:
10759
          raise errors.OpPrereqError("Target info for disk %s: %s" %
10760
                                     (idx, err), errors.ECODE_INVAL)
10761

    
10762
        disk_info.append((host, port, magic))
10763

    
10764
      assert len(disk_info) == len(self.op.target_node)
10765
      self.dest_disk_info = disk_info
10766

    
10767
    else:
10768
      raise errors.ProgrammerError("Unhandled export mode %r" %
10769
                                   self.op.mode)
10770

    
10771
    # instance disk type verification
10772
    # TODO: Implement export support for file-based disks
10773
    for disk in self.instance.disks:
10774
      if disk.dev_type == constants.LD_FILE:
10775
        raise errors.OpPrereqError("Export not supported for instances with"
10776
                                   " file-based disks", errors.ECODE_INVAL)
10777

    
10778
  def _CleanupExports(self, feedback_fn):
10779
    """Removes exports of current instance from all other nodes.
10780

10781
    If an instance in a cluster with nodes A..D was exported to node C, its
10782
    exports will be removed from the nodes A, B and D.
10783

10784
    """
10785
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
10786

    
10787
    nodelist = self.cfg.GetNodeList()
10788
    nodelist.remove(self.dst_node.name)
10789

    
10790
    # on one-node clusters nodelist will be empty after the removal
10791
    # if we proceed the backup would be removed because OpBackupQuery
10792
    # substitutes an empty list with the full cluster node list.
10793
    iname = self.instance.name
10794
    if nodelist:
10795
      feedback_fn("Removing old exports for instance %s" % iname)
10796
      exportlist = self.rpc.call_export_list(nodelist)
10797
      for node in exportlist:
10798
        if exportlist[node].fail_msg:
10799
          continue
10800
        if iname in exportlist[node].payload:
10801
          msg = self.rpc.call_export_remove(node, iname).fail_msg
10802
          if msg:
10803
            self.LogWarning("Could not remove older export for instance %s"
10804
                            " on node %s: %s", iname, node, msg)
10805

    
10806
  def Exec(self, feedback_fn):
10807
    """Export an instance to an image in the cluster.
10808

10809
    """
10810
    assert self.op.mode in constants.EXPORT_MODES
10811

    
10812
    instance = self.instance
10813
    src_node = instance.primary_node
10814

    
10815
    if self.op.shutdown:
10816
      # shutdown the instance, but not the disks
10817
      feedback_fn("Shutting down instance %s" % instance.name)
10818
      result = self.rpc.call_instance_shutdown(src_node, instance,
10819
                                               self.op.shutdown_timeout)
10820
      # TODO: Maybe ignore failures if ignore_remove_failures is set
10821
      result.Raise("Could not shutdown instance %s on"
10822
                   " node %s" % (instance.name, src_node))
10823

    
10824
    # set the disks ID correctly since call_instance_start needs the
10825
    # correct drbd minor to create the symlinks
10826
    for disk in instance.disks:
10827
      self.cfg.SetDiskID(disk, src_node)
10828

    
10829
    activate_disks = (not instance.admin_up)
10830

    
10831
    if activate_disks:
10832
      # Activate the instance disks if we'exporting a stopped instance
10833
      feedback_fn("Activating disks for %s" % instance.name)
10834
      _StartInstanceDisks(self, instance, None)
10835

    
10836
    try:
10837
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10838
                                                     instance)
10839

    
10840
      helper.CreateSnapshots()
10841
      try:
10842
        if (self.op.shutdown and instance.admin_up and
10843
            not self.op.remove_instance):
10844
          assert not activate_disks
10845
          feedback_fn("Starting instance %s" % instance.name)
10846
          result = self.rpc.call_instance_start(src_node, instance, None, None)
10847
          msg = result.fail_msg
10848
          if msg:
10849
            feedback_fn("Failed to start instance: %s" % msg)
10850
            _ShutdownInstanceDisks(self, instance)
10851
            raise errors.OpExecError("Could not start instance: %s" % msg)
10852

    
10853
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
10854
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10855
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10856
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
10857
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10858

    
10859
          (key_name, _, _) = self.x509_key_name
10860

    
10861
          dest_ca_pem = \
10862
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10863
                                            self.dest_x509_ca)
10864

    
10865
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10866
                                                     key_name, dest_ca_pem,
10867
                                                     timeouts)
10868
      finally:
10869
        helper.Cleanup()
10870

    
10871
      # Check for backwards compatibility
10872
      assert len(dresults) == len(instance.disks)
10873
      assert compat.all(isinstance(i, bool) for i in dresults), \
10874
             "Not all results are boolean: %r" % dresults
10875

    
10876
    finally:
10877
      if activate_disks:
10878
        feedback_fn("Deactivating disks for %s" % instance.name)
10879
        _ShutdownInstanceDisks(self, instance)
10880

    
10881
    if not (compat.all(dresults) and fin_resu):
10882
      failures = []
10883
      if not fin_resu:
10884
        failures.append("export finalization")
10885
      if not compat.all(dresults):
10886
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10887
                               if not dsk)
10888
        failures.append("disk export: disk(s) %s" % fdsk)
10889

    
10890
      raise errors.OpExecError("Export failed, errors in %s" %
10891
                               utils.CommaJoin(failures))
10892

    
10893
    # At this point, the export was successful, we can cleanup/finish
10894

    
10895
    # Remove instance if requested
10896
    if self.op.remove_instance:
10897
      feedback_fn("Removing instance %s" % instance.name)
10898
      _RemoveInstance(self, feedback_fn, instance,
10899
                      self.op.ignore_remove_failures)
10900

    
10901
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10902
      self._CleanupExports(feedback_fn)
10903

    
10904
    return fin_resu, dresults
10905

    
10906

    
10907
class LUBackupRemove(NoHooksLU):
10908
  """Remove exports related to the named instance.
10909

10910
  """
10911
  REQ_BGL = False
10912

    
10913
  def ExpandNames(self):
10914
    self.needed_locks = {}
10915
    # We need all nodes to be locked in order for RemoveExport to work, but we
10916
    # don't need to lock the instance itself, as nothing will happen to it (and
10917
    # we can remove exports also for a removed instance)
10918
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10919

    
10920
  def Exec(self, feedback_fn):
10921
    """Remove any export.
10922

10923
    """
10924
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10925
    # If the instance was not found we'll try with the name that was passed in.
10926
    # This will only work if it was an FQDN, though.
10927
    fqdn_warn = False
10928
    if not instance_name:
10929
      fqdn_warn = True
10930
      instance_name = self.op.instance_name
10931

    
10932
    locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
10933
    exportlist = self.rpc.call_export_list(locked_nodes)
10934
    found = False
10935
    for node in exportlist:
10936
      msg = exportlist[node].fail_msg
10937
      if msg:
10938
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10939
        continue
10940
      if instance_name in exportlist[node].payload:
10941
        found = True
10942
        result = self.rpc.call_export_remove(node, instance_name)
10943
        msg = result.fail_msg
10944
        if msg:
10945
          logging.error("Could not remove export for instance %s"
10946
                        " on node %s: %s", instance_name, node, msg)
10947

    
10948
    if fqdn_warn and not found:
10949
      feedback_fn("Export not found. If trying to remove an export belonging"
10950
                  " to a deleted instance please use its Fully Qualified"
10951
                  " Domain Name.")
10952

    
10953

    
10954
class LUGroupAdd(LogicalUnit):
10955
  """Logical unit for creating node groups.
10956

10957
  """
10958
  HPATH = "group-add"
10959
  HTYPE = constants.HTYPE_GROUP
10960
  REQ_BGL = False
10961

    
10962
  def ExpandNames(self):
10963
    # We need the new group's UUID here so that we can create and acquire the
10964
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10965
    # that it should not check whether the UUID exists in the configuration.
10966
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10967
    self.needed_locks = {}
10968
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10969

    
10970
  def CheckPrereq(self):
10971
    """Check prerequisites.
10972

10973
    This checks that the given group name is not an existing node group
10974
    already.
10975

10976
    """
10977
    try:
10978
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10979
    except errors.OpPrereqError:
10980
      pass
10981
    else:
10982
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10983
                                 " node group (UUID: %s)" %
10984
                                 (self.op.group_name, existing_uuid),
10985
                                 errors.ECODE_EXISTS)
10986

    
10987
    if self.op.ndparams:
10988
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10989

    
10990
  def BuildHooksEnv(self):
10991
    """Build hooks env.
10992

10993
    """
10994
    return {
10995
      "GROUP_NAME": self.op.group_name,
10996
      }
10997

    
10998
  def BuildHooksNodes(self):
10999
    """Build hooks nodes.
11000

11001
    """
11002
    mn = self.cfg.GetMasterNode()
11003
    return ([mn], [mn])
11004

    
11005
  def Exec(self, feedback_fn):
11006
    """Add the node group to the cluster.
11007

11008
    """
11009
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11010
                                  uuid=self.group_uuid,
11011
                                  alloc_policy=self.op.alloc_policy,
11012
                                  ndparams=self.op.ndparams)
11013

    
11014
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11015
    del self.remove_locks[locking.LEVEL_NODEGROUP]
11016

    
11017

    
11018
class LUGroupAssignNodes(NoHooksLU):
11019
  """Logical unit for assigning nodes to groups.
11020

11021
  """
11022
  REQ_BGL = False
11023

    
11024
  def ExpandNames(self):
11025
    # These raise errors.OpPrereqError on their own:
11026
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11027
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11028

    
11029
    # We want to lock all the affected nodes and groups. We have readily
11030
    # available the list of nodes, and the *destination* group. To gather the
11031
    # list of "source" groups, we need to fetch node information later on.
11032
    self.needed_locks = {
11033
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11034
      locking.LEVEL_NODE: self.op.nodes,
11035
      }
11036

    
11037
  def DeclareLocks(self, level):
11038
    if level == locking.LEVEL_NODEGROUP:
11039
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11040

    
11041
      # Try to get all affected nodes' groups without having the group or node
11042
      # lock yet. Needs verification later in the code flow.
11043
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11044

    
11045
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11046

    
11047
  def CheckPrereq(self):
11048
    """Check prerequisites.
11049

11050
    """
11051
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
11052
    assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
11053
            frozenset(self.op.nodes))
11054

    
11055
    expected_locks = (set([self.group_uuid]) |
11056
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11057
    actual_locks = self.glm.list_owned(locking.LEVEL_NODEGROUP)
11058
    if actual_locks != expected_locks:
11059
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11060
                               " current groups are '%s', used to be '%s'" %
11061
                               (utils.CommaJoin(expected_locks),
11062
                                utils.CommaJoin(actual_locks)))
11063

    
11064
    self.node_data = self.cfg.GetAllNodesInfo()
11065
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11066
    instance_data = self.cfg.GetAllInstancesInfo()
11067

    
11068
    if self.group is None:
11069
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11070
                               (self.op.group_name, self.group_uuid))
11071

    
11072
    (new_splits, previous_splits) = \
11073
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11074
                                             for node in self.op.nodes],
11075
                                            self.node_data, instance_data)
11076

    
11077
    if new_splits:
11078
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11079

    
11080
      if not self.op.force:
11081
        raise errors.OpExecError("The following instances get split by this"
11082
                                 " change and --force was not given: %s" %
11083
                                 fmt_new_splits)
11084
      else:
11085
        self.LogWarning("This operation will split the following instances: %s",
11086
                        fmt_new_splits)
11087

    
11088
        if previous_splits:
11089
          self.LogWarning("In addition, these already-split instances continue"
11090
                          " to be split across groups: %s",
11091
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
11092

    
11093
  def Exec(self, feedback_fn):
11094
    """Assign nodes to a new group.
11095

11096
    """
11097
    for node in self.op.nodes:
11098
      self.node_data[node].group = self.group_uuid
11099

    
11100
    # FIXME: Depends on side-effects of modifying the result of
11101
    # C{cfg.GetAllNodesInfo}
11102

    
11103
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11104

    
11105
  @staticmethod
11106
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11107
    """Check for split instances after a node assignment.
11108

11109
    This method considers a series of node assignments as an atomic operation,
11110
    and returns information about split instances after applying the set of
11111
    changes.
11112

11113
    In particular, it returns information about newly split instances, and
11114
    instances that were already split, and remain so after the change.
11115

11116
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11117
    considered.
11118

11119
    @type changes: list of (node_name, new_group_uuid) pairs.
11120
    @param changes: list of node assignments to consider.
11121
    @param node_data: a dict with data for all nodes
11122
    @param instance_data: a dict with all instances to consider
11123
    @rtype: a two-tuple
11124
    @return: a list of instances that were previously okay and result split as a
11125
      consequence of this change, and a list of instances that were previously
11126
      split and this change does not fix.
11127

11128
    """
11129
    changed_nodes = dict((node, group) for node, group in changes
11130
                         if node_data[node].group != group)
11131

    
11132
    all_split_instances = set()
11133
    previously_split_instances = set()
11134

    
11135
    def InstanceNodes(instance):
11136
      return [instance.primary_node] + list(instance.secondary_nodes)
11137

    
11138
    for inst in instance_data.values():
11139
      if inst.disk_template not in constants.DTS_INT_MIRROR:
11140
        continue
11141

    
11142
      instance_nodes = InstanceNodes(inst)
11143

    
11144
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
11145
        previously_split_instances.add(inst.name)
11146

    
11147
      if len(set(changed_nodes.get(node, node_data[node].group)
11148
                 for node in instance_nodes)) > 1:
11149
        all_split_instances.add(inst.name)
11150

    
11151
    return (list(all_split_instances - previously_split_instances),
11152
            list(previously_split_instances & all_split_instances))
11153

    
11154

    
11155
class _GroupQuery(_QueryBase):
11156
  FIELDS = query.GROUP_FIELDS
11157

    
11158
  def ExpandNames(self, lu):
11159
    lu.needed_locks = {}
11160

    
11161
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
11162
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
11163

    
11164
    if not self.names:
11165
      self.wanted = [name_to_uuid[name]
11166
                     for name in utils.NiceSort(name_to_uuid.keys())]
11167
    else:
11168
      # Accept names to be either names or UUIDs.
11169
      missing = []
11170
      self.wanted = []
11171
      all_uuid = frozenset(self._all_groups.keys())
11172

    
11173
      for name in self.names:
11174
        if name in all_uuid:
11175
          self.wanted.append(name)
11176
        elif name in name_to_uuid:
11177
          self.wanted.append(name_to_uuid[name])
11178
        else:
11179
          missing.append(name)
11180

    
11181
      if missing:
11182
        raise errors.OpPrereqError("Some groups do not exist: %s" %
11183
                                   utils.CommaJoin(missing),
11184
                                   errors.ECODE_NOENT)
11185

    
11186
  def DeclareLocks(self, lu, level):
11187
    pass
11188

    
11189
  def _GetQueryData(self, lu):
11190
    """Computes the list of node groups and their attributes.
11191

11192
    """
11193
    do_nodes = query.GQ_NODE in self.requested_data
11194
    do_instances = query.GQ_INST in self.requested_data
11195

    
11196
    group_to_nodes = None
11197
    group_to_instances = None
11198

    
11199
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11200
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11201
    # latter GetAllInstancesInfo() is not enough, for we have to go through
11202
    # instance->node. Hence, we will need to process nodes even if we only need
11203
    # instance information.
11204
    if do_nodes or do_instances:
11205
      all_nodes = lu.cfg.GetAllNodesInfo()
11206
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11207
      node_to_group = {}
11208

    
11209
      for node in all_nodes.values():
11210
        if node.group in group_to_nodes:
11211
          group_to_nodes[node.group].append(node.name)
11212
          node_to_group[node.name] = node.group
11213

    
11214
      if do_instances:
11215
        all_instances = lu.cfg.GetAllInstancesInfo()
11216
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
11217

    
11218
        for instance in all_instances.values():
11219
          node = instance.primary_node
11220
          if node in node_to_group:
11221
            group_to_instances[node_to_group[node]].append(instance.name)
11222

    
11223
        if not do_nodes:
11224
          # Do not pass on node information if it was not requested.
11225
          group_to_nodes = None
11226

    
11227
    return query.GroupQueryData([self._all_groups[uuid]
11228
                                 for uuid in self.wanted],
11229
                                group_to_nodes, group_to_instances)
11230

    
11231

    
11232
class LUGroupQuery(NoHooksLU):
11233
  """Logical unit for querying node groups.
11234

11235
  """
11236
  REQ_BGL = False
11237

    
11238
  def CheckArguments(self):
11239
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
11240
                          self.op.output_fields, False)
11241

    
11242
  def ExpandNames(self):
11243
    self.gq.ExpandNames(self)
11244

    
11245
  def Exec(self, feedback_fn):
11246
    return self.gq.OldStyleQuery(self)
11247

    
11248

    
11249
class LUGroupSetParams(LogicalUnit):
11250
  """Modifies the parameters of a node group.
11251

11252
  """
11253
  HPATH = "group-modify"
11254
  HTYPE = constants.HTYPE_GROUP
11255
  REQ_BGL = False
11256

    
11257
  def CheckArguments(self):
11258
    all_changes = [
11259
      self.op.ndparams,
11260
      self.op.alloc_policy,
11261
      ]
11262

    
11263
    if all_changes.count(None) == len(all_changes):
11264
      raise errors.OpPrereqError("Please pass at least one modification",
11265
                                 errors.ECODE_INVAL)
11266

    
11267
  def ExpandNames(self):
11268
    # This raises errors.OpPrereqError on its own:
11269
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11270

    
11271
    self.needed_locks = {
11272
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11273
      }
11274

    
11275
  def CheckPrereq(self):
11276
    """Check prerequisites.
11277

11278
    """
11279
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11280

    
11281
    if self.group is None:
11282
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11283
                               (self.op.group_name, self.group_uuid))
11284

    
11285
    if self.op.ndparams:
11286
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
11287
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11288
      self.new_ndparams = new_ndparams
11289

    
11290
  def BuildHooksEnv(self):
11291
    """Build hooks env.
11292

11293
    """
11294
    return {
11295
      "GROUP_NAME": self.op.group_name,
11296
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
11297
      }
11298

    
11299
  def BuildHooksNodes(self):
11300
    """Build hooks nodes.
11301

11302
    """
11303
    mn = self.cfg.GetMasterNode()
11304
    return ([mn], [mn])
11305

    
11306
  def Exec(self, feedback_fn):
11307
    """Modifies the node group.
11308

11309
    """
11310
    result = []
11311

    
11312
    if self.op.ndparams:
11313
      self.group.ndparams = self.new_ndparams
11314
      result.append(("ndparams", str(self.group.ndparams)))
11315

    
11316
    if self.op.alloc_policy:
11317
      self.group.alloc_policy = self.op.alloc_policy
11318

    
11319
    self.cfg.Update(self.group, feedback_fn)
11320
    return result
11321

    
11322

    
11323

    
11324
class LUGroupRemove(LogicalUnit):
11325
  HPATH = "group-remove"
11326
  HTYPE = constants.HTYPE_GROUP
11327
  REQ_BGL = False
11328

    
11329
  def ExpandNames(self):
11330
    # This will raises errors.OpPrereqError on its own:
11331
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11332
    self.needed_locks = {
11333
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11334
      }
11335

    
11336
  def CheckPrereq(self):
11337
    """Check prerequisites.
11338

11339
    This checks that the given group name exists as a node group, that is
11340
    empty (i.e., contains no nodes), and that is not the last group of the
11341
    cluster.
11342

11343
    """
11344
    # Verify that the group is empty.
11345
    group_nodes = [node.name
11346
                   for node in self.cfg.GetAllNodesInfo().values()
11347
                   if node.group == self.group_uuid]
11348

    
11349
    if group_nodes:
11350
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
11351
                                 " nodes: %s" %
11352
                                 (self.op.group_name,
11353
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
11354
                                 errors.ECODE_STATE)
11355

    
11356
    # Verify the cluster would not be left group-less.
11357
    if len(self.cfg.GetNodeGroupList()) == 1:
11358
      raise errors.OpPrereqError("Group '%s' is the only group,"
11359
                                 " cannot be removed" %
11360
                                 self.op.group_name,
11361
                                 errors.ECODE_STATE)
11362

    
11363
  def BuildHooksEnv(self):
11364
    """Build hooks env.
11365

11366
    """
11367
    return {
11368
      "GROUP_NAME": self.op.group_name,
11369
      }
11370

    
11371
  def BuildHooksNodes(self):
11372
    """Build hooks nodes.
11373

11374
    """
11375
    mn = self.cfg.GetMasterNode()
11376
    return ([mn], [mn])
11377

    
11378
  def Exec(self, feedback_fn):
11379
    """Remove the node group.
11380

11381
    """
11382
    try:
11383
      self.cfg.RemoveNodeGroup(self.group_uuid)
11384
    except errors.ConfigurationError:
11385
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11386
                               (self.op.group_name, self.group_uuid))
11387

    
11388
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11389

    
11390

    
11391
class LUGroupRename(LogicalUnit):
11392
  HPATH = "group-rename"
11393
  HTYPE = constants.HTYPE_GROUP
11394
  REQ_BGL = False
11395

    
11396
  def ExpandNames(self):
11397
    # This raises errors.OpPrereqError on its own:
11398
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11399

    
11400
    self.needed_locks = {
11401
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11402
      }
11403

    
11404
  def CheckPrereq(self):
11405
    """Check prerequisites.
11406

11407
    Ensures requested new name is not yet used.
11408

11409
    """
11410
    try:
11411
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11412
    except errors.OpPrereqError:
11413
      pass
11414
    else:
11415
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11416
                                 " node group (UUID: %s)" %
11417
                                 (self.op.new_name, new_name_uuid),
11418
                                 errors.ECODE_EXISTS)
11419

    
11420
  def BuildHooksEnv(self):
11421
    """Build hooks env.
11422

11423
    """
11424
    return {
11425
      "OLD_NAME": self.op.group_name,
11426
      "NEW_NAME": self.op.new_name,
11427
      }
11428

    
11429
  def BuildHooksNodes(self):
11430
    """Build hooks nodes.
11431

11432
    """
11433
    mn = self.cfg.GetMasterNode()
11434

    
11435
    all_nodes = self.cfg.GetAllNodesInfo()
11436
    all_nodes.pop(mn, None)
11437

    
11438
    run_nodes = [mn]
11439
    run_nodes.extend(node.name for node in all_nodes.values()
11440
                     if node.group == self.group_uuid)
11441

    
11442
    return (run_nodes, run_nodes)
11443

    
11444
  def Exec(self, feedback_fn):
11445
    """Rename the node group.
11446

11447
    """
11448
    group = self.cfg.GetNodeGroup(self.group_uuid)
11449

    
11450
    if group is None:
11451
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11452
                               (self.op.group_name, self.group_uuid))
11453

    
11454
    group.name = self.op.new_name
11455
    self.cfg.Update(group, feedback_fn)
11456

    
11457
    return self.op.new_name
11458

    
11459

    
11460
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
11461
  """Generic tags LU.
11462

11463
  This is an abstract class which is the parent of all the other tags LUs.
11464

11465
  """
11466
  def ExpandNames(self):
11467
    self.group_uuid = None
11468
    self.needed_locks = {}
11469
    if self.op.kind == constants.TAG_NODE:
11470
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
11471
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
11472
    elif self.op.kind == constants.TAG_INSTANCE:
11473
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
11474
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
11475
    elif self.op.kind == constants.TAG_NODEGROUP:
11476
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
11477

    
11478
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
11479
    # not possible to acquire the BGL based on opcode parameters)
11480

    
11481
  def CheckPrereq(self):
11482
    """Check prerequisites.
11483

11484
    """
11485
    if self.op.kind == constants.TAG_CLUSTER:
11486
      self.target = self.cfg.GetClusterInfo()
11487
    elif self.op.kind == constants.TAG_NODE:
11488
      self.target = self.cfg.GetNodeInfo(self.op.name)
11489
    elif self.op.kind == constants.TAG_INSTANCE:
11490
      self.target = self.cfg.GetInstanceInfo(self.op.name)
11491
    elif self.op.kind == constants.TAG_NODEGROUP:
11492
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
11493
    else:
11494
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
11495
                                 str(self.op.kind), errors.ECODE_INVAL)
11496

    
11497

    
11498
class LUTagsGet(TagsLU):
11499
  """Returns the tags of a given object.
11500

11501
  """
11502
  REQ_BGL = False
11503

    
11504
  def ExpandNames(self):
11505
    TagsLU.ExpandNames(self)
11506

    
11507
    # Share locks as this is only a read operation
11508
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
11509

    
11510
  def Exec(self, feedback_fn):
11511
    """Returns the tag list.
11512

11513
    """
11514
    return list(self.target.GetTags())
11515

    
11516

    
11517
class LUTagsSearch(NoHooksLU):
11518
  """Searches the tags for a given pattern.
11519

11520
  """
11521
  REQ_BGL = False
11522

    
11523
  def ExpandNames(self):
11524
    self.needed_locks = {}
11525

    
11526
  def CheckPrereq(self):
11527
    """Check prerequisites.
11528

11529
    This checks the pattern passed for validity by compiling it.
11530

11531
    """
11532
    try:
11533
      self.re = re.compile(self.op.pattern)
11534
    except re.error, err:
11535
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
11536
                                 (self.op.pattern, err), errors.ECODE_INVAL)
11537

    
11538
  def Exec(self, feedback_fn):
11539
    """Returns the tag list.
11540

11541
    """
11542
    cfg = self.cfg
11543
    tgts = [("/cluster", cfg.GetClusterInfo())]
11544
    ilist = cfg.GetAllInstancesInfo().values()
11545
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
11546
    nlist = cfg.GetAllNodesInfo().values()
11547
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
11548
    tgts.extend(("/nodegroup/%s" % n.name, n)
11549
                for n in cfg.GetAllNodeGroupsInfo().values())
11550
    results = []
11551
    for path, target in tgts:
11552
      for tag in target.GetTags():
11553
        if self.re.search(tag):
11554
          results.append((path, tag))
11555
    return results
11556

    
11557

    
11558
class LUTagsSet(TagsLU):
11559
  """Sets a tag on a given object.
11560

11561
  """
11562
  REQ_BGL = False
11563

    
11564
  def CheckPrereq(self):
11565
    """Check prerequisites.
11566

11567
    This checks the type and length of the tag name and value.
11568

11569
    """
11570
    TagsLU.CheckPrereq(self)
11571
    for tag in self.op.tags:
11572
      objects.TaggableObject.ValidateTag(tag)
11573

    
11574
  def Exec(self, feedback_fn):
11575
    """Sets the tag.
11576

11577
    """
11578
    try:
11579
      for tag in self.op.tags:
11580
        self.target.AddTag(tag)
11581
    except errors.TagError, err:
11582
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
11583
    self.cfg.Update(self.target, feedback_fn)
11584

    
11585

    
11586
class LUTagsDel(TagsLU):
11587
  """Delete a list of tags from a given object.
11588

11589
  """
11590
  REQ_BGL = False
11591

    
11592
  def CheckPrereq(self):
11593
    """Check prerequisites.
11594

11595
    This checks that we have the given tag.
11596

11597
    """
11598
    TagsLU.CheckPrereq(self)
11599
    for tag in self.op.tags:
11600
      objects.TaggableObject.ValidateTag(tag)
11601
    del_tags = frozenset(self.op.tags)
11602
    cur_tags = self.target.GetTags()
11603

    
11604
    diff_tags = del_tags - cur_tags
11605
    if diff_tags:
11606
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
11607
      raise errors.OpPrereqError("Tag(s) %s not found" %
11608
                                 (utils.CommaJoin(diff_names), ),
11609
                                 errors.ECODE_NOENT)
11610

    
11611
  def Exec(self, feedback_fn):
11612
    """Remove the tag from the object.
11613

11614
    """
11615
    for tag in self.op.tags:
11616
      self.target.RemoveTag(tag)
11617
    self.cfg.Update(self.target, feedback_fn)
11618

    
11619

    
11620
class LUTestDelay(NoHooksLU):
11621
  """Sleep for a specified amount of time.
11622

11623
  This LU sleeps on the master and/or nodes for a specified amount of
11624
  time.
11625

11626
  """
11627
  REQ_BGL = False
11628

    
11629
  def ExpandNames(self):
11630
    """Expand names and set required locks.
11631

11632
    This expands the node list, if any.
11633

11634
    """
11635
    self.needed_locks = {}
11636
    if self.op.on_nodes:
11637
      # _GetWantedNodes can be used here, but is not always appropriate to use
11638
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
11639
      # more information.
11640
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
11641
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
11642

    
11643
  def _TestDelay(self):
11644
    """Do the actual sleep.
11645

11646
    """
11647
    if self.op.on_master:
11648
      if not utils.TestDelay(self.op.duration):
11649
        raise errors.OpExecError("Error during master delay test")
11650
    if self.op.on_nodes:
11651
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
11652
      for node, node_result in result.items():
11653
        node_result.Raise("Failure during rpc call to node %s" % node)
11654

    
11655
  def Exec(self, feedback_fn):
11656
    """Execute the test delay opcode, with the wanted repetitions.
11657

11658
    """
11659
    if self.op.repeat == 0:
11660
      self._TestDelay()
11661
    else:
11662
      top_value = self.op.repeat - 1
11663
      for i in range(self.op.repeat):
11664
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
11665
        self._TestDelay()
11666

    
11667

    
11668
class LUTestJqueue(NoHooksLU):
11669
  """Utility LU to test some aspects of the job queue.
11670

11671
  """
11672
  REQ_BGL = False
11673

    
11674
  # Must be lower than default timeout for WaitForJobChange to see whether it
11675
  # notices changed jobs
11676
  _CLIENT_CONNECT_TIMEOUT = 20.0
11677
  _CLIENT_CONFIRM_TIMEOUT = 60.0
11678

    
11679
  @classmethod
11680
  def _NotifyUsingSocket(cls, cb, errcls):
11681
    """Opens a Unix socket and waits for another program to connect.
11682

11683
    @type cb: callable
11684
    @param cb: Callback to send socket name to client
11685
    @type errcls: class
11686
    @param errcls: Exception class to use for errors
11687

11688
    """
11689
    # Using a temporary directory as there's no easy way to create temporary
11690
    # sockets without writing a custom loop around tempfile.mktemp and
11691
    # socket.bind
11692
    tmpdir = tempfile.mkdtemp()
11693
    try:
11694
      tmpsock = utils.PathJoin(tmpdir, "sock")
11695

    
11696
      logging.debug("Creating temporary socket at %s", tmpsock)
11697
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
11698
      try:
11699
        sock.bind(tmpsock)
11700
        sock.listen(1)
11701

    
11702
        # Send details to client
11703
        cb(tmpsock)
11704

    
11705
        # Wait for client to connect before continuing
11706
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
11707
        try:
11708
          (conn, _) = sock.accept()
11709
        except socket.error, err:
11710
          raise errcls("Client didn't connect in time (%s)" % err)
11711
      finally:
11712
        sock.close()
11713
    finally:
11714
      # Remove as soon as client is connected
11715
      shutil.rmtree(tmpdir)
11716

    
11717
    # Wait for client to close
11718
    try:
11719
      try:
11720
        # pylint: disable-msg=E1101
11721
        # Instance of '_socketobject' has no ... member
11722
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
11723
        conn.recv(1)
11724
      except socket.error, err:
11725
        raise errcls("Client failed to confirm notification (%s)" % err)
11726
    finally:
11727
      conn.close()
11728

    
11729
  def _SendNotification(self, test, arg, sockname):
11730
    """Sends a notification to the client.
11731

11732
    @type test: string
11733
    @param test: Test name
11734
    @param arg: Test argument (depends on test)
11735
    @type sockname: string
11736
    @param sockname: Socket path
11737

11738
    """
11739
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11740

    
11741
  def _Notify(self, prereq, test, arg):
11742
    """Notifies the client of a test.
11743

11744
    @type prereq: bool
11745
    @param prereq: Whether this is a prereq-phase test
11746
    @type test: string
11747
    @param test: Test name
11748
    @param arg: Test argument (depends on test)
11749

11750
    """
11751
    if prereq:
11752
      errcls = errors.OpPrereqError
11753
    else:
11754
      errcls = errors.OpExecError
11755

    
11756
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11757
                                                  test, arg),
11758
                                   errcls)
11759

    
11760
  def CheckArguments(self):
11761
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11762
    self.expandnames_calls = 0
11763

    
11764
  def ExpandNames(self):
11765
    checkargs_calls = getattr(self, "checkargs_calls", 0)
11766
    if checkargs_calls < 1:
11767
      raise errors.ProgrammerError("CheckArguments was not called")
11768

    
11769
    self.expandnames_calls += 1
11770

    
11771
    if self.op.notify_waitlock:
11772
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
11773

    
11774
    self.LogInfo("Expanding names")
11775

    
11776
    # Get lock on master node (just to get a lock, not for a particular reason)
11777
    self.needed_locks = {
11778
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11779
      }
11780

    
11781
  def Exec(self, feedback_fn):
11782
    if self.expandnames_calls < 1:
11783
      raise errors.ProgrammerError("ExpandNames was not called")
11784

    
11785
    if self.op.notify_exec:
11786
      self._Notify(False, constants.JQT_EXEC, None)
11787

    
11788
    self.LogInfo("Executing")
11789

    
11790
    if self.op.log_messages:
11791
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
11792
      for idx, msg in enumerate(self.op.log_messages):
11793
        self.LogInfo("Sending log message %s", idx + 1)
11794
        feedback_fn(constants.JQT_MSGPREFIX + msg)
11795
        # Report how many test messages have been sent
11796
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
11797

    
11798
    if self.op.fail:
11799
      raise errors.OpExecError("Opcode failure was requested")
11800

    
11801
    return True
11802

    
11803

    
11804
class IAllocator(object):
11805
  """IAllocator framework.
11806

11807
  An IAllocator instance has three sets of attributes:
11808
    - cfg that is needed to query the cluster
11809
    - input data (all members of the _KEYS class attribute are required)
11810
    - four buffer attributes (in|out_data|text), that represent the
11811
      input (to the external script) in text and data structure format,
11812
      and the output from it, again in two formats
11813
    - the result variables from the script (success, info, nodes) for
11814
      easy usage
11815

11816
  """
11817
  # pylint: disable-msg=R0902
11818
  # lots of instance attributes
11819

    
11820
  def __init__(self, cfg, rpc, mode, **kwargs):
11821
    self.cfg = cfg
11822
    self.rpc = rpc
11823
    # init buffer variables
11824
    self.in_text = self.out_text = self.in_data = self.out_data = None
11825
    # init all input fields so that pylint is happy
11826
    self.mode = mode
11827
    self.mem_size = self.disks = self.disk_template = None
11828
    self.os = self.tags = self.nics = self.vcpus = None
11829
    self.hypervisor = None
11830
    self.relocate_from = None
11831
    self.name = None
11832
    self.evac_nodes = None
11833
    self.instances = None
11834
    self.reloc_mode = None
11835
    self.target_groups = None
11836
    # computed fields
11837
    self.required_nodes = None
11838
    # init result fields
11839
    self.success = self.info = self.result = None
11840

    
11841
    try:
11842
      (fn, keyset, self._result_check) = self._MODE_DATA[self.mode]
11843
    except KeyError:
11844
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
11845
                                   " IAllocator" % self.mode)
11846

    
11847
    for key in kwargs:
11848
      if key not in keyset:
11849
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
11850
                                     " IAllocator" % key)
11851
      setattr(self, key, kwargs[key])
11852

    
11853
    for key in keyset:
11854
      if key not in kwargs:
11855
        raise errors.ProgrammerError("Missing input parameter '%s' to"
11856
                                     " IAllocator" % key)
11857
    self._BuildInputData(compat.partial(fn, self))
11858

    
11859
  def _ComputeClusterData(self):
11860
    """Compute the generic allocator input data.
11861

11862
    This is the data that is independent of the actual operation.
11863

11864
    """
11865
    cfg = self.cfg
11866
    cluster_info = cfg.GetClusterInfo()
11867
    # cluster data
11868
    data = {
11869
      "version": constants.IALLOCATOR_VERSION,
11870
      "cluster_name": cfg.GetClusterName(),
11871
      "cluster_tags": list(cluster_info.GetTags()),
11872
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
11873
      # we don't have job IDs
11874
      }
11875
    ninfo = cfg.GetAllNodesInfo()
11876
    iinfo = cfg.GetAllInstancesInfo().values()
11877
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
11878

    
11879
    # node data
11880
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
11881

    
11882
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11883
      hypervisor_name = self.hypervisor
11884
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11885
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
11886
    elif self.mode in (constants.IALLOCATOR_MODE_MEVAC,
11887
                       constants.IALLOCATOR_MODE_MRELOC):
11888
      hypervisor_name = cluster_info.enabled_hypervisors[0]
11889

    
11890
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
11891
                                        hypervisor_name)
11892
    node_iinfo = \
11893
      self.rpc.call_all_instances_info(node_list,
11894
                                       cluster_info.enabled_hypervisors)
11895

    
11896
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
11897

    
11898
    config_ndata = self._ComputeBasicNodeData(ninfo)
11899
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
11900
                                                 i_list, config_ndata)
11901
    assert len(data["nodes"]) == len(ninfo), \
11902
        "Incomplete node data computed"
11903

    
11904
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11905

    
11906
    self.in_data = data
11907

    
11908
  @staticmethod
11909
  def _ComputeNodeGroupData(cfg):
11910
    """Compute node groups data.
11911

11912
    """
11913
    ng = dict((guuid, {
11914
      "name": gdata.name,
11915
      "alloc_policy": gdata.alloc_policy,
11916
      })
11917
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
11918

    
11919
    return ng
11920

    
11921
  @staticmethod
11922
  def _ComputeBasicNodeData(node_cfg):
11923
    """Compute global node data.
11924

11925
    @rtype: dict
11926
    @returns: a dict of name: (node dict, node config)
11927

11928
    """
11929
    # fill in static (config-based) values
11930
    node_results = dict((ninfo.name, {
11931
      "tags": list(ninfo.GetTags()),
11932
      "primary_ip": ninfo.primary_ip,
11933
      "secondary_ip": ninfo.secondary_ip,
11934
      "offline": ninfo.offline,
11935
      "drained": ninfo.drained,
11936
      "master_candidate": ninfo.master_candidate,
11937
      "group": ninfo.group,
11938
      "master_capable": ninfo.master_capable,
11939
      "vm_capable": ninfo.vm_capable,
11940
      })
11941
      for ninfo in node_cfg.values())
11942

    
11943
    return node_results
11944

    
11945
  @staticmethod
11946
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
11947
                              node_results):
11948
    """Compute global node data.
11949

11950
    @param node_results: the basic node structures as filled from the config
11951

11952
    """
11953
    # make a copy of the current dict
11954
    node_results = dict(node_results)
11955
    for nname, nresult in node_data.items():
11956
      assert nname in node_results, "Missing basic data for node %s" % nname
11957
      ninfo = node_cfg[nname]
11958

    
11959
      if not (ninfo.offline or ninfo.drained):
11960
        nresult.Raise("Can't get data for node %s" % nname)
11961
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11962
                                nname)
11963
        remote_info = nresult.payload
11964

    
11965
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
11966
                     'vg_size', 'vg_free', 'cpu_total']:
11967
          if attr not in remote_info:
11968
            raise errors.OpExecError("Node '%s' didn't return attribute"
11969
                                     " '%s'" % (nname, attr))
11970
          if not isinstance(remote_info[attr], int):
11971
            raise errors.OpExecError("Node '%s' returned invalid value"
11972
                                     " for '%s': %s" %
11973
                                     (nname, attr, remote_info[attr]))
11974
        # compute memory used by primary instances
11975
        i_p_mem = i_p_up_mem = 0
11976
        for iinfo, beinfo in i_list:
11977
          if iinfo.primary_node == nname:
11978
            i_p_mem += beinfo[constants.BE_MEMORY]
11979
            if iinfo.name not in node_iinfo[nname].payload:
11980
              i_used_mem = 0
11981
            else:
11982
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11983
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11984
            remote_info['memory_free'] -= max(0, i_mem_diff)
11985

    
11986
            if iinfo.admin_up:
11987
              i_p_up_mem += beinfo[constants.BE_MEMORY]
11988

    
11989
        # compute memory used by instances
11990
        pnr_dyn = {
11991
          "total_memory": remote_info['memory_total'],
11992
          "reserved_memory": remote_info['memory_dom0'],
11993
          "free_memory": remote_info['memory_free'],
11994
          "total_disk": remote_info['vg_size'],
11995
          "free_disk": remote_info['vg_free'],
11996
          "total_cpus": remote_info['cpu_total'],
11997
          "i_pri_memory": i_p_mem,
11998
          "i_pri_up_memory": i_p_up_mem,
11999
          }
12000
        pnr_dyn.update(node_results[nname])
12001
        node_results[nname] = pnr_dyn
12002

    
12003
    return node_results
12004

    
12005
  @staticmethod
12006
  def _ComputeInstanceData(cluster_info, i_list):
12007
    """Compute global instance data.
12008

12009
    """
12010
    instance_data = {}
12011
    for iinfo, beinfo in i_list:
12012
      nic_data = []
12013
      for nic in iinfo.nics:
12014
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
12015
        nic_dict = {
12016
          "mac": nic.mac,
12017
          "ip": nic.ip,
12018
          "mode": filled_params[constants.NIC_MODE],
12019
          "link": filled_params[constants.NIC_LINK],
12020
          }
12021
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
12022
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
12023
        nic_data.append(nic_dict)
12024
      pir = {
12025
        "tags": list(iinfo.GetTags()),
12026
        "admin_up": iinfo.admin_up,
12027
        "vcpus": beinfo[constants.BE_VCPUS],
12028
        "memory": beinfo[constants.BE_MEMORY],
12029
        "os": iinfo.os,
12030
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
12031
        "nics": nic_data,
12032
        "disks": [{constants.IDISK_SIZE: dsk.size,
12033
                   constants.IDISK_MODE: dsk.mode}
12034
                  for dsk in iinfo.disks],
12035
        "disk_template": iinfo.disk_template,
12036
        "hypervisor": iinfo.hypervisor,
12037
        }
12038
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
12039
                                                 pir["disks"])
12040
      instance_data[iinfo.name] = pir
12041

    
12042
    return instance_data
12043

    
12044
  def _AddNewInstance(self):
12045
    """Add new instance data to allocator structure.
12046

12047
    This in combination with _AllocatorGetClusterData will create the
12048
    correct structure needed as input for the allocator.
12049

12050
    The checks for the completeness of the opcode must have already been
12051
    done.
12052

12053
    """
12054
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
12055

    
12056
    if self.disk_template in constants.DTS_INT_MIRROR:
12057
      self.required_nodes = 2
12058
    else:
12059
      self.required_nodes = 1
12060

    
12061
    request = {
12062
      "name": self.name,
12063
      "disk_template": self.disk_template,
12064
      "tags": self.tags,
12065
      "os": self.os,
12066
      "vcpus": self.vcpus,
12067
      "memory": self.mem_size,
12068
      "disks": self.disks,
12069
      "disk_space_total": disk_space,
12070
      "nics": self.nics,
12071
      "required_nodes": self.required_nodes,
12072
      }
12073

    
12074
    return request
12075

    
12076
  def _AddRelocateInstance(self):
12077
    """Add relocate instance data to allocator structure.
12078

12079
    This in combination with _IAllocatorGetClusterData will create the
12080
    correct structure needed as input for the allocator.
12081

12082
    The checks for the completeness of the opcode must have already been
12083
    done.
12084

12085
    """
12086
    instance = self.cfg.GetInstanceInfo(self.name)
12087
    if instance is None:
12088
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
12089
                                   " IAllocator" % self.name)
12090

    
12091
    if instance.disk_template not in constants.DTS_MIRRORED:
12092
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
12093
                                 errors.ECODE_INVAL)
12094

    
12095
    if instance.disk_template in constants.DTS_INT_MIRROR and \
12096
        len(instance.secondary_nodes) != 1:
12097
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
12098
                                 errors.ECODE_STATE)
12099

    
12100
    self.required_nodes = 1
12101
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
12102
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
12103

    
12104
    request = {
12105
      "name": self.name,
12106
      "disk_space_total": disk_space,
12107
      "required_nodes": self.required_nodes,
12108
      "relocate_from": self.relocate_from,
12109
      }
12110
    return request
12111

    
12112
  def _AddEvacuateNodes(self):
12113
    """Add evacuate nodes data to allocator structure.
12114

12115
    """
12116
    request = {
12117
      "evac_nodes": self.evac_nodes
12118
      }
12119
    return request
12120

    
12121
  def _AddMultiRelocate(self):
12122
    """Get data for multi-relocate requests.
12123

12124
    """
12125
    return {
12126
      "instances": self.instances,
12127
      "reloc_mode": self.reloc_mode,
12128
      "target_groups": self.target_groups,
12129
      }
12130

    
12131
  def _BuildInputData(self, fn):
12132
    """Build input data structures.
12133

12134
    """
12135
    self._ComputeClusterData()
12136

    
12137
    request = fn()
12138
    request["type"] = self.mode
12139
    self.in_data["request"] = request
12140

    
12141
    self.in_text = serializer.Dump(self.in_data)
12142

    
12143
  _MODE_DATA = {
12144
    constants.IALLOCATOR_MODE_ALLOC:
12145
      (_AddNewInstance,
12146
       ["name", "mem_size", "disks", "disk_template", "os", "tags", "nics",
12147
        "vcpus", "hypervisor"], ht.TList),
12148
    constants.IALLOCATOR_MODE_RELOC:
12149
      (_AddRelocateInstance, ["name", "relocate_from"], ht.TList),
12150
    constants.IALLOCATOR_MODE_MEVAC:
12151
      (_AddEvacuateNodes, ["evac_nodes"],
12152
       ht.TListOf(ht.TAnd(ht.TIsLength(2),
12153
                          ht.TListOf(ht.TString)))),
12154
    constants.IALLOCATOR_MODE_MRELOC:
12155
      (_AddMultiRelocate, ["instances", "reloc_mode", "target_groups"],
12156
       ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
12157
         # pylint: disable-msg=E1101
12158
         # Class '...' has no 'OP_ID' member
12159
         "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
12160
                              opcodes.OpInstanceMigrate.OP_ID,
12161
                              opcodes.OpInstanceReplaceDisks.OP_ID])
12162
         })))),
12163
    }
12164

    
12165
  def Run(self, name, validate=True, call_fn=None):
12166
    """Run an instance allocator and return the results.
12167

12168
    """
12169
    if call_fn is None:
12170
      call_fn = self.rpc.call_iallocator_runner
12171

    
12172
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
12173
    result.Raise("Failure while running the iallocator script")
12174

    
12175
    self.out_text = result.payload
12176
    if validate:
12177
      self._ValidateResult()
12178

    
12179
  def _ValidateResult(self):
12180
    """Process the allocator results.
12181

12182
    This will process and if successful save the result in
12183
    self.out_data and the other parameters.
12184

12185
    """
12186
    try:
12187
      rdict = serializer.Load(self.out_text)
12188
    except Exception, err:
12189
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
12190

    
12191
    if not isinstance(rdict, dict):
12192
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
12193

    
12194
    # TODO: remove backwards compatiblity in later versions
12195
    if "nodes" in rdict and "result" not in rdict:
12196
      rdict["result"] = rdict["nodes"]
12197
      del rdict["nodes"]
12198

    
12199
    for key in "success", "info", "result":
12200
      if key not in rdict:
12201
        raise errors.OpExecError("Can't parse iallocator results:"
12202
                                 " missing key '%s'" % key)
12203
      setattr(self, key, rdict[key])
12204

    
12205
    if not self._result_check(self.result):
12206
      raise errors.OpExecError("Iallocator returned invalid result,"
12207
                               " expected %s, got %s" %
12208
                               (self._result_check, self.result),
12209
                               errors.ECODE_INVAL)
12210

    
12211
    if self.mode in (constants.IALLOCATOR_MODE_RELOC,
12212
                     constants.IALLOCATOR_MODE_MEVAC):
12213
      node2group = dict((name, ndata["group"])
12214
                        for (name, ndata) in self.in_data["nodes"].items())
12215

    
12216
      fn = compat.partial(self._NodesToGroups, node2group,
12217
                          self.in_data["nodegroups"])
12218

    
12219
      if self.mode == constants.IALLOCATOR_MODE_RELOC:
12220
        assert self.relocate_from is not None
12221
        assert self.required_nodes == 1
12222

    
12223
        request_groups = fn(self.relocate_from)
12224
        result_groups = fn(rdict["result"])
12225

    
12226
        if result_groups != request_groups:
12227
          raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
12228
                                   " differ from original groups (%s)" %
12229
                                   (utils.CommaJoin(result_groups),
12230
                                    utils.CommaJoin(request_groups)))
12231
      elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
12232
        request_groups = fn(self.evac_nodes)
12233
        for (instance_name, secnode) in self.result:
12234
          result_groups = fn([secnode])
12235
          if result_groups != request_groups:
12236
            raise errors.OpExecError("Iallocator returned new secondary node"
12237
                                     " '%s' (group '%s') for instance '%s'"
12238
                                     " which is not in original group '%s'" %
12239
                                     (secnode, utils.CommaJoin(result_groups),
12240
                                      instance_name,
12241
                                      utils.CommaJoin(request_groups)))
12242
      else:
12243
        raise errors.ProgrammerError("Unhandled mode '%s'" % self.mode)
12244

    
12245
    self.out_data = rdict
12246

    
12247
  @staticmethod
12248
  def _NodesToGroups(node2group, groups, nodes):
12249
    """Returns a list of unique group names for a list of nodes.
12250

12251
    @type node2group: dict
12252
    @param node2group: Map from node name to group UUID
12253
    @type groups: dict
12254
    @param groups: Group information
12255
    @type nodes: list
12256
    @param nodes: Node names
12257

12258
    """
12259
    result = set()
12260

    
12261
    for node in nodes:
12262
      try:
12263
        group_uuid = node2group[node]
12264
      except KeyError:
12265
        # Ignore unknown node
12266
        pass
12267
      else:
12268
        try:
12269
          group = groups[group_uuid]
12270
        except KeyError:
12271
          # Can't find group, let's use UUID
12272
          group_name = group_uuid
12273
        else:
12274
          group_name = group["name"]
12275

    
12276
        result.add(group_name)
12277

    
12278
    return sorted(result)
12279

    
12280

    
12281
class LUTestAllocator(NoHooksLU):
12282
  """Run allocator tests.
12283

12284
  This LU runs the allocator tests
12285

12286
  """
12287
  def CheckPrereq(self):
12288
    """Check prerequisites.
12289

12290
    This checks the opcode parameters depending on the director and mode test.
12291

12292
    """
12293
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12294
      for attr in ["mem_size", "disks", "disk_template",
12295
                   "os", "tags", "nics", "vcpus"]:
12296
        if not hasattr(self.op, attr):
12297
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
12298
                                     attr, errors.ECODE_INVAL)
12299
      iname = self.cfg.ExpandInstanceName(self.op.name)
12300
      if iname is not None:
12301
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
12302
                                   iname, errors.ECODE_EXISTS)
12303
      if not isinstance(self.op.nics, list):
12304
        raise errors.OpPrereqError("Invalid parameter 'nics'",
12305
                                   errors.ECODE_INVAL)
12306
      if not isinstance(self.op.disks, list):
12307
        raise errors.OpPrereqError("Invalid parameter 'disks'",
12308
                                   errors.ECODE_INVAL)
12309
      for row in self.op.disks:
12310
        if (not isinstance(row, dict) or
12311
            "size" not in row or
12312
            not isinstance(row["size"], int) or
12313
            "mode" not in row or
12314
            row["mode"] not in ['r', 'w']):
12315
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
12316
                                     " parameter", errors.ECODE_INVAL)
12317
      if self.op.hypervisor is None:
12318
        self.op.hypervisor = self.cfg.GetHypervisorType()
12319
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12320
      fname = _ExpandInstanceName(self.cfg, self.op.name)
12321
      self.op.name = fname
12322
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
12323
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12324
      if not hasattr(self.op, "evac_nodes"):
12325
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
12326
                                   " opcode input", errors.ECODE_INVAL)
12327
    elif self.op.mode == constants.IALLOCATOR_MODE_MRELOC:
12328
      if self.op.instances:
12329
        self.op.instances = _GetWantedInstances(self, self.op.instances)
12330
      else:
12331
        raise errors.OpPrereqError("Missing instances to relocate",
12332
                                   errors.ECODE_INVAL)
12333
    else:
12334
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
12335
                                 self.op.mode, errors.ECODE_INVAL)
12336

    
12337
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
12338
      if self.op.allocator is None:
12339
        raise errors.OpPrereqError("Missing allocator name",
12340
                                   errors.ECODE_INVAL)
12341
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
12342
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
12343
                                 self.op.direction, errors.ECODE_INVAL)
12344

    
12345
  def Exec(self, feedback_fn):
12346
    """Run the allocator test.
12347

12348
    """
12349
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12350
      ial = IAllocator(self.cfg, self.rpc,
12351
                       mode=self.op.mode,
12352
                       name=self.op.name,
12353
                       mem_size=self.op.mem_size,
12354
                       disks=self.op.disks,
12355
                       disk_template=self.op.disk_template,
12356
                       os=self.op.os,
12357
                       tags=self.op.tags,
12358
                       nics=self.op.nics,
12359
                       vcpus=self.op.vcpus,
12360
                       hypervisor=self.op.hypervisor,
12361
                       )
12362
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12363
      ial = IAllocator(self.cfg, self.rpc,
12364
                       mode=self.op.mode,
12365
                       name=self.op.name,
12366
                       relocate_from=list(self.relocate_from),
12367
                       )
12368
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12369
      ial = IAllocator(self.cfg, self.rpc,
12370
                       mode=self.op.mode,
12371
                       evac_nodes=self.op.evac_nodes)
12372
    elif self.op.mode == constants.IALLOCATOR_MODE_MRELOC:
12373
      ial = IAllocator(self.cfg, self.rpc,
12374
                       mode=self.op.mode,
12375
                       instances=self.op.instances,
12376
                       reloc_mode=self.op.reloc_mode,
12377
                       target_groups=self.op.target_groups)
12378
    else:
12379
      raise errors.ProgrammerError("Uncatched mode %s in"
12380
                                   " LUTestAllocator.Exec", self.op.mode)
12381

    
12382
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
12383
      result = ial.in_text
12384
    else:
12385
      ial.Run(self.op.allocator, validate=False)
12386
      result = ial.out_text
12387
    return result
12388

    
12389

    
12390
#: Query type implementations
12391
_QUERY_IMPL = {
12392
  constants.QR_INSTANCE: _InstanceQuery,
12393
  constants.QR_NODE: _NodeQuery,
12394
  constants.QR_GROUP: _GroupQuery,
12395
  constants.QR_OS: _OsQuery,
12396
  }
12397

    
12398
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
12399

    
12400

    
12401
def _GetQueryImplementation(name):
12402
  """Returns the implemtnation for a query type.
12403

12404
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
12405

12406
  """
12407
  try:
12408
    return _QUERY_IMPL[name]
12409
  except KeyError:
12410
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
12411
                               errors.ECODE_INVAL)