Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ d23a2a9d

History | View | Annotate | Download (438.7 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43

    
44
from ganeti import ssh
45
from ganeti import utils
46
from ganeti import errors
47
from ganeti import hypervisor
48
from ganeti import locking
49
from ganeti import constants
50
from ganeti import objects
51
from ganeti import serializer
52
from ganeti import ssconf
53
from ganeti import uidpool
54
from ganeti import compat
55
from ganeti import masterd
56
from ganeti import netutils
57
from ganeti import query
58
from ganeti import qlang
59
from ganeti import opcodes
60
from ganeti import ht
61

    
62
import ganeti.masterd.instance # pylint: disable-msg=W0611
63

    
64

    
65
def _SupportsOob(cfg, node):
66
  """Tells if node supports OOB.
67

68
  @type cfg: L{config.ConfigWriter}
69
  @param cfg: The cluster configuration
70
  @type node: L{objects.Node}
71
  @param node: The node
72
  @return: The OOB script if supported or an empty string otherwise
73

74
  """
75
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
76

    
77

    
78
class ResultWithJobs:
79
  """Data container for LU results with jobs.
80

81
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83
  contained in the C{jobs} attribute and include the job IDs in the opcode
84
  result.
85

86
  """
87
  def __init__(self, jobs, **kwargs):
88
    """Initializes this class.
89

90
    Additional return values can be specified as keyword arguments.
91

92
    @type jobs: list of lists of L{opcode.OpCode}
93
    @param jobs: A list of lists of opcode objects
94

95
    """
96
    self.jobs = jobs
97
    self.other = kwargs
98

    
99

    
100
class LogicalUnit(object):
101
  """Logical Unit base class.
102

103
  Subclasses must follow these rules:
104
    - implement ExpandNames
105
    - implement CheckPrereq (except when tasklets are used)
106
    - implement Exec (except when tasklets are used)
107
    - implement BuildHooksEnv
108
    - implement BuildHooksNodes
109
    - redefine HPATH and HTYPE
110
    - optionally redefine their run requirements:
111
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
112

113
  Note that all commands require root permissions.
114

115
  @ivar dry_run_result: the value (if any) that will be returned to the caller
116
      in dry-run mode (signalled by opcode dry_run parameter)
117

118
  """
119
  HPATH = None
120
  HTYPE = None
121
  REQ_BGL = True
122

    
123
  def __init__(self, processor, op, context, rpc):
124
    """Constructor for LogicalUnit.
125

126
    This needs to be overridden in derived classes in order to check op
127
    validity.
128

129
    """
130
    self.proc = processor
131
    self.op = op
132
    self.cfg = context.cfg
133
    self.glm = context.glm
134
    self.context = context
135
    self.rpc = rpc
136
    # Dicts used to declare locking needs to mcpu
137
    self.needed_locks = None
138
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
139
    self.add_locks = {}
140
    self.remove_locks = {}
141
    # Used to force good behavior when calling helper functions
142
    self.recalculate_locks = {}
143
    # logging
144
    self.Log = processor.Log # pylint: disable-msg=C0103
145
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
146
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
147
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
148
    # support for dry-run
149
    self.dry_run_result = None
150
    # support for generic debug attribute
151
    if (not hasattr(self.op, "debug_level") or
152
        not isinstance(self.op.debug_level, int)):
153
      self.op.debug_level = 0
154

    
155
    # Tasklets
156
    self.tasklets = None
157

    
158
    # Validate opcode parameters and set defaults
159
    self.op.Validate(True)
160

    
161
    self.CheckArguments()
162

    
163
  def CheckArguments(self):
164
    """Check syntactic validity for the opcode arguments.
165

166
    This method is for doing a simple syntactic check and ensure
167
    validity of opcode parameters, without any cluster-related
168
    checks. While the same can be accomplished in ExpandNames and/or
169
    CheckPrereq, doing these separate is better because:
170

171
      - ExpandNames is left as as purely a lock-related function
172
      - CheckPrereq is run after we have acquired locks (and possible
173
        waited for them)
174

175
    The function is allowed to change the self.op attribute so that
176
    later methods can no longer worry about missing parameters.
177

178
    """
179
    pass
180

    
181
  def ExpandNames(self):
182
    """Expand names for this LU.
183

184
    This method is called before starting to execute the opcode, and it should
185
    update all the parameters of the opcode to their canonical form (e.g. a
186
    short node name must be fully expanded after this method has successfully
187
    completed). This way locking, hooks, logging, etc. can work correctly.
188

189
    LUs which implement this method must also populate the self.needed_locks
190
    member, as a dict with lock levels as keys, and a list of needed lock names
191
    as values. Rules:
192

193
      - use an empty dict if you don't need any lock
194
      - if you don't need any lock at a particular level omit that level
195
      - don't put anything for the BGL level
196
      - if you want all locks at a level use locking.ALL_SET as a value
197

198
    If you need to share locks (rather than acquire them exclusively) at one
199
    level you can modify self.share_locks, setting a true value (usually 1) for
200
    that level. By default locks are not shared.
201

202
    This function can also define a list of tasklets, which then will be
203
    executed in order instead of the usual LU-level CheckPrereq and Exec
204
    functions, if those are not defined by the LU.
205

206
    Examples::
207

208
      # Acquire all nodes and one instance
209
      self.needed_locks = {
210
        locking.LEVEL_NODE: locking.ALL_SET,
211
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
212
      }
213
      # Acquire just two nodes
214
      self.needed_locks = {
215
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
216
      }
217
      # Acquire no locks
218
      self.needed_locks = {} # No, you can't leave it to the default value None
219

220
    """
221
    # The implementation of this method is mandatory only if the new LU is
222
    # concurrent, so that old LUs don't need to be changed all at the same
223
    # time.
224
    if self.REQ_BGL:
225
      self.needed_locks = {} # Exclusive LUs don't need locks.
226
    else:
227
      raise NotImplementedError
228

    
229
  def DeclareLocks(self, level):
230
    """Declare LU locking needs for a level
231

232
    While most LUs can just declare their locking needs at ExpandNames time,
233
    sometimes there's the need to calculate some locks after having acquired
234
    the ones before. This function is called just before acquiring locks at a
235
    particular level, but after acquiring the ones at lower levels, and permits
236
    such calculations. It can be used to modify self.needed_locks, and by
237
    default it does nothing.
238

239
    This function is only called if you have something already set in
240
    self.needed_locks for the level.
241

242
    @param level: Locking level which is going to be locked
243
    @type level: member of ganeti.locking.LEVELS
244

245
    """
246

    
247
  def CheckPrereq(self):
248
    """Check prerequisites for this LU.
249

250
    This method should check that the prerequisites for the execution
251
    of this LU are fulfilled. It can do internode communication, but
252
    it should be idempotent - no cluster or system changes are
253
    allowed.
254

255
    The method should raise errors.OpPrereqError in case something is
256
    not fulfilled. Its return value is ignored.
257

258
    This method should also update all the parameters of the opcode to
259
    their canonical form if it hasn't been done by ExpandNames before.
260

261
    """
262
    if self.tasklets is not None:
263
      for (idx, tl) in enumerate(self.tasklets):
264
        logging.debug("Checking prerequisites for tasklet %s/%s",
265
                      idx + 1, len(self.tasklets))
266
        tl.CheckPrereq()
267
    else:
268
      pass
269

    
270
  def Exec(self, feedback_fn):
271
    """Execute the LU.
272

273
    This method should implement the actual work. It should raise
274
    errors.OpExecError for failures that are somewhat dealt with in
275
    code, or expected.
276

277
    """
278
    if self.tasklets is not None:
279
      for (idx, tl) in enumerate(self.tasklets):
280
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
281
        tl.Exec(feedback_fn)
282
    else:
283
      raise NotImplementedError
284

    
285
  def BuildHooksEnv(self):
286
    """Build hooks environment for this LU.
287

288
    @rtype: dict
289
    @return: Dictionary containing the environment that will be used for
290
      running the hooks for this LU. The keys of the dict must not be prefixed
291
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
292
      will extend the environment with additional variables. If no environment
293
      should be defined, an empty dictionary should be returned (not C{None}).
294
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
295
      will not be called.
296

297
    """
298
    raise NotImplementedError
299

    
300
  def BuildHooksNodes(self):
301
    """Build list of nodes to run LU's hooks.
302

303
    @rtype: tuple; (list, list)
304
    @return: Tuple containing a list of node names on which the hook
305
      should run before the execution and a list of node names on which the
306
      hook should run after the execution. No nodes should be returned as an
307
      empty list (and not None).
308
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
309
      will not be called.
310

311
    """
312
    raise NotImplementedError
313

    
314
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
315
    """Notify the LU about the results of its hooks.
316

317
    This method is called every time a hooks phase is executed, and notifies
318
    the Logical Unit about the hooks' result. The LU can then use it to alter
319
    its result based on the hooks.  By default the method does nothing and the
320
    previous result is passed back unchanged but any LU can define it if it
321
    wants to use the local cluster hook-scripts somehow.
322

323
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
324
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
325
    @param hook_results: the results of the multi-node hooks rpc call
326
    @param feedback_fn: function used send feedback back to the caller
327
    @param lu_result: the previous Exec result this LU had, or None
328
        in the PRE phase
329
    @return: the new Exec result, based on the previous result
330
        and hook results
331

332
    """
333
    # API must be kept, thus we ignore the unused argument and could
334
    # be a function warnings
335
    # pylint: disable-msg=W0613,R0201
336
    return lu_result
337

    
338
  def _ExpandAndLockInstance(self):
339
    """Helper function to expand and lock an instance.
340

341
    Many LUs that work on an instance take its name in self.op.instance_name
342
    and need to expand it and then declare the expanded name for locking. This
343
    function does it, and then updates self.op.instance_name to the expanded
344
    name. It also initializes needed_locks as a dict, if this hasn't been done
345
    before.
346

347
    """
348
    if self.needed_locks is None:
349
      self.needed_locks = {}
350
    else:
351
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
352
        "_ExpandAndLockInstance called with instance-level locks set"
353
    self.op.instance_name = _ExpandInstanceName(self.cfg,
354
                                                self.op.instance_name)
355
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
356

    
357
  def _LockInstancesNodes(self, primary_only=False):
358
    """Helper function to declare instances' nodes for locking.
359

360
    This function should be called after locking one or more instances to lock
361
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
362
    with all primary or secondary nodes for instances already locked and
363
    present in self.needed_locks[locking.LEVEL_INSTANCE].
364

365
    It should be called from DeclareLocks, and for safety only works if
366
    self.recalculate_locks[locking.LEVEL_NODE] is set.
367

368
    In the future it may grow parameters to just lock some instance's nodes, or
369
    to just lock primaries or secondary nodes, if needed.
370

371
    If should be called in DeclareLocks in a way similar to::
372

373
      if level == locking.LEVEL_NODE:
374
        self._LockInstancesNodes()
375

376
    @type primary_only: boolean
377
    @param primary_only: only lock primary nodes of locked instances
378

379
    """
380
    assert locking.LEVEL_NODE in self.recalculate_locks, \
381
      "_LockInstancesNodes helper function called with no nodes to recalculate"
382

    
383
    # TODO: check if we're really been called with the instance locks held
384

    
385
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
386
    # future we might want to have different behaviors depending on the value
387
    # of self.recalculate_locks[locking.LEVEL_NODE]
388
    wanted_nodes = []
389
    for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
390
      instance = self.context.cfg.GetInstanceInfo(instance_name)
391
      wanted_nodes.append(instance.primary_node)
392
      if not primary_only:
393
        wanted_nodes.extend(instance.secondary_nodes)
394

    
395
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
396
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
397
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
398
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
399

    
400
    del self.recalculate_locks[locking.LEVEL_NODE]
401

    
402

    
403
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
404
  """Simple LU which runs no hooks.
405

406
  This LU is intended as a parent for other LogicalUnits which will
407
  run no hooks, in order to reduce duplicate code.
408

409
  """
410
  HPATH = None
411
  HTYPE = None
412

    
413
  def BuildHooksEnv(self):
414
    """Empty BuildHooksEnv for NoHooksLu.
415

416
    This just raises an error.
417

418
    """
419
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
420

    
421
  def BuildHooksNodes(self):
422
    """Empty BuildHooksNodes for NoHooksLU.
423

424
    """
425
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
426

    
427

    
428
class Tasklet:
429
  """Tasklet base class.
430

431
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
432
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
433
  tasklets know nothing about locks.
434

435
  Subclasses must follow these rules:
436
    - Implement CheckPrereq
437
    - Implement Exec
438

439
  """
440
  def __init__(self, lu):
441
    self.lu = lu
442

    
443
    # Shortcuts
444
    self.cfg = lu.cfg
445
    self.rpc = lu.rpc
446

    
447
  def CheckPrereq(self):
448
    """Check prerequisites for this tasklets.
449

450
    This method should check whether the prerequisites for the execution of
451
    this tasklet are fulfilled. It can do internode communication, but it
452
    should be idempotent - no cluster or system changes are allowed.
453

454
    The method should raise errors.OpPrereqError in case something is not
455
    fulfilled. Its return value is ignored.
456

457
    This method should also update all parameters to their canonical form if it
458
    hasn't been done before.
459

460
    """
461
    pass
462

    
463
  def Exec(self, feedback_fn):
464
    """Execute the tasklet.
465

466
    This method should implement the actual work. It should raise
467
    errors.OpExecError for failures that are somewhat dealt with in code, or
468
    expected.
469

470
    """
471
    raise NotImplementedError
472

    
473

    
474
class _QueryBase:
475
  """Base for query utility classes.
476

477
  """
478
  #: Attribute holding field definitions
479
  FIELDS = None
480

    
481
  def __init__(self, filter_, fields, use_locking):
482
    """Initializes this class.
483

484
    """
485
    self.use_locking = use_locking
486

    
487
    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
488
                             namefield="name")
489
    self.requested_data = self.query.RequestedData()
490
    self.names = self.query.RequestedNames()
491

    
492
    # Sort only if no names were requested
493
    self.sort_by_name = not self.names
494

    
495
    self.do_locking = None
496
    self.wanted = None
497

    
498
  def _GetNames(self, lu, all_names, lock_level):
499
    """Helper function to determine names asked for in the query.
500

501
    """
502
    if self.do_locking:
503
      names = lu.glm.list_owned(lock_level)
504
    else:
505
      names = all_names
506

    
507
    if self.wanted == locking.ALL_SET:
508
      assert not self.names
509
      # caller didn't specify names, so ordering is not important
510
      return utils.NiceSort(names)
511

    
512
    # caller specified names and we must keep the same order
513
    assert self.names
514
    assert not self.do_locking or lu.glm.is_owned(lock_level)
515

    
516
    missing = set(self.wanted).difference(names)
517
    if missing:
518
      raise errors.OpExecError("Some items were removed before retrieving"
519
                               " their data: %s" % missing)
520

    
521
    # Return expanded names
522
    return self.wanted
523

    
524
  def ExpandNames(self, lu):
525
    """Expand names for this query.
526

527
    See L{LogicalUnit.ExpandNames}.
528

529
    """
530
    raise NotImplementedError()
531

    
532
  def DeclareLocks(self, lu, level):
533
    """Declare locks for this query.
534

535
    See L{LogicalUnit.DeclareLocks}.
536

537
    """
538
    raise NotImplementedError()
539

    
540
  def _GetQueryData(self, lu):
541
    """Collects all data for this query.
542

543
    @return: Query data object
544

545
    """
546
    raise NotImplementedError()
547

    
548
  def NewStyleQuery(self, lu):
549
    """Collect data and execute query.
550

551
    """
552
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
553
                                  sort_by_name=self.sort_by_name)
554

    
555
  def OldStyleQuery(self, lu):
556
    """Collect data and execute query.
557

558
    """
559
    return self.query.OldStyleQuery(self._GetQueryData(lu),
560
                                    sort_by_name=self.sort_by_name)
561

    
562

    
563
def _GetWantedNodes(lu, nodes):
564
  """Returns list of checked and expanded node names.
565

566
  @type lu: L{LogicalUnit}
567
  @param lu: the logical unit on whose behalf we execute
568
  @type nodes: list
569
  @param nodes: list of node names or None for all nodes
570
  @rtype: list
571
  @return: the list of nodes, sorted
572
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
573

574
  """
575
  if nodes:
576
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
577

    
578
  return utils.NiceSort(lu.cfg.GetNodeList())
579

    
580

    
581
def _GetWantedInstances(lu, instances):
582
  """Returns list of checked and expanded instance names.
583

584
  @type lu: L{LogicalUnit}
585
  @param lu: the logical unit on whose behalf we execute
586
  @type instances: list
587
  @param instances: list of instance names or None for all instances
588
  @rtype: list
589
  @return: the list of instances, sorted
590
  @raise errors.OpPrereqError: if the instances parameter is wrong type
591
  @raise errors.OpPrereqError: if any of the passed instances is not found
592

593
  """
594
  if instances:
595
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
596
  else:
597
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
598
  return wanted
599

    
600

    
601
def _GetUpdatedParams(old_params, update_dict,
602
                      use_default=True, use_none=False):
603
  """Return the new version of a parameter dictionary.
604

605
  @type old_params: dict
606
  @param old_params: old parameters
607
  @type update_dict: dict
608
  @param update_dict: dict containing new parameter values, or
609
      constants.VALUE_DEFAULT to reset the parameter to its default
610
      value
611
  @param use_default: boolean
612
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
613
      values as 'to be deleted' values
614
  @param use_none: boolean
615
  @type use_none: whether to recognise C{None} values as 'to be
616
      deleted' values
617
  @rtype: dict
618
  @return: the new parameter dictionary
619

620
  """
621
  params_copy = copy.deepcopy(old_params)
622
  for key, val in update_dict.iteritems():
623
    if ((use_default and val == constants.VALUE_DEFAULT) or
624
        (use_none and val is None)):
625
      try:
626
        del params_copy[key]
627
      except KeyError:
628
        pass
629
    else:
630
      params_copy[key] = val
631
  return params_copy
632

    
633

    
634
def _ReleaseLocks(lu, level, names=None, keep=None):
635
  """Releases locks owned by an LU.
636

637
  @type lu: L{LogicalUnit}
638
  @param level: Lock level
639
  @type names: list or None
640
  @param names: Names of locks to release
641
  @type keep: list or None
642
  @param keep: Names of locks to retain
643

644
  """
645
  assert not (keep is not None and names is not None), \
646
         "Only one of the 'names' and the 'keep' parameters can be given"
647

    
648
  if names is not None:
649
    should_release = names.__contains__
650
  elif keep:
651
    should_release = lambda name: name not in keep
652
  else:
653
    should_release = None
654

    
655
  if should_release:
656
    retain = []
657
    release = []
658

    
659
    # Determine which locks to release
660
    for name in lu.glm.list_owned(level):
661
      if should_release(name):
662
        release.append(name)
663
      else:
664
        retain.append(name)
665

    
666
    assert len(lu.glm.list_owned(level)) == (len(retain) + len(release))
667

    
668
    # Release just some locks
669
    lu.glm.release(level, names=release)
670

    
671
    assert frozenset(lu.glm.list_owned(level)) == frozenset(retain)
672
  else:
673
    # Release everything
674
    lu.glm.release(level)
675

    
676
    assert not lu.glm.is_owned(level), "No locks should be owned"
677

    
678

    
679
def _RunPostHook(lu, node_name):
680
  """Runs the post-hook for an opcode on a single node.
681

682
  """
683
  hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
684
  try:
685
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
686
  except:
687
    # pylint: disable-msg=W0702
688
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
689

    
690

    
691
def _CheckOutputFields(static, dynamic, selected):
692
  """Checks whether all selected fields are valid.
693

694
  @type static: L{utils.FieldSet}
695
  @param static: static fields set
696
  @type dynamic: L{utils.FieldSet}
697
  @param dynamic: dynamic fields set
698

699
  """
700
  f = utils.FieldSet()
701
  f.Extend(static)
702
  f.Extend(dynamic)
703

    
704
  delta = f.NonMatching(selected)
705
  if delta:
706
    raise errors.OpPrereqError("Unknown output fields selected: %s"
707
                               % ",".join(delta), errors.ECODE_INVAL)
708

    
709

    
710
def _CheckGlobalHvParams(params):
711
  """Validates that given hypervisor params are not global ones.
712

713
  This will ensure that instances don't get customised versions of
714
  global params.
715

716
  """
717
  used_globals = constants.HVC_GLOBALS.intersection(params)
718
  if used_globals:
719
    msg = ("The following hypervisor parameters are global and cannot"
720
           " be customized at instance level, please modify them at"
721
           " cluster level: %s" % utils.CommaJoin(used_globals))
722
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
723

    
724

    
725
def _CheckNodeOnline(lu, node, msg=None):
726
  """Ensure that a given node is online.
727

728
  @param lu: the LU on behalf of which we make the check
729
  @param node: the node to check
730
  @param msg: if passed, should be a message to replace the default one
731
  @raise errors.OpPrereqError: if the node is offline
732

733
  """
734
  if msg is None:
735
    msg = "Can't use offline node"
736
  if lu.cfg.GetNodeInfo(node).offline:
737
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
738

    
739

    
740
def _CheckNodeNotDrained(lu, node):
741
  """Ensure that a given node is not drained.
742

743
  @param lu: the LU on behalf of which we make the check
744
  @param node: the node to check
745
  @raise errors.OpPrereqError: if the node is drained
746

747
  """
748
  if lu.cfg.GetNodeInfo(node).drained:
749
    raise errors.OpPrereqError("Can't use drained node %s" % node,
750
                               errors.ECODE_STATE)
751

    
752

    
753
def _CheckNodeVmCapable(lu, node):
754
  """Ensure that a given node is vm capable.
755

756
  @param lu: the LU on behalf of which we make the check
757
  @param node: the node to check
758
  @raise errors.OpPrereqError: if the node is not vm capable
759

760
  """
761
  if not lu.cfg.GetNodeInfo(node).vm_capable:
762
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
763
                               errors.ECODE_STATE)
764

    
765

    
766
def _CheckNodeHasOS(lu, node, os_name, force_variant):
767
  """Ensure that a node supports a given OS.
768

769
  @param lu: the LU on behalf of which we make the check
770
  @param node: the node to check
771
  @param os_name: the OS to query about
772
  @param force_variant: whether to ignore variant errors
773
  @raise errors.OpPrereqError: if the node is not supporting the OS
774

775
  """
776
  result = lu.rpc.call_os_get(node, os_name)
777
  result.Raise("OS '%s' not in supported OS list for node %s" %
778
               (os_name, node),
779
               prereq=True, ecode=errors.ECODE_INVAL)
780
  if not force_variant:
781
    _CheckOSVariant(result.payload, os_name)
782

    
783

    
784
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
785
  """Ensure that a node has the given secondary ip.
786

787
  @type lu: L{LogicalUnit}
788
  @param lu: the LU on behalf of which we make the check
789
  @type node: string
790
  @param node: the node to check
791
  @type secondary_ip: string
792
  @param secondary_ip: the ip to check
793
  @type prereq: boolean
794
  @param prereq: whether to throw a prerequisite or an execute error
795
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
796
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
797

798
  """
799
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
800
  result.Raise("Failure checking secondary ip on node %s" % node,
801
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
802
  if not result.payload:
803
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
804
           " please fix and re-run this command" % secondary_ip)
805
    if prereq:
806
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
807
    else:
808
      raise errors.OpExecError(msg)
809

    
810

    
811
def _GetClusterDomainSecret():
812
  """Reads the cluster domain secret.
813

814
  """
815
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
816
                               strict=True)
817

    
818

    
819
def _CheckInstanceDown(lu, instance, reason):
820
  """Ensure that an instance is not running."""
821
  if instance.admin_up:
822
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
823
                               (instance.name, reason), errors.ECODE_STATE)
824

    
825
  pnode = instance.primary_node
826
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
827
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
828
              prereq=True, ecode=errors.ECODE_ENVIRON)
829

    
830
  if instance.name in ins_l.payload:
831
    raise errors.OpPrereqError("Instance %s is running, %s" %
832
                               (instance.name, reason), errors.ECODE_STATE)
833

    
834

    
835
def _ExpandItemName(fn, name, kind):
836
  """Expand an item name.
837

838
  @param fn: the function to use for expansion
839
  @param name: requested item name
840
  @param kind: text description ('Node' or 'Instance')
841
  @return: the resolved (full) name
842
  @raise errors.OpPrereqError: if the item is not found
843

844
  """
845
  full_name = fn(name)
846
  if full_name is None:
847
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
848
                               errors.ECODE_NOENT)
849
  return full_name
850

    
851

    
852
def _ExpandNodeName(cfg, name):
853
  """Wrapper over L{_ExpandItemName} for nodes."""
854
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
855

    
856

    
857
def _ExpandInstanceName(cfg, name):
858
  """Wrapper over L{_ExpandItemName} for instance."""
859
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
860

    
861

    
862
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
863
                          memory, vcpus, nics, disk_template, disks,
864
                          bep, hvp, hypervisor_name):
865
  """Builds instance related env variables for hooks
866

867
  This builds the hook environment from individual variables.
868

869
  @type name: string
870
  @param name: the name of the instance
871
  @type primary_node: string
872
  @param primary_node: the name of the instance's primary node
873
  @type secondary_nodes: list
874
  @param secondary_nodes: list of secondary nodes as strings
875
  @type os_type: string
876
  @param os_type: the name of the instance's OS
877
  @type status: boolean
878
  @param status: the should_run status of the instance
879
  @type memory: string
880
  @param memory: the memory size of the instance
881
  @type vcpus: string
882
  @param vcpus: the count of VCPUs the instance has
883
  @type nics: list
884
  @param nics: list of tuples (ip, mac, mode, link) representing
885
      the NICs the instance has
886
  @type disk_template: string
887
  @param disk_template: the disk template of the instance
888
  @type disks: list
889
  @param disks: the list of (size, mode) pairs
890
  @type bep: dict
891
  @param bep: the backend parameters for the instance
892
  @type hvp: dict
893
  @param hvp: the hypervisor parameters for the instance
894
  @type hypervisor_name: string
895
  @param hypervisor_name: the hypervisor for the instance
896
  @rtype: dict
897
  @return: the hook environment for this instance
898

899
  """
900
  if status:
901
    str_status = "up"
902
  else:
903
    str_status = "down"
904
  env = {
905
    "OP_TARGET": name,
906
    "INSTANCE_NAME": name,
907
    "INSTANCE_PRIMARY": primary_node,
908
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
909
    "INSTANCE_OS_TYPE": os_type,
910
    "INSTANCE_STATUS": str_status,
911
    "INSTANCE_MEMORY": memory,
912
    "INSTANCE_VCPUS": vcpus,
913
    "INSTANCE_DISK_TEMPLATE": disk_template,
914
    "INSTANCE_HYPERVISOR": hypervisor_name,
915
  }
916

    
917
  if nics:
918
    nic_count = len(nics)
919
    for idx, (ip, mac, mode, link) in enumerate(nics):
920
      if ip is None:
921
        ip = ""
922
      env["INSTANCE_NIC%d_IP" % idx] = ip
923
      env["INSTANCE_NIC%d_MAC" % idx] = mac
924
      env["INSTANCE_NIC%d_MODE" % idx] = mode
925
      env["INSTANCE_NIC%d_LINK" % idx] = link
926
      if mode == constants.NIC_MODE_BRIDGED:
927
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
928
  else:
929
    nic_count = 0
930

    
931
  env["INSTANCE_NIC_COUNT"] = nic_count
932

    
933
  if disks:
934
    disk_count = len(disks)
935
    for idx, (size, mode) in enumerate(disks):
936
      env["INSTANCE_DISK%d_SIZE" % idx] = size
937
      env["INSTANCE_DISK%d_MODE" % idx] = mode
938
  else:
939
    disk_count = 0
940

    
941
  env["INSTANCE_DISK_COUNT"] = disk_count
942

    
943
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
944
    for key, value in source.items():
945
      env["INSTANCE_%s_%s" % (kind, key)] = value
946

    
947
  return env
948

    
949

    
950
def _NICListToTuple(lu, nics):
951
  """Build a list of nic information tuples.
952

953
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
954
  value in LUInstanceQueryData.
955

956
  @type lu:  L{LogicalUnit}
957
  @param lu: the logical unit on whose behalf we execute
958
  @type nics: list of L{objects.NIC}
959
  @param nics: list of nics to convert to hooks tuples
960

961
  """
962
  hooks_nics = []
963
  cluster = lu.cfg.GetClusterInfo()
964
  for nic in nics:
965
    ip = nic.ip
966
    mac = nic.mac
967
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
968
    mode = filled_params[constants.NIC_MODE]
969
    link = filled_params[constants.NIC_LINK]
970
    hooks_nics.append((ip, mac, mode, link))
971
  return hooks_nics
972

    
973

    
974
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
975
  """Builds instance related env variables for hooks from an object.
976

977
  @type lu: L{LogicalUnit}
978
  @param lu: the logical unit on whose behalf we execute
979
  @type instance: L{objects.Instance}
980
  @param instance: the instance for which we should build the
981
      environment
982
  @type override: dict
983
  @param override: dictionary with key/values that will override
984
      our values
985
  @rtype: dict
986
  @return: the hook environment dictionary
987

988
  """
989
  cluster = lu.cfg.GetClusterInfo()
990
  bep = cluster.FillBE(instance)
991
  hvp = cluster.FillHV(instance)
992
  args = {
993
    'name': instance.name,
994
    'primary_node': instance.primary_node,
995
    'secondary_nodes': instance.secondary_nodes,
996
    'os_type': instance.os,
997
    'status': instance.admin_up,
998
    'memory': bep[constants.BE_MEMORY],
999
    'vcpus': bep[constants.BE_VCPUS],
1000
    'nics': _NICListToTuple(lu, instance.nics),
1001
    'disk_template': instance.disk_template,
1002
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
1003
    'bep': bep,
1004
    'hvp': hvp,
1005
    'hypervisor_name': instance.hypervisor,
1006
  }
1007
  if override:
1008
    args.update(override)
1009
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1010

    
1011

    
1012
def _AdjustCandidatePool(lu, exceptions):
1013
  """Adjust the candidate pool after node operations.
1014

1015
  """
1016
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1017
  if mod_list:
1018
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1019
               utils.CommaJoin(node.name for node in mod_list))
1020
    for name in mod_list:
1021
      lu.context.ReaddNode(name)
1022
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1023
  if mc_now > mc_max:
1024
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1025
               (mc_now, mc_max))
1026

    
1027

    
1028
def _DecideSelfPromotion(lu, exceptions=None):
1029
  """Decide whether I should promote myself as a master candidate.
1030

1031
  """
1032
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1033
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1034
  # the new node will increase mc_max with one, so:
1035
  mc_should = min(mc_should + 1, cp_size)
1036
  return mc_now < mc_should
1037

    
1038

    
1039
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1040
  """Check that the brigdes needed by a list of nics exist.
1041

1042
  """
1043
  cluster = lu.cfg.GetClusterInfo()
1044
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1045
  brlist = [params[constants.NIC_LINK] for params in paramslist
1046
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1047
  if brlist:
1048
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1049
    result.Raise("Error checking bridges on destination node '%s'" %
1050
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1051

    
1052

    
1053
def _CheckInstanceBridgesExist(lu, instance, node=None):
1054
  """Check that the brigdes needed by an instance exist.
1055

1056
  """
1057
  if node is None:
1058
    node = instance.primary_node
1059
  _CheckNicsBridgesExist(lu, instance.nics, node)
1060

    
1061

    
1062
def _CheckOSVariant(os_obj, name):
1063
  """Check whether an OS name conforms to the os variants specification.
1064

1065
  @type os_obj: L{objects.OS}
1066
  @param os_obj: OS object to check
1067
  @type name: string
1068
  @param name: OS name passed by the user, to check for validity
1069

1070
  """
1071
  if not os_obj.supported_variants:
1072
    return
1073
  variant = objects.OS.GetVariant(name)
1074
  if not variant:
1075
    raise errors.OpPrereqError("OS name must include a variant",
1076
                               errors.ECODE_INVAL)
1077

    
1078
  if variant not in os_obj.supported_variants:
1079
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1080

    
1081

    
1082
def _GetNodeInstancesInner(cfg, fn):
1083
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1084

    
1085

    
1086
def _GetNodeInstances(cfg, node_name):
1087
  """Returns a list of all primary and secondary instances on a node.
1088

1089
  """
1090

    
1091
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1092

    
1093

    
1094
def _GetNodePrimaryInstances(cfg, node_name):
1095
  """Returns primary instances on a node.
1096

1097
  """
1098
  return _GetNodeInstancesInner(cfg,
1099
                                lambda inst: node_name == inst.primary_node)
1100

    
1101

    
1102
def _GetNodeSecondaryInstances(cfg, node_name):
1103
  """Returns secondary instances on a node.
1104

1105
  """
1106
  return _GetNodeInstancesInner(cfg,
1107
                                lambda inst: node_name in inst.secondary_nodes)
1108

    
1109

    
1110
def _GetStorageTypeArgs(cfg, storage_type):
1111
  """Returns the arguments for a storage type.
1112

1113
  """
1114
  # Special case for file storage
1115
  if storage_type == constants.ST_FILE:
1116
    # storage.FileStorage wants a list of storage directories
1117
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1118

    
1119
  return []
1120

    
1121

    
1122
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1123
  faulty = []
1124

    
1125
  for dev in instance.disks:
1126
    cfg.SetDiskID(dev, node_name)
1127

    
1128
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1129
  result.Raise("Failed to get disk status from node %s" % node_name,
1130
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1131

    
1132
  for idx, bdev_status in enumerate(result.payload):
1133
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1134
      faulty.append(idx)
1135

    
1136
  return faulty
1137

    
1138

    
1139
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1140
  """Check the sanity of iallocator and node arguments and use the
1141
  cluster-wide iallocator if appropriate.
1142

1143
  Check that at most one of (iallocator, node) is specified. If none is
1144
  specified, then the LU's opcode's iallocator slot is filled with the
1145
  cluster-wide default iallocator.
1146

1147
  @type iallocator_slot: string
1148
  @param iallocator_slot: the name of the opcode iallocator slot
1149
  @type node_slot: string
1150
  @param node_slot: the name of the opcode target node slot
1151

1152
  """
1153
  node = getattr(lu.op, node_slot, None)
1154
  iallocator = getattr(lu.op, iallocator_slot, None)
1155

    
1156
  if node is not None and iallocator is not None:
1157
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1158
                               errors.ECODE_INVAL)
1159
  elif node is None and iallocator is None:
1160
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1161
    if default_iallocator:
1162
      setattr(lu.op, iallocator_slot, default_iallocator)
1163
    else:
1164
      raise errors.OpPrereqError("No iallocator or node given and no"
1165
                                 " cluster-wide default iallocator found;"
1166
                                 " please specify either an iallocator or a"
1167
                                 " node, or set a cluster-wide default"
1168
                                 " iallocator")
1169

    
1170

    
1171
class LUClusterPostInit(LogicalUnit):
1172
  """Logical unit for running hooks after cluster initialization.
1173

1174
  """
1175
  HPATH = "cluster-init"
1176
  HTYPE = constants.HTYPE_CLUSTER
1177

    
1178
  def BuildHooksEnv(self):
1179
    """Build hooks env.
1180

1181
    """
1182
    return {
1183
      "OP_TARGET": self.cfg.GetClusterName(),
1184
      }
1185

    
1186
  def BuildHooksNodes(self):
1187
    """Build hooks nodes.
1188

1189
    """
1190
    return ([], [self.cfg.GetMasterNode()])
1191

    
1192
  def Exec(self, feedback_fn):
1193
    """Nothing to do.
1194

1195
    """
1196
    return True
1197

    
1198

    
1199
class LUClusterDestroy(LogicalUnit):
1200
  """Logical unit for destroying the cluster.
1201

1202
  """
1203
  HPATH = "cluster-destroy"
1204
  HTYPE = constants.HTYPE_CLUSTER
1205

    
1206
  def BuildHooksEnv(self):
1207
    """Build hooks env.
1208

1209
    """
1210
    return {
1211
      "OP_TARGET": self.cfg.GetClusterName(),
1212
      }
1213

    
1214
  def BuildHooksNodes(self):
1215
    """Build hooks nodes.
1216

1217
    """
1218
    return ([], [])
1219

    
1220
  def CheckPrereq(self):
1221
    """Check prerequisites.
1222

1223
    This checks whether the cluster is empty.
1224

1225
    Any errors are signaled by raising errors.OpPrereqError.
1226

1227
    """
1228
    master = self.cfg.GetMasterNode()
1229

    
1230
    nodelist = self.cfg.GetNodeList()
1231
    if len(nodelist) != 1 or nodelist[0] != master:
1232
      raise errors.OpPrereqError("There are still %d node(s) in"
1233
                                 " this cluster." % (len(nodelist) - 1),
1234
                                 errors.ECODE_INVAL)
1235
    instancelist = self.cfg.GetInstanceList()
1236
    if instancelist:
1237
      raise errors.OpPrereqError("There are still %d instance(s) in"
1238
                                 " this cluster." % len(instancelist),
1239
                                 errors.ECODE_INVAL)
1240

    
1241
  def Exec(self, feedback_fn):
1242
    """Destroys the cluster.
1243

1244
    """
1245
    master = self.cfg.GetMasterNode()
1246

    
1247
    # Run post hooks on master node before it's removed
1248
    _RunPostHook(self, master)
1249

    
1250
    result = self.rpc.call_node_stop_master(master, False)
1251
    result.Raise("Could not disable the master role")
1252

    
1253
    return master
1254

    
1255

    
1256
def _VerifyCertificate(filename):
1257
  """Verifies a certificate for LUClusterVerifyConfig.
1258

1259
  @type filename: string
1260
  @param filename: Path to PEM file
1261

1262
  """
1263
  try:
1264
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1265
                                           utils.ReadFile(filename))
1266
  except Exception, err: # pylint: disable-msg=W0703
1267
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1268
            "Failed to load X509 certificate %s: %s" % (filename, err))
1269

    
1270
  (errcode, msg) = \
1271
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1272
                                constants.SSL_CERT_EXPIRATION_ERROR)
1273

    
1274
  if msg:
1275
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1276
  else:
1277
    fnamemsg = None
1278

    
1279
  if errcode is None:
1280
    return (None, fnamemsg)
1281
  elif errcode == utils.CERT_WARNING:
1282
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1283
  elif errcode == utils.CERT_ERROR:
1284
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1285

    
1286
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1287

    
1288

    
1289
def _GetAllHypervisorParameters(cluster, instances):
1290
  """Compute the set of all hypervisor parameters.
1291

1292
  @type cluster: L{objects.Cluster}
1293
  @param cluster: the cluster object
1294
  @param instances: list of L{objects.Instance}
1295
  @param instances: additional instances from which to obtain parameters
1296
  @rtype: list of (origin, hypervisor, parameters)
1297
  @return: a list with all parameters found, indicating the hypervisor they
1298
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1299

1300
  """
1301
  hvp_data = []
1302

    
1303
  for hv_name in cluster.enabled_hypervisors:
1304
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1305

    
1306
  for os_name, os_hvp in cluster.os_hvp.items():
1307
    for hv_name, hv_params in os_hvp.items():
1308
      if hv_params:
1309
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1310
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1311

    
1312
  # TODO: collapse identical parameter values in a single one
1313
  for instance in instances:
1314
    if instance.hvparams:
1315
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1316
                       cluster.FillHV(instance)))
1317

    
1318
  return hvp_data
1319

    
1320

    
1321
class _VerifyErrors(object):
1322
  """Mix-in for cluster/group verify LUs.
1323

1324
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1325
  self.op and self._feedback_fn to be available.)
1326

1327
  """
1328
  TCLUSTER = "cluster"
1329
  TNODE = "node"
1330
  TINSTANCE = "instance"
1331

    
1332
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1333
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1334
  ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1335
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1336
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1337
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1338
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1339
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1340
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1341
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1342
  ENODEDRBD = (TNODE, "ENODEDRBD")
1343
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1344
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1345
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1346
  ENODEHV = (TNODE, "ENODEHV")
1347
  ENODELVM = (TNODE, "ENODELVM")
1348
  ENODEN1 = (TNODE, "ENODEN1")
1349
  ENODENET = (TNODE, "ENODENET")
1350
  ENODEOS = (TNODE, "ENODEOS")
1351
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1352
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1353
  ENODERPC = (TNODE, "ENODERPC")
1354
  ENODESSH = (TNODE, "ENODESSH")
1355
  ENODEVERSION = (TNODE, "ENODEVERSION")
1356
  ENODESETUP = (TNODE, "ENODESETUP")
1357
  ENODETIME = (TNODE, "ENODETIME")
1358
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1359

    
1360
  ETYPE_FIELD = "code"
1361
  ETYPE_ERROR = "ERROR"
1362
  ETYPE_WARNING = "WARNING"
1363

    
1364
  def _Error(self, ecode, item, msg, *args, **kwargs):
1365
    """Format an error message.
1366

1367
    Based on the opcode's error_codes parameter, either format a
1368
    parseable error code, or a simpler error string.
1369

1370
    This must be called only from Exec and functions called from Exec.
1371

1372
    """
1373
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1374
    itype, etxt = ecode
1375
    # first complete the msg
1376
    if args:
1377
      msg = msg % args
1378
    # then format the whole message
1379
    if self.op.error_codes: # This is a mix-in. pylint: disable-msg=E1101
1380
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1381
    else:
1382
      if item:
1383
        item = " " + item
1384
      else:
1385
        item = ""
1386
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1387
    # and finally report it via the feedback_fn
1388
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable-msg=E1101
1389

    
1390
  def _ErrorIf(self, cond, *args, **kwargs):
1391
    """Log an error message if the passed condition is True.
1392

1393
    """
1394
    cond = (bool(cond)
1395
            or self.op.debug_simulate_errors) # pylint: disable-msg=E1101
1396
    if cond:
1397
      self._Error(*args, **kwargs)
1398
    # do not mark the operation as failed for WARN cases only
1399
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1400
      self.bad = self.bad or cond
1401

    
1402

    
1403
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1404
  """Verifies the cluster config.
1405

1406
  """
1407

    
1408
  REQ_BGL = False
1409

    
1410
  def _VerifyHVP(self, hvp_data):
1411
    """Verifies locally the syntax of the hypervisor parameters.
1412

1413
    """
1414
    for item, hv_name, hv_params in hvp_data:
1415
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1416
             (item, hv_name))
1417
      try:
1418
        hv_class = hypervisor.GetHypervisor(hv_name)
1419
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1420
        hv_class.CheckParameterSyntax(hv_params)
1421
      except errors.GenericError, err:
1422
        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1423

    
1424
  def ExpandNames(self):
1425
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1426
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1427
    self.needed_locks = {}
1428

    
1429
  def Exec(self, feedback_fn):
1430
    """Verify integrity of cluster, performing various test on nodes.
1431

1432
    """
1433
    self.bad = False
1434
    self._feedback_fn = feedback_fn
1435

    
1436
    feedback_fn("* Verifying cluster config")
1437

    
1438
    for msg in self.cfg.VerifyConfig():
1439
      self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1440

    
1441
    feedback_fn("* Verifying cluster certificate files")
1442

    
1443
    for cert_filename in constants.ALL_CERT_FILES:
1444
      (errcode, msg) = _VerifyCertificate(cert_filename)
1445
      self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1446

    
1447
    feedback_fn("* Verifying hypervisor parameters")
1448

    
1449
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1450
                                                self.all_inst_info.values()))
1451

    
1452
    return (not self.bad, [g.name for g in self.all_group_info.values()])
1453

    
1454

    
1455
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1456
  """Verifies the status of a node group.
1457

1458
  """
1459

    
1460
  HPATH = "cluster-verify"
1461
  HTYPE = constants.HTYPE_CLUSTER
1462
  REQ_BGL = False
1463

    
1464
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1465

    
1466
  class NodeImage(object):
1467
    """A class representing the logical and physical status of a node.
1468

1469
    @type name: string
1470
    @ivar name: the node name to which this object refers
1471
    @ivar volumes: a structure as returned from
1472
        L{ganeti.backend.GetVolumeList} (runtime)
1473
    @ivar instances: a list of running instances (runtime)
1474
    @ivar pinst: list of configured primary instances (config)
1475
    @ivar sinst: list of configured secondary instances (config)
1476
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1477
        instances for which this node is secondary (config)
1478
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1479
    @ivar dfree: free disk, as reported by the node (runtime)
1480
    @ivar offline: the offline status (config)
1481
    @type rpc_fail: boolean
1482
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1483
        not whether the individual keys were correct) (runtime)
1484
    @type lvm_fail: boolean
1485
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1486
    @type hyp_fail: boolean
1487
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1488
    @type ghost: boolean
1489
    @ivar ghost: whether this is a known node or not (config)
1490
    @type os_fail: boolean
1491
    @ivar os_fail: whether the RPC call didn't return valid OS data
1492
    @type oslist: list
1493
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1494
    @type vm_capable: boolean
1495
    @ivar vm_capable: whether the node can host instances
1496

1497
    """
1498
    def __init__(self, offline=False, name=None, vm_capable=True):
1499
      self.name = name
1500
      self.volumes = {}
1501
      self.instances = []
1502
      self.pinst = []
1503
      self.sinst = []
1504
      self.sbp = {}
1505
      self.mfree = 0
1506
      self.dfree = 0
1507
      self.offline = offline
1508
      self.vm_capable = vm_capable
1509
      self.rpc_fail = False
1510
      self.lvm_fail = False
1511
      self.hyp_fail = False
1512
      self.ghost = False
1513
      self.os_fail = False
1514
      self.oslist = {}
1515

    
1516
  def ExpandNames(self):
1517
    # This raises errors.OpPrereqError on its own:
1518
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1519

    
1520
    all_node_info = self.cfg.GetAllNodesInfo()
1521
    all_inst_info = self.cfg.GetAllInstancesInfo()
1522

    
1523
    node_names = set(node.name
1524
                     for node in all_node_info.values()
1525
                     if node.group == self.group_uuid)
1526

    
1527
    inst_names = [inst.name
1528
                  for inst in all_inst_info.values()
1529
                  if inst.primary_node in node_names]
1530

    
1531
    self.needed_locks = {
1532
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1533
      locking.LEVEL_NODE: list(node_names),
1534
      locking.LEVEL_INSTANCE: inst_names,
1535
    }
1536

    
1537
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1538

    
1539
  def CheckPrereq(self):
1540
    self.all_node_info = self.cfg.GetAllNodesInfo()
1541
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1542

    
1543
    group_nodes = set(node.name
1544
                      for node in self.all_node_info.values()
1545
                      if node.group == self.group_uuid)
1546

    
1547
    group_instances = set(inst.name
1548
                          for inst in self.all_inst_info.values()
1549
                          if inst.primary_node in group_nodes)
1550

    
1551
    unlocked_nodes = \
1552
        group_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1553

    
1554
    unlocked_instances = \
1555
        group_instances.difference(self.glm.list_owned(locking.LEVEL_INSTANCE))
1556

    
1557
    if unlocked_nodes:
1558
      raise errors.OpPrereqError("missing lock for nodes: %s" %
1559
                                 utils.CommaJoin(unlocked_nodes))
1560

    
1561
    if unlocked_instances:
1562
      raise errors.OpPrereqError("missing lock for instances: %s" %
1563
                                 utils.CommaJoin(unlocked_instances))
1564

    
1565
    self.my_node_names = utils.NiceSort(group_nodes)
1566
    self.my_inst_names = utils.NiceSort(group_instances)
1567

    
1568
    self.my_node_info = dict((name, self.all_node_info[name])
1569
                             for name in self.my_node_names)
1570

    
1571
    self.my_inst_info = dict((name, self.all_inst_info[name])
1572
                             for name in self.my_inst_names)
1573

    
1574
  def _VerifyNode(self, ninfo, nresult):
1575
    """Perform some basic validation on data returned from a node.
1576

1577
      - check the result data structure is well formed and has all the
1578
        mandatory fields
1579
      - check ganeti version
1580

1581
    @type ninfo: L{objects.Node}
1582
    @param ninfo: the node to check
1583
    @param nresult: the results from the node
1584
    @rtype: boolean
1585
    @return: whether overall this call was successful (and we can expect
1586
         reasonable values in the respose)
1587

1588
    """
1589
    node = ninfo.name
1590
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1591

    
1592
    # main result, nresult should be a non-empty dict
1593
    test = not nresult or not isinstance(nresult, dict)
1594
    _ErrorIf(test, self.ENODERPC, node,
1595
                  "unable to verify node: no data returned")
1596
    if test:
1597
      return False
1598

    
1599
    # compares ganeti version
1600
    local_version = constants.PROTOCOL_VERSION
1601
    remote_version = nresult.get("version", None)
1602
    test = not (remote_version and
1603
                isinstance(remote_version, (list, tuple)) and
1604
                len(remote_version) == 2)
1605
    _ErrorIf(test, self.ENODERPC, node,
1606
             "connection to node returned invalid data")
1607
    if test:
1608
      return False
1609

    
1610
    test = local_version != remote_version[0]
1611
    _ErrorIf(test, self.ENODEVERSION, node,
1612
             "incompatible protocol versions: master %s,"
1613
             " node %s", local_version, remote_version[0])
1614
    if test:
1615
      return False
1616

    
1617
    # node seems compatible, we can actually try to look into its results
1618

    
1619
    # full package version
1620
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1621
                  self.ENODEVERSION, node,
1622
                  "software version mismatch: master %s, node %s",
1623
                  constants.RELEASE_VERSION, remote_version[1],
1624
                  code=self.ETYPE_WARNING)
1625

    
1626
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1627
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1628
      for hv_name, hv_result in hyp_result.iteritems():
1629
        test = hv_result is not None
1630
        _ErrorIf(test, self.ENODEHV, node,
1631
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1632

    
1633
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1634
    if ninfo.vm_capable and isinstance(hvp_result, list):
1635
      for item, hv_name, hv_result in hvp_result:
1636
        _ErrorIf(True, self.ENODEHV, node,
1637
                 "hypervisor %s parameter verify failure (source %s): %s",
1638
                 hv_name, item, hv_result)
1639

    
1640
    test = nresult.get(constants.NV_NODESETUP,
1641
                       ["Missing NODESETUP results"])
1642
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1643
             "; ".join(test))
1644

    
1645
    return True
1646

    
1647
  def _VerifyNodeTime(self, ninfo, nresult,
1648
                      nvinfo_starttime, nvinfo_endtime):
1649
    """Check the node time.
1650

1651
    @type ninfo: L{objects.Node}
1652
    @param ninfo: the node to check
1653
    @param nresult: the remote results for the node
1654
    @param nvinfo_starttime: the start time of the RPC call
1655
    @param nvinfo_endtime: the end time of the RPC call
1656

1657
    """
1658
    node = ninfo.name
1659
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1660

    
1661
    ntime = nresult.get(constants.NV_TIME, None)
1662
    try:
1663
      ntime_merged = utils.MergeTime(ntime)
1664
    except (ValueError, TypeError):
1665
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1666
      return
1667

    
1668
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1669
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1670
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1671
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1672
    else:
1673
      ntime_diff = None
1674

    
1675
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1676
             "Node time diverges by at least %s from master node time",
1677
             ntime_diff)
1678

    
1679
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1680
    """Check the node LVM results.
1681

1682
    @type ninfo: L{objects.Node}
1683
    @param ninfo: the node to check
1684
    @param nresult: the remote results for the node
1685
    @param vg_name: the configured VG name
1686

1687
    """
1688
    if vg_name is None:
1689
      return
1690

    
1691
    node = ninfo.name
1692
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1693

    
1694
    # checks vg existence and size > 20G
1695
    vglist = nresult.get(constants.NV_VGLIST, None)
1696
    test = not vglist
1697
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1698
    if not test:
1699
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1700
                                            constants.MIN_VG_SIZE)
1701
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1702

    
1703
    # check pv names
1704
    pvlist = nresult.get(constants.NV_PVLIST, None)
1705
    test = pvlist is None
1706
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1707
    if not test:
1708
      # check that ':' is not present in PV names, since it's a
1709
      # special character for lvcreate (denotes the range of PEs to
1710
      # use on the PV)
1711
      for _, pvname, owner_vg in pvlist:
1712
        test = ":" in pvname
1713
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1714
                 " '%s' of VG '%s'", pvname, owner_vg)
1715

    
1716
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1717
    """Check the node bridges.
1718

1719
    @type ninfo: L{objects.Node}
1720
    @param ninfo: the node to check
1721
    @param nresult: the remote results for the node
1722
    @param bridges: the expected list of bridges
1723

1724
    """
1725
    if not bridges:
1726
      return
1727

    
1728
    node = ninfo.name
1729
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1730

    
1731
    missing = nresult.get(constants.NV_BRIDGES, None)
1732
    test = not isinstance(missing, list)
1733
    _ErrorIf(test, self.ENODENET, node,
1734
             "did not return valid bridge information")
1735
    if not test:
1736
      _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1737
               utils.CommaJoin(sorted(missing)))
1738

    
1739
  def _VerifyNodeNetwork(self, ninfo, nresult):
1740
    """Check the node network connectivity results.
1741

1742
    @type ninfo: L{objects.Node}
1743
    @param ninfo: the node to check
1744
    @param nresult: the remote results for the node
1745

1746
    """
1747
    node = ninfo.name
1748
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1749

    
1750
    test = constants.NV_NODELIST not in nresult
1751
    _ErrorIf(test, self.ENODESSH, node,
1752
             "node hasn't returned node ssh connectivity data")
1753
    if not test:
1754
      if nresult[constants.NV_NODELIST]:
1755
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1756
          _ErrorIf(True, self.ENODESSH, node,
1757
                   "ssh communication with node '%s': %s", a_node, a_msg)
1758

    
1759
    test = constants.NV_NODENETTEST not in nresult
1760
    _ErrorIf(test, self.ENODENET, node,
1761
             "node hasn't returned node tcp connectivity data")
1762
    if not test:
1763
      if nresult[constants.NV_NODENETTEST]:
1764
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1765
        for anode in nlist:
1766
          _ErrorIf(True, self.ENODENET, node,
1767
                   "tcp communication with node '%s': %s",
1768
                   anode, nresult[constants.NV_NODENETTEST][anode])
1769

    
1770
    test = constants.NV_MASTERIP not in nresult
1771
    _ErrorIf(test, self.ENODENET, node,
1772
             "node hasn't returned node master IP reachability data")
1773
    if not test:
1774
      if not nresult[constants.NV_MASTERIP]:
1775
        if node == self.master_node:
1776
          msg = "the master node cannot reach the master IP (not configured?)"
1777
        else:
1778
          msg = "cannot reach the master IP"
1779
        _ErrorIf(True, self.ENODENET, node, msg)
1780

    
1781
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1782
                      diskstatus):
1783
    """Verify an instance.
1784

1785
    This function checks to see if the required block devices are
1786
    available on the instance's node.
1787

1788
    """
1789
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1790
    node_current = instanceconfig.primary_node
1791

    
1792
    node_vol_should = {}
1793
    instanceconfig.MapLVsByNode(node_vol_should)
1794

    
1795
    for node in node_vol_should:
1796
      n_img = node_image[node]
1797
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1798
        # ignore missing volumes on offline or broken nodes
1799
        continue
1800
      for volume in node_vol_should[node]:
1801
        test = volume not in n_img.volumes
1802
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1803
                 "volume %s missing on node %s", volume, node)
1804

    
1805
    if instanceconfig.admin_up:
1806
      pri_img = node_image[node_current]
1807
      test = instance not in pri_img.instances and not pri_img.offline
1808
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1809
               "instance not running on its primary node %s",
1810
               node_current)
1811

    
1812
    diskdata = [(nname, success, status, idx)
1813
                for (nname, disks) in diskstatus.items()
1814
                for idx, (success, status) in enumerate(disks)]
1815

    
1816
    for nname, success, bdev_status, idx in diskdata:
1817
      # the 'ghost node' construction in Exec() ensures that we have a
1818
      # node here
1819
      snode = node_image[nname]
1820
      bad_snode = snode.ghost or snode.offline
1821
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1822
               self.EINSTANCEFAULTYDISK, instance,
1823
               "couldn't retrieve status for disk/%s on %s: %s",
1824
               idx, nname, bdev_status)
1825
      _ErrorIf((instanceconfig.admin_up and success and
1826
                bdev_status.ldisk_status == constants.LDS_FAULTY),
1827
               self.EINSTANCEFAULTYDISK, instance,
1828
               "disk/%s on %s is faulty", idx, nname)
1829

    
1830
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1831
    """Verify if there are any unknown volumes in the cluster.
1832

1833
    The .os, .swap and backup volumes are ignored. All other volumes are
1834
    reported as unknown.
1835

1836
    @type reserved: L{ganeti.utils.FieldSet}
1837
    @param reserved: a FieldSet of reserved volume names
1838

1839
    """
1840
    for node, n_img in node_image.items():
1841
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1842
        # skip non-healthy nodes
1843
        continue
1844
      for volume in n_img.volumes:
1845
        test = ((node not in node_vol_should or
1846
                volume not in node_vol_should[node]) and
1847
                not reserved.Matches(volume))
1848
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1849
                      "volume %s is unknown", volume)
1850

    
1851
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1852
    """Verify N+1 Memory Resilience.
1853

1854
    Check that if one single node dies we can still start all the
1855
    instances it was primary for.
1856

1857
    """
1858
    cluster_info = self.cfg.GetClusterInfo()
1859
    for node, n_img in node_image.items():
1860
      # This code checks that every node which is now listed as
1861
      # secondary has enough memory to host all instances it is
1862
      # supposed to should a single other node in the cluster fail.
1863
      # FIXME: not ready for failover to an arbitrary node
1864
      # FIXME: does not support file-backed instances
1865
      # WARNING: we currently take into account down instances as well
1866
      # as up ones, considering that even if they're down someone
1867
      # might want to start them even in the event of a node failure.
1868
      if n_img.offline:
1869
        # we're skipping offline nodes from the N+1 warning, since
1870
        # most likely we don't have good memory infromation from them;
1871
        # we already list instances living on such nodes, and that's
1872
        # enough warning
1873
        continue
1874
      for prinode, instances in n_img.sbp.items():
1875
        needed_mem = 0
1876
        for instance in instances:
1877
          bep = cluster_info.FillBE(instance_cfg[instance])
1878
          if bep[constants.BE_AUTO_BALANCE]:
1879
            needed_mem += bep[constants.BE_MEMORY]
1880
        test = n_img.mfree < needed_mem
1881
        self._ErrorIf(test, self.ENODEN1, node,
1882
                      "not enough memory to accomodate instance failovers"
1883
                      " should node %s fail (%dMiB needed, %dMiB available)",
1884
                      prinode, needed_mem, n_img.mfree)
1885

    
1886
  @classmethod
1887
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1888
                   (files_all, files_all_opt, files_mc, files_vm)):
1889
    """Verifies file checksums collected from all nodes.
1890

1891
    @param errorif: Callback for reporting errors
1892
    @param nodeinfo: List of L{objects.Node} objects
1893
    @param master_node: Name of master node
1894
    @param all_nvinfo: RPC results
1895

1896
    """
1897
    node_names = frozenset(node.name for node in nodeinfo)
1898

    
1899
    assert master_node in node_names
1900
    assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1901
            sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1902
           "Found file listed in more than one file list"
1903

    
1904
    # Define functions determining which nodes to consider for a file
1905
    file2nodefn = dict([(filename, fn)
1906
      for (files, fn) in [(files_all, None),
1907
                          (files_all_opt, None),
1908
                          (files_mc, lambda node: (node.master_candidate or
1909
                                                   node.name == master_node)),
1910
                          (files_vm, lambda node: node.vm_capable)]
1911
      for filename in files])
1912

    
1913
    fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
1914

    
1915
    for node in nodeinfo:
1916
      nresult = all_nvinfo[node.name]
1917

    
1918
      if nresult.fail_msg or not nresult.payload:
1919
        node_files = None
1920
      else:
1921
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
1922

    
1923
      test = not (node_files and isinstance(node_files, dict))
1924
      errorif(test, cls.ENODEFILECHECK, node.name,
1925
              "Node did not return file checksum data")
1926
      if test:
1927
        continue
1928

    
1929
      for (filename, checksum) in node_files.items():
1930
        # Check if the file should be considered for a node
1931
        fn = file2nodefn[filename]
1932
        if fn is None or fn(node):
1933
          fileinfo[filename].setdefault(checksum, set()).add(node.name)
1934

    
1935
    for (filename, checksums) in fileinfo.items():
1936
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
1937

    
1938
      # Nodes having the file
1939
      with_file = frozenset(node_name
1940
                            for nodes in fileinfo[filename].values()
1941
                            for node_name in nodes)
1942

    
1943
      # Nodes missing file
1944
      missing_file = node_names - with_file
1945

    
1946
      if filename in files_all_opt:
1947
        # All or no nodes
1948
        errorif(missing_file and missing_file != node_names,
1949
                cls.ECLUSTERFILECHECK, None,
1950
                "File %s is optional, but it must exist on all or no nodes (not"
1951
                " found on %s)",
1952
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
1953
      else:
1954
        errorif(missing_file, cls.ECLUSTERFILECHECK, None,
1955
                "File %s is missing from node(s) %s", filename,
1956
                utils.CommaJoin(utils.NiceSort(missing_file)))
1957

    
1958
      # See if there are multiple versions of the file
1959
      test = len(checksums) > 1
1960
      if test:
1961
        variants = ["variant %s on %s" %
1962
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
1963
                    for (idx, (checksum, nodes)) in
1964
                      enumerate(sorted(checksums.items()))]
1965
      else:
1966
        variants = []
1967

    
1968
      errorif(test, cls.ECLUSTERFILECHECK, None,
1969
              "File %s found with %s different checksums (%s)",
1970
              filename, len(checksums), "; ".join(variants))
1971

    
1972
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1973
                      drbd_map):
1974
    """Verifies and the node DRBD status.
1975

1976
    @type ninfo: L{objects.Node}
1977
    @param ninfo: the node to check
1978
    @param nresult: the remote results for the node
1979
    @param instanceinfo: the dict of instances
1980
    @param drbd_helper: the configured DRBD usermode helper
1981
    @param drbd_map: the DRBD map as returned by
1982
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1983

1984
    """
1985
    node = ninfo.name
1986
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1987

    
1988
    if drbd_helper:
1989
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1990
      test = (helper_result == None)
1991
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
1992
               "no drbd usermode helper returned")
1993
      if helper_result:
1994
        status, payload = helper_result
1995
        test = not status
1996
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1997
                 "drbd usermode helper check unsuccessful: %s", payload)
1998
        test = status and (payload != drbd_helper)
1999
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2000
                 "wrong drbd usermode helper: %s", payload)
2001

    
2002
    # compute the DRBD minors
2003
    node_drbd = {}
2004
    for minor, instance in drbd_map[node].items():
2005
      test = instance not in instanceinfo
2006
      _ErrorIf(test, self.ECLUSTERCFG, None,
2007
               "ghost instance '%s' in temporary DRBD map", instance)
2008
        # ghost instance should not be running, but otherwise we
2009
        # don't give double warnings (both ghost instance and
2010
        # unallocated minor in use)
2011
      if test:
2012
        node_drbd[minor] = (instance, False)
2013
      else:
2014
        instance = instanceinfo[instance]
2015
        node_drbd[minor] = (instance.name, instance.admin_up)
2016

    
2017
    # and now check them
2018
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2019
    test = not isinstance(used_minors, (tuple, list))
2020
    _ErrorIf(test, self.ENODEDRBD, node,
2021
             "cannot parse drbd status file: %s", str(used_minors))
2022
    if test:
2023
      # we cannot check drbd status
2024
      return
2025

    
2026
    for minor, (iname, must_exist) in node_drbd.items():
2027
      test = minor not in used_minors and must_exist
2028
      _ErrorIf(test, self.ENODEDRBD, node,
2029
               "drbd minor %d of instance %s is not active", minor, iname)
2030
    for minor in used_minors:
2031
      test = minor not in node_drbd
2032
      _ErrorIf(test, self.ENODEDRBD, node,
2033
               "unallocated drbd minor %d is in use", minor)
2034

    
2035
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2036
    """Builds the node OS structures.
2037

2038
    @type ninfo: L{objects.Node}
2039
    @param ninfo: the node to check
2040
    @param nresult: the remote results for the node
2041
    @param nimg: the node image object
2042

2043
    """
2044
    node = ninfo.name
2045
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2046

    
2047
    remote_os = nresult.get(constants.NV_OSLIST, None)
2048
    test = (not isinstance(remote_os, list) or
2049
            not compat.all(isinstance(v, list) and len(v) == 7
2050
                           for v in remote_os))
2051

    
2052
    _ErrorIf(test, self.ENODEOS, node,
2053
             "node hasn't returned valid OS data")
2054

    
2055
    nimg.os_fail = test
2056

    
2057
    if test:
2058
      return
2059

    
2060
    os_dict = {}
2061

    
2062
    for (name, os_path, status, diagnose,
2063
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2064

    
2065
      if name not in os_dict:
2066
        os_dict[name] = []
2067

    
2068
      # parameters is a list of lists instead of list of tuples due to
2069
      # JSON lacking a real tuple type, fix it:
2070
      parameters = [tuple(v) for v in parameters]
2071
      os_dict[name].append((os_path, status, diagnose,
2072
                            set(variants), set(parameters), set(api_ver)))
2073

    
2074
    nimg.oslist = os_dict
2075

    
2076
  def _VerifyNodeOS(self, ninfo, nimg, base):
2077
    """Verifies the node OS list.
2078

2079
    @type ninfo: L{objects.Node}
2080
    @param ninfo: the node to check
2081
    @param nimg: the node image object
2082
    @param base: the 'template' node we match against (e.g. from the master)
2083

2084
    """
2085
    node = ninfo.name
2086
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2087

    
2088
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2089

    
2090
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2091
    for os_name, os_data in nimg.oslist.items():
2092
      assert os_data, "Empty OS status for OS %s?!" % os_name
2093
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2094
      _ErrorIf(not f_status, self.ENODEOS, node,
2095
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2096
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2097
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2098
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2099
      # this will catched in backend too
2100
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
2101
               and not f_var, self.ENODEOS, node,
2102
               "OS %s with API at least %d does not declare any variant",
2103
               os_name, constants.OS_API_V15)
2104
      # comparisons with the 'base' image
2105
      test = os_name not in base.oslist
2106
      _ErrorIf(test, self.ENODEOS, node,
2107
               "Extra OS %s not present on reference node (%s)",
2108
               os_name, base.name)
2109
      if test:
2110
        continue
2111
      assert base.oslist[os_name], "Base node has empty OS status?"
2112
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2113
      if not b_status:
2114
        # base OS is invalid, skipping
2115
        continue
2116
      for kind, a, b in [("API version", f_api, b_api),
2117
                         ("variants list", f_var, b_var),
2118
                         ("parameters", beautify_params(f_param),
2119
                          beautify_params(b_param))]:
2120
        _ErrorIf(a != b, self.ENODEOS, node,
2121
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2122
                 kind, os_name, base.name,
2123
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2124

    
2125
    # check any missing OSes
2126
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2127
    _ErrorIf(missing, self.ENODEOS, node,
2128
             "OSes present on reference node %s but missing on this node: %s",
2129
             base.name, utils.CommaJoin(missing))
2130

    
2131
  def _VerifyOob(self, ninfo, nresult):
2132
    """Verifies out of band functionality of a node.
2133

2134
    @type ninfo: L{objects.Node}
2135
    @param ninfo: the node to check
2136
    @param nresult: the remote results for the node
2137

2138
    """
2139
    node = ninfo.name
2140
    # We just have to verify the paths on master and/or master candidates
2141
    # as the oob helper is invoked on the master
2142
    if ((ninfo.master_candidate or ninfo.master_capable) and
2143
        constants.NV_OOB_PATHS in nresult):
2144
      for path_result in nresult[constants.NV_OOB_PATHS]:
2145
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2146

    
2147
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2148
    """Verifies and updates the node volume data.
2149

2150
    This function will update a L{NodeImage}'s internal structures
2151
    with data from the remote call.
2152

2153
    @type ninfo: L{objects.Node}
2154
    @param ninfo: the node to check
2155
    @param nresult: the remote results for the node
2156
    @param nimg: the node image object
2157
    @param vg_name: the configured VG name
2158

2159
    """
2160
    node = ninfo.name
2161
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2162

    
2163
    nimg.lvm_fail = True
2164
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2165
    if vg_name is None:
2166
      pass
2167
    elif isinstance(lvdata, basestring):
2168
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2169
               utils.SafeEncode(lvdata))
2170
    elif not isinstance(lvdata, dict):
2171
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2172
    else:
2173
      nimg.volumes = lvdata
2174
      nimg.lvm_fail = False
2175

    
2176
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2177
    """Verifies and updates the node instance list.
2178

2179
    If the listing was successful, then updates this node's instance
2180
    list. Otherwise, it marks the RPC call as failed for the instance
2181
    list key.
2182

2183
    @type ninfo: L{objects.Node}
2184
    @param ninfo: the node to check
2185
    @param nresult: the remote results for the node
2186
    @param nimg: the node image object
2187

2188
    """
2189
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2190
    test = not isinstance(idata, list)
2191
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2192
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
2193
    if test:
2194
      nimg.hyp_fail = True
2195
    else:
2196
      nimg.instances = idata
2197

    
2198
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2199
    """Verifies and computes a node information map
2200

2201
    @type ninfo: L{objects.Node}
2202
    @param ninfo: the node to check
2203
    @param nresult: the remote results for the node
2204
    @param nimg: the node image object
2205
    @param vg_name: the configured VG name
2206

2207
    """
2208
    node = ninfo.name
2209
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2210

    
2211
    # try to read free memory (from the hypervisor)
2212
    hv_info = nresult.get(constants.NV_HVINFO, None)
2213
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2214
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2215
    if not test:
2216
      try:
2217
        nimg.mfree = int(hv_info["memory_free"])
2218
      except (ValueError, TypeError):
2219
        _ErrorIf(True, self.ENODERPC, node,
2220
                 "node returned invalid nodeinfo, check hypervisor")
2221

    
2222
    # FIXME: devise a free space model for file based instances as well
2223
    if vg_name is not None:
2224
      test = (constants.NV_VGLIST not in nresult or
2225
              vg_name not in nresult[constants.NV_VGLIST])
2226
      _ErrorIf(test, self.ENODELVM, node,
2227
               "node didn't return data for the volume group '%s'"
2228
               " - it is either missing or broken", vg_name)
2229
      if not test:
2230
        try:
2231
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2232
        except (ValueError, TypeError):
2233
          _ErrorIf(True, self.ENODERPC, node,
2234
                   "node returned invalid LVM info, check LVM status")
2235

    
2236
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2237
    """Gets per-disk status information for all instances.
2238

2239
    @type nodelist: list of strings
2240
    @param nodelist: Node names
2241
    @type node_image: dict of (name, L{objects.Node})
2242
    @param node_image: Node objects
2243
    @type instanceinfo: dict of (name, L{objects.Instance})
2244
    @param instanceinfo: Instance objects
2245
    @rtype: {instance: {node: [(succes, payload)]}}
2246
    @return: a dictionary of per-instance dictionaries with nodes as
2247
        keys and disk information as values; the disk information is a
2248
        list of tuples (success, payload)
2249

2250
    """
2251
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2252

    
2253
    node_disks = {}
2254
    node_disks_devonly = {}
2255
    diskless_instances = set()
2256
    diskless = constants.DT_DISKLESS
2257

    
2258
    for nname in nodelist:
2259
      node_instances = list(itertools.chain(node_image[nname].pinst,
2260
                                            node_image[nname].sinst))
2261
      diskless_instances.update(inst for inst in node_instances
2262
                                if instanceinfo[inst].disk_template == diskless)
2263
      disks = [(inst, disk)
2264
               for inst in node_instances
2265
               for disk in instanceinfo[inst].disks]
2266

    
2267
      if not disks:
2268
        # No need to collect data
2269
        continue
2270

    
2271
      node_disks[nname] = disks
2272

    
2273
      # Creating copies as SetDiskID below will modify the objects and that can
2274
      # lead to incorrect data returned from nodes
2275
      devonly = [dev.Copy() for (_, dev) in disks]
2276

    
2277
      for dev in devonly:
2278
        self.cfg.SetDiskID(dev, nname)
2279

    
2280
      node_disks_devonly[nname] = devonly
2281

    
2282
    assert len(node_disks) == len(node_disks_devonly)
2283

    
2284
    # Collect data from all nodes with disks
2285
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2286
                                                          node_disks_devonly)
2287

    
2288
    assert len(result) == len(node_disks)
2289

    
2290
    instdisk = {}
2291

    
2292
    for (nname, nres) in result.items():
2293
      disks = node_disks[nname]
2294

    
2295
      if nres.offline:
2296
        # No data from this node
2297
        data = len(disks) * [(False, "node offline")]
2298
      else:
2299
        msg = nres.fail_msg
2300
        _ErrorIf(msg, self.ENODERPC, nname,
2301
                 "while getting disk information: %s", msg)
2302
        if msg:
2303
          # No data from this node
2304
          data = len(disks) * [(False, msg)]
2305
        else:
2306
          data = []
2307
          for idx, i in enumerate(nres.payload):
2308
            if isinstance(i, (tuple, list)) and len(i) == 2:
2309
              data.append(i)
2310
            else:
2311
              logging.warning("Invalid result from node %s, entry %d: %s",
2312
                              nname, idx, i)
2313
              data.append((False, "Invalid result from the remote node"))
2314

    
2315
      for ((inst, _), status) in zip(disks, data):
2316
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2317

    
2318
    # Add empty entries for diskless instances.
2319
    for inst in diskless_instances:
2320
      assert inst not in instdisk
2321
      instdisk[inst] = {}
2322

    
2323
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2324
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2325
                      compat.all(isinstance(s, (tuple, list)) and
2326
                                 len(s) == 2 for s in statuses)
2327
                      for inst, nnames in instdisk.items()
2328
                      for nname, statuses in nnames.items())
2329
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2330

    
2331
    return instdisk
2332

    
2333
  def BuildHooksEnv(self):
2334
    """Build hooks env.
2335

2336
    Cluster-Verify hooks just ran in the post phase and their failure makes
2337
    the output be logged in the verify output and the verification to fail.
2338

2339
    """
2340
    env = {
2341
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2342
      }
2343

    
2344
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2345
               for node in self.my_node_info.values())
2346

    
2347
    return env
2348

    
2349
  def BuildHooksNodes(self):
2350
    """Build hooks nodes.
2351

2352
    """
2353
    assert self.my_node_names, ("Node list not gathered,"
2354
      " has CheckPrereq been executed?")
2355
    return ([], self.my_node_names)
2356

    
2357
  def Exec(self, feedback_fn):
2358
    """Verify integrity of the node group, performing various test on nodes.
2359

2360
    """
2361
    # This method has too many local variables. pylint: disable-msg=R0914
2362
    self.bad = False
2363
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2364
    verbose = self.op.verbose
2365
    self._feedback_fn = feedback_fn
2366

    
2367
    vg_name = self.cfg.GetVGName()
2368
    drbd_helper = self.cfg.GetDRBDHelper()
2369
    cluster = self.cfg.GetClusterInfo()
2370
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2371
    hypervisors = cluster.enabled_hypervisors
2372
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2373

    
2374
    i_non_redundant = [] # Non redundant instances
2375
    i_non_a_balanced = [] # Non auto-balanced instances
2376
    n_offline = 0 # Count of offline nodes
2377
    n_drained = 0 # Count of nodes being drained
2378
    node_vol_should = {}
2379

    
2380
    # FIXME: verify OS list
2381

    
2382
    # File verification
2383
    filemap = _ComputeAncillaryFiles(cluster, False)
2384

    
2385
    # do local checksums
2386
    master_node = self.master_node = self.cfg.GetMasterNode()
2387
    master_ip = self.cfg.GetMasterIP()
2388

    
2389
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2390
    node_verify_param = {
2391
      constants.NV_FILELIST:
2392
        utils.UniqueSequence(filename
2393
                             for files in filemap
2394
                             for filename in files),
2395
      constants.NV_NODELIST: [node.name for node in self.all_node_info.values()
2396
                              if not node.offline],
2397
      constants.NV_HYPERVISOR: hypervisors,
2398
      constants.NV_HVPARAMS:
2399
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2400
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2401
                                 for node in node_data_list
2402
                                 if not node.offline],
2403
      constants.NV_INSTANCELIST: hypervisors,
2404
      constants.NV_VERSION: None,
2405
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2406
      constants.NV_NODESETUP: None,
2407
      constants.NV_TIME: None,
2408
      constants.NV_MASTERIP: (master_node, master_ip),
2409
      constants.NV_OSLIST: None,
2410
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2411
      }
2412

    
2413
    if vg_name is not None:
2414
      node_verify_param[constants.NV_VGLIST] = None
2415
      node_verify_param[constants.NV_LVLIST] = vg_name
2416
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2417
      node_verify_param[constants.NV_DRBDLIST] = None
2418

    
2419
    if drbd_helper:
2420
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2421

    
2422
    # bridge checks
2423
    # FIXME: this needs to be changed per node-group, not cluster-wide
2424
    bridges = set()
2425
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2426
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2427
      bridges.add(default_nicpp[constants.NIC_LINK])
2428
    for instance in self.my_inst_info.values():
2429
      for nic in instance.nics:
2430
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2431
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2432
          bridges.add(full_nic[constants.NIC_LINK])
2433

    
2434
    if bridges:
2435
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2436

    
2437
    # Build our expected cluster state
2438
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2439
                                                 name=node.name,
2440
                                                 vm_capable=node.vm_capable))
2441
                      for node in node_data_list)
2442

    
2443
    # Gather OOB paths
2444
    oob_paths = []
2445
    for node in self.all_node_info.values():
2446
      path = _SupportsOob(self.cfg, node)
2447
      if path and path not in oob_paths:
2448
        oob_paths.append(path)
2449

    
2450
    if oob_paths:
2451
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2452

    
2453
    for instance in self.my_inst_names:
2454
      inst_config = self.my_inst_info[instance]
2455

    
2456
      for nname in inst_config.all_nodes:
2457
        if nname not in node_image:
2458
          # ghost node
2459
          gnode = self.NodeImage(name=nname)
2460
          gnode.ghost = True
2461
          node_image[nname] = gnode
2462

    
2463
      inst_config.MapLVsByNode(node_vol_should)
2464

    
2465
      pnode = inst_config.primary_node
2466
      node_image[pnode].pinst.append(instance)
2467

    
2468
      for snode in inst_config.secondary_nodes:
2469
        nimg = node_image[snode]
2470
        nimg.sinst.append(instance)
2471
        if pnode not in nimg.sbp:
2472
          nimg.sbp[pnode] = []
2473
        nimg.sbp[pnode].append(instance)
2474

    
2475
    # At this point, we have the in-memory data structures complete,
2476
    # except for the runtime information, which we'll gather next
2477

    
2478
    # Due to the way our RPC system works, exact response times cannot be
2479
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2480
    # time before and after executing the request, we can at least have a time
2481
    # window.
2482
    nvinfo_starttime = time.time()
2483
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2484
                                           node_verify_param,
2485
                                           self.cfg.GetClusterName())
2486
    nvinfo_endtime = time.time()
2487

    
2488
    all_drbd_map = self.cfg.ComputeDRBDMap()
2489

    
2490
    feedback_fn("* Gathering disk information (%s nodes)" %
2491
                len(self.my_node_names))
2492
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2493
                                     self.my_inst_info)
2494

    
2495
    feedback_fn("* Verifying configuration file consistency")
2496

    
2497
    # If not all nodes are being checked, we need to make sure the master node
2498
    # and a non-checked vm_capable node are in the list.
2499
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2500
    if absent_nodes:
2501
      vf_nvinfo = all_nvinfo.copy()
2502
      vf_node_info = list(self.my_node_info.values())
2503
      additional_nodes = []
2504
      if master_node not in self.my_node_info:
2505
        additional_nodes.append(master_node)
2506
        vf_node_info.append(self.all_node_info[master_node])
2507
      # Add the first vm_capable node we find which is not included
2508
      for node in absent_nodes:
2509
        nodeinfo = self.all_node_info[node]
2510
        if nodeinfo.vm_capable and not nodeinfo.offline:
2511
          additional_nodes.append(node)
2512
          vf_node_info.append(self.all_node_info[node])
2513
          break
2514
      key = constants.NV_FILELIST
2515
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2516
                                                 {key: node_verify_param[key]},
2517
                                                 self.cfg.GetClusterName()))
2518
    else:
2519
      vf_nvinfo = all_nvinfo
2520
      vf_node_info = self.my_node_info.values()
2521

    
2522
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2523

    
2524
    feedback_fn("* Verifying node status")
2525

    
2526
    refos_img = None
2527

    
2528
    for node_i in node_data_list:
2529
      node = node_i.name
2530
      nimg = node_image[node]
2531

    
2532
      if node_i.offline:
2533
        if verbose:
2534
          feedback_fn("* Skipping offline node %s" % (node,))
2535
        n_offline += 1
2536
        continue
2537

    
2538
      if node == master_node:
2539
        ntype = "master"
2540
      elif node_i.master_candidate:
2541
        ntype = "master candidate"
2542
      elif node_i.drained:
2543
        ntype = "drained"
2544
        n_drained += 1
2545
      else:
2546
        ntype = "regular"
2547
      if verbose:
2548
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2549

    
2550
      msg = all_nvinfo[node].fail_msg
2551
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2552
      if msg:
2553
        nimg.rpc_fail = True
2554
        continue
2555

    
2556
      nresult = all_nvinfo[node].payload
2557

    
2558
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2559
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2560
      self._VerifyNodeNetwork(node_i, nresult)
2561
      self._VerifyOob(node_i, nresult)
2562

    
2563
      if nimg.vm_capable:
2564
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2565
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2566
                             all_drbd_map)
2567

    
2568
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2569
        self._UpdateNodeInstances(node_i, nresult, nimg)
2570
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2571
        self._UpdateNodeOS(node_i, nresult, nimg)
2572

    
2573
        if not nimg.os_fail:
2574
          if refos_img is None:
2575
            refos_img = nimg
2576
          self._VerifyNodeOS(node_i, nimg, refos_img)
2577
        self._VerifyNodeBridges(node_i, nresult, bridges)
2578

    
2579
        # Check whether all running instancies are primary for the node. (This
2580
        # can no longer be done from _VerifyInstance below, since some of the
2581
        # wrong instances could be from other node groups.)
2582
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2583

    
2584
        for inst in non_primary_inst:
2585
          test = inst in self.all_inst_info
2586
          _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2587
                   "instance should not run on node %s", node_i.name)
2588
          _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2589
                   "node is running unknown instance %s", inst)
2590

    
2591
    feedback_fn("* Verifying instance status")
2592
    for instance in self.my_inst_names:
2593
      if verbose:
2594
        feedback_fn("* Verifying instance %s" % instance)
2595
      inst_config = self.my_inst_info[instance]
2596
      self._VerifyInstance(instance, inst_config, node_image,
2597
                           instdisk[instance])
2598
      inst_nodes_offline = []
2599

    
2600
      pnode = inst_config.primary_node
2601
      pnode_img = node_image[pnode]
2602
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2603
               self.ENODERPC, pnode, "instance %s, connection to"
2604
               " primary node failed", instance)
2605

    
2606
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2607
               self.EINSTANCEBADNODE, instance,
2608
               "instance is marked as running and lives on offline node %s",
2609
               inst_config.primary_node)
2610

    
2611
      # If the instance is non-redundant we cannot survive losing its primary
2612
      # node, so we are not N+1 compliant. On the other hand we have no disk
2613
      # templates with more than one secondary so that situation is not well
2614
      # supported either.
2615
      # FIXME: does not support file-backed instances
2616
      if not inst_config.secondary_nodes:
2617
        i_non_redundant.append(instance)
2618

    
2619
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2620
               instance, "instance has multiple secondary nodes: %s",
2621
               utils.CommaJoin(inst_config.secondary_nodes),
2622
               code=self.ETYPE_WARNING)
2623

    
2624
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2625
        pnode = inst_config.primary_node
2626
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2627
        instance_groups = {}
2628

    
2629
        for node in instance_nodes:
2630
          instance_groups.setdefault(self.all_node_info[node].group,
2631
                                     []).append(node)
2632

    
2633
        pretty_list = [
2634
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2635
          # Sort so that we always list the primary node first.
2636
          for group, nodes in sorted(instance_groups.items(),
2637
                                     key=lambda (_, nodes): pnode in nodes,
2638
                                     reverse=True)]
2639

    
2640
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2641
                      instance, "instance has primary and secondary nodes in"
2642
                      " different groups: %s", utils.CommaJoin(pretty_list),
2643
                      code=self.ETYPE_WARNING)
2644

    
2645
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2646
        i_non_a_balanced.append(instance)
2647

    
2648
      for snode in inst_config.secondary_nodes:
2649
        s_img = node_image[snode]
2650
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2651
                 "instance %s, connection to secondary node failed", instance)
2652

    
2653
        if s_img.offline:
2654
          inst_nodes_offline.append(snode)
2655

    
2656
      # warn that the instance lives on offline nodes
2657
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2658
               "instance has offline secondary node(s) %s",
2659
               utils.CommaJoin(inst_nodes_offline))
2660
      # ... or ghost/non-vm_capable nodes
2661
      for node in inst_config.all_nodes:
2662
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2663
                 "instance lives on ghost node %s", node)
2664
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2665
                 instance, "instance lives on non-vm_capable node %s", node)
2666

    
2667
    feedback_fn("* Verifying orphan volumes")
2668
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2669
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2670

    
2671
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2672
      feedback_fn("* Verifying N+1 Memory redundancy")
2673
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2674

    
2675
    feedback_fn("* Other Notes")
2676
    if i_non_redundant:
2677
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2678
                  % len(i_non_redundant))
2679

    
2680
    if i_non_a_balanced:
2681
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2682
                  % len(i_non_a_balanced))
2683

    
2684
    if n_offline:
2685
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2686

    
2687
    if n_drained:
2688
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2689

    
2690
    return not self.bad
2691

    
2692
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2693
    """Analyze the post-hooks' result
2694

2695
    This method analyses the hook result, handles it, and sends some
2696
    nicely-formatted feedback back to the user.
2697

2698
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2699
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2700
    @param hooks_results: the results of the multi-node hooks rpc call
2701
    @param feedback_fn: function used send feedback back to the caller
2702
    @param lu_result: previous Exec result
2703
    @return: the new Exec result, based on the previous result
2704
        and hook results
2705

2706
    """
2707
    # We only really run POST phase hooks, and are only interested in
2708
    # their results
2709
    if phase == constants.HOOKS_PHASE_POST:
2710
      # Used to change hooks' output to proper indentation
2711
      feedback_fn("* Hooks Results")
2712
      assert hooks_results, "invalid result from hooks"
2713

    
2714
      for node_name in hooks_results:
2715
        res = hooks_results[node_name]
2716
        msg = res.fail_msg
2717
        test = msg and not res.offline
2718
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2719
                      "Communication failure in hooks execution: %s", msg)
2720
        if res.offline or msg:
2721
          # No need to investigate payload if node is offline or gave an error.
2722
          # override manually lu_result here as _ErrorIf only
2723
          # overrides self.bad
2724
          lu_result = 1
2725
          continue
2726
        for script, hkr, output in res.payload:
2727
          test = hkr == constants.HKR_FAIL
2728
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2729
                        "Script %s failed, output:", script)
2730
          if test:
2731
            output = self._HOOKS_INDENT_RE.sub('      ', output)
2732
            feedback_fn("%s" % output)
2733
            lu_result = 0
2734

    
2735
      return lu_result
2736

    
2737

    
2738
class LUClusterVerifyDisks(NoHooksLU):
2739
  """Verifies the cluster disks status.
2740

2741
  """
2742
  REQ_BGL = False
2743

    
2744
  def ExpandNames(self):
2745
    self.needed_locks = {
2746
      locking.LEVEL_NODE: locking.ALL_SET,
2747
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2748
    }
2749
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2750

    
2751
  def Exec(self, feedback_fn):
2752
    """Verify integrity of cluster disks.
2753

2754
    @rtype: tuple of three items
2755
    @return: a tuple of (dict of node-to-node_error, list of instances
2756
        which need activate-disks, dict of instance: (node, volume) for
2757
        missing volumes
2758

2759
    """
2760
    result = res_nodes, res_instances, res_missing = {}, [], {}
2761

    
2762
    nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2763
    instances = self.cfg.GetAllInstancesInfo().values()
2764

    
2765
    nv_dict = {}
2766
    for inst in instances:
2767
      inst_lvs = {}
2768
      if not inst.admin_up:
2769
        continue
2770
      inst.MapLVsByNode(inst_lvs)
2771
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2772
      for node, vol_list in inst_lvs.iteritems():
2773
        for vol in vol_list:
2774
          nv_dict[(node, vol)] = inst
2775

    
2776
    if not nv_dict:
2777
      return result
2778

    
2779
    node_lvs = self.rpc.call_lv_list(nodes, [])
2780
    for node, node_res in node_lvs.items():
2781
      if node_res.offline:
2782
        continue
2783
      msg = node_res.fail_msg
2784
      if msg:
2785
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2786
        res_nodes[node] = msg
2787
        continue
2788

    
2789
      lvs = node_res.payload
2790
      for lv_name, (_, _, lv_online) in lvs.items():
2791
        inst = nv_dict.pop((node, lv_name), None)
2792
        if (not lv_online and inst is not None
2793
            and inst.name not in res_instances):
2794
          res_instances.append(inst.name)
2795

    
2796
    # any leftover items in nv_dict are missing LVs, let's arrange the
2797
    # data better
2798
    for key, inst in nv_dict.iteritems():
2799
      if inst.name not in res_missing:
2800
        res_missing[inst.name] = []
2801
      res_missing[inst.name].append(key)
2802

    
2803
    return result
2804

    
2805

    
2806
class LUClusterRepairDiskSizes(NoHooksLU):
2807
  """Verifies the cluster disks sizes.
2808

2809
  """
2810
  REQ_BGL = False
2811

    
2812
  def ExpandNames(self):
2813
    if self.op.instances:
2814
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
2815
      self.needed_locks = {
2816
        locking.LEVEL_NODE: [],
2817
        locking.LEVEL_INSTANCE: self.wanted_names,
2818
        }
2819
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2820
    else:
2821
      self.wanted_names = None
2822
      self.needed_locks = {
2823
        locking.LEVEL_NODE: locking.ALL_SET,
2824
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2825
        }
2826
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2827

    
2828
  def DeclareLocks(self, level):
2829
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2830
      self._LockInstancesNodes(primary_only=True)
2831

    
2832
  def CheckPrereq(self):
2833
    """Check prerequisites.
2834

2835
    This only checks the optional instance list against the existing names.
2836

2837
    """
2838
    if self.wanted_names is None:
2839
      self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
2840

    
2841
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2842
                             in self.wanted_names]
2843

    
2844
  def _EnsureChildSizes(self, disk):
2845
    """Ensure children of the disk have the needed disk size.
2846

2847
    This is valid mainly for DRBD8 and fixes an issue where the
2848
    children have smaller disk size.
2849

2850
    @param disk: an L{ganeti.objects.Disk} object
2851

2852
    """
2853
    if disk.dev_type == constants.LD_DRBD8:
2854
      assert disk.children, "Empty children for DRBD8?"
2855
      fchild = disk.children[0]
2856
      mismatch = fchild.size < disk.size
2857
      if mismatch:
2858
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2859
                     fchild.size, disk.size)
2860
        fchild.size = disk.size
2861

    
2862
      # and we recurse on this child only, not on the metadev
2863
      return self._EnsureChildSizes(fchild) or mismatch
2864
    else:
2865
      return False
2866

    
2867
  def Exec(self, feedback_fn):
2868
    """Verify the size of cluster disks.
2869

2870
    """
2871
    # TODO: check child disks too
2872
    # TODO: check differences in size between primary/secondary nodes
2873
    per_node_disks = {}
2874
    for instance in self.wanted_instances:
2875
      pnode = instance.primary_node
2876
      if pnode not in per_node_disks:
2877
        per_node_disks[pnode] = []
2878
      for idx, disk in enumerate(instance.disks):
2879
        per_node_disks[pnode].append((instance, idx, disk))
2880

    
2881
    changed = []
2882
    for node, dskl in per_node_disks.items():
2883
      newl = [v[2].Copy() for v in dskl]
2884
      for dsk in newl:
2885
        self.cfg.SetDiskID(dsk, node)
2886
      result = self.rpc.call_blockdev_getsize(node, newl)
2887
      if result.fail_msg:
2888
        self.LogWarning("Failure in blockdev_getsize call to node"
2889
                        " %s, ignoring", node)
2890
        continue
2891
      if len(result.payload) != len(dskl):
2892
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
2893
                        " result.payload=%s", node, len(dskl), result.payload)
2894
        self.LogWarning("Invalid result from node %s, ignoring node results",
2895
                        node)
2896
        continue
2897
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
2898
        if size is None:
2899
          self.LogWarning("Disk %d of instance %s did not return size"
2900
                          " information, ignoring", idx, instance.name)
2901
          continue
2902
        if not isinstance(size, (int, long)):
2903
          self.LogWarning("Disk %d of instance %s did not return valid"
2904
                          " size information, ignoring", idx, instance.name)
2905
          continue
2906
        size = size >> 20
2907
        if size != disk.size:
2908
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2909
                       " correcting: recorded %d, actual %d", idx,
2910
                       instance.name, disk.size, size)
2911
          disk.size = size
2912
          self.cfg.Update(instance, feedback_fn)
2913
          changed.append((instance.name, idx, size))
2914
        if self._EnsureChildSizes(disk):
2915
          self.cfg.Update(instance, feedback_fn)
2916
          changed.append((instance.name, idx, disk.size))
2917
    return changed
2918

    
2919

    
2920
class LUClusterRename(LogicalUnit):
2921
  """Rename the cluster.
2922

2923
  """
2924
  HPATH = "cluster-rename"
2925
  HTYPE = constants.HTYPE_CLUSTER
2926

    
2927
  def BuildHooksEnv(self):
2928
    """Build hooks env.
2929

2930
    """
2931
    return {
2932
      "OP_TARGET": self.cfg.GetClusterName(),
2933
      "NEW_NAME": self.op.name,
2934
      }
2935

    
2936
  def BuildHooksNodes(self):
2937
    """Build hooks nodes.
2938

2939
    """
2940
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
2941

    
2942
  def CheckPrereq(self):
2943
    """Verify that the passed name is a valid one.
2944

2945
    """
2946
    hostname = netutils.GetHostname(name=self.op.name,
2947
                                    family=self.cfg.GetPrimaryIPFamily())
2948

    
2949
    new_name = hostname.name
2950
    self.ip = new_ip = hostname.ip
2951
    old_name = self.cfg.GetClusterName()
2952
    old_ip = self.cfg.GetMasterIP()
2953
    if new_name == old_name and new_ip == old_ip:
2954
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2955
                                 " cluster has changed",
2956
                                 errors.ECODE_INVAL)
2957
    if new_ip != old_ip:
2958
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2959
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2960
                                   " reachable on the network" %
2961
                                   new_ip, errors.ECODE_NOTUNIQUE)
2962

    
2963
    self.op.name = new_name
2964

    
2965
  def Exec(self, feedback_fn):
2966
    """Rename the cluster.
2967

2968
    """
2969
    clustername = self.op.name
2970
    ip = self.ip
2971

    
2972
    # shutdown the master IP
2973
    master = self.cfg.GetMasterNode()
2974
    result = self.rpc.call_node_stop_master(master, False)
2975
    result.Raise("Could not disable the master role")
2976

    
2977
    try:
2978
      cluster = self.cfg.GetClusterInfo()
2979
      cluster.cluster_name = clustername
2980
      cluster.master_ip = ip
2981
      self.cfg.Update(cluster, feedback_fn)
2982

    
2983
      # update the known hosts file
2984
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2985
      node_list = self.cfg.GetOnlineNodeList()
2986
      try:
2987
        node_list.remove(master)
2988
      except ValueError:
2989
        pass
2990
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2991
    finally:
2992
      result = self.rpc.call_node_start_master(master, False, False)
2993
      msg = result.fail_msg
2994
      if msg:
2995
        self.LogWarning("Could not re-enable the master role on"
2996
                        " the master, please restart manually: %s", msg)
2997

    
2998
    return clustername
2999

    
3000

    
3001
class LUClusterSetParams(LogicalUnit):
3002
  """Change the parameters of the cluster.
3003

3004
  """
3005
  HPATH = "cluster-modify"
3006
  HTYPE = constants.HTYPE_CLUSTER
3007
  REQ_BGL = False
3008

    
3009
  def CheckArguments(self):
3010
    """Check parameters
3011

3012
    """
3013
    if self.op.uid_pool:
3014
      uidpool.CheckUidPool(self.op.uid_pool)
3015

    
3016
    if self.op.add_uids:
3017
      uidpool.CheckUidPool(self.op.add_uids)
3018

    
3019
    if self.op.remove_uids:
3020
      uidpool.CheckUidPool(self.op.remove_uids)
3021

    
3022
  def ExpandNames(self):
3023
    # FIXME: in the future maybe other cluster params won't require checking on
3024
    # all nodes to be modified.
3025
    self.needed_locks = {
3026
      locking.LEVEL_NODE: locking.ALL_SET,
3027
    }
3028
    self.share_locks[locking.LEVEL_NODE] = 1
3029

    
3030
  def BuildHooksEnv(self):
3031
    """Build hooks env.
3032

3033
    """
3034
    return {
3035
      "OP_TARGET": self.cfg.GetClusterName(),
3036
      "NEW_VG_NAME": self.op.vg_name,
3037
      }
3038

    
3039
  def BuildHooksNodes(self):
3040
    """Build hooks nodes.
3041

3042
    """
3043
    mn = self.cfg.GetMasterNode()
3044
    return ([mn], [mn])
3045

    
3046
  def CheckPrereq(self):
3047
    """Check prerequisites.
3048

3049
    This checks whether the given params don't conflict and
3050
    if the given volume group is valid.
3051

3052
    """
3053
    if self.op.vg_name is not None and not self.op.vg_name:
3054
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3055
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3056
                                   " instances exist", errors.ECODE_INVAL)
3057

    
3058
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3059
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3060
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3061
                                   " drbd-based instances exist",
3062
                                   errors.ECODE_INVAL)
3063

    
3064
    node_list = self.glm.list_owned(locking.LEVEL_NODE)
3065

    
3066
    # if vg_name not None, checks given volume group on all nodes
3067
    if self.op.vg_name:
3068
      vglist = self.rpc.call_vg_list(node_list)
3069
      for node in node_list:
3070
        msg = vglist[node].fail_msg
3071
        if msg:
3072
          # ignoring down node
3073
          self.LogWarning("Error while gathering data on node %s"
3074
                          " (ignoring node): %s", node, msg)
3075
          continue
3076
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3077
                                              self.op.vg_name,
3078
                                              constants.MIN_VG_SIZE)
3079
        if vgstatus:
3080
          raise errors.OpPrereqError("Error on node '%s': %s" %
3081
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3082

    
3083
    if self.op.drbd_helper:
3084
      # checks given drbd helper on all nodes
3085
      helpers = self.rpc.call_drbd_helper(node_list)
3086
      for node in node_list:
3087
        ninfo = self.cfg.GetNodeInfo(node)
3088
        if ninfo.offline:
3089
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3090
          continue
3091
        msg = helpers[node].fail_msg
3092
        if msg:
3093
          raise errors.OpPrereqError("Error checking drbd helper on node"
3094
                                     " '%s': %s" % (node, msg),
3095
                                     errors.ECODE_ENVIRON)
3096
        node_helper = helpers[node].payload
3097
        if node_helper != self.op.drbd_helper:
3098
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3099
                                     (node, node_helper), errors.ECODE_ENVIRON)
3100

    
3101
    self.cluster = cluster = self.cfg.GetClusterInfo()
3102
    # validate params changes
3103
    if self.op.beparams:
3104
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3105
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3106

    
3107
    if self.op.ndparams:
3108
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3109
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3110

    
3111
      # TODO: we need a more general way to handle resetting
3112
      # cluster-level parameters to default values
3113
      if self.new_ndparams["oob_program"] == "":
3114
        self.new_ndparams["oob_program"] = \
3115
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3116

    
3117
    if self.op.nicparams:
3118
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3119
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3120
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3121
      nic_errors = []
3122

    
3123
      # check all instances for consistency
3124
      for instance in self.cfg.GetAllInstancesInfo().values():
3125
        for nic_idx, nic in enumerate(instance.nics):
3126
          params_copy = copy.deepcopy(nic.nicparams)
3127
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3128

    
3129
          # check parameter syntax
3130
          try:
3131
            objects.NIC.CheckParameterSyntax(params_filled)
3132
          except errors.ConfigurationError, err:
3133
            nic_errors.append("Instance %s, nic/%d: %s" %
3134
                              (instance.name, nic_idx, err))
3135

    
3136
          # if we're moving instances to routed, check that they have an ip
3137
          target_mode = params_filled[constants.NIC_MODE]
3138
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3139
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3140
                              " address" % (instance.name, nic_idx))
3141
      if nic_errors:
3142
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3143
                                   "\n".join(nic_errors))
3144

    
3145
    # hypervisor list/parameters
3146
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3147
    if self.op.hvparams:
3148
      for hv_name, hv_dict in self.op.hvparams.items():
3149
        if hv_name not in self.new_hvparams:
3150
          self.new_hvparams[hv_name] = hv_dict
3151
        else:
3152
          self.new_hvparams[hv_name].update(hv_dict)
3153

    
3154
    # os hypervisor parameters
3155
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3156
    if self.op.os_hvp:
3157
      for os_name, hvs in self.op.os_hvp.items():
3158
        if os_name not in self.new_os_hvp:
3159
          self.new_os_hvp[os_name] = hvs
3160
        else:
3161
          for hv_name, hv_dict in hvs.items():
3162
            if hv_name not in self.new_os_hvp[os_name]:
3163
              self.new_os_hvp[os_name][hv_name] = hv_dict
3164
            else:
3165
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3166

    
3167
    # os parameters
3168
    self.new_osp = objects.FillDict(cluster.osparams, {})
3169
    if self.op.osparams:
3170
      for os_name, osp in self.op.osparams.items():
3171
        if os_name not in self.new_osp:
3172
          self.new_osp[os_name] = {}
3173

    
3174
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3175
                                                  use_none=True)
3176

    
3177
        if not self.new_osp[os_name]:
3178
          # we removed all parameters
3179
          del self.new_osp[os_name]
3180
        else:
3181
          # check the parameter validity (remote check)
3182
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3183
                         os_name, self.new_osp[os_name])
3184

    
3185
    # changes to the hypervisor list
3186
    if self.op.enabled_hypervisors is not None:
3187
      self.hv_list = self.op.enabled_hypervisors
3188
      for hv in self.hv_list:
3189
        # if the hypervisor doesn't already exist in the cluster
3190
        # hvparams, we initialize it to empty, and then (in both
3191
        # cases) we make sure to fill the defaults, as we might not
3192
        # have a complete defaults list if the hypervisor wasn't
3193
        # enabled before
3194
        if hv not in new_hvp:
3195
          new_hvp[hv] = {}
3196
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3197
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3198
    else:
3199
      self.hv_list = cluster.enabled_hypervisors
3200

    
3201
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3202
      # either the enabled list has changed, or the parameters have, validate
3203
      for hv_name, hv_params in self.new_hvparams.items():
3204
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3205
            (self.op.enabled_hypervisors and
3206
             hv_name in self.op.enabled_hypervisors)):
3207
          # either this is a new hypervisor, or its parameters have changed
3208
          hv_class = hypervisor.GetHypervisor(hv_name)
3209
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3210
          hv_class.CheckParameterSyntax(hv_params)
3211
          _CheckHVParams(self, node_list, hv_name, hv_params)
3212

    
3213
    if self.op.os_hvp:
3214
      # no need to check any newly-enabled hypervisors, since the
3215
      # defaults have already been checked in the above code-block
3216
      for os_name, os_hvp in self.new_os_hvp.items():
3217
        for hv_name, hv_params in os_hvp.items():
3218
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3219
          # we need to fill in the new os_hvp on top of the actual hv_p
3220
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3221
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3222
          hv_class = hypervisor.GetHypervisor(hv_name)
3223
          hv_class.CheckParameterSyntax(new_osp)
3224
          _CheckHVParams(self, node_list, hv_name, new_osp)
3225

    
3226
    if self.op.default_iallocator:
3227
      alloc_script = utils.FindFile(self.op.default_iallocator,
3228
                                    constants.IALLOCATOR_SEARCH_PATH,
3229
                                    os.path.isfile)
3230
      if alloc_script is None:
3231
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3232
                                   " specified" % self.op.default_iallocator,
3233
                                   errors.ECODE_INVAL)
3234

    
3235
  def Exec(self, feedback_fn):
3236
    """Change the parameters of the cluster.
3237

3238
    """
3239
    if self.op.vg_name is not None:
3240
      new_volume = self.op.vg_name
3241
      if not new_volume:
3242
        new_volume = None
3243
      if new_volume != self.cfg.GetVGName():
3244
        self.cfg.SetVGName(new_volume)
3245
      else:
3246
        feedback_fn("Cluster LVM configuration already in desired"
3247
                    " state, not changing")
3248
    if self.op.drbd_helper is not None:
3249
      new_helper = self.op.drbd_helper
3250
      if not new_helper:
3251
        new_helper = None
3252
      if new_helper != self.cfg.GetDRBDHelper():
3253
        self.cfg.SetDRBDHelper(new_helper)
3254
      else:
3255
        feedback_fn("Cluster DRBD helper already in desired state,"
3256
                    " not changing")
3257
    if self.op.hvparams:
3258
      self.cluster.hvparams = self.new_hvparams
3259
    if self.op.os_hvp:
3260
      self.cluster.os_hvp = self.new_os_hvp
3261
    if self.op.enabled_hypervisors is not None:
3262
      self.cluster.hvparams = self.new_hvparams
3263
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3264
    if self.op.beparams:
3265
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3266
    if self.op.nicparams:
3267
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3268
    if self.op.osparams:
3269
      self.cluster.osparams = self.new_osp
3270
    if self.op.ndparams:
3271
      self.cluster.ndparams = self.new_ndparams
3272

    
3273
    if self.op.candidate_pool_size is not None:
3274
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3275
      # we need to update the pool size here, otherwise the save will fail
3276
      _AdjustCandidatePool(self, [])
3277

    
3278
    if self.op.maintain_node_health is not None:
3279
      self.cluster.maintain_node_health = self.op.maintain_node_health
3280

    
3281
    if self.op.prealloc_wipe_disks is not None:
3282
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3283

    
3284
    if self.op.add_uids is not None:
3285
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3286

    
3287
    if self.op.remove_uids is not None:
3288
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3289

    
3290
    if self.op.uid_pool is not None:
3291
      self.cluster.uid_pool = self.op.uid_pool
3292

    
3293
    if self.op.default_iallocator is not None:
3294
      self.cluster.default_iallocator = self.op.default_iallocator
3295

    
3296
    if self.op.reserved_lvs is not None:
3297
      self.cluster.reserved_lvs = self.op.reserved_lvs
3298

    
3299
    def helper_os(aname, mods, desc):
3300
      desc += " OS list"
3301
      lst = getattr(self.cluster, aname)
3302
      for key, val in mods:
3303
        if key == constants.DDM_ADD:
3304
          if val in lst:
3305
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3306
          else:
3307
            lst.append(val)
3308
        elif key == constants.DDM_REMOVE:
3309
          if val in lst:
3310
            lst.remove(val)
3311
          else:
3312
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3313
        else:
3314
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3315

    
3316
    if self.op.hidden_os:
3317
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3318

    
3319
    if self.op.blacklisted_os:
3320
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3321

    
3322
    if self.op.master_netdev:
3323
      master = self.cfg.GetMasterNode()
3324
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3325
                  self.cluster.master_netdev)
3326
      result = self.rpc.call_node_stop_master(master, False)
3327
      result.Raise("Could not disable the master ip")
3328
      feedback_fn("Changing master_netdev from %s to %s" %
3329
                  (self.cluster.master_netdev, self.op.master_netdev))
3330
      self.cluster.master_netdev = self.op.master_netdev
3331

    
3332
    self.cfg.Update(self.cluster, feedback_fn)
3333

    
3334
    if self.op.master_netdev:
3335
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3336
                  self.op.master_netdev)
3337
      result = self.rpc.call_node_start_master(master, False, False)
3338
      if result.fail_msg:
3339
        self.LogWarning("Could not re-enable the master ip on"
3340
                        " the master, please restart manually: %s",
3341
                        result.fail_msg)
3342

    
3343

    
3344
def _UploadHelper(lu, nodes, fname):
3345
  """Helper for uploading a file and showing warnings.
3346

3347
  """
3348
  if os.path.exists(fname):
3349
    result = lu.rpc.call_upload_file(nodes, fname)
3350
    for to_node, to_result in result.items():
3351
      msg = to_result.fail_msg
3352
      if msg:
3353
        msg = ("Copy of file %s to node %s failed: %s" %
3354
               (fname, to_node, msg))
3355
        lu.proc.LogWarning(msg)
3356

    
3357

    
3358
def _ComputeAncillaryFiles(cluster, redist):
3359
  """Compute files external to Ganeti which need to be consistent.
3360

3361
  @type redist: boolean
3362
  @param redist: Whether to include files which need to be redistributed
3363

3364
  """
3365
  # Compute files for all nodes
3366
  files_all = set([
3367
    constants.SSH_KNOWN_HOSTS_FILE,
3368
    constants.CONFD_HMAC_KEY,
3369
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3370
    ])
3371

    
3372
  if not redist:
3373
    files_all.update(constants.ALL_CERT_FILES)
3374
    files_all.update(ssconf.SimpleStore().GetFileList())
3375

    
3376
  if cluster.modify_etc_hosts:
3377
    files_all.add(constants.ETC_HOSTS)
3378

    
3379
  # Files which must either exist on all nodes or on none
3380
  files_all_opt = set([
3381
    constants.RAPI_USERS_FILE,
3382
    ])
3383

    
3384
  # Files which should only be on master candidates
3385
  files_mc = set()
3386
  if not redist:
3387
    files_mc.add(constants.CLUSTER_CONF_FILE)
3388

    
3389
  # Files which should only be on VM-capable nodes
3390
  files_vm = set(filename
3391
    for hv_name in cluster.enabled_hypervisors
3392
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3393

    
3394
  # Filenames must be unique
3395
  assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3396
          sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3397
         "Found file listed in more than one file list"
3398

    
3399
  return (files_all, files_all_opt, files_mc, files_vm)
3400

    
3401

    
3402
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3403
  """Distribute additional files which are part of the cluster configuration.
3404

3405
  ConfigWriter takes care of distributing the config and ssconf files, but
3406
  there are more files which should be distributed to all nodes. This function
3407
  makes sure those are copied.
3408

3409
  @param lu: calling logical unit
3410
  @param additional_nodes: list of nodes not in the config to distribute to
3411
  @type additional_vm: boolean
3412
  @param additional_vm: whether the additional nodes are vm-capable or not
3413

3414
  """
3415
  # Gather target nodes
3416
  cluster = lu.cfg.GetClusterInfo()
3417
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3418

    
3419
  online_nodes = lu.cfg.GetOnlineNodeList()
3420
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3421

    
3422
  if additional_nodes is not None:
3423
    online_nodes.extend(additional_nodes)
3424
    if additional_vm:
3425
      vm_nodes.extend(additional_nodes)
3426

    
3427
  # Never distribute to master node
3428
  for nodelist in [online_nodes, vm_nodes]:
3429
    if master_info.name in nodelist:
3430
      nodelist.remove(master_info.name)
3431

    
3432
  # Gather file lists
3433
  (files_all, files_all_opt, files_mc, files_vm) = \
3434
    _ComputeAncillaryFiles(cluster, True)
3435

    
3436
  # Never re-distribute configuration file from here
3437
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3438
              constants.CLUSTER_CONF_FILE in files_vm)
3439
  assert not files_mc, "Master candidates not handled in this function"
3440

    
3441
  filemap = [
3442
    (online_nodes, files_all),
3443
    (online_nodes, files_all_opt),
3444
    (vm_nodes, files_vm),
3445
    ]
3446

    
3447
  # Upload the files
3448
  for (node_list, files) in filemap:
3449
    for fname in files:
3450
      _UploadHelper(lu, node_list, fname)
3451

    
3452

    
3453
class LUClusterRedistConf(NoHooksLU):
3454
  """Force the redistribution of cluster configuration.
3455

3456
  This is a very simple LU.
3457

3458
  """
3459
  REQ_BGL = False
3460

    
3461
  def ExpandNames(self):
3462
    self.needed_locks = {
3463
      locking.LEVEL_NODE: locking.ALL_SET,
3464
    }
3465
    self.share_locks[locking.LEVEL_NODE] = 1
3466

    
3467
  def Exec(self, feedback_fn):
3468
    """Redistribute the configuration.
3469

3470
    """
3471
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3472
    _RedistributeAncillaryFiles(self)
3473

    
3474

    
3475
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3476
  """Sleep and poll for an instance's disk to sync.
3477

3478
  """
3479
  if not instance.disks or disks is not None and not disks:
3480
    return True
3481

    
3482
  disks = _ExpandCheckDisks(instance, disks)
3483

    
3484
  if not oneshot:
3485
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3486

    
3487
  node = instance.primary_node
3488

    
3489
  for dev in disks:
3490
    lu.cfg.SetDiskID(dev, node)
3491

    
3492
  # TODO: Convert to utils.Retry
3493

    
3494
  retries = 0
3495
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3496
  while True:
3497
    max_time = 0
3498
    done = True
3499
    cumul_degraded = False
3500
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3501
    msg = rstats.fail_msg
3502
    if msg:
3503
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3504
      retries += 1
3505
      if retries >= 10:
3506
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3507
                                 " aborting." % node)
3508
      time.sleep(6)
3509
      continue
3510
    rstats = rstats.payload
3511
    retries = 0
3512
    for i, mstat in enumerate(rstats):
3513
      if mstat is None:
3514
        lu.LogWarning("Can't compute data for node %s/%s",
3515
                           node, disks[i].iv_name)
3516
        continue
3517

    
3518
      cumul_degraded = (cumul_degraded or
3519
                        (mstat.is_degraded and mstat.sync_percent is None))
3520
      if mstat.sync_percent is not None:
3521
        done = False
3522
        if mstat.estimated_time is not None:
3523
          rem_time = ("%s remaining (estimated)" %
3524
                      utils.FormatSeconds(mstat.estimated_time))
3525
          max_time = mstat.estimated_time
3526
        else:
3527
          rem_time = "no time estimate"
3528
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3529
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3530

    
3531
    # if we're done but degraded, let's do a few small retries, to
3532
    # make sure we see a stable and not transient situation; therefore
3533
    # we force restart of the loop
3534
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3535
      logging.info("Degraded disks found, %d retries left", degr_retries)
3536
      degr_retries -= 1
3537
      time.sleep(1)
3538
      continue
3539

    
3540
    if done or oneshot:
3541
      break
3542

    
3543
    time.sleep(min(60, max_time))
3544

    
3545
  if done:
3546
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3547
  return not cumul_degraded
3548

    
3549

    
3550
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3551
  """Check that mirrors are not degraded.
3552

3553
  The ldisk parameter, if True, will change the test from the
3554
  is_degraded attribute (which represents overall non-ok status for
3555
  the device(s)) to the ldisk (representing the local storage status).
3556

3557
  """
3558
  lu.cfg.SetDiskID(dev, node)
3559

    
3560
  result = True
3561

    
3562
  if on_primary or dev.AssembleOnSecondary():
3563
    rstats = lu.rpc.call_blockdev_find(node, dev)
3564
    msg = rstats.fail_msg
3565
    if msg:
3566
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3567
      result = False
3568
    elif not rstats.payload:
3569
      lu.LogWarning("Can't find disk on node %s", node)
3570
      result = False
3571
    else:
3572
      if ldisk:
3573
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3574
      else:
3575
        result = result and not rstats.payload.is_degraded
3576

    
3577
  if dev.children:
3578
    for child in dev.children:
3579
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3580

    
3581
  return result
3582

    
3583

    
3584
class LUOobCommand(NoHooksLU):
3585
  """Logical unit for OOB handling.
3586

3587
  """
3588
  REG_BGL = False
3589
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3590

    
3591
  def ExpandNames(self):
3592
    """Gather locks we need.
3593

3594
    """
3595
    if self.op.node_names:
3596
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3597
      lock_names = self.op.node_names
3598
    else:
3599
      lock_names = locking.ALL_SET
3600

    
3601
    self.needed_locks = {
3602
      locking.LEVEL_NODE: lock_names,
3603
      }
3604

    
3605
  def CheckPrereq(self):
3606
    """Check prerequisites.
3607

3608
    This checks:
3609
     - the node exists in the configuration
3610
     - OOB is supported
3611

3612
    Any errors are signaled by raising errors.OpPrereqError.
3613

3614
    """
3615
    self.nodes = []
3616
    self.master_node = self.cfg.GetMasterNode()
3617

    
3618
    assert self.op.power_delay >= 0.0
3619

    
3620
    if self.op.node_names:
3621
      if (self.op.command in self._SKIP_MASTER and
3622
          self.master_node in self.op.node_names):
3623
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3624
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3625

    
3626
        if master_oob_handler:
3627
          additional_text = ("run '%s %s %s' if you want to operate on the"
3628
                             " master regardless") % (master_oob_handler,
3629
                                                      self.op.command,
3630
                                                      self.master_node)
3631
        else:
3632
          additional_text = "it does not support out-of-band operations"
3633

    
3634
        raise errors.OpPrereqError(("Operating on the master node %s is not"
3635
                                    " allowed for %s; %s") %
3636
                                   (self.master_node, self.op.command,
3637
                                    additional_text), errors.ECODE_INVAL)
3638
    else:
3639
      self.op.node_names = self.cfg.GetNodeList()
3640
      if self.op.command in self._SKIP_MASTER:
3641
        self.op.node_names.remove(self.master_node)
3642

    
3643
    if self.op.command in self._SKIP_MASTER:
3644
      assert self.master_node not in self.op.node_names
3645

    
3646
    for node_name in self.op.node_names:
3647
      node = self.cfg.GetNodeInfo(node_name)
3648

    
3649
      if node is None:
3650
        raise errors.OpPrereqError("Node %s not found" % node_name,
3651
                                   errors.ECODE_NOENT)
3652
      else:
3653
        self.nodes.append(node)
3654

    
3655
      if (not self.op.ignore_status and
3656
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3657
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
3658
                                    " not marked offline") % node_name,
3659
                                   errors.ECODE_STATE)
3660

    
3661
  def Exec(self, feedback_fn):
3662
    """Execute OOB and return result if we expect any.
3663

3664
    """
3665
    master_node = self.master_node
3666
    ret = []
3667

    
3668
    for idx, node in enumerate(utils.NiceSort(self.nodes,
3669
                                              key=lambda node: node.name)):
3670
      node_entry = [(constants.RS_NORMAL, node.name)]
3671
      ret.append(node_entry)
3672

    
3673
      oob_program = _SupportsOob(self.cfg, node)
3674

    
3675
      if not oob_program:
3676
        node_entry.append((constants.RS_UNAVAIL, None))
3677
        continue
3678

    
3679
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
3680
                   self.op.command, oob_program, node.name)
3681
      result = self.rpc.call_run_oob(master_node, oob_program,
3682
                                     self.op.command, node.name,
3683
                                     self.op.timeout)
3684

    
3685
      if result.fail_msg:
3686
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
3687
                        node.name, result.fail_msg)
3688
        node_entry.append((constants.RS_NODATA, None))
3689
      else:
3690
        try:
3691
          self._CheckPayload(result)
3692
        except errors.OpExecError, err:
3693
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
3694
                          node.name, err)
3695
          node_entry.append((constants.RS_NODATA, None))
3696
        else:
3697
          if self.op.command == constants.OOB_HEALTH:
3698
            # For health we should log important events
3699
            for item, status in result.payload:
3700
              if status in [constants.OOB_STATUS_WARNING,
3701
                            constants.OOB_STATUS_CRITICAL]:
3702
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
3703
                                item, node.name, status)
3704

    
3705
          if self.op.command == constants.OOB_POWER_ON:
3706
            node.powered = True
3707
          elif self.op.command == constants.OOB_POWER_OFF:
3708
            node.powered = False
3709
          elif self.op.command == constants.OOB_POWER_STATUS:
3710
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3711
            if powered != node.powered:
3712
              logging.warning(("Recorded power state (%s) of node '%s' does not"
3713
                               " match actual power state (%s)"), node.powered,
3714
                              node.name, powered)
3715

    
3716
          # For configuration changing commands we should update the node
3717
          if self.op.command in (constants.OOB_POWER_ON,
3718
                                 constants.OOB_POWER_OFF):
3719
            self.cfg.Update(node, feedback_fn)
3720

    
3721
          node_entry.append((constants.RS_NORMAL, result.payload))
3722

    
3723
          if (self.op.command == constants.OOB_POWER_ON and
3724
              idx < len(self.nodes) - 1):
3725
            time.sleep(self.op.power_delay)
3726

    
3727
    return ret
3728

    
3729
  def _CheckPayload(self, result):
3730
    """Checks if the payload is valid.
3731

3732
    @param result: RPC result
3733
    @raises errors.OpExecError: If payload is not valid
3734

3735
    """
3736
    errs = []
3737
    if self.op.command == constants.OOB_HEALTH:
3738
      if not isinstance(result.payload, list):
3739
        errs.append("command 'health' is expected to return a list but got %s" %
3740
                    type(result.payload))
3741
      else:
3742
        for item, status in result.payload:
3743
          if status not in constants.OOB_STATUSES:
3744
            errs.append("health item '%s' has invalid status '%s'" %
3745
                        (item, status))
3746

    
3747
    if self.op.command == constants.OOB_POWER_STATUS:
3748
      if not isinstance(result.payload, dict):
3749
        errs.append("power-status is expected to return a dict but got %s" %
3750
                    type(result.payload))
3751

    
3752
    if self.op.command in [
3753
        constants.OOB_POWER_ON,
3754
        constants.OOB_POWER_OFF,
3755
        constants.OOB_POWER_CYCLE,
3756
        ]:
3757
      if result.payload is not None:
3758
        errs.append("%s is expected to not return payload but got '%s'" %
3759
                    (self.op.command, result.payload))
3760

    
3761
    if errs:
3762
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3763
                               utils.CommaJoin(errs))
3764

    
3765
class _OsQuery(_QueryBase):
3766
  FIELDS = query.OS_FIELDS
3767

    
3768
  def ExpandNames(self, lu):
3769
    # Lock all nodes in shared mode
3770
    # Temporary removal of locks, should be reverted later
3771
    # TODO: reintroduce locks when they are lighter-weight
3772
    lu.needed_locks = {}
3773
    #self.share_locks[locking.LEVEL_NODE] = 1
3774
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3775

    
3776
    # The following variables interact with _QueryBase._GetNames
3777
    if self.names:
3778
      self.wanted = self.names
3779
    else:
3780
      self.wanted = locking.ALL_SET
3781

    
3782
    self.do_locking = self.use_locking
3783

    
3784
  def DeclareLocks(self, lu, level):
3785
    pass
3786

    
3787
  @staticmethod
3788
  def _DiagnoseByOS(rlist):
3789
    """Remaps a per-node return list into an a per-os per-node dictionary
3790

3791
    @param rlist: a map with node names as keys and OS objects as values
3792

3793
    @rtype: dict
3794
    @return: a dictionary with osnames as keys and as value another
3795
        map, with nodes as keys and tuples of (path, status, diagnose,
3796
        variants, parameters, api_versions) as values, eg::
3797

3798
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3799
                                     (/srv/..., False, "invalid api")],
3800
                           "node2": [(/srv/..., True, "", [], [])]}
3801
          }
3802

3803
    """
3804
    all_os = {}
3805
    # we build here the list of nodes that didn't fail the RPC (at RPC
3806
    # level), so that nodes with a non-responding node daemon don't
3807
    # make all OSes invalid
3808
    good_nodes = [node_name for node_name in rlist
3809
                  if not rlist[node_name].fail_msg]
3810
    for node_name, nr in rlist.items():
3811
      if nr.fail_msg or not nr.payload:
3812
        continue
3813
      for (name, path, status, diagnose, variants,
3814
           params, api_versions) in nr.payload:
3815
        if name not in all_os:
3816
          # build a list of nodes for this os containing empty lists
3817
          # for each node in node_list
3818
          all_os[name] = {}
3819
          for nname in good_nodes:
3820
            all_os[name][nname] = []
3821
        # convert params from [name, help] to (name, help)
3822
        params = [tuple(v) for v in params]
3823
        all_os[name][node_name].append((path, status, diagnose,
3824
                                        variants, params, api_versions))
3825
    return all_os
3826

    
3827
  def _GetQueryData(self, lu):
3828
    """Computes the list of nodes and their attributes.
3829

3830
    """
3831
    # Locking is not used
3832
    assert not (compat.any(lu.glm.is_owned(level)
3833
                           for level in locking.LEVELS
3834
                           if level != locking.LEVEL_CLUSTER) or
3835
                self.do_locking or self.use_locking)
3836

    
3837
    valid_nodes = [node.name
3838
                   for node in lu.cfg.GetAllNodesInfo().values()
3839
                   if not node.offline and node.vm_capable]
3840
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3841
    cluster = lu.cfg.GetClusterInfo()
3842

    
3843
    data = {}
3844

    
3845
    for (os_name, os_data) in pol.items():
3846
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3847
                          hidden=(os_name in cluster.hidden_os),
3848
                          blacklisted=(os_name in cluster.blacklisted_os))
3849

    
3850
      variants = set()
3851
      parameters = set()
3852
      api_versions = set()
3853

    
3854
      for idx, osl in enumerate(os_data.values()):
3855
        info.valid = bool(info.valid and osl and osl[0][1])
3856
        if not info.valid:
3857
          break
3858

    
3859
        (node_variants, node_params, node_api) = osl[0][3:6]
3860
        if idx == 0:
3861
          # First entry
3862
          variants.update(node_variants)
3863
          parameters.update(node_params)
3864
          api_versions.update(node_api)
3865
        else:
3866
          # Filter out inconsistent values
3867
          variants.intersection_update(node_variants)
3868
          parameters.intersection_update(node_params)
3869
          api_versions.intersection_update(node_api)
3870

    
3871
      info.variants = list(variants)
3872
      info.parameters = list(parameters)
3873
      info.api_versions = list(api_versions)
3874

    
3875
      data[os_name] = info
3876

    
3877
    # Prepare data in requested order
3878
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
3879
            if name in data]
3880

    
3881

    
3882
class LUOsDiagnose(NoHooksLU):
3883
  """Logical unit for OS diagnose/query.
3884

3885
  """
3886
  REQ_BGL = False
3887

    
3888
  @staticmethod
3889
  def _BuildFilter(fields, names):
3890
    """Builds a filter for querying OSes.
3891

3892
    """
3893
    name_filter = qlang.MakeSimpleFilter("name", names)
3894

    
3895
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
3896
    # respective field is not requested
3897
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
3898
                     for fname in ["hidden", "blacklisted"]
3899
                     if fname not in fields]
3900
    if "valid" not in fields:
3901
      status_filter.append([qlang.OP_TRUE, "valid"])
3902

    
3903
    if status_filter:
3904
      status_filter.insert(0, qlang.OP_AND)
3905
    else:
3906
      status_filter = None
3907

    
3908
    if name_filter and status_filter:
3909
      return [qlang.OP_AND, name_filter, status_filter]
3910
    elif name_filter:
3911
      return name_filter
3912
    else:
3913
      return status_filter
3914

    
3915
  def CheckArguments(self):
3916
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
3917
                       self.op.output_fields, False)
3918

    
3919
  def ExpandNames(self):
3920
    self.oq.ExpandNames(self)
3921

    
3922
  def Exec(self, feedback_fn):
3923
    return self.oq.OldStyleQuery(self)
3924

    
3925

    
3926
class LUNodeRemove(LogicalUnit):
3927
  """Logical unit for removing a node.
3928

3929
  """
3930
  HPATH = "node-remove"
3931
  HTYPE = constants.HTYPE_NODE
3932

    
3933
  def BuildHooksEnv(self):
3934
    """Build hooks env.
3935

3936
    This doesn't run on the target node in the pre phase as a failed
3937
    node would then be impossible to remove.
3938

3939
    """
3940
    return {
3941
      "OP_TARGET": self.op.node_name,
3942
      "NODE_NAME": self.op.node_name,
3943
      }
3944

    
3945
  def BuildHooksNodes(self):
3946
    """Build hooks nodes.
3947

3948
    """
3949
    all_nodes = self.cfg.GetNodeList()
3950
    try:
3951
      all_nodes.remove(self.op.node_name)
3952
    except ValueError:
3953
      logging.warning("Node '%s', which is about to be removed, was not found"
3954
                      " in the list of all nodes", self.op.node_name)
3955
    return (all_nodes, all_nodes)
3956

    
3957
  def CheckPrereq(self):
3958
    """Check prerequisites.
3959

3960
    This checks:
3961
     - the node exists in the configuration
3962
     - it does not have primary or secondary instances
3963
     - it's not the master
3964

3965
    Any errors are signaled by raising errors.OpPrereqError.
3966

3967
    """
3968
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3969
    node = self.cfg.GetNodeInfo(self.op.node_name)
3970
    assert node is not None
3971

    
3972
    instance_list = self.cfg.GetInstanceList()
3973

    
3974
    masternode = self.cfg.GetMasterNode()
3975
    if node.name == masternode:
3976
      raise errors.OpPrereqError("Node is the master node, failover to another"
3977
                                 " node is required", errors.ECODE_INVAL)
3978

    
3979
    for instance_name in instance_list:
3980
      instance = self.cfg.GetInstanceInfo(instance_name)
3981
      if node.name in instance.all_nodes:
3982
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3983
                                   " please remove first" % instance_name,
3984
                                   errors.ECODE_INVAL)
3985
    self.op.node_name = node.name
3986
    self.node = node
3987

    
3988
  def Exec(self, feedback_fn):
3989
    """Removes the node from the cluster.
3990

3991
    """
3992
    node = self.node
3993
    logging.info("Stopping the node daemon and removing configs from node %s",
3994
                 node.name)
3995

    
3996
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3997

    
3998
    # Promote nodes to master candidate as needed
3999
    _AdjustCandidatePool(self, exceptions=[node.name])
4000
    self.context.RemoveNode(node.name)
4001

    
4002
    # Run post hooks on the node before it's removed
4003
    _RunPostHook(self, node.name)
4004

    
4005
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4006
    msg = result.fail_msg
4007
    if msg:
4008
      self.LogWarning("Errors encountered on the remote node while leaving"
4009
                      " the cluster: %s", msg)
4010

    
4011
    # Remove node from our /etc/hosts
4012
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4013
      master_node = self.cfg.GetMasterNode()
4014
      result = self.rpc.call_etc_hosts_modify(master_node,
4015
                                              constants.ETC_HOSTS_REMOVE,
4016
                                              node.name, None)
4017
      result.Raise("Can't update hosts file with new host data")
4018
      _RedistributeAncillaryFiles(self)
4019

    
4020

    
4021
class _NodeQuery(_QueryBase):
4022
  FIELDS = query.NODE_FIELDS
4023

    
4024
  def ExpandNames(self, lu):
4025
    lu.needed_locks = {}
4026
    lu.share_locks[locking.LEVEL_NODE] = 1
4027

    
4028
    if self.names:
4029
      self.wanted = _GetWantedNodes(lu, self.names)
4030
    else:
4031
      self.wanted = locking.ALL_SET
4032

    
4033
    self.do_locking = (self.use_locking and
4034
                       query.NQ_LIVE in self.requested_data)
4035

    
4036
    if self.do_locking:
4037
      # if we don't request only static fields, we need to lock the nodes
4038
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4039

    
4040
  def DeclareLocks(self, lu, level):
4041
    pass
4042

    
4043
  def _GetQueryData(self, lu):
4044
    """Computes the list of nodes and their attributes.
4045

4046
    """
4047
    all_info = lu.cfg.GetAllNodesInfo()
4048

    
4049
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4050

    
4051
    # Gather data as requested
4052
    if query.NQ_LIVE in self.requested_data:
4053
      # filter out non-vm_capable nodes
4054
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4055

    
4056
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4057
                                        lu.cfg.GetHypervisorType())
4058
      live_data = dict((name, nresult.payload)
4059
                       for (name, nresult) in node_data.items()
4060
                       if not nresult.fail_msg and nresult.payload)
4061
    else:
4062
      live_data = None
4063

    
4064
    if query.NQ_INST in self.requested_data:
4065
      node_to_primary = dict([(name, set()) for name in nodenames])
4066
      node_to_secondary = dict([(name, set()) for name in nodenames])
4067

    
4068
      inst_data = lu.cfg.GetAllInstancesInfo()
4069

    
4070
      for inst in inst_data.values():
4071
        if inst.primary_node in node_to_primary:
4072
          node_to_primary[inst.primary_node].add(inst.name)
4073
        for secnode in inst.secondary_nodes:
4074
          if secnode in node_to_secondary:
4075
            node_to_secondary[secnode].add(inst.name)
4076
    else:
4077
      node_to_primary = None
4078
      node_to_secondary = None
4079

    
4080
    if query.NQ_OOB in self.requested_data:
4081
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4082
                         for name, node in all_info.iteritems())
4083
    else:
4084
      oob_support = None
4085

    
4086
    if query.NQ_GROUP in self.requested_data:
4087
      groups = lu.cfg.GetAllNodeGroupsInfo()
4088
    else:
4089
      groups = {}
4090

    
4091
    return query.NodeQueryData([all_info[name] for name in nodenames],
4092
                               live_data, lu.cfg.GetMasterNode(),
4093
                               node_to_primary, node_to_secondary, groups,
4094
                               oob_support, lu.cfg.GetClusterInfo())
4095

    
4096

    
4097
class LUNodeQuery(NoHooksLU):
4098
  """Logical unit for querying nodes.
4099

4100
  """
4101
  # pylint: disable-msg=W0142
4102
  REQ_BGL = False
4103

    
4104
  def CheckArguments(self):
4105
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4106
                         self.op.output_fields, self.op.use_locking)
4107

    
4108
  def ExpandNames(self):
4109
    self.nq.ExpandNames(self)
4110

    
4111
  def Exec(self, feedback_fn):
4112
    return self.nq.OldStyleQuery(self)
4113

    
4114

    
4115
class LUNodeQueryvols(NoHooksLU):
4116
  """Logical unit for getting volumes on node(s).
4117

4118
  """
4119
  REQ_BGL = False
4120
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4121
  _FIELDS_STATIC = utils.FieldSet("node")
4122

    
4123
  def CheckArguments(self):
4124
    _CheckOutputFields(static=self._FIELDS_STATIC,
4125
                       dynamic=self._FIELDS_DYNAMIC,
4126
                       selected=self.op.output_fields)
4127

    
4128
  def ExpandNames(self):
4129
    self.needed_locks = {}
4130
    self.share_locks[locking.LEVEL_NODE] = 1
4131
    if not self.op.nodes:
4132
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4133
    else:
4134
      self.needed_locks[locking.LEVEL_NODE] = \
4135
        _GetWantedNodes(self, self.op.nodes)
4136

    
4137
  def Exec(self, feedback_fn):
4138
    """Computes the list of nodes and their attributes.
4139

4140
    """
4141
    nodenames = self.glm.list_owned(locking.LEVEL_NODE)
4142
    volumes = self.rpc.call_node_volumes(nodenames)
4143

    
4144
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
4145
             in self.cfg.GetInstanceList()]
4146

    
4147
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
4148

    
4149
    output = []
4150
    for node in nodenames:
4151
      nresult = volumes[node]
4152
      if nresult.offline:
4153
        continue
4154
      msg = nresult.fail_msg
4155
      if msg:
4156
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4157
        continue
4158

    
4159
      node_vols = nresult.payload[:]
4160
      node_vols.sort(key=lambda vol: vol['dev'])
4161

    
4162
      for vol in node_vols:
4163
        node_output = []
4164
        for field in self.op.output_fields:
4165
          if field == "node":
4166
            val = node
4167
          elif field == "phys":
4168
            val = vol['dev']
4169
          elif field == "vg":
4170
            val = vol['vg']
4171
          elif field == "name":
4172
            val = vol['name']
4173
          elif field == "size":
4174
            val = int(float(vol['size']))
4175
          elif field == "instance":
4176
            for inst in ilist:
4177
              if node not in lv_by_node[inst]:
4178
                continue
4179
              if vol['name'] in lv_by_node[inst][node]:
4180
                val = inst.name
4181
                break
4182
            else:
4183
              val = '-'
4184
          else:
4185
            raise errors.ParameterError(field)
4186
          node_output.append(str(val))
4187

    
4188
        output.append(node_output)
4189

    
4190
    return output
4191

    
4192

    
4193
class LUNodeQueryStorage(NoHooksLU):
4194
  """Logical unit for getting information on storage units on node(s).
4195

4196
  """
4197
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4198
  REQ_BGL = False
4199

    
4200
  def CheckArguments(self):
4201
    _CheckOutputFields(static=self._FIELDS_STATIC,
4202
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4203
                       selected=self.op.output_fields)
4204

    
4205
  def ExpandNames(self):
4206
    self.needed_locks = {}
4207
    self.share_locks[locking.LEVEL_NODE] = 1
4208

    
4209
    if self.op.nodes:
4210
      self.needed_locks[locking.LEVEL_NODE] = \
4211
        _GetWantedNodes(self, self.op.nodes)
4212
    else:
4213
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4214

    
4215
  def Exec(self, feedback_fn):
4216
    """Computes the list of nodes and their attributes.
4217

4218
    """
4219
    self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
4220

    
4221
    # Always get name to sort by
4222
    if constants.SF_NAME in self.op.output_fields:
4223
      fields = self.op.output_fields[:]
4224
    else:
4225
      fields = [constants.SF_NAME] + self.op.output_fields
4226

    
4227
    # Never ask for node or type as it's only known to the LU
4228
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4229
      while extra in fields:
4230
        fields.remove(extra)
4231

    
4232
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4233
    name_idx = field_idx[constants.SF_NAME]
4234

    
4235
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4236
    data = self.rpc.call_storage_list(self.nodes,
4237
                                      self.op.storage_type, st_args,
4238
                                      self.op.name, fields)
4239

    
4240
    result = []
4241

    
4242
    for node in utils.NiceSort(self.nodes):
4243
      nresult = data[node]
4244
      if nresult.offline:
4245
        continue
4246

    
4247
      msg = nresult.fail_msg
4248
      if msg:
4249
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4250
        continue
4251

    
4252
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4253

    
4254
      for name in utils.NiceSort(rows.keys()):
4255
        row = rows[name]
4256

    
4257
        out = []
4258

    
4259
        for field in self.op.output_fields:
4260
          if field == constants.SF_NODE:
4261
            val = node
4262
          elif field == constants.SF_TYPE:
4263
            val = self.op.storage_type
4264
          elif field in field_idx:
4265
            val = row[field_idx[field]]
4266
          else:
4267
            raise errors.ParameterError(field)
4268

    
4269
          out.append(val)
4270

    
4271
        result.append(out)
4272

    
4273
    return result
4274

    
4275

    
4276
class _InstanceQuery(_QueryBase):
4277
  FIELDS = query.INSTANCE_FIELDS
4278

    
4279
  def ExpandNames(self, lu):
4280
    lu.needed_locks = {}
4281
    lu.share_locks[locking.LEVEL_INSTANCE] = 1
4282
    lu.share_locks[locking.LEVEL_NODE] = 1
4283

    
4284
    if self.names:
4285
      self.wanted = _GetWantedInstances(lu, self.names)
4286
    else:
4287
      self.wanted = locking.ALL_SET
4288

    
4289
    self.do_locking = (self.use_locking and
4290
                       query.IQ_LIVE in self.requested_data)
4291
    if self.do_locking:
4292
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4293
      lu.needed_locks[locking.LEVEL_NODE] = []
4294
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4295

    
4296
  def DeclareLocks(self, lu, level):
4297
    if level == locking.LEVEL_NODE and self.do_locking:
4298
      lu._LockInstancesNodes() # pylint: disable-msg=W0212
4299

    
4300
  def _GetQueryData(self, lu):
4301
    """Computes the list of instances and their attributes.
4302

4303
    """
4304
    cluster = lu.cfg.GetClusterInfo()
4305
    all_info = lu.cfg.GetAllInstancesInfo()
4306

    
4307
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4308

    
4309
    instance_list = [all_info[name] for name in instance_names]
4310
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4311
                                        for inst in instance_list)))
4312
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4313
    bad_nodes = []
4314
    offline_nodes = []
4315
    wrongnode_inst = set()
4316

    
4317
    # Gather data as requested
4318
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4319
      live_data = {}
4320
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4321
      for name in nodes:
4322
        result = node_data[name]
4323
        if result.offline:
4324
          # offline nodes will be in both lists
4325
          assert result.fail_msg
4326
          offline_nodes.append(name)
4327
        if result.fail_msg:
4328
          bad_nodes.append(name)
4329
        elif result.payload:
4330
          for inst in result.payload:
4331
            if inst in all_info:
4332
              if all_info[inst].primary_node == name:
4333
                live_data.update(result.payload)
4334
              else:
4335
                wrongnode_inst.add(inst)
4336
            else:
4337
              # orphan instance; we don't list it here as we don't
4338
              # handle this case yet in the output of instance listing
4339
              logging.warning("Orphan instance '%s' found on node %s",
4340
                              inst, name)
4341
        # else no instance is alive
4342
    else:
4343
      live_data = {}
4344

    
4345
    if query.IQ_DISKUSAGE in self.requested_data:
4346
      disk_usage = dict((inst.name,
4347
                         _ComputeDiskSize(inst.disk_template,
4348
                                          [{constants.IDISK_SIZE: disk.size}
4349
                                           for disk in inst.disks]))
4350
                        for inst in instance_list)
4351
    else:
4352
      disk_usage = None
4353

    
4354
    if query.IQ_CONSOLE in self.requested_data:
4355
      consinfo = {}
4356
      for inst in instance_list:
4357
        if inst.name in live_data:
4358
          # Instance is running
4359
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4360
        else:
4361
          consinfo[inst.name] = None
4362
      assert set(consinfo.keys()) == set(instance_names)
4363
    else:
4364
      consinfo = None
4365

    
4366
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4367
                                   disk_usage, offline_nodes, bad_nodes,
4368
                                   live_data, wrongnode_inst, consinfo)
4369

    
4370

    
4371
class LUQuery(NoHooksLU):
4372
  """Query for resources/items of a certain kind.
4373

4374
  """
4375
  # pylint: disable-msg=W0142
4376
  REQ_BGL = False
4377

    
4378
  def CheckArguments(self):
4379
    qcls = _GetQueryImplementation(self.op.what)
4380

    
4381
    self.impl = qcls(self.op.filter, self.op.fields, False)
4382

    
4383
  def ExpandNames(self):
4384
    self.impl.ExpandNames(self)
4385

    
4386
  def DeclareLocks(self, level):
4387
    self.impl.DeclareLocks(self, level)
4388

    
4389
  def Exec(self, feedback_fn):
4390
    return self.impl.NewStyleQuery(self)
4391

    
4392

    
4393
class LUQueryFields(NoHooksLU):
4394
  """Query for resources/items of a certain kind.
4395

4396
  """
4397
  # pylint: disable-msg=W0142
4398
  REQ_BGL = False
4399

    
4400
  def CheckArguments(self):
4401
    self.qcls = _GetQueryImplementation(self.op.what)
4402

    
4403
  def ExpandNames(self):
4404
    self.needed_locks = {}
4405

    
4406
  def Exec(self, feedback_fn):
4407
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4408

    
4409

    
4410
class LUNodeModifyStorage(NoHooksLU):
4411
  """Logical unit for modifying a storage volume on a node.
4412

4413
  """
4414
  REQ_BGL = False
4415

    
4416
  def CheckArguments(self):
4417
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4418

    
4419
    storage_type = self.op.storage_type
4420

    
4421
    try:
4422
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4423
    except KeyError:
4424
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4425
                                 " modified" % storage_type,
4426
                                 errors.ECODE_INVAL)
4427

    
4428
    diff = set(self.op.changes.keys()) - modifiable
4429
    if diff:
4430
      raise errors.OpPrereqError("The following fields can not be modified for"
4431
                                 " storage units of type '%s': %r" %
4432
                                 (storage_type, list(diff)),
4433
                                 errors.ECODE_INVAL)
4434

    
4435
  def ExpandNames(self):
4436
    self.needed_locks = {
4437
      locking.LEVEL_NODE: self.op.node_name,
4438
      }
4439

    
4440
  def Exec(self, feedback_fn):
4441
    """Computes the list of nodes and their attributes.
4442

4443
    """
4444
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4445
    result = self.rpc.call_storage_modify(self.op.node_name,
4446
                                          self.op.storage_type, st_args,
4447
                                          self.op.name, self.op.changes)
4448
    result.Raise("Failed to modify storage unit '%s' on %s" %
4449
                 (self.op.name, self.op.node_name))
4450

    
4451

    
4452
class LUNodeAdd(LogicalUnit):
4453
  """Logical unit for adding node to the cluster.
4454

4455
  """
4456
  HPATH = "node-add"
4457
  HTYPE = constants.HTYPE_NODE
4458
  _NFLAGS = ["master_capable", "vm_capable"]
4459

    
4460
  def CheckArguments(self):
4461
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4462
    # validate/normalize the node name
4463
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4464
                                         family=self.primary_ip_family)
4465
    self.op.node_name = self.hostname.name
4466

    
4467
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4468
      raise errors.OpPrereqError("Cannot readd the master node",
4469
                                 errors.ECODE_STATE)
4470

    
4471
    if self.op.readd and self.op.group:
4472
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4473
                                 " being readded", errors.ECODE_INVAL)
4474

    
4475
  def BuildHooksEnv(self):
4476
    """Build hooks env.
4477

4478
    This will run on all nodes before, and on all nodes + the new node after.
4479

4480
    """
4481
    return {
4482
      "OP_TARGET": self.op.node_name,
4483
      "NODE_NAME": self.op.node_name,
4484
      "NODE_PIP": self.op.primary_ip,
4485
      "NODE_SIP": self.op.secondary_ip,
4486
      "MASTER_CAPABLE": str(self.op.master_capable),
4487
      "VM_CAPABLE": str(self.op.vm_capable),
4488
      }
4489

    
4490
  def BuildHooksNodes(self):
4491
    """Build hooks nodes.
4492

4493
    """
4494
    # Exclude added node
4495
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4496
    post_nodes = pre_nodes + [self.op.node_name, ]
4497

    
4498
    return (pre_nodes, post_nodes)
4499

    
4500
  def CheckPrereq(self):
4501
    """Check prerequisites.
4502

4503
    This checks:
4504
     - the new node is not already in the config
4505
     - it is resolvable
4506
     - its parameters (single/dual homed) matches the cluster
4507

4508
    Any errors are signaled by raising errors.OpPrereqError.
4509

4510
    """
4511
    cfg = self.cfg
4512
    hostname = self.hostname
4513
    node = hostname.name
4514
    primary_ip = self.op.primary_ip = hostname.ip
4515
    if self.op.secondary_ip is None:
4516
      if self.primary_ip_family == netutils.IP6Address.family:
4517
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4518
                                   " IPv4 address must be given as secondary",
4519
                                   errors.ECODE_INVAL)
4520
      self.op.secondary_ip = primary_ip
4521

    
4522
    secondary_ip = self.op.secondary_ip
4523
    if not netutils.IP4Address.IsValid(secondary_ip):
4524
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4525
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4526

    
4527
    node_list = cfg.GetNodeList()
4528
    if not self.op.readd and node in node_list:
4529
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4530
                                 node, errors.ECODE_EXISTS)
4531
    elif self.op.readd and node not in node_list:
4532
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4533
                                 errors.ECODE_NOENT)
4534

    
4535
    self.changed_primary_ip = False
4536

    
4537
    for existing_node_name in node_list:
4538
      existing_node = cfg.GetNodeInfo(existing_node_name)
4539

    
4540
      if self.op.readd and node == existing_node_name:
4541
        if existing_node.secondary_ip != secondary_ip:
4542
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4543
                                     " address configuration as before",
4544
                                     errors.ECODE_INVAL)
4545
        if existing_node.primary_ip != primary_ip:
4546
          self.changed_primary_ip = True
4547

    
4548
        continue
4549

    
4550
      if (existing_node.primary_ip == primary_ip or
4551
          existing_node.secondary_ip == primary_ip or
4552
          existing_node.primary_ip == secondary_ip or
4553
          existing_node.secondary_ip == secondary_ip):
4554
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4555
                                   " existing node %s" % existing_node.name,
4556
                                   errors.ECODE_NOTUNIQUE)
4557

    
4558
    # After this 'if' block, None is no longer a valid value for the
4559
    # _capable op attributes
4560
    if self.op.readd:
4561
      old_node = self.cfg.GetNodeInfo(node)
4562
      assert old_node is not None, "Can't retrieve locked node %s" % node
4563
      for attr in self._NFLAGS:
4564
        if getattr(self.op, attr) is None:
4565
          setattr(self.op, attr, getattr(old_node, attr))
4566
    else:
4567
      for attr in self._NFLAGS:
4568
        if getattr(self.op, attr) is None:
4569
          setattr(self.op, attr, True)
4570

    
4571
    if self.op.readd and not self.op.vm_capable:
4572
      pri, sec = cfg.GetNodeInstances(node)
4573
      if pri or sec:
4574
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4575
                                   " flag set to false, but it already holds"
4576
                                   " instances" % node,
4577
                                   errors.ECODE_STATE)
4578

    
4579
    # check that the type of the node (single versus dual homed) is the
4580
    # same as for the master
4581
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4582
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4583
    newbie_singlehomed = secondary_ip == primary_ip
4584
    if master_singlehomed != newbie_singlehomed:
4585
      if master_singlehomed:
4586
        raise errors.OpPrereqError("The master has no secondary ip but the"
4587
                                   " new node has one",
4588
                                   errors.ECODE_INVAL)
4589
      else:
4590
        raise errors.OpPrereqError("The master has a secondary ip but the"
4591
                                   " new node doesn't have one",
4592
                                   errors.ECODE_INVAL)
4593

    
4594
    # checks reachability
4595
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4596
      raise errors.OpPrereqError("Node not reachable by ping",
4597
                                 errors.ECODE_ENVIRON)
4598

    
4599
    if not newbie_singlehomed:
4600
      # check reachability from my secondary ip to newbie's secondary ip
4601
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4602
                           source=myself.secondary_ip):
4603
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4604
                                   " based ping to node daemon port",
4605
                                   errors.ECODE_ENVIRON)
4606

    
4607
    if self.op.readd:
4608
      exceptions = [node]
4609
    else:
4610
      exceptions = []
4611

    
4612
    if self.op.master_capable:
4613
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4614
    else:
4615
      self.master_candidate = False
4616

    
4617
    if self.op.readd:
4618
      self.new_node = old_node
4619
    else:
4620
      node_group = cfg.LookupNodeGroup(self.op.group)
4621
      self.new_node = objects.Node(name=node,
4622
                                   primary_ip=primary_ip,
4623
                                   secondary_ip=secondary_ip,
4624
                                   master_candidate=self.master_candidate,
4625
                                   offline=False, drained=False,
4626
                                   group=node_group)
4627

    
4628
    if self.op.ndparams:
4629
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4630

    
4631
  def Exec(self, feedback_fn):
4632
    """Adds the new node to the cluster.
4633

4634
    """
4635
    new_node = self.new_node
4636
    node = new_node.name
4637

    
4638
    # We adding a new node so we assume it's powered
4639
    new_node.powered = True
4640

    
4641
    # for re-adds, reset the offline/drained/master-candidate flags;
4642
    # we need to reset here, otherwise offline would prevent RPC calls
4643
    # later in the procedure; this also means that if the re-add
4644
    # fails, we are left with a non-offlined, broken node
4645
    if self.op.readd:
4646
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4647
      self.LogInfo("Readding a node, the offline/drained flags were reset")
4648
      # if we demote the node, we do cleanup later in the procedure
4649
      new_node.master_candidate = self.master_candidate
4650
      if self.changed_primary_ip:
4651
        new_node.primary_ip = self.op.primary_ip
4652

    
4653
    # copy the master/vm_capable flags
4654
    for attr in self._NFLAGS:
4655
      setattr(new_node, attr, getattr(self.op, attr))
4656

    
4657
    # notify the user about any possible mc promotion
4658
    if new_node.master_candidate:
4659
      self.LogInfo("Node will be a master candidate")
4660

    
4661
    if self.op.ndparams:
4662
      new_node.ndparams = self.op.ndparams
4663
    else:
4664
      new_node.ndparams = {}
4665

    
4666
    # check connectivity
4667
    result = self.rpc.call_version([node])[node]
4668
    result.Raise("Can't get version information from node %s" % node)
4669
    if constants.PROTOCOL_VERSION == result.payload:
4670
      logging.info("Communication to node %s fine, sw version %s match",
4671
                   node, result.payload)
4672
    else:
4673
      raise errors.OpExecError("Version mismatch master version %s,"
4674
                               " node version %s" %
4675
                               (constants.PROTOCOL_VERSION, result.payload))
4676

    
4677
    # Add node to our /etc/hosts, and add key to known_hosts
4678
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4679
      master_node = self.cfg.GetMasterNode()
4680
      result = self.rpc.call_etc_hosts_modify(master_node,
4681
                                              constants.ETC_HOSTS_ADD,
4682
                                              self.hostname.name,
4683
                                              self.hostname.ip)
4684
      result.Raise("Can't update hosts file with new host data")
4685

    
4686
    if new_node.secondary_ip != new_node.primary_ip:
4687
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4688
                               False)
4689

    
4690
    node_verify_list = [self.cfg.GetMasterNode()]
4691
    node_verify_param = {
4692
      constants.NV_NODELIST: [node],
4693
      # TODO: do a node-net-test as well?
4694
    }
4695

    
4696
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4697
                                       self.cfg.GetClusterName())
4698
    for verifier in node_verify_list:
4699
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
4700
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
4701
      if nl_payload:
4702
        for failed in nl_payload:
4703
          feedback_fn("ssh/hostname verification failed"
4704
                      " (checking from %s): %s" %
4705
                      (verifier, nl_payload[failed]))
4706
        raise errors.OpExecError("ssh/hostname verification failed")
4707

    
4708
    if self.op.readd:
4709
      _RedistributeAncillaryFiles(self)
4710
      self.context.ReaddNode(new_node)
4711
      # make sure we redistribute the config
4712
      self.cfg.Update(new_node, feedback_fn)
4713
      # and make sure the new node will not have old files around
4714
      if not new_node.master_candidate:
4715
        result = self.rpc.call_node_demote_from_mc(new_node.name)
4716
        msg = result.fail_msg
4717
        if msg:
4718
          self.LogWarning("Node failed to demote itself from master"
4719
                          " candidate status: %s" % msg)
4720
    else:
4721
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
4722
                                  additional_vm=self.op.vm_capable)
4723
      self.context.AddNode(new_node, self.proc.GetECId())
4724

    
4725

    
4726
class LUNodeSetParams(LogicalUnit):
4727
  """Modifies the parameters of a node.
4728

4729
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4730
      to the node role (as _ROLE_*)
4731
  @cvar _R2F: a dictionary from node role to tuples of flags
4732
  @cvar _FLAGS: a list of attribute names corresponding to the flags
4733

4734
  """
4735
  HPATH = "node-modify"
4736
  HTYPE = constants.HTYPE_NODE
4737
  REQ_BGL = False
4738
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4739
  _F2R = {
4740
    (True, False, False): _ROLE_CANDIDATE,
4741
    (False, True, False): _ROLE_DRAINED,
4742
    (False, False, True): _ROLE_OFFLINE,
4743
    (False, False, False): _ROLE_REGULAR,
4744
    }
4745
  _R2F = dict((v, k) for k, v in _F2R.items())
4746
  _FLAGS = ["master_candidate", "drained", "offline"]
4747

    
4748
  def CheckArguments(self):
4749
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4750
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4751
                self.op.master_capable, self.op.vm_capable,
4752
                self.op.secondary_ip, self.op.ndparams]
4753
    if all_mods.count(None) == len(all_mods):
4754
      raise errors.OpPrereqError("Please pass at least one modification",
4755
                                 errors.ECODE_INVAL)
4756
    if all_mods.count(True) > 1:
4757
      raise errors.OpPrereqError("Can't set the node into more than one"
4758
                                 " state at the same time",
4759
                                 errors.ECODE_INVAL)
4760

    
4761
    # Boolean value that tells us whether we might be demoting from MC
4762
    self.might_demote = (self.op.master_candidate == False or
4763
                         self.op.offline == True or
4764
                         self.op.drained == True or
4765
                         self.op.master_capable == False)
4766

    
4767
    if self.op.secondary_ip:
4768
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4769
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4770
                                   " address" % self.op.secondary_ip,
4771
                                   errors.ECODE_INVAL)
4772

    
4773
    self.lock_all = self.op.auto_promote and self.might_demote
4774
    self.lock_instances = self.op.secondary_ip is not None
4775

    
4776
  def ExpandNames(self):
4777
    if self.lock_all:
4778
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4779
    else:
4780
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4781

    
4782
    if self.lock_instances:
4783
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4784

    
4785
  def DeclareLocks(self, level):
4786
    # If we have locked all instances, before waiting to lock nodes, release
4787
    # all the ones living on nodes unrelated to the current operation.
4788
    if level == locking.LEVEL_NODE and self.lock_instances:
4789
      self.affected_instances = []
4790
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4791
        instances_keep = []
4792

    
4793
        # Build list of instances to release
4794
        for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
4795
          instance = self.context.cfg.GetInstanceInfo(instance_name)
4796
          if (instance.disk_template in constants.DTS_INT_MIRROR and
4797
              self.op.node_name in instance.all_nodes):
4798
            instances_keep.append(instance_name)
4799
            self.affected_instances.append(instance)
4800

    
4801
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
4802

    
4803
        assert (set(self.glm.list_owned(locking.LEVEL_INSTANCE)) ==
4804
                set(instances_keep))
4805

    
4806
  def BuildHooksEnv(self):
4807
    """Build hooks env.
4808

4809
    This runs on the master node.
4810

4811
    """
4812
    return {
4813
      "OP_TARGET": self.op.node_name,
4814
      "MASTER_CANDIDATE": str(self.op.master_candidate),
4815
      "OFFLINE": str(self.op.offline),
4816
      "DRAINED": str(self.op.drained),
4817
      "MASTER_CAPABLE": str(self.op.master_capable),
4818
      "VM_CAPABLE": str(self.op.vm_capable),
4819
      }
4820

    
4821
  def BuildHooksNodes(self):
4822
    """Build hooks nodes.
4823

4824
    """
4825
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
4826
    return (nl, nl)
4827

    
4828
  def CheckPrereq(self):
4829
    """Check prerequisites.
4830

4831
    This only checks the instance list against the existing names.
4832

4833
    """
4834
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4835

    
4836
    if (self.op.master_candidate is not None or
4837
        self.op.drained is not None or
4838
        self.op.offline is not None):
4839
      # we can't change the master's node flags
4840
      if self.op.node_name == self.cfg.GetMasterNode():
4841
        raise errors.OpPrereqError("The master role can be changed"
4842
                                   " only via master-failover",
4843
                                   errors.ECODE_INVAL)
4844

    
4845
    if self.op.master_candidate and not node.master_capable:
4846
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4847
                                 " it a master candidate" % node.name,
4848
                                 errors.ECODE_STATE)
4849

    
4850
    if self.op.vm_capable == False:
4851
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4852
      if ipri or isec:
4853
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4854
                                   " the vm_capable flag" % node.name,
4855
                                   errors.ECODE_STATE)
4856

    
4857
    if node.master_candidate and self.might_demote and not self.lock_all:
4858
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
4859
      # check if after removing the current node, we're missing master
4860
      # candidates
4861
      (mc_remaining, mc_should, _) = \
4862
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4863
      if mc_remaining < mc_should:
4864
        raise errors.OpPrereqError("Not enough master candidates, please"
4865
                                   " pass auto promote option to allow"
4866
                                   " promotion", errors.ECODE_STATE)
4867

    
4868
    self.old_flags = old_flags = (node.master_candidate,
4869
                                  node.drained, node.offline)
4870
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4871
    self.old_role = old_role = self._F2R[old_flags]
4872

    
4873
    # Check for ineffective changes
4874
    for attr in self._FLAGS:
4875
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4876
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4877
        setattr(self.op, attr, None)
4878

    
4879
    # Past this point, any flag change to False means a transition
4880
    # away from the respective state, as only real changes are kept
4881

    
4882
    # TODO: We might query the real power state if it supports OOB
4883
    if _SupportsOob(self.cfg, node):
4884
      if self.op.offline is False and not (node.powered or
4885
                                           self.op.powered == True):
4886
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
4887
                                    " offline status can be reset") %
4888
                                   self.op.node_name)
4889
    elif self.op.powered is not None:
4890
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
4891
                                  " as it does not support out-of-band"
4892
                                  " handling") % self.op.node_name)
4893

    
4894
    # If we're being deofflined/drained, we'll MC ourself if needed
4895
    if (self.op.drained == False or self.op.offline == False or
4896
        (self.op.master_capable and not node.master_capable)):
4897
      if _DecideSelfPromotion(self):
4898
        self.op.master_candidate = True
4899
        self.LogInfo("Auto-promoting node to master candidate")
4900

    
4901
    # If we're no longer master capable, we'll demote ourselves from MC
4902
    if self.op.master_capable == False and node.master_candidate:
4903
      self.LogInfo("Demoting from master candidate")
4904
      self.op.master_candidate = False
4905

    
4906
    # Compute new role
4907
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4908
    if self.op.master_candidate:
4909
      new_role = self._ROLE_CANDIDATE
4910
    elif self.op.drained:
4911
      new_role = self._ROLE_DRAINED
4912
    elif self.op.offline:
4913
      new_role = self._ROLE_OFFLINE
4914
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4915
      # False is still in new flags, which means we're un-setting (the
4916
      # only) True flag
4917
      new_role = self._ROLE_REGULAR
4918
    else: # no new flags, nothing, keep old role
4919
      new_role = old_role
4920

    
4921
    self.new_role = new_role
4922

    
4923
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
4924
      # Trying to transition out of offline status
4925
      result = self.rpc.call_version([node.name])[node.name]
4926
      if result.fail_msg:
4927
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4928
                                   " to report its version: %s" %
4929
                                   (node.name, result.fail_msg),
4930
                                   errors.ECODE_STATE)
4931
      else:
4932
        self.LogWarning("Transitioning node from offline to online state"
4933
                        " without using re-add. Please make sure the node"
4934
                        " is healthy!")
4935

    
4936
    if self.op.secondary_ip:
4937
      # Ok even without locking, because this can't be changed by any LU
4938
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4939
      master_singlehomed = master.secondary_ip == master.primary_ip
4940
      if master_singlehomed and self.op.secondary_ip:
4941
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4942
                                   " homed cluster", errors.ECODE_INVAL)
4943

    
4944
      if node.offline:
4945
        if self.affected_instances:
4946
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
4947
                                     " node has instances (%s) configured"
4948
                                     " to use it" % self.affected_instances)
4949
      else:
4950
        # On online nodes, check that no instances are running, and that
4951
        # the node has the new ip and we can reach it.
4952
        for instance in self.affected_instances:
4953
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
4954

    
4955
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4956
        if master.name != node.name:
4957
          # check reachability from master secondary ip to new secondary ip
4958
          if not netutils.TcpPing(self.op.secondary_ip,
4959
                                  constants.DEFAULT_NODED_PORT,
4960
                                  source=master.secondary_ip):
4961
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4962
                                       " based ping to node daemon port",
4963
                                       errors.ECODE_ENVIRON)
4964

    
4965
    if self.op.ndparams:
4966
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4967
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4968
      self.new_ndparams = new_ndparams
4969

    
4970
  def Exec(self, feedback_fn):
4971
    """Modifies a node.
4972

4973
    """
4974
    node = self.node
4975
    old_role = self.old_role
4976
    new_role = self.new_role
4977

    
4978
    result = []
4979

    
4980
    if self.op.ndparams:
4981
      node.ndparams = self.new_ndparams
4982

    
4983
    if self.op.powered is not None:
4984
      node.powered = self.op.powered
4985

    
4986
    for attr in ["master_capable", "vm_capable"]:
4987
      val = getattr(self.op, attr)
4988
      if val is not None:
4989
        setattr(node, attr, val)
4990
        result.append((attr, str(val)))
4991

    
4992
    if new_role != old_role:
4993
      # Tell the node to demote itself, if no longer MC and not offline
4994
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4995
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4996
        if msg:
4997
          self.LogWarning("Node failed to demote itself: %s", msg)
4998

    
4999
      new_flags = self._R2F[new_role]
5000
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5001
        if of != nf:
5002
          result.append((desc, str(nf)))
5003
      (node.master_candidate, node.drained, node.offline) = new_flags
5004

    
5005
      # we locked all nodes, we adjust the CP before updating this node
5006
      if self.lock_all:
5007
        _AdjustCandidatePool(self, [node.name])
5008

    
5009
    if self.op.secondary_ip:
5010
      node.secondary_ip = self.op.secondary_ip
5011
      result.append(("secondary_ip", self.op.secondary_ip))
5012

    
5013
    # this will trigger configuration file update, if needed
5014
    self.cfg.Update(node, feedback_fn)
5015

    
5016
    # this will trigger job queue propagation or cleanup if the mc
5017
    # flag changed
5018
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5019
      self.context.ReaddNode(node)
5020

    
5021
    return result
5022

    
5023

    
5024
class LUNodePowercycle(NoHooksLU):
5025
  """Powercycles a node.
5026

5027
  """
5028
  REQ_BGL = False
5029

    
5030
  def CheckArguments(self):
5031
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5032
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5033
      raise errors.OpPrereqError("The node is the master and the force"
5034
                                 " parameter was not set",
5035
                                 errors.ECODE_INVAL)
5036

    
5037
  def ExpandNames(self):
5038
    """Locking for PowercycleNode.
5039

5040
    This is a last-resort option and shouldn't block on other
5041
    jobs. Therefore, we grab no locks.
5042

5043
    """
5044
    self.needed_locks = {}
5045

    
5046
  def Exec(self, feedback_fn):
5047
    """Reboots a node.
5048

5049
    """
5050
    result = self.rpc.call_node_powercycle(self.op.node_name,
5051
                                           self.cfg.GetHypervisorType())
5052
    result.Raise("Failed to schedule the reboot")
5053
    return result.payload
5054

    
5055

    
5056
class LUClusterQuery(NoHooksLU):
5057
  """Query cluster configuration.
5058

5059
  """
5060
  REQ_BGL = False
5061

    
5062
  def ExpandNames(self):
5063
    self.needed_locks = {}
5064

    
5065
  def Exec(self, feedback_fn):
5066
    """Return cluster config.
5067

5068
    """
5069
    cluster = self.cfg.GetClusterInfo()
5070
    os_hvp = {}
5071

    
5072
    # Filter just for enabled hypervisors
5073
    for os_name, hv_dict in cluster.os_hvp.items():
5074
      os_hvp[os_name] = {}
5075
      for hv_name, hv_params in hv_dict.items():
5076
        if hv_name in cluster.enabled_hypervisors:
5077
          os_hvp[os_name][hv_name] = hv_params
5078

    
5079
    # Convert ip_family to ip_version
5080
    primary_ip_version = constants.IP4_VERSION
5081
    if cluster.primary_ip_family == netutils.IP6Address.family:
5082
      primary_ip_version = constants.IP6_VERSION
5083

    
5084
    result = {
5085
      "software_version": constants.RELEASE_VERSION,
5086
      "protocol_version": constants.PROTOCOL_VERSION,
5087
      "config_version": constants.CONFIG_VERSION,
5088
      "os_api_version": max(constants.OS_API_VERSIONS),
5089
      "export_version": constants.EXPORT_VERSION,
5090
      "architecture": (platform.architecture()[0], platform.machine()),
5091
      "name": cluster.cluster_name,
5092
      "master": cluster.master_node,
5093
      "default_hypervisor": cluster.enabled_hypervisors[0],
5094
      "enabled_hypervisors": cluster.enabled_hypervisors,
5095
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5096
                        for hypervisor_name in cluster.enabled_hypervisors]),
5097
      "os_hvp": os_hvp,
5098
      "beparams": cluster.beparams,
5099
      "osparams": cluster.osparams,
5100
      "nicparams": cluster.nicparams,
5101
      "ndparams": cluster.ndparams,
5102
      "candidate_pool_size": cluster.candidate_pool_size,
5103
      "master_netdev": cluster.master_netdev,
5104
      "volume_group_name": cluster.volume_group_name,
5105
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5106
      "file_storage_dir": cluster.file_storage_dir,
5107
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5108
      "maintain_node_health": cluster.maintain_node_health,
5109
      "ctime": cluster.ctime,
5110
      "mtime": cluster.mtime,
5111
      "uuid": cluster.uuid,
5112
      "tags": list(cluster.GetTags()),
5113
      "uid_pool": cluster.uid_pool,
5114
      "default_iallocator": cluster.default_iallocator,
5115
      "reserved_lvs": cluster.reserved_lvs,
5116
      "primary_ip_version": primary_ip_version,
5117
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5118
      "hidden_os": cluster.hidden_os,
5119
      "blacklisted_os": cluster.blacklisted_os,
5120
      }
5121

    
5122
    return result
5123

    
5124

    
5125
class LUClusterConfigQuery(NoHooksLU):
5126
  """Return configuration values.
5127

5128
  """
5129
  REQ_BGL = False
5130
  _FIELDS_DYNAMIC = utils.FieldSet()
5131
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5132
                                  "watcher_pause", "volume_group_name")
5133

    
5134
  def CheckArguments(self):
5135
    _CheckOutputFields(static=self._FIELDS_STATIC,
5136
                       dynamic=self._FIELDS_DYNAMIC,
5137
                       selected=self.op.output_fields)
5138

    
5139
  def ExpandNames(self):
5140
    self.needed_locks = {}
5141

    
5142
  def Exec(self, feedback_fn):
5143
    """Dump a representation of the cluster config to the standard output.
5144

5145
    """
5146
    values = []
5147
    for field in self.op.output_fields:
5148
      if field == "cluster_name":
5149
        entry = self.cfg.GetClusterName()
5150
      elif field == "master_node":
5151
        entry = self.cfg.GetMasterNode()
5152
      elif field == "drain_flag":
5153
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5154
      elif field == "watcher_pause":
5155
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5156
      elif field == "volume_group_name":
5157
        entry = self.cfg.GetVGName()
5158
      else:
5159
        raise errors.ParameterError(field)
5160
      values.append(entry)
5161
    return values
5162

    
5163

    
5164
class LUInstanceActivateDisks(NoHooksLU):
5165
  """Bring up an instance's disks.
5166

5167
  """
5168
  REQ_BGL = False
5169

    
5170
  def ExpandNames(self):
5171
    self._ExpandAndLockInstance()
5172
    self.needed_locks[locking.LEVEL_NODE] = []
5173
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5174

    
5175
  def DeclareLocks(self, level):
5176
    if level == locking.LEVEL_NODE:
5177
      self._LockInstancesNodes()
5178

    
5179
  def CheckPrereq(self):
5180
    """Check prerequisites.
5181

5182
    This checks that the instance is in the cluster.
5183

5184
    """
5185
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5186
    assert self.instance is not None, \
5187
      "Cannot retrieve locked instance %s" % self.op.instance_name
5188
    _CheckNodeOnline(self, self.instance.primary_node)
5189

    
5190
  def Exec(self, feedback_fn):
5191
    """Activate the disks.
5192

5193
    """
5194
    disks_ok, disks_info = \
5195
              _AssembleInstanceDisks(self, self.instance,
5196
                                     ignore_size=self.op.ignore_size)
5197
    if not disks_ok:
5198
      raise errors.OpExecError("Cannot activate block devices")
5199

    
5200
    return disks_info
5201

    
5202

    
5203
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5204
                           ignore_size=False):
5205
  """Prepare the block devices for an instance.
5206

5207
  This sets up the block devices on all nodes.
5208

5209
  @type lu: L{LogicalUnit}
5210
  @param lu: the logical unit on whose behalf we execute
5211
  @type instance: L{objects.Instance}
5212
  @param instance: the instance for whose disks we assemble
5213
  @type disks: list of L{objects.Disk} or None
5214
  @param disks: which disks to assemble (or all, if None)
5215
  @type ignore_secondaries: boolean
5216
  @param ignore_secondaries: if true, errors on secondary nodes
5217
      won't result in an error return from the function
5218
  @type ignore_size: boolean
5219
  @param ignore_size: if true, the current known size of the disk
5220
      will not be used during the disk activation, useful for cases
5221
      when the size is wrong
5222
  @return: False if the operation failed, otherwise a list of
5223
      (host, instance_visible_name, node_visible_name)
5224
      with the mapping from node devices to instance devices
5225

5226
  """
5227
  device_info = []
5228
  disks_ok = True
5229
  iname = instance.name
5230
  disks = _ExpandCheckDisks(instance, disks)
5231

    
5232
  # With the two passes mechanism we try to reduce the window of
5233
  # opportunity for the race condition of switching DRBD to primary
5234
  # before handshaking occured, but we do not eliminate it
5235

    
5236
  # The proper fix would be to wait (with some limits) until the
5237
  # connection has been made and drbd transitions from WFConnection
5238
  # into any other network-connected state (Connected, SyncTarget,
5239
  # SyncSource, etc.)
5240

    
5241
  # 1st pass, assemble on all nodes in secondary mode
5242
  for idx, inst_disk in enumerate(disks):
5243
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5244
      if ignore_size:
5245
        node_disk = node_disk.Copy()
5246
        node_disk.UnsetSize()
5247
      lu.cfg.SetDiskID(node_disk, node)
5248
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5249
      msg = result.fail_msg
5250
      if msg:
5251
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5252
                           " (is_primary=False, pass=1): %s",
5253
                           inst_disk.iv_name, node, msg)
5254
        if not ignore_secondaries:
5255
          disks_ok = False
5256

    
5257
  # FIXME: race condition on drbd migration to primary
5258

    
5259
  # 2nd pass, do only the primary node
5260
  for idx, inst_disk in enumerate(disks):
5261
    dev_path = None
5262

    
5263
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5264
      if node != instance.primary_node:
5265
        continue
5266
      if ignore_size:
5267
        node_disk = node_disk.Copy()
5268
        node_disk.UnsetSize()
5269
      lu.cfg.SetDiskID(node_disk, node)
5270
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5271
      msg = result.fail_msg
5272
      if msg:
5273
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5274
                           " (is_primary=True, pass=2): %s",
5275
                           inst_disk.iv_name, node, msg)
5276
        disks_ok = False
5277
      else:
5278
        dev_path = result.payload
5279

    
5280
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5281

    
5282
  # leave the disks configured for the primary node
5283
  # this is a workaround that would be fixed better by
5284
  # improving the logical/physical id handling
5285
  for disk in disks:
5286
    lu.cfg.SetDiskID(disk, instance.primary_node)
5287

    
5288
  return disks_ok, device_info
5289

    
5290

    
5291
def _StartInstanceDisks(lu, instance, force):
5292
  """Start the disks of an instance.
5293

5294
  """
5295
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5296
                                           ignore_secondaries=force)
5297
  if not disks_ok:
5298
    _ShutdownInstanceDisks(lu, instance)
5299
    if force is not None and not force:
5300
      lu.proc.LogWarning("", hint="If the message above refers to a"
5301
                         " secondary node,"
5302
                         " you can retry the operation using '--force'.")
5303
    raise errors.OpExecError("Disk consistency error")
5304

    
5305

    
5306
class LUInstanceDeactivateDisks(NoHooksLU):
5307
  """Shutdown an instance's disks.
5308

5309
  """
5310
  REQ_BGL = False
5311

    
5312
  def ExpandNames(self):
5313
    self._ExpandAndLockInstance()
5314
    self.needed_locks[locking.LEVEL_NODE] = []
5315
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5316

    
5317
  def DeclareLocks(self, level):
5318
    if level == locking.LEVEL_NODE:
5319
      self._LockInstancesNodes()
5320

    
5321
  def CheckPrereq(self):
5322
    """Check prerequisites.
5323

5324
    This checks that the instance is in the cluster.
5325

5326
    """
5327
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5328
    assert self.instance is not None, \
5329
      "Cannot retrieve locked instance %s" % self.op.instance_name
5330

    
5331
  def Exec(self, feedback_fn):
5332
    """Deactivate the disks
5333

5334
    """
5335
    instance = self.instance
5336
    if self.op.force:
5337
      _ShutdownInstanceDisks(self, instance)
5338
    else:
5339
      _SafeShutdownInstanceDisks(self, instance)
5340

    
5341

    
5342
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5343
  """Shutdown block devices of an instance.
5344

5345
  This function checks if an instance is running, before calling
5346
  _ShutdownInstanceDisks.
5347

5348
  """
5349
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5350
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5351

    
5352

    
5353
def _ExpandCheckDisks(instance, disks):
5354
  """Return the instance disks selected by the disks list
5355

5356
  @type disks: list of L{objects.Disk} or None
5357
  @param disks: selected disks
5358
  @rtype: list of L{objects.Disk}
5359
  @return: selected instance disks to act on
5360

5361
  """
5362
  if disks is None:
5363
    return instance.disks
5364
  else:
5365
    if not set(disks).issubset(instance.disks):
5366
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5367
                                   " target instance")
5368
    return disks
5369

    
5370

    
5371
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5372
  """Shutdown block devices of an instance.
5373

5374
  This does the shutdown on all nodes of the instance.
5375

5376
  If the ignore_primary is false, errors on the primary node are
5377
  ignored.
5378

5379
  """
5380
  all_result = True
5381
  disks = _ExpandCheckDisks(instance, disks)
5382

    
5383
  for disk in disks:
5384
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5385
      lu.cfg.SetDiskID(top_disk, node)
5386
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5387
      msg = result.fail_msg
5388
      if msg:
5389
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5390
                      disk.iv_name, node, msg)
5391
        if ((node == instance.primary_node and not ignore_primary) or
5392
            (node != instance.primary_node and not result.offline)):
5393
          all_result = False
5394
  return all_result
5395

    
5396

    
5397
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5398
  """Checks if a node has enough free memory.
5399

5400
  This function check if a given node has the needed amount of free
5401
  memory. In case the node has less memory or we cannot get the
5402
  information from the node, this function raise an OpPrereqError
5403
  exception.
5404

5405
  @type lu: C{LogicalUnit}
5406
  @param lu: a logical unit from which we get configuration data
5407
  @type node: C{str}
5408
  @param node: the node to check
5409
  @type reason: C{str}
5410
  @param reason: string to use in the error message
5411
  @type requested: C{int}
5412
  @param requested: the amount of memory in MiB to check for
5413
  @type hypervisor_name: C{str}
5414
  @param hypervisor_name: the hypervisor to ask for memory stats
5415
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5416
      we cannot check the node
5417

5418
  """
5419
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5420
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5421
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5422
  free_mem = nodeinfo[node].payload.get('memory_free', None)
5423
  if not isinstance(free_mem, int):
5424
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5425
                               " was '%s'" % (node, free_mem),
5426
                               errors.ECODE_ENVIRON)
5427
  if requested > free_mem:
5428
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5429
                               " needed %s MiB, available %s MiB" %
5430
                               (node, reason, requested, free_mem),
5431
                               errors.ECODE_NORES)
5432

    
5433

    
5434
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5435
  """Checks if nodes have enough free disk space in the all VGs.
5436

5437
  This function check if all given nodes have the needed amount of
5438
  free disk. In case any node has less disk or we cannot get the
5439
  information from the node, this function raise an OpPrereqError
5440
  exception.
5441

5442
  @type lu: C{LogicalUnit}
5443
  @param lu: a logical unit from which we get configuration data
5444
  @type nodenames: C{list}
5445
  @param nodenames: the list of node names to check
5446
  @type req_sizes: C{dict}
5447
  @param req_sizes: the hash of vg and corresponding amount of disk in
5448
      MiB to check for
5449
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5450
      or we cannot check the node
5451

5452
  """
5453
  for vg, req_size in req_sizes.items():
5454
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5455

    
5456

    
5457
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5458
  """Checks if nodes have enough free disk space in the specified VG.
5459

5460
  This function check if all given nodes have the needed amount of
5461
  free disk. In case any node has less disk or we cannot get the
5462
  information from the node, this function raise an OpPrereqError
5463
  exception.
5464

5465
  @type lu: C{LogicalUnit}
5466
  @param lu: a logical unit from which we get configuration data
5467
  @type nodenames: C{list}
5468
  @param nodenames: the list of node names to check
5469
  @type vg: C{str}
5470
  @param vg: the volume group to check
5471
  @type requested: C{int}
5472
  @param requested: the amount of disk in MiB to check for
5473
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5474
      or we cannot check the node
5475

5476
  """
5477
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5478
  for node in nodenames:
5479
    info = nodeinfo[node]
5480
    info.Raise("Cannot get current information from node %s" % node,
5481
               prereq=True, ecode=errors.ECODE_ENVIRON)
5482
    vg_free = info.payload.get("vg_free", None)
5483
    if not isinstance(vg_free, int):
5484
      raise errors.OpPrereqError("Can't compute free disk space on node"
5485
                                 " %s for vg %s, result was '%s'" %
5486
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5487
    if requested > vg_free:
5488
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5489
                                 " vg %s: required %d MiB, available %d MiB" %
5490
                                 (node, vg, requested, vg_free),
5491
                                 errors.ECODE_NORES)
5492

    
5493

    
5494
class LUInstanceStartup(LogicalUnit):
5495
  """Starts an instance.
5496

5497
  """
5498
  HPATH = "instance-start"
5499
  HTYPE = constants.HTYPE_INSTANCE
5500
  REQ_BGL = False
5501

    
5502
  def CheckArguments(self):
5503
    # extra beparams
5504
    if self.op.beparams:
5505
      # fill the beparams dict
5506
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5507

    
5508
  def ExpandNames(self):
5509
    self._ExpandAndLockInstance()
5510

    
5511
  def BuildHooksEnv(self):
5512
    """Build hooks env.
5513

5514
    This runs on master, primary and secondary nodes of the instance.
5515

5516
    """
5517
    env = {
5518
      "FORCE": self.op.force,
5519
      }
5520

    
5521
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5522

    
5523
    return env
5524

    
5525
  def BuildHooksNodes(self):
5526
    """Build hooks nodes.
5527

5528
    """
5529
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5530
    return (nl, nl)
5531

    
5532
  def CheckPrereq(self):
5533
    """Check prerequisites.
5534

5535
    This checks that the instance is in the cluster.
5536

5537
    """
5538
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5539
    assert self.instance is not None, \
5540
      "Cannot retrieve locked instance %s" % self.op.instance_name
5541

    
5542
    # extra hvparams
5543
    if self.op.hvparams:
5544
      # check hypervisor parameter syntax (locally)
5545
      cluster = self.cfg.GetClusterInfo()
5546
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5547
      filled_hvp = cluster.FillHV(instance)
5548
      filled_hvp.update(self.op.hvparams)
5549
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5550
      hv_type.CheckParameterSyntax(filled_hvp)
5551
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5552

    
5553
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5554

    
5555
    if self.primary_offline and self.op.ignore_offline_nodes:
5556
      self.proc.LogWarning("Ignoring offline primary node")
5557

    
5558
      if self.op.hvparams or self.op.beparams:
5559
        self.proc.LogWarning("Overridden parameters are ignored")
5560
    else:
5561
      _CheckNodeOnline(self, instance.primary_node)
5562

    
5563
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5564

    
5565
      # check bridges existence
5566
      _CheckInstanceBridgesExist(self, instance)
5567

    
5568
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5569
                                                instance.name,
5570
                                                instance.hypervisor)
5571
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5572
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5573
      if not remote_info.payload: # not running already
5574
        _CheckNodeFreeMemory(self, instance.primary_node,
5575
                             "starting instance %s" % instance.name,
5576
                             bep[constants.BE_MEMORY], instance.hypervisor)
5577

    
5578
  def Exec(self, feedback_fn):
5579
    """Start the instance.
5580

5581
    """
5582
    instance = self.instance
5583
    force = self.op.force
5584

    
5585
    if not self.op.no_remember:
5586
      self.cfg.MarkInstanceUp(instance.name)
5587

    
5588
    if self.primary_offline:
5589
      assert self.op.ignore_offline_nodes
5590
      self.proc.LogInfo("Primary node offline, marked instance as started")
5591
    else:
5592
      node_current = instance.primary_node
5593

    
5594
      _StartInstanceDisks(self, instance, force)
5595

    
5596
      result = self.rpc.call_instance_start(node_current, instance,
5597
                                            self.op.hvparams, self.op.beparams)
5598
      msg = result.fail_msg
5599
      if msg:
5600
        _ShutdownInstanceDisks(self, instance)
5601
        raise errors.OpExecError("Could not start instance: %s" % msg)
5602

    
5603

    
5604
class LUInstanceReboot(LogicalUnit):
5605
  """Reboot an instance.
5606

5607
  """
5608
  HPATH = "instance-reboot"
5609
  HTYPE = constants.HTYPE_INSTANCE
5610
  REQ_BGL = False
5611

    
5612
  def ExpandNames(self):
5613
    self._ExpandAndLockInstance()
5614

    
5615
  def BuildHooksEnv(self):
5616
    """Build hooks env.
5617

5618
    This runs on master, primary and secondary nodes of the instance.
5619

5620
    """
5621
    env = {
5622
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5623
      "REBOOT_TYPE": self.op.reboot_type,
5624
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5625
      }
5626

    
5627
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5628

    
5629
    return env
5630

    
5631
  def BuildHooksNodes(self):
5632
    """Build hooks nodes.
5633

5634
    """
5635
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5636
    return (nl, nl)
5637

    
5638
  def CheckPrereq(self):
5639
    """Check prerequisites.
5640

5641
    This checks that the instance is in the cluster.
5642

5643
    """
5644
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5645
    assert self.instance is not None, \
5646
      "Cannot retrieve locked instance %s" % self.op.instance_name
5647

    
5648
    _CheckNodeOnline(self, instance.primary_node)
5649

    
5650
    # check bridges existence
5651
    _CheckInstanceBridgesExist(self, instance)
5652

    
5653
  def Exec(self, feedback_fn):
5654
    """Reboot the instance.
5655

5656
    """
5657
    instance = self.instance
5658
    ignore_secondaries = self.op.ignore_secondaries
5659
    reboot_type = self.op.reboot_type
5660

    
5661
    remote_info = self.rpc.call_instance_info(instance.primary_node,
5662
                                              instance.name,
5663
                                              instance.hypervisor)
5664
    remote_info.Raise("Error checking node %s" % instance.primary_node)
5665
    instance_running = bool(remote_info.payload)
5666

    
5667
    node_current = instance.primary_node
5668

    
5669
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5670
                                            constants.INSTANCE_REBOOT_HARD]:
5671
      for disk in instance.disks:
5672
        self.cfg.SetDiskID(disk, node_current)
5673
      result = self.rpc.call_instance_reboot(node_current, instance,
5674
                                             reboot_type,
5675
                                             self.op.shutdown_timeout)
5676
      result.Raise("Could not reboot instance")
5677
    else:
5678
      if instance_running:
5679
        result = self.rpc.call_instance_shutdown(node_current, instance,
5680
                                                 self.op.shutdown_timeout)
5681
        result.Raise("Could not shutdown instance for full reboot")
5682
        _ShutdownInstanceDisks(self, instance)
5683
      else:
5684
        self.LogInfo("Instance %s was already stopped, starting now",
5685
                     instance.name)
5686
      _StartInstanceDisks(self, instance, ignore_secondaries)
5687
      result = self.rpc.call_instance_start(node_current, instance, None, None)
5688
      msg = result.fail_msg
5689
      if msg:
5690
        _ShutdownInstanceDisks(self, instance)
5691
        raise errors.OpExecError("Could not start instance for"
5692
                                 " full reboot: %s" % msg)
5693

    
5694
    self.cfg.MarkInstanceUp(instance.name)
5695

    
5696

    
5697
class LUInstanceShutdown(LogicalUnit):
5698
  """Shutdown an instance.
5699

5700
  """
5701
  HPATH = "instance-stop"
5702
  HTYPE = constants.HTYPE_INSTANCE
5703
  REQ_BGL = False
5704

    
5705
  def ExpandNames(self):
5706
    self._ExpandAndLockInstance()
5707

    
5708
  def BuildHooksEnv(self):
5709
    """Build hooks env.
5710

5711
    This runs on master, primary and secondary nodes of the instance.
5712

5713
    """
5714
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5715
    env["TIMEOUT"] = self.op.timeout
5716
    return env
5717

    
5718
  def BuildHooksNodes(self):
5719
    """Build hooks nodes.
5720

5721
    """
5722
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5723
    return (nl, nl)
5724

    
5725
  def CheckPrereq(self):
5726
    """Check prerequisites.
5727

5728
    This checks that the instance is in the cluster.
5729

5730
    """
5731
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5732
    assert self.instance is not None, \
5733
      "Cannot retrieve locked instance %s" % self.op.instance_name
5734

    
5735
    self.primary_offline = \
5736
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
5737

    
5738
    if self.primary_offline and self.op.ignore_offline_nodes:
5739
      self.proc.LogWarning("Ignoring offline primary node")
5740
    else:
5741
      _CheckNodeOnline(self, self.instance.primary_node)
5742

    
5743
  def Exec(self, feedback_fn):
5744
    """Shutdown the instance.
5745

5746
    """
5747
    instance = self.instance
5748
    node_current = instance.primary_node
5749
    timeout = self.op.timeout
5750

    
5751
    if not self.op.no_remember:
5752
      self.cfg.MarkInstanceDown(instance.name)
5753

    
5754
    if self.primary_offline:
5755
      assert self.op.ignore_offline_nodes
5756
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
5757
    else:
5758
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5759
      msg = result.fail_msg
5760
      if msg:
5761
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5762

    
5763
      _ShutdownInstanceDisks(self, instance)
5764

    
5765

    
5766
class LUInstanceReinstall(LogicalUnit):
5767
  """Reinstall an instance.
5768

5769
  """
5770
  HPATH = "instance-reinstall"
5771
  HTYPE = constants.HTYPE_INSTANCE
5772
  REQ_BGL = False
5773

    
5774
  def ExpandNames(self):
5775
    self._ExpandAndLockInstance()
5776

    
5777
  def BuildHooksEnv(self):
5778
    """Build hooks env.
5779

5780
    This runs on master, primary and secondary nodes of the instance.
5781

5782
    """
5783
    return _BuildInstanceHookEnvByObject(self, self.instance)
5784

    
5785
  def BuildHooksNodes(self):
5786
    """Build hooks nodes.
5787

5788
    """
5789
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5790
    return (nl, nl)
5791

    
5792
  def CheckPrereq(self):
5793
    """Check prerequisites.
5794

5795
    This checks that the instance is in the cluster and is not running.
5796

5797
    """
5798
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5799
    assert instance is not None, \
5800
      "Cannot retrieve locked instance %s" % self.op.instance_name
5801
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5802
                     " offline, cannot reinstall")
5803
    for node in instance.secondary_nodes:
5804
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
5805
                       " cannot reinstall")
5806

    
5807
    if instance.disk_template == constants.DT_DISKLESS:
5808
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5809
                                 self.op.instance_name,
5810
                                 errors.ECODE_INVAL)
5811
    _CheckInstanceDown(self, instance, "cannot reinstall")
5812

    
5813
    if self.op.os_type is not None:
5814
      # OS verification
5815
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5816
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5817
      instance_os = self.op.os_type
5818
    else:
5819
      instance_os = instance.os
5820

    
5821
    nodelist = list(instance.all_nodes)
5822

    
5823
    if self.op.osparams:
5824
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5825
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5826
      self.os_inst = i_osdict # the new dict (without defaults)
5827
    else:
5828
      self.os_inst = None
5829

    
5830
    self.instance = instance
5831

    
5832
  def Exec(self, feedback_fn):
5833
    """Reinstall the instance.
5834

5835
    """
5836
    inst = self.instance
5837

    
5838
    if self.op.os_type is not None:
5839
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5840
      inst.os = self.op.os_type
5841
      # Write to configuration
5842
      self.cfg.Update(inst, feedback_fn)
5843

    
5844
    _StartInstanceDisks(self, inst, None)
5845
    try:
5846
      feedback_fn("Running the instance OS create scripts...")
5847
      # FIXME: pass debug option from opcode to backend
5848
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5849
                                             self.op.debug_level,
5850
                                             osparams=self.os_inst)
5851
      result.Raise("Could not install OS for instance %s on node %s" %
5852
                   (inst.name, inst.primary_node))
5853
    finally:
5854
      _ShutdownInstanceDisks(self, inst)
5855

    
5856

    
5857
class LUInstanceRecreateDisks(LogicalUnit):
5858
  """Recreate an instance's missing disks.
5859

5860
  """
5861
  HPATH = "instance-recreate-disks"
5862
  HTYPE = constants.HTYPE_INSTANCE
5863
  REQ_BGL = False
5864

    
5865
  def CheckArguments(self):
5866
    # normalise the disk list
5867
    self.op.disks = sorted(frozenset(self.op.disks))
5868

    
5869
  def ExpandNames(self):
5870
    self._ExpandAndLockInstance()
5871
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5872
    if self.op.nodes:
5873
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
5874
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
5875
    else:
5876
      self.needed_locks[locking.LEVEL_NODE] = []
5877

    
5878
  def DeclareLocks(self, level):
5879
    if level == locking.LEVEL_NODE:
5880
      # if we replace the nodes, we only need to lock the old primary,
5881
      # otherwise we need to lock all nodes for disk re-creation
5882
      primary_only = bool(self.op.nodes)
5883
      self._LockInstancesNodes(primary_only=primary_only)
5884

    
5885
  def BuildHooksEnv(self):
5886
    """Build hooks env.
5887

5888
    This runs on master, primary and secondary nodes of the instance.
5889

5890
    """
5891
    return _BuildInstanceHookEnvByObject(self, self.instance)
5892

    
5893
  def BuildHooksNodes(self):
5894
    """Build hooks nodes.
5895

5896
    """
5897
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5898
    return (nl, nl)
5899

    
5900
  def CheckPrereq(self):
5901
    """Check prerequisites.
5902

5903
    This checks that the instance is in the cluster and is not running.
5904

5905
    """
5906
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5907
    assert instance is not None, \
5908
      "Cannot retrieve locked instance %s" % self.op.instance_name
5909
    if self.op.nodes:
5910
      if len(self.op.nodes) != len(instance.all_nodes):
5911
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
5912
                                   " %d replacement nodes were specified" %
5913
                                   (instance.name, len(instance.all_nodes),
5914
                                    len(self.op.nodes)),
5915
                                   errors.ECODE_INVAL)
5916
      assert instance.disk_template != constants.DT_DRBD8 or \
5917
          len(self.op.nodes) == 2
5918
      assert instance.disk_template != constants.DT_PLAIN or \
5919
          len(self.op.nodes) == 1
5920
      primary_node = self.op.nodes[0]
5921
    else:
5922
      primary_node = instance.primary_node
5923
    _CheckNodeOnline(self, primary_node)
5924

    
5925
    if instance.disk_template == constants.DT_DISKLESS:
5926
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5927
                                 self.op.instance_name, errors.ECODE_INVAL)
5928
    # if we replace nodes *and* the old primary is offline, we don't
5929
    # check
5930
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
5931
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
5932
    if not (self.op.nodes and old_pnode.offline):
5933
      _CheckInstanceDown(self, instance, "cannot recreate disks")
5934

    
5935
    if not self.op.disks:
5936
      self.op.disks = range(len(instance.disks))
5937
    else:
5938
      for idx in self.op.disks:
5939
        if idx >= len(instance.disks):
5940
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
5941
                                     errors.ECODE_INVAL)
5942
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
5943
      raise errors.OpPrereqError("Can't recreate disks partially and"
5944
                                 " change the nodes at the same time",
5945
                                 errors.ECODE_INVAL)
5946
    self.instance = instance
5947

    
5948
  def Exec(self, feedback_fn):
5949
    """Recreate the disks.
5950

5951
    """
5952
    # change primary node, if needed
5953
    if self.op.nodes:
5954
      self.instance.primary_node = self.op.nodes[0]
5955
      self.LogWarning("Changing the instance's nodes, you will have to"
5956
                      " remove any disks left on the older nodes manually")
5957

    
5958
    to_skip = []
5959
    for idx, disk in enumerate(self.instance.disks):
5960
      if idx not in self.op.disks: # disk idx has not been passed in
5961
        to_skip.append(idx)
5962
        continue
5963
      # update secondaries for disks, if needed
5964
      if self.op.nodes:
5965
        if disk.dev_type == constants.LD_DRBD8:
5966
          # need to update the nodes
5967
          assert len(self.op.nodes) == 2
5968
          logical_id = list(disk.logical_id)
5969
          logical_id[0] = self.op.nodes[0]
5970
          logical_id[1] = self.op.nodes[1]
5971
          disk.logical_id = tuple(logical_id)
5972

    
5973
    if self.op.nodes:
5974
      self.cfg.Update(self.instance, feedback_fn)
5975

    
5976
    _CreateDisks(self, self.instance, to_skip=to_skip)
5977

    
5978

    
5979
class LUInstanceRename(LogicalUnit):
5980
  """Rename an instance.
5981

5982
  """
5983
  HPATH = "instance-rename"
5984
  HTYPE = constants.HTYPE_INSTANCE
5985

    
5986
  def CheckArguments(self):
5987
    """Check arguments.
5988

5989
    """
5990
    if self.op.ip_check and not self.op.name_check:
5991
      # TODO: make the ip check more flexible and not depend on the name check
5992
      raise errors.OpPrereqError("IP address check requires a name check",
5993
                                 errors.ECODE_INVAL)
5994

    
5995
  def BuildHooksEnv(self):
5996
    """Build hooks env.
5997

5998
    This runs on master, primary and secondary nodes of the instance.
5999

6000
    """
6001
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6002
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6003
    return env
6004

    
6005
  def BuildHooksNodes(self):
6006
    """Build hooks nodes.
6007

6008
    """
6009
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6010
    return (nl, nl)
6011

    
6012
  def CheckPrereq(self):
6013
    """Check prerequisites.
6014

6015
    This checks that the instance is in the cluster and is not running.
6016

6017
    """
6018
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6019
                                                self.op.instance_name)
6020
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6021
    assert instance is not None
6022
    _CheckNodeOnline(self, instance.primary_node)
6023
    _CheckInstanceDown(self, instance, "cannot rename")
6024
    self.instance = instance
6025

    
6026
    new_name = self.op.new_name
6027
    if self.op.name_check:
6028
      hostname = netutils.GetHostname(name=new_name)
6029
      if hostname != new_name:
6030
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6031
                     hostname.name)
6032
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6033
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6034
                                    " same as given hostname '%s'") %
6035
                                    (hostname.name, self.op.new_name),
6036
                                    errors.ECODE_INVAL)
6037
      new_name = self.op.new_name = hostname.name
6038
      if (self.op.ip_check and
6039
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6040
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6041
                                   (hostname.ip, new_name),
6042
                                   errors.ECODE_NOTUNIQUE)
6043

    
6044
    instance_list = self.cfg.GetInstanceList()
6045
    if new_name in instance_list and new_name != instance.name:
6046
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6047
                                 new_name, errors.ECODE_EXISTS)
6048

    
6049
  def Exec(self, feedback_fn):
6050
    """Rename the instance.
6051

6052
    """
6053
    inst = self.instance
6054
    old_name = inst.name
6055

    
6056
    rename_file_storage = False
6057
    if (inst.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE) and
6058
        self.op.new_name != inst.name):
6059
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6060
      rename_file_storage = True
6061

    
6062
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6063
    # Change the instance lock. This is definitely safe while we hold the BGL.
6064
    # Otherwise the new lock would have to be added in acquired mode.
6065
    assert self.REQ_BGL
6066
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6067
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6068

    
6069
    # re-read the instance from the configuration after rename
6070
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6071

    
6072
    if rename_file_storage:
6073
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6074
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6075
                                                     old_file_storage_dir,
6076
                                                     new_file_storage_dir)
6077
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6078
                   " (but the instance has been renamed in Ganeti)" %
6079
                   (inst.primary_node, old_file_storage_dir,
6080
                    new_file_storage_dir))
6081

    
6082
    _StartInstanceDisks(self, inst, None)
6083
    try:
6084
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6085
                                                 old_name, self.op.debug_level)
6086
      msg = result.fail_msg
6087
      if msg:
6088
        msg = ("Could not run OS rename script for instance %s on node %s"
6089
               " (but the instance has been renamed in Ganeti): %s" %
6090
               (inst.name, inst.primary_node, msg))
6091
        self.proc.LogWarning(msg)
6092
    finally:
6093
      _ShutdownInstanceDisks(self, inst)
6094

    
6095
    return inst.name
6096

    
6097

    
6098
class LUInstanceRemove(LogicalUnit):
6099
  """Remove an instance.
6100

6101
  """
6102
  HPATH = "instance-remove"
6103
  HTYPE = constants.HTYPE_INSTANCE
6104
  REQ_BGL = False
6105

    
6106
  def ExpandNames(self):
6107
    self._ExpandAndLockInstance()
6108
    self.needed_locks[locking.LEVEL_NODE] = []
6109
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6110

    
6111
  def DeclareLocks(self, level):
6112
    if level == locking.LEVEL_NODE:
6113
      self._LockInstancesNodes()
6114

    
6115
  def BuildHooksEnv(self):
6116
    """Build hooks env.
6117

6118
    This runs on master, primary and secondary nodes of the instance.
6119

6120
    """
6121
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6122
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6123
    return env
6124

    
6125
  def BuildHooksNodes(self):
6126
    """Build hooks nodes.
6127

6128
    """
6129
    nl = [self.cfg.GetMasterNode()]
6130
    nl_post = list(self.instance.all_nodes) + nl
6131
    return (nl, nl_post)
6132

    
6133
  def CheckPrereq(self):
6134
    """Check prerequisites.
6135

6136
    This checks that the instance is in the cluster.
6137

6138
    """
6139
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6140
    assert self.instance is not None, \
6141
      "Cannot retrieve locked instance %s" % self.op.instance_name
6142

    
6143
  def Exec(self, feedback_fn):
6144
    """Remove the instance.
6145

6146
    """
6147
    instance = self.instance
6148
    logging.info("Shutting down instance %s on node %s",
6149
                 instance.name, instance.primary_node)
6150

    
6151
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6152
                                             self.op.shutdown_timeout)
6153
    msg = result.fail_msg
6154
    if msg:
6155
      if self.op.ignore_failures:
6156
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6157
      else:
6158
        raise errors.OpExecError("Could not shutdown instance %s on"
6159
                                 " node %s: %s" %
6160
                                 (instance.name, instance.primary_node, msg))
6161

    
6162
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6163

    
6164

    
6165
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6166
  """Utility function to remove an instance.
6167

6168
  """
6169
  logging.info("Removing block devices for instance %s", instance.name)
6170

    
6171
  if not _RemoveDisks(lu, instance):
6172
    if not ignore_failures:
6173
      raise errors.OpExecError("Can't remove instance's disks")
6174
    feedback_fn("Warning: can't remove instance's disks")
6175

    
6176
  logging.info("Removing instance %s out of cluster config", instance.name)
6177

    
6178
  lu.cfg.RemoveInstance(instance.name)
6179

    
6180
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6181
    "Instance lock removal conflict"
6182

    
6183
  # Remove lock for the instance
6184
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6185

    
6186

    
6187
class LUInstanceQuery(NoHooksLU):
6188
  """Logical unit for querying instances.
6189

6190
  """
6191
  # pylint: disable-msg=W0142
6192
  REQ_BGL = False
6193

    
6194
  def CheckArguments(self):
6195
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6196
                             self.op.output_fields, self.op.use_locking)
6197

    
6198
  def ExpandNames(self):
6199
    self.iq.ExpandNames(self)
6200

    
6201
  def DeclareLocks(self, level):
6202
    self.iq.DeclareLocks(self, level)
6203

    
6204
  def Exec(self, feedback_fn):
6205
    return self.iq.OldStyleQuery(self)
6206

    
6207

    
6208
class LUInstanceFailover(LogicalUnit):
6209
  """Failover an instance.
6210

6211
  """
6212
  HPATH = "instance-failover"
6213
  HTYPE = constants.HTYPE_INSTANCE
6214
  REQ_BGL = False
6215

    
6216
  def CheckArguments(self):
6217
    """Check the arguments.
6218

6219
    """
6220
    self.iallocator = getattr(self.op, "iallocator", None)
6221
    self.target_node = getattr(self.op, "target_node", None)
6222

    
6223
  def ExpandNames(self):
6224
    self._ExpandAndLockInstance()
6225

    
6226
    if self.op.target_node is not None:
6227
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6228

    
6229
    self.needed_locks[locking.LEVEL_NODE] = []
6230
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6231

    
6232
    ignore_consistency = self.op.ignore_consistency
6233
    shutdown_timeout = self.op.shutdown_timeout
6234
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6235
                                       cleanup=False,
6236
                                       failover=True,
6237
                                       ignore_consistency=ignore_consistency,
6238
                                       shutdown_timeout=shutdown_timeout)
6239
    self.tasklets = [self._migrater]
6240

    
6241
  def DeclareLocks(self, level):
6242
    if level == locking.LEVEL_NODE:
6243
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6244
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6245
        if self.op.target_node is None:
6246
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6247
        else:
6248
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6249
                                                   self.op.target_node]
6250
        del self.recalculate_locks[locking.LEVEL_NODE]
6251
      else:
6252
        self._LockInstancesNodes()
6253

    
6254
  def BuildHooksEnv(self):
6255
    """Build hooks env.
6256

6257
    This runs on master, primary and secondary nodes of the instance.
6258

6259
    """
6260
    instance = self._migrater.instance
6261
    source_node = instance.primary_node
6262
    target_node = self.op.target_node
6263
    env = {
6264
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6265
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6266
      "OLD_PRIMARY": source_node,
6267
      "NEW_PRIMARY": target_node,
6268
      }
6269

    
6270
    if instance.disk_template in constants.DTS_INT_MIRROR:
6271
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6272
      env["NEW_SECONDARY"] = source_node
6273
    else:
6274
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6275

    
6276
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6277

    
6278
    return env
6279

    
6280
  def BuildHooksNodes(self):
6281
    """Build hooks nodes.
6282

6283
    """
6284
    instance = self._migrater.instance
6285
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6286
    return (nl, nl + [instance.primary_node])
6287

    
6288

    
6289
class LUInstanceMigrate(LogicalUnit):
6290
  """Migrate an instance.
6291

6292
  This is migration without shutting down, compared to the failover,
6293
  which is done with shutdown.
6294

6295
  """
6296
  HPATH = "instance-migrate"
6297
  HTYPE = constants.HTYPE_INSTANCE
6298
  REQ_BGL = False
6299

    
6300
  def ExpandNames(self):
6301
    self._ExpandAndLockInstance()
6302

    
6303
    if self.op.target_node is not None:
6304
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6305

    
6306
    self.needed_locks[locking.LEVEL_NODE] = []
6307
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6308

    
6309
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6310
                                       cleanup=self.op.cleanup,
6311
                                       failover=False,
6312
                                       fallback=self.op.allow_failover)
6313
    self.tasklets = [self._migrater]
6314

    
6315
  def DeclareLocks(self, level):
6316
    if level == locking.LEVEL_NODE:
6317
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6318
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6319
        if self.op.target_node is None:
6320
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6321
        else:
6322
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6323
                                                   self.op.target_node]
6324
        del self.recalculate_locks[locking.LEVEL_NODE]
6325
      else:
6326
        self._LockInstancesNodes()
6327

    
6328
  def BuildHooksEnv(self):
6329
    """Build hooks env.
6330

6331
    This runs on master, primary and secondary nodes of the instance.
6332

6333
    """
6334
    instance = self._migrater.instance
6335
    source_node = instance.primary_node
6336
    target_node = self.op.target_node
6337
    env = _BuildInstanceHookEnvByObject(self, instance)
6338
    env.update({
6339
      "MIGRATE_LIVE": self._migrater.live,
6340
      "MIGRATE_CLEANUP": self.op.cleanup,
6341
      "OLD_PRIMARY": source_node,
6342
      "NEW_PRIMARY": target_node,
6343
      })
6344

    
6345
    if instance.disk_template in constants.DTS_INT_MIRROR:
6346
      env["OLD_SECONDARY"] = target_node
6347
      env["NEW_SECONDARY"] = source_node
6348
    else:
6349
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6350

    
6351
    return env
6352

    
6353
  def BuildHooksNodes(self):
6354
    """Build hooks nodes.
6355

6356
    """
6357
    instance = self._migrater.instance
6358
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6359
    return (nl, nl + [instance.primary_node])
6360

    
6361

    
6362
class LUInstanceMove(LogicalUnit):
6363
  """Move an instance by data-copying.
6364

6365
  """
6366
  HPATH = "instance-move"
6367
  HTYPE = constants.HTYPE_INSTANCE
6368
  REQ_BGL = False
6369

    
6370
  def ExpandNames(self):
6371
    self._ExpandAndLockInstance()
6372
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6373
    self.op.target_node = target_node
6374
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6375
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6376

    
6377
  def DeclareLocks(self, level):
6378
    if level == locking.LEVEL_NODE:
6379
      self._LockInstancesNodes(primary_only=True)
6380

    
6381
  def BuildHooksEnv(self):
6382
    """Build hooks env.
6383

6384
    This runs on master, primary and secondary nodes of the instance.
6385

6386
    """
6387
    env = {
6388
      "TARGET_NODE": self.op.target_node,
6389
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6390
      }
6391
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6392
    return env
6393

    
6394
  def BuildHooksNodes(self):
6395
    """Build hooks nodes.
6396

6397
    """
6398
    nl = [
6399
      self.cfg.GetMasterNode(),
6400
      self.instance.primary_node,
6401
      self.op.target_node,
6402
      ]
6403
    return (nl, nl)
6404

    
6405
  def CheckPrereq(self):
6406
    """Check prerequisites.
6407

6408
    This checks that the instance is in the cluster.
6409

6410
    """
6411
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6412
    assert self.instance is not None, \
6413
      "Cannot retrieve locked instance %s" % self.op.instance_name
6414

    
6415
    node = self.cfg.GetNodeInfo(self.op.target_node)
6416
    assert node is not None, \
6417
      "Cannot retrieve locked node %s" % self.op.target_node
6418

    
6419
    self.target_node = target_node = node.name
6420

    
6421
    if target_node == instance.primary_node:
6422
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6423
                                 (instance.name, target_node),
6424
                                 errors.ECODE_STATE)
6425

    
6426
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6427

    
6428
    for idx, dsk in enumerate(instance.disks):
6429
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6430
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6431
                                   " cannot copy" % idx, errors.ECODE_STATE)
6432

    
6433
    _CheckNodeOnline(self, target_node)
6434
    _CheckNodeNotDrained(self, target_node)
6435
    _CheckNodeVmCapable(self, target_node)
6436

    
6437
    if instance.admin_up:
6438
      # check memory requirements on the secondary node
6439
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6440
                           instance.name, bep[constants.BE_MEMORY],
6441
                           instance.hypervisor)
6442
    else:
6443
      self.LogInfo("Not checking memory on the secondary node as"
6444
                   " instance will not be started")
6445

    
6446
    # check bridge existance
6447
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6448

    
6449
  def Exec(self, feedback_fn):
6450
    """Move an instance.
6451

6452
    The move is done by shutting it down on its present node, copying
6453
    the data over (slow) and starting it on the new node.
6454

6455
    """
6456
    instance = self.instance
6457

    
6458
    source_node = instance.primary_node
6459
    target_node = self.target_node
6460

    
6461
    self.LogInfo("Shutting down instance %s on source node %s",
6462
                 instance.name, source_node)
6463

    
6464
    result = self.rpc.call_instance_shutdown(source_node, instance,
6465
                                             self.op.shutdown_timeout)
6466
    msg = result.fail_msg
6467
    if msg:
6468
      if self.op.ignore_consistency:
6469
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6470
                             " Proceeding anyway. Please make sure node"
6471
                             " %s is down. Error details: %s",
6472
                             instance.name, source_node, source_node, msg)
6473
      else:
6474
        raise errors.OpExecError("Could not shutdown instance %s on"
6475
                                 " node %s: %s" %
6476
                                 (instance.name, source_node, msg))
6477

    
6478
    # create the target disks
6479
    try:
6480
      _CreateDisks(self, instance, target_node=target_node)
6481
    except errors.OpExecError:
6482
      self.LogWarning("Device creation failed, reverting...")
6483
      try:
6484
        _RemoveDisks(self, instance, target_node=target_node)
6485
      finally:
6486
        self.cfg.ReleaseDRBDMinors(instance.name)
6487
        raise
6488

    
6489
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6490

    
6491
    errs = []
6492
    # activate, get path, copy the data over
6493
    for idx, disk in enumerate(instance.disks):
6494
      self.LogInfo("Copying data for disk %d", idx)
6495
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6496
                                               instance.name, True, idx)
6497
      if result.fail_msg:
6498
        self.LogWarning("Can't assemble newly created disk %d: %s",
6499
                        idx, result.fail_msg)
6500
        errs.append(result.fail_msg)
6501
        break
6502
      dev_path = result.payload
6503
      result = self.rpc.call_blockdev_export(source_node, disk,
6504
                                             target_node, dev_path,
6505
                                             cluster_name)
6506
      if result.fail_msg:
6507
        self.LogWarning("Can't copy data over for disk %d: %s",
6508
                        idx, result.fail_msg)
6509
        errs.append(result.fail_msg)
6510
        break
6511

    
6512
    if errs:
6513
      self.LogWarning("Some disks failed to copy, aborting")
6514
      try:
6515
        _RemoveDisks(self, instance, target_node=target_node)
6516
      finally:
6517
        self.cfg.ReleaseDRBDMinors(instance.name)
6518
        raise errors.OpExecError("Errors during disk copy: %s" %
6519
                                 (",".join(errs),))
6520

    
6521
    instance.primary_node = target_node
6522
    self.cfg.Update(instance, feedback_fn)
6523

    
6524
    self.LogInfo("Removing the disks on the original node")
6525
    _RemoveDisks(self, instance, target_node=source_node)
6526

    
6527
    # Only start the instance if it's marked as up
6528
    if instance.admin_up:
6529
      self.LogInfo("Starting instance %s on node %s",
6530
                   instance.name, target_node)
6531

    
6532
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6533
                                           ignore_secondaries=True)
6534
      if not disks_ok:
6535
        _ShutdownInstanceDisks(self, instance)
6536
        raise errors.OpExecError("Can't activate the instance's disks")
6537

    
6538
      result = self.rpc.call_instance_start(target_node, instance, None, None)
6539
      msg = result.fail_msg
6540
      if msg:
6541
        _ShutdownInstanceDisks(self, instance)
6542
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6543
                                 (instance.name, target_node, msg))
6544

    
6545

    
6546
class LUNodeMigrate(LogicalUnit):
6547
  """Migrate all instances from a node.
6548

6549
  """
6550
  HPATH = "node-migrate"
6551
  HTYPE = constants.HTYPE_NODE
6552
  REQ_BGL = False
6553

    
6554
  def CheckArguments(self):
6555
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
6556

    
6557
  def ExpandNames(self):
6558
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6559

    
6560
    self.needed_locks = {}
6561

    
6562
    # Create tasklets for migrating instances for all instances on this node
6563
    names = []
6564
    tasklets = []
6565

    
6566
    self.lock_all_nodes = False
6567

    
6568
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6569
      logging.debug("Migrating instance %s", inst.name)
6570
      names.append(inst.name)
6571

    
6572
      tasklets.append(TLMigrateInstance(self, inst.name, cleanup=False))
6573

    
6574
      if inst.disk_template in constants.DTS_EXT_MIRROR:
6575
        # We need to lock all nodes, as the iallocator will choose the
6576
        # destination nodes afterwards
6577
        self.lock_all_nodes = True
6578

    
6579
    self.tasklets = tasklets
6580

    
6581
    # Declare node locks
6582
    if self.lock_all_nodes:
6583
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6584
    else:
6585
      self.needed_locks[locking.LEVEL_NODE] = [self.op.node_name]
6586
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6587

    
6588
    # Declare instance locks
6589
    self.needed_locks[locking.LEVEL_INSTANCE] = names
6590

    
6591
  def DeclareLocks(self, level):
6592
    if level == locking.LEVEL_NODE and not self.lock_all_nodes:
6593
      self._LockInstancesNodes()
6594

    
6595
  def BuildHooksEnv(self):
6596
    """Build hooks env.
6597

6598
    This runs on the master, the primary and all the secondaries.
6599

6600
    """
6601
    return {
6602
      "NODE_NAME": self.op.node_name,
6603
      }
6604

    
6605
  def BuildHooksNodes(self):
6606
    """Build hooks nodes.
6607

6608
    """
6609
    nl = [self.cfg.GetMasterNode()]
6610
    return (nl, nl)
6611

    
6612

    
6613
class TLMigrateInstance(Tasklet):
6614
  """Tasklet class for instance migration.
6615

6616
  @type live: boolean
6617
  @ivar live: whether the migration will be done live or non-live;
6618
      this variable is initalized only after CheckPrereq has run
6619
  @type cleanup: boolean
6620
  @ivar cleanup: Wheater we cleanup from a failed migration
6621
  @type iallocator: string
6622
  @ivar iallocator: The iallocator used to determine target_node
6623
  @type target_node: string
6624
  @ivar target_node: If given, the target_node to reallocate the instance to
6625
  @type failover: boolean
6626
  @ivar failover: Whether operation results in failover or migration
6627
  @type fallback: boolean
6628
  @ivar fallback: Whether fallback to failover is allowed if migration not
6629
                  possible
6630
  @type ignore_consistency: boolean
6631
  @ivar ignore_consistency: Wheter we should ignore consistency between source
6632
                            and target node
6633
  @type shutdown_timeout: int
6634
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
6635

6636
  """
6637
  def __init__(self, lu, instance_name, cleanup=False,
6638
               failover=False, fallback=False,
6639
               ignore_consistency=False,
6640
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6641
    """Initializes this class.
6642

6643
    """
6644
    Tasklet.__init__(self, lu)
6645

    
6646
    # Parameters
6647
    self.instance_name = instance_name
6648
    self.cleanup = cleanup
6649
    self.live = False # will be overridden later
6650
    self.failover = failover
6651
    self.fallback = fallback
6652
    self.ignore_consistency = ignore_consistency
6653
    self.shutdown_timeout = shutdown_timeout
6654

    
6655
  def CheckPrereq(self):
6656
    """Check prerequisites.
6657

6658
    This checks that the instance is in the cluster.
6659

6660
    """
6661
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6662
    instance = self.cfg.GetInstanceInfo(instance_name)
6663
    assert instance is not None
6664
    self.instance = instance
6665

    
6666
    if (not self.cleanup and not instance.admin_up and not self.failover and
6667
        self.fallback):
6668
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6669
                      " to failover")
6670
      self.failover = True
6671

    
6672
    if instance.disk_template not in constants.DTS_MIRRORED:
6673
      if self.failover:
6674
        text = "failovers"
6675
      else:
6676
        text = "migrations"
6677
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6678
                                 " %s" % (instance.disk_template, text),
6679
                                 errors.ECODE_STATE)
6680

    
6681
    if instance.disk_template in constants.DTS_EXT_MIRROR:
6682
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6683

    
6684
      if self.lu.op.iallocator:
6685
        self._RunAllocator()
6686
      else:
6687
        # We set set self.target_node as it is required by
6688
        # BuildHooksEnv
6689
        self.target_node = self.lu.op.target_node
6690

    
6691
      # self.target_node is already populated, either directly or by the
6692
      # iallocator run
6693
      target_node = self.target_node
6694
      if self.target_node == instance.primary_node:
6695
        raise errors.OpPrereqError("Cannot migrate instance %s"
6696
                                   " to its primary (%s)" %
6697
                                   (instance.name, instance.primary_node))
6698

    
6699
      if len(self.lu.tasklets) == 1:
6700
        # It is safe to release locks only when we're the only tasklet
6701
        # in the LU
6702
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
6703
                      keep=[instance.primary_node, self.target_node])
6704

    
6705
    else:
6706
      secondary_nodes = instance.secondary_nodes
6707
      if not secondary_nodes:
6708
        raise errors.ConfigurationError("No secondary node but using"
6709
                                        " %s disk template" %
6710
                                        instance.disk_template)
6711
      target_node = secondary_nodes[0]
6712
      if self.lu.op.iallocator or (self.lu.op.target_node and
6713
                                   self.lu.op.target_node != target_node):
6714
        if self.failover:
6715
          text = "failed over"
6716
        else:
6717
          text = "migrated"
6718
        raise errors.OpPrereqError("Instances with disk template %s cannot"
6719
                                   " be %s to arbitrary nodes"
6720
                                   " (neither an iallocator nor a target"
6721
                                   " node can be passed)" %
6722
                                   (instance.disk_template, text),
6723
                                   errors.ECODE_INVAL)
6724

    
6725
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
6726

    
6727
    # check memory requirements on the secondary node
6728
    if not self.failover or instance.admin_up:
6729
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6730
                           instance.name, i_be[constants.BE_MEMORY],
6731
                           instance.hypervisor)
6732
    else:
6733
      self.lu.LogInfo("Not checking memory on the secondary node as"
6734
                      " instance will not be started")
6735

    
6736
    # check bridge existance
6737
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6738

    
6739
    if not self.cleanup:
6740
      _CheckNodeNotDrained(self.lu, target_node)
6741
      if not self.failover:
6742
        result = self.rpc.call_instance_migratable(instance.primary_node,
6743
                                                   instance)
6744
        if result.fail_msg and self.fallback:
6745
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6746
                          " failover")
6747
          self.failover = True
6748
        else:
6749
          result.Raise("Can't migrate, please use failover",
6750
                       prereq=True, ecode=errors.ECODE_STATE)
6751

    
6752
    assert not (self.failover and self.cleanup)
6753

    
6754
    if not self.failover:
6755
      if self.lu.op.live is not None and self.lu.op.mode is not None:
6756
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6757
                                   " parameters are accepted",
6758
                                   errors.ECODE_INVAL)
6759
      if self.lu.op.live is not None:
6760
        if self.lu.op.live:
6761
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
6762
        else:
6763
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6764
        # reset the 'live' parameter to None so that repeated
6765
        # invocations of CheckPrereq do not raise an exception
6766
        self.lu.op.live = None
6767
      elif self.lu.op.mode is None:
6768
        # read the default value from the hypervisor
6769
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6770
                                                skip_globals=False)
6771
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6772

    
6773
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6774
    else:
6775
      # Failover is never live
6776
      self.live = False
6777

    
6778
  def _RunAllocator(self):
6779
    """Run the allocator based on input opcode.
6780

6781
    """
6782
    ial = IAllocator(self.cfg, self.rpc,
6783
                     mode=constants.IALLOCATOR_MODE_RELOC,
6784
                     name=self.instance_name,
6785
                     # TODO See why hail breaks with a single node below
6786
                     relocate_from=[self.instance.primary_node,
6787
                                    self.instance.primary_node],
6788
                     )
6789

    
6790
    ial.Run(self.lu.op.iallocator)
6791

    
6792
    if not ial.success:
6793
      raise errors.OpPrereqError("Can't compute nodes using"
6794
                                 " iallocator '%s': %s" %
6795
                                 (self.lu.op.iallocator, ial.info),
6796
                                 errors.ECODE_NORES)
6797
    if len(ial.result) != ial.required_nodes:
6798
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6799
                                 " of nodes (%s), required %s" %
6800
                                 (self.lu.op.iallocator, len(ial.result),
6801
                                  ial.required_nodes), errors.ECODE_FAULT)
6802
    self.target_node = ial.result[0]
6803
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6804
                 self.instance_name, self.lu.op.iallocator,
6805
                 utils.CommaJoin(ial.result))
6806

    
6807
  def _WaitUntilSync(self):
6808
    """Poll with custom rpc for disk sync.
6809

6810
    This uses our own step-based rpc call.
6811

6812
    """
6813
    self.feedback_fn("* wait until resync is done")
6814
    all_done = False
6815
    while not all_done:
6816
      all_done = True
6817
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6818
                                            self.nodes_ip,
6819
                                            self.instance.disks)
6820
      min_percent = 100
6821
      for node, nres in result.items():
6822
        nres.Raise("Cannot resync disks on node %s" % node)
6823
        node_done, node_percent = nres.payload
6824
        all_done = all_done and node_done
6825
        if node_percent is not None:
6826
          min_percent = min(min_percent, node_percent)
6827
      if not all_done:
6828
        if min_percent < 100:
6829
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
6830
        time.sleep(2)
6831

    
6832
  def _EnsureSecondary(self, node):
6833
    """Demote a node to secondary.
6834

6835
    """
6836
    self.feedback_fn("* switching node %s to secondary mode" % node)
6837

    
6838
    for dev in self.instance.disks:
6839
      self.cfg.SetDiskID(dev, node)
6840

    
6841
    result = self.rpc.call_blockdev_close(node, self.instance.name,
6842
                                          self.instance.disks)
6843
    result.Raise("Cannot change disk to secondary on node %s" % node)
6844

    
6845
  def _GoStandalone(self):
6846
    """Disconnect from the network.
6847

6848
    """
6849
    self.feedback_fn("* changing into standalone mode")
6850
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6851
                                               self.instance.disks)
6852
    for node, nres in result.items():
6853
      nres.Raise("Cannot disconnect disks node %s" % node)
6854

    
6855
  def _GoReconnect(self, multimaster):
6856
    """Reconnect to the network.
6857

6858
    """
6859
    if multimaster:
6860
      msg = "dual-master"
6861
    else:
6862
      msg = "single-master"
6863
    self.feedback_fn("* changing disks into %s mode" % msg)
6864
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6865
                                           self.instance.disks,
6866
                                           self.instance.name, multimaster)
6867
    for node, nres in result.items():
6868
      nres.Raise("Cannot change disks config on node %s" % node)
6869

    
6870
  def _ExecCleanup(self):
6871
    """Try to cleanup after a failed migration.
6872

6873
    The cleanup is done by:
6874
      - check that the instance is running only on one node
6875
        (and update the config if needed)
6876
      - change disks on its secondary node to secondary
6877
      - wait until disks are fully synchronized
6878
      - disconnect from the network
6879
      - change disks into single-master mode
6880
      - wait again until disks are fully synchronized
6881

6882
    """
6883
    instance = self.instance
6884
    target_node = self.target_node
6885
    source_node = self.source_node
6886

    
6887
    # check running on only one node
6888
    self.feedback_fn("* checking where the instance actually runs"
6889
                     " (if this hangs, the hypervisor might be in"
6890
                     " a bad state)")
6891
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6892
    for node, result in ins_l.items():
6893
      result.Raise("Can't contact node %s" % node)
6894

    
6895
    runningon_source = instance.name in ins_l[source_node].payload
6896
    runningon_target = instance.name in ins_l[target_node].payload
6897

    
6898
    if runningon_source and runningon_target:
6899
      raise errors.OpExecError("Instance seems to be running on two nodes,"
6900
                               " or the hypervisor is confused; you will have"
6901
                               " to ensure manually that it runs only on one"
6902
                               " and restart this operation")
6903

    
6904
    if not (runningon_source or runningon_target):
6905
      raise errors.OpExecError("Instance does not seem to be running at all;"
6906
                               " in this case it's safer to repair by"
6907
                               " running 'gnt-instance stop' to ensure disk"
6908
                               " shutdown, and then restarting it")
6909

    
6910
    if runningon_target:
6911
      # the migration has actually succeeded, we need to update the config
6912
      self.feedback_fn("* instance running on secondary node (%s),"
6913
                       " updating config" % target_node)
6914
      instance.primary_node = target_node
6915
      self.cfg.Update(instance, self.feedback_fn)
6916
      demoted_node = source_node
6917
    else:
6918
      self.feedback_fn("* instance confirmed to be running on its"
6919
                       " primary node (%s)" % source_node)
6920
      demoted_node = target_node
6921

    
6922
    if instance.disk_template in constants.DTS_INT_MIRROR:
6923
      self._EnsureSecondary(demoted_node)
6924
      try:
6925
        self._WaitUntilSync()
6926
      except errors.OpExecError:
6927
        # we ignore here errors, since if the device is standalone, it
6928
        # won't be able to sync
6929
        pass
6930
      self._GoStandalone()
6931
      self._GoReconnect(False)
6932
      self._WaitUntilSync()
6933

    
6934
    self.feedback_fn("* done")
6935

    
6936
  def _RevertDiskStatus(self):
6937
    """Try to revert the disk status after a failed migration.
6938

6939
    """
6940
    target_node = self.target_node
6941
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
6942
      return
6943

    
6944
    try:
6945
      self._EnsureSecondary(target_node)
6946
      self._GoStandalone()
6947
      self._GoReconnect(False)
6948
      self._WaitUntilSync()
6949
    except errors.OpExecError, err:
6950
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
6951
                         " please try to recover the instance manually;"
6952
                         " error '%s'" % str(err))
6953

    
6954
  def _AbortMigration(self):
6955
    """Call the hypervisor code to abort a started migration.
6956

6957
    """
6958
    instance = self.instance
6959
    target_node = self.target_node
6960
    migration_info = self.migration_info
6961

    
6962
    abort_result = self.rpc.call_finalize_migration(target_node,
6963
                                                    instance,
6964
                                                    migration_info,
6965
                                                    False)
6966
    abort_msg = abort_result.fail_msg
6967
    if abort_msg:
6968
      logging.error("Aborting migration failed on target node %s: %s",
6969
                    target_node, abort_msg)
6970
      # Don't raise an exception here, as we stil have to try to revert the
6971
      # disk status, even if this step failed.
6972

    
6973
  def _ExecMigration(self):
6974
    """Migrate an instance.
6975

6976
    The migrate is done by:
6977
      - change the disks into dual-master mode
6978
      - wait until disks are fully synchronized again
6979
      - migrate the instance
6980
      - change disks on the new secondary node (the old primary) to secondary
6981
      - wait until disks are fully synchronized
6982
      - change disks into single-master mode
6983

6984
    """
6985
    instance = self.instance
6986
    target_node = self.target_node
6987
    source_node = self.source_node
6988

    
6989
    self.feedback_fn("* checking disk consistency between source and target")
6990
    for dev in instance.disks:
6991
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6992
        raise errors.OpExecError("Disk %s is degraded or not fully"
6993
                                 " synchronized on target node,"
6994
                                 " aborting migration" % dev.iv_name)
6995

    
6996
    # First get the migration information from the remote node
6997
    result = self.rpc.call_migration_info(source_node, instance)
6998
    msg = result.fail_msg
6999
    if msg:
7000
      log_err = ("Failed fetching source migration information from %s: %s" %
7001
                 (source_node, msg))
7002
      logging.error(log_err)
7003
      raise errors.OpExecError(log_err)
7004

    
7005
    self.migration_info = migration_info = result.payload
7006

    
7007
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7008
      # Then switch the disks to master/master mode
7009
      self._EnsureSecondary(target_node)
7010
      self._GoStandalone()
7011
      self._GoReconnect(True)
7012
      self._WaitUntilSync()
7013

    
7014
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7015
    result = self.rpc.call_accept_instance(target_node,
7016
                                           instance,
7017
                                           migration_info,
7018
                                           self.nodes_ip[target_node])
7019

    
7020
    msg = result.fail_msg
7021
    if msg:
7022
      logging.error("Instance pre-migration failed, trying to revert"
7023
                    " disk status: %s", msg)
7024
      self.feedback_fn("Pre-migration failed, aborting")
7025
      self._AbortMigration()
7026
      self._RevertDiskStatus()
7027
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7028
                               (instance.name, msg))
7029

    
7030
    self.feedback_fn("* migrating instance to %s" % target_node)
7031
    result = self.rpc.call_instance_migrate(source_node, instance,
7032
                                            self.nodes_ip[target_node],
7033
                                            self.live)
7034
    msg = result.fail_msg
7035
    if msg:
7036
      logging.error("Instance migration failed, trying to revert"
7037
                    " disk status: %s", msg)
7038
      self.feedback_fn("Migration failed, aborting")
7039
      self._AbortMigration()
7040
      self._RevertDiskStatus()
7041
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7042
                               (instance.name, msg))
7043

    
7044
    instance.primary_node = target_node
7045
    # distribute new instance config to the other nodes
7046
    self.cfg.Update(instance, self.feedback_fn)
7047

    
7048
    result = self.rpc.call_finalize_migration(target_node,
7049
                                              instance,
7050
                                              migration_info,
7051
                                              True)
7052
    msg = result.fail_msg
7053
    if msg:
7054
      logging.error("Instance migration succeeded, but finalization failed:"
7055
                    " %s", msg)
7056
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7057
                               msg)
7058

    
7059
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7060
      self._EnsureSecondary(source_node)
7061
      self._WaitUntilSync()
7062
      self._GoStandalone()
7063
      self._GoReconnect(False)
7064
      self._WaitUntilSync()
7065

    
7066
    self.feedback_fn("* done")
7067

    
7068
  def _ExecFailover(self):
7069
    """Failover an instance.
7070

7071
    The failover is done by shutting it down on its present node and
7072
    starting it on the secondary.
7073

7074
    """
7075
    instance = self.instance
7076
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7077

    
7078
    source_node = instance.primary_node
7079
    target_node = self.target_node
7080

    
7081
    if instance.admin_up:
7082
      self.feedback_fn("* checking disk consistency between source and target")
7083
      for dev in instance.disks:
7084
        # for drbd, these are drbd over lvm
7085
        if not _CheckDiskConsistency(self, dev, target_node, False):
7086
          if not self.ignore_consistency:
7087
            raise errors.OpExecError("Disk %s is degraded on target node,"
7088
                                     " aborting failover" % dev.iv_name)
7089
    else:
7090
      self.feedback_fn("* not checking disk consistency as instance is not"
7091
                       " running")
7092

    
7093
    self.feedback_fn("* shutting down instance on source node")
7094
    logging.info("Shutting down instance %s on node %s",
7095
                 instance.name, source_node)
7096

    
7097
    result = self.rpc.call_instance_shutdown(source_node, instance,
7098
                                             self.shutdown_timeout)
7099
    msg = result.fail_msg
7100
    if msg:
7101
      if self.ignore_consistency or primary_node.offline:
7102
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7103
                           " proceeding anyway; please make sure node"
7104
                           " %s is down; error details: %s",
7105
                           instance.name, source_node, source_node, msg)
7106
      else:
7107
        raise errors.OpExecError("Could not shutdown instance %s on"
7108
                                 " node %s: %s" %
7109
                                 (instance.name, source_node, msg))
7110

    
7111
    self.feedback_fn("* deactivating the instance's disks on source node")
7112
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
7113
      raise errors.OpExecError("Can't shut down the instance's disks.")
7114

    
7115
    instance.primary_node = target_node
7116
    # distribute new instance config to the other nodes
7117
    self.cfg.Update(instance, self.feedback_fn)
7118

    
7119
    # Only start the instance if it's marked as up
7120
    if instance.admin_up:
7121
      self.feedback_fn("* activating the instance's disks on target node")
7122
      logging.info("Starting instance %s on node %s",
7123
                   instance.name, target_node)
7124

    
7125
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
7126
                                           ignore_secondaries=True)
7127
      if not disks_ok:
7128
        _ShutdownInstanceDisks(self, instance)
7129
        raise errors.OpExecError("Can't activate the instance's disks")
7130

    
7131
      self.feedback_fn("* starting the instance on the target node")
7132
      result = self.rpc.call_instance_start(target_node, instance, None, None)
7133
      msg = result.fail_msg
7134
      if msg:
7135
        _ShutdownInstanceDisks(self, instance)
7136
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7137
                                 (instance.name, target_node, msg))
7138

    
7139
  def Exec(self, feedback_fn):
7140
    """Perform the migration.
7141

7142
    """
7143
    self.feedback_fn = feedback_fn
7144
    self.source_node = self.instance.primary_node
7145

    
7146
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7147
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7148
      self.target_node = self.instance.secondary_nodes[0]
7149
      # Otherwise self.target_node has been populated either
7150
      # directly, or through an iallocator.
7151

    
7152
    self.all_nodes = [self.source_node, self.target_node]
7153
    self.nodes_ip = {
7154
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
7155
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
7156
      }
7157

    
7158
    if self.failover:
7159
      feedback_fn("Failover instance %s" % self.instance.name)
7160
      self._ExecFailover()
7161
    else:
7162
      feedback_fn("Migrating instance %s" % self.instance.name)
7163

    
7164
      if self.cleanup:
7165
        return self._ExecCleanup()
7166
      else:
7167
        return self._ExecMigration()
7168

    
7169

    
7170
def _CreateBlockDev(lu, node, instance, device, force_create,
7171
                    info, force_open):
7172
  """Create a tree of block devices on a given node.
7173

7174
  If this device type has to be created on secondaries, create it and
7175
  all its children.
7176

7177
  If not, just recurse to children keeping the same 'force' value.
7178

7179
  @param lu: the lu on whose behalf we execute
7180
  @param node: the node on which to create the device
7181
  @type instance: L{objects.Instance}
7182
  @param instance: the instance which owns the device
7183
  @type device: L{objects.Disk}
7184
  @param device: the device to create
7185
  @type force_create: boolean
7186
  @param force_create: whether to force creation of this device; this
7187
      will be change to True whenever we find a device which has
7188
      CreateOnSecondary() attribute
7189
  @param info: the extra 'metadata' we should attach to the device
7190
      (this will be represented as a LVM tag)
7191
  @type force_open: boolean
7192
  @param force_open: this parameter will be passes to the
7193
      L{backend.BlockdevCreate} function where it specifies
7194
      whether we run on primary or not, and it affects both
7195
      the child assembly and the device own Open() execution
7196

7197
  """
7198
  if device.CreateOnSecondary():
7199
    force_create = True
7200

    
7201
  if device.children:
7202
    for child in device.children:
7203
      _CreateBlockDev(lu, node, instance, child, force_create,
7204
                      info, force_open)
7205

    
7206
  if not force_create:
7207
    return
7208

    
7209
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7210

    
7211

    
7212
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7213
  """Create a single block device on a given node.
7214

7215
  This will not recurse over children of the device, so they must be
7216
  created in advance.
7217

7218
  @param lu: the lu on whose behalf we execute
7219
  @param node: the node on which to create the device
7220
  @type instance: L{objects.Instance}
7221
  @param instance: the instance which owns the device
7222
  @type device: L{objects.Disk}
7223
  @param device: the device to create
7224
  @param info: the extra 'metadata' we should attach to the device
7225
      (this will be represented as a LVM tag)
7226
  @type force_open: boolean
7227
  @param force_open: this parameter will be passes to the
7228
      L{backend.BlockdevCreate} function where it specifies
7229
      whether we run on primary or not, and it affects both
7230
      the child assembly and the device own Open() execution
7231

7232
  """
7233
  lu.cfg.SetDiskID(device, node)
7234
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7235
                                       instance.name, force_open, info)
7236
  result.Raise("Can't create block device %s on"
7237
               " node %s for instance %s" % (device, node, instance.name))
7238
  if device.physical_id is None:
7239
    device.physical_id = result.payload
7240

    
7241

    
7242
def _GenerateUniqueNames(lu, exts):
7243
  """Generate a suitable LV name.
7244

7245
  This will generate a logical volume name for the given instance.
7246

7247
  """
7248
  results = []
7249
  for val in exts:
7250
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7251
    results.append("%s%s" % (new_id, val))
7252
  return results
7253

    
7254

    
7255
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7256
                         iv_name, p_minor, s_minor):
7257
  """Generate a drbd8 device complete with its children.
7258

7259
  """
7260
  assert len(vgnames) == len(names) == 2
7261
  port = lu.cfg.AllocatePort()
7262
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7263
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7264
                          logical_id=(vgnames[0], names[0]))
7265
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7266
                          logical_id=(vgnames[1], names[1]))
7267
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7268
                          logical_id=(primary, secondary, port,
7269
                                      p_minor, s_minor,
7270
                                      shared_secret),
7271
                          children=[dev_data, dev_meta],
7272
                          iv_name=iv_name)
7273
  return drbd_dev
7274

    
7275

    
7276
def _GenerateDiskTemplate(lu, template_name,
7277
                          instance_name, primary_node,
7278
                          secondary_nodes, disk_info,
7279
                          file_storage_dir, file_driver,
7280
                          base_index, feedback_fn):
7281
  """Generate the entire disk layout for a given template type.
7282

7283
  """
7284
  #TODO: compute space requirements
7285

    
7286
  vgname = lu.cfg.GetVGName()
7287
  disk_count = len(disk_info)
7288
  disks = []
7289
  if template_name == constants.DT_DISKLESS:
7290
    pass
7291
  elif template_name == constants.DT_PLAIN:
7292
    if len(secondary_nodes) != 0:
7293
      raise errors.ProgrammerError("Wrong template configuration")
7294

    
7295
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7296
                                      for i in range(disk_count)])
7297
    for idx, disk in enumerate(disk_info):
7298
      disk_index = idx + base_index
7299
      vg = disk.get(constants.IDISK_VG, vgname)
7300
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7301
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7302
                              size=disk[constants.IDISK_SIZE],
7303
                              logical_id=(vg, names[idx]),
7304
                              iv_name="disk/%d" % disk_index,
7305
                              mode=disk[constants.IDISK_MODE])
7306
      disks.append(disk_dev)
7307
  elif template_name == constants.DT_DRBD8:
7308
    if len(secondary_nodes) != 1:
7309
      raise errors.ProgrammerError("Wrong template configuration")
7310
    remote_node = secondary_nodes[0]
7311
    minors = lu.cfg.AllocateDRBDMinor(
7312
      [primary_node, remote_node] * len(disk_info), instance_name)
7313

    
7314
    names = []
7315
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7316
                                               for i in range(disk_count)]):
7317
      names.append(lv_prefix + "_data")
7318
      names.append(lv_prefix + "_meta")
7319
    for idx, disk in enumerate(disk_info):
7320
      disk_index = idx + base_index
7321
      data_vg = disk.get(constants.IDISK_VG, vgname)
7322
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7323
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7324
                                      disk[constants.IDISK_SIZE],
7325
                                      [data_vg, meta_vg],
7326
                                      names[idx * 2:idx * 2 + 2],
7327
                                      "disk/%d" % disk_index,
7328
                                      minors[idx * 2], minors[idx * 2 + 1])
7329
      disk_dev.mode = disk[constants.IDISK_MODE]
7330
      disks.append(disk_dev)
7331
  elif template_name == constants.DT_FILE:
7332
    if len(secondary_nodes) != 0:
7333
      raise errors.ProgrammerError("Wrong template configuration")
7334

    
7335
    opcodes.RequireFileStorage()
7336

    
7337
    for idx, disk in enumerate(disk_info):
7338
      disk_index = idx + base_index
7339
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7340
                              size=disk[constants.IDISK_SIZE],
7341
                              iv_name="disk/%d" % disk_index,
7342
                              logical_id=(file_driver,
7343
                                          "%s/disk%d" % (file_storage_dir,
7344
                                                         disk_index)),
7345
                              mode=disk[constants.IDISK_MODE])
7346
      disks.append(disk_dev)
7347
  elif template_name == constants.DT_SHARED_FILE:
7348
    if len(secondary_nodes) != 0:
7349
      raise errors.ProgrammerError("Wrong template configuration")
7350

    
7351
    opcodes.RequireSharedFileStorage()
7352

    
7353
    for idx, disk in enumerate(disk_info):
7354
      disk_index = idx + base_index
7355
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7356
                              size=disk[constants.IDISK_SIZE],
7357
                              iv_name="disk/%d" % disk_index,
7358
                              logical_id=(file_driver,
7359
                                          "%s/disk%d" % (file_storage_dir,
7360
                                                         disk_index)),
7361
                              mode=disk[constants.IDISK_MODE])
7362
      disks.append(disk_dev)
7363
  elif template_name == constants.DT_BLOCK:
7364
    if len(secondary_nodes) != 0:
7365
      raise errors.ProgrammerError("Wrong template configuration")
7366

    
7367
    for idx, disk in enumerate(disk_info):
7368
      disk_index = idx + base_index
7369
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7370
                              size=disk[constants.IDISK_SIZE],
7371
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7372
                                          disk[constants.IDISK_ADOPT]),
7373
                              iv_name="disk/%d" % disk_index,
7374
                              mode=disk[constants.IDISK_MODE])
7375
      disks.append(disk_dev)
7376

    
7377
  else:
7378
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7379
  return disks
7380

    
7381

    
7382
def _GetInstanceInfoText(instance):
7383
  """Compute that text that should be added to the disk's metadata.
7384

7385
  """
7386
  return "originstname+%s" % instance.name
7387

    
7388

    
7389
def _CalcEta(time_taken, written, total_size):
7390
  """Calculates the ETA based on size written and total size.
7391

7392
  @param time_taken: The time taken so far
7393
  @param written: amount written so far
7394
  @param total_size: The total size of data to be written
7395
  @return: The remaining time in seconds
7396

7397
  """
7398
  avg_time = time_taken / float(written)
7399
  return (total_size - written) * avg_time
7400

    
7401

    
7402
def _WipeDisks(lu, instance):
7403
  """Wipes instance disks.
7404

7405
  @type lu: L{LogicalUnit}
7406
  @param lu: the logical unit on whose behalf we execute
7407
  @type instance: L{objects.Instance}
7408
  @param instance: the instance whose disks we should create
7409
  @return: the success of the wipe
7410

7411
  """
7412
  node = instance.primary_node
7413

    
7414
  for device in instance.disks:
7415
    lu.cfg.SetDiskID(device, node)
7416

    
7417
  logging.info("Pause sync of instance %s disks", instance.name)
7418
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7419

    
7420
  for idx, success in enumerate(result.payload):
7421
    if not success:
7422
      logging.warn("pause-sync of instance %s for disks %d failed",
7423
                   instance.name, idx)
7424

    
7425
  try:
7426
    for idx, device in enumerate(instance.disks):
7427
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7428
      # MAX_WIPE_CHUNK at max
7429
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7430
                            constants.MIN_WIPE_CHUNK_PERCENT)
7431
      # we _must_ make this an int, otherwise rounding errors will
7432
      # occur
7433
      wipe_chunk_size = int(wipe_chunk_size)
7434

    
7435
      lu.LogInfo("* Wiping disk %d", idx)
7436
      logging.info("Wiping disk %d for instance %s, node %s using"
7437
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7438

    
7439
      offset = 0
7440
      size = device.size
7441
      last_output = 0
7442
      start_time = time.time()
7443

    
7444
      while offset < size:
7445
        wipe_size = min(wipe_chunk_size, size - offset)
7446
        logging.debug("Wiping disk %d, offset %s, chunk %s",
7447
                      idx, offset, wipe_size)
7448
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7449
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
7450
                     (idx, offset, wipe_size))
7451
        now = time.time()
7452
        offset += wipe_size
7453
        if now - last_output >= 60:
7454
          eta = _CalcEta(now - start_time, offset, size)
7455
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
7456
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
7457
          last_output = now
7458
  finally:
7459
    logging.info("Resume sync of instance %s disks", instance.name)
7460

    
7461
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7462

    
7463
    for idx, success in enumerate(result.payload):
7464
      if not success:
7465
        lu.LogWarning("Resume sync of disk %d failed, please have a"
7466
                      " look at the status and troubleshoot the issue", idx)
7467
        logging.warn("resume-sync of instance %s for disks %d failed",
7468
                     instance.name, idx)
7469

    
7470

    
7471
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7472
  """Create all disks for an instance.
7473

7474
  This abstracts away some work from AddInstance.
7475

7476
  @type lu: L{LogicalUnit}
7477
  @param lu: the logical unit on whose behalf we execute
7478
  @type instance: L{objects.Instance}
7479
  @param instance: the instance whose disks we should create
7480
  @type to_skip: list
7481
  @param to_skip: list of indices to skip
7482
  @type target_node: string
7483
  @param target_node: if passed, overrides the target node for creation
7484
  @rtype: boolean
7485
  @return: the success of the creation
7486

7487
  """
7488
  info = _GetInstanceInfoText(instance)
7489
  if target_node is None:
7490
    pnode = instance.primary_node
7491
    all_nodes = instance.all_nodes
7492
  else:
7493
    pnode = target_node
7494
    all_nodes = [pnode]
7495

    
7496
  if instance.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE):
7497
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7498
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7499

    
7500
    result.Raise("Failed to create directory '%s' on"
7501
                 " node %s" % (file_storage_dir, pnode))
7502

    
7503
  # Note: this needs to be kept in sync with adding of disks in
7504
  # LUInstanceSetParams
7505
  for idx, device in enumerate(instance.disks):
7506
    if to_skip and idx in to_skip:
7507
      continue
7508
    logging.info("Creating volume %s for instance %s",
7509
                 device.iv_name, instance.name)
7510
    #HARDCODE
7511
    for node in all_nodes:
7512
      f_create = node == pnode
7513
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7514

    
7515

    
7516
def _RemoveDisks(lu, instance, target_node=None):
7517
  """Remove all disks for an instance.
7518

7519
  This abstracts away some work from `AddInstance()` and
7520
  `RemoveInstance()`. Note that in case some of the devices couldn't
7521
  be removed, the removal will continue with the other ones (compare
7522
  with `_CreateDisks()`).
7523

7524
  @type lu: L{LogicalUnit}
7525
  @param lu: the logical unit on whose behalf we execute
7526
  @type instance: L{objects.Instance}
7527
  @param instance: the instance whose disks we should remove
7528
  @type target_node: string
7529
  @param target_node: used to override the node on which to remove the disks
7530
  @rtype: boolean
7531
  @return: the success of the removal
7532

7533
  """
7534
  logging.info("Removing block devices for instance %s", instance.name)
7535

    
7536
  all_result = True
7537
  for device in instance.disks:
7538
    if target_node:
7539
      edata = [(target_node, device)]
7540
    else:
7541
      edata = device.ComputeNodeTree(instance.primary_node)
7542
    for node, disk in edata:
7543
      lu.cfg.SetDiskID(disk, node)
7544
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7545
      if msg:
7546
        lu.LogWarning("Could not remove block device %s on node %s,"
7547
                      " continuing anyway: %s", device.iv_name, node, msg)
7548
        all_result = False
7549

    
7550
  if instance.disk_template == constants.DT_FILE:
7551
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7552
    if target_node:
7553
      tgt = target_node
7554
    else:
7555
      tgt = instance.primary_node
7556
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7557
    if result.fail_msg:
7558
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7559
                    file_storage_dir, instance.primary_node, result.fail_msg)
7560
      all_result = False
7561

    
7562
  return all_result
7563

    
7564

    
7565
def _ComputeDiskSizePerVG(disk_template, disks):
7566
  """Compute disk size requirements in the volume group
7567

7568
  """
7569
  def _compute(disks, payload):
7570
    """Universal algorithm.
7571

7572
    """
7573
    vgs = {}
7574
    for disk in disks:
7575
      vgs[disk[constants.IDISK_VG]] = \
7576
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7577

    
7578
    return vgs
7579

    
7580
  # Required free disk space as a function of disk and swap space
7581
  req_size_dict = {
7582
    constants.DT_DISKLESS: {},
7583
    constants.DT_PLAIN: _compute(disks, 0),
7584
    # 128 MB are added for drbd metadata for each disk
7585
    constants.DT_DRBD8: _compute(disks, 128),
7586
    constants.DT_FILE: {},
7587
    constants.DT_SHARED_FILE: {},
7588
  }
7589

    
7590
  if disk_template not in req_size_dict:
7591
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7592
                                 " is unknown" %  disk_template)
7593

    
7594
  return req_size_dict[disk_template]
7595

    
7596

    
7597
def _ComputeDiskSize(disk_template, disks):
7598
  """Compute disk size requirements in the volume group
7599

7600
  """
7601
  # Required free disk space as a function of disk and swap space
7602
  req_size_dict = {
7603
    constants.DT_DISKLESS: None,
7604
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7605
    # 128 MB are added for drbd metadata for each disk
7606
    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7607
    constants.DT_FILE: None,
7608
    constants.DT_SHARED_FILE: 0,
7609
    constants.DT_BLOCK: 0,
7610
  }
7611

    
7612
  if disk_template not in req_size_dict:
7613
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7614
                                 " is unknown" %  disk_template)
7615

    
7616
  return req_size_dict[disk_template]
7617

    
7618

    
7619
def _FilterVmNodes(lu, nodenames):
7620
  """Filters out non-vm_capable nodes from a list.
7621

7622
  @type lu: L{LogicalUnit}
7623
  @param lu: the logical unit for which we check
7624
  @type nodenames: list
7625
  @param nodenames: the list of nodes on which we should check
7626
  @rtype: list
7627
  @return: the list of vm-capable nodes
7628

7629
  """
7630
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7631
  return [name for name in nodenames if name not in vm_nodes]
7632

    
7633

    
7634
def _CheckHVParams(lu, nodenames, hvname, hvparams):
7635
  """Hypervisor parameter validation.
7636

7637
  This function abstract the hypervisor parameter validation to be
7638
  used in both instance create and instance modify.
7639

7640
  @type lu: L{LogicalUnit}
7641
  @param lu: the logical unit for which we check
7642
  @type nodenames: list
7643
  @param nodenames: the list of nodes on which we should check
7644
  @type hvname: string
7645
  @param hvname: the name of the hypervisor we should use
7646
  @type hvparams: dict
7647
  @param hvparams: the parameters which we need to check
7648
  @raise errors.OpPrereqError: if the parameters are not valid
7649

7650
  """
7651
  nodenames = _FilterVmNodes(lu, nodenames)
7652
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7653
                                                  hvname,
7654
                                                  hvparams)
7655
  for node in nodenames:
7656
    info = hvinfo[node]
7657
    if info.offline:
7658
      continue
7659
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
7660

    
7661

    
7662
def _CheckOSParams(lu, required, nodenames, osname, osparams):
7663
  """OS parameters validation.
7664

7665
  @type lu: L{LogicalUnit}
7666
  @param lu: the logical unit for which we check
7667
  @type required: boolean
7668
  @param required: whether the validation should fail if the OS is not
7669
      found
7670
  @type nodenames: list
7671
  @param nodenames: the list of nodes on which we should check
7672
  @type osname: string
7673
  @param osname: the name of the hypervisor we should use
7674
  @type osparams: dict
7675
  @param osparams: the parameters which we need to check
7676
  @raise errors.OpPrereqError: if the parameters are not valid
7677

7678
  """
7679
  nodenames = _FilterVmNodes(lu, nodenames)
7680
  result = lu.rpc.call_os_validate(required, nodenames, osname,
7681
                                   [constants.OS_VALIDATE_PARAMETERS],
7682
                                   osparams)
7683
  for node, nres in result.items():
7684
    # we don't check for offline cases since this should be run only
7685
    # against the master node and/or an instance's nodes
7686
    nres.Raise("OS Parameters validation failed on node %s" % node)
7687
    if not nres.payload:
7688
      lu.LogInfo("OS %s not found on node %s, validation skipped",
7689
                 osname, node)
7690

    
7691

    
7692
class LUInstanceCreate(LogicalUnit):
7693
  """Create an instance.
7694

7695
  """
7696
  HPATH = "instance-add"
7697
  HTYPE = constants.HTYPE_INSTANCE
7698
  REQ_BGL = False
7699

    
7700
  def CheckArguments(self):
7701
    """Check arguments.
7702

7703
    """
7704
    # do not require name_check to ease forward/backward compatibility
7705
    # for tools
7706
    if self.op.no_install and self.op.start:
7707
      self.LogInfo("No-installation mode selected, disabling startup")
7708
      self.op.start = False
7709
    # validate/normalize the instance name
7710
    self.op.instance_name = \
7711
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
7712

    
7713
    if self.op.ip_check and not self.op.name_check:
7714
      # TODO: make the ip check more flexible and not depend on the name check
7715
      raise errors.OpPrereqError("Cannot do IP address check without a name"
7716
                                 " check", errors.ECODE_INVAL)
7717

    
7718
    # check nics' parameter names
7719
    for nic in self.op.nics:
7720
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7721

    
7722
    # check disks. parameter names and consistent adopt/no-adopt strategy
7723
    has_adopt = has_no_adopt = False
7724
    for disk in self.op.disks:
7725
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7726
      if constants.IDISK_ADOPT in disk:
7727
        has_adopt = True
7728
      else:
7729
        has_no_adopt = True
7730
    if has_adopt and has_no_adopt:
7731
      raise errors.OpPrereqError("Either all disks are adopted or none is",
7732
                                 errors.ECODE_INVAL)
7733
    if has_adopt:
7734
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7735
        raise errors.OpPrereqError("Disk adoption is not supported for the"
7736
                                   " '%s' disk template" %
7737
                                   self.op.disk_template,
7738
                                   errors.ECODE_INVAL)
7739
      if self.op.iallocator is not None:
7740
        raise errors.OpPrereqError("Disk adoption not allowed with an"
7741
                                   " iallocator script", errors.ECODE_INVAL)
7742
      if self.op.mode == constants.INSTANCE_IMPORT:
7743
        raise errors.OpPrereqError("Disk adoption not allowed for"
7744
                                   " instance import", errors.ECODE_INVAL)
7745
    else:
7746
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
7747
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7748
                                   " but no 'adopt' parameter given" %
7749
                                   self.op.disk_template,
7750
                                   errors.ECODE_INVAL)
7751

    
7752
    self.adopt_disks = has_adopt
7753

    
7754
    # instance name verification
7755
    if self.op.name_check:
7756
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7757
      self.op.instance_name = self.hostname1.name
7758
      # used in CheckPrereq for ip ping check
7759
      self.check_ip = self.hostname1.ip
7760
    else:
7761
      self.check_ip = None
7762

    
7763
    # file storage checks
7764
    if (self.op.file_driver and
7765
        not self.op.file_driver in constants.FILE_DRIVER):
7766
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
7767
                                 self.op.file_driver, errors.ECODE_INVAL)
7768

    
7769
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7770
      raise errors.OpPrereqError("File storage directory path not absolute",
7771
                                 errors.ECODE_INVAL)
7772

    
7773
    ### Node/iallocator related checks
7774
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7775

    
7776
    if self.op.pnode is not None:
7777
      if self.op.disk_template in constants.DTS_INT_MIRROR:
7778
        if self.op.snode is None:
7779
          raise errors.OpPrereqError("The networked disk templates need"
7780
                                     " a mirror node", errors.ECODE_INVAL)
7781
      elif self.op.snode:
7782
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7783
                        " template")
7784
        self.op.snode = None
7785

    
7786
    self._cds = _GetClusterDomainSecret()
7787

    
7788
    if self.op.mode == constants.INSTANCE_IMPORT:
7789
      # On import force_variant must be True, because if we forced it at
7790
      # initial install, our only chance when importing it back is that it
7791
      # works again!
7792
      self.op.force_variant = True
7793

    
7794
      if self.op.no_install:
7795
        self.LogInfo("No-installation mode has no effect during import")
7796

    
7797
    elif self.op.mode == constants.INSTANCE_CREATE:
7798
      if self.op.os_type is None:
7799
        raise errors.OpPrereqError("No guest OS specified",
7800
                                   errors.ECODE_INVAL)
7801
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7802
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7803
                                   " installation" % self.op.os_type,
7804
                                   errors.ECODE_STATE)
7805
      if self.op.disk_template is None:
7806
        raise errors.OpPrereqError("No disk template specified",
7807
                                   errors.ECODE_INVAL)
7808

    
7809
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7810
      # Check handshake to ensure both clusters have the same domain secret
7811
      src_handshake = self.op.source_handshake
7812
      if not src_handshake:
7813
        raise errors.OpPrereqError("Missing source handshake",
7814
                                   errors.ECODE_INVAL)
7815

    
7816
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7817
                                                           src_handshake)
7818
      if errmsg:
7819
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7820
                                   errors.ECODE_INVAL)
7821

    
7822
      # Load and check source CA
7823
      self.source_x509_ca_pem = self.op.source_x509_ca
7824
      if not self.source_x509_ca_pem:
7825
        raise errors.OpPrereqError("Missing source X509 CA",
7826
                                   errors.ECODE_INVAL)
7827

    
7828
      try:
7829
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7830
                                                    self._cds)
7831
      except OpenSSL.crypto.Error, err:
7832
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7833
                                   (err, ), errors.ECODE_INVAL)
7834

    
7835
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7836
      if errcode is not None:
7837
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7838
                                   errors.ECODE_INVAL)
7839

    
7840
      self.source_x509_ca = cert
7841

    
7842
      src_instance_name = self.op.source_instance_name
7843
      if not src_instance_name:
7844
        raise errors.OpPrereqError("Missing source instance name",
7845
                                   errors.ECODE_INVAL)
7846

    
7847
      self.source_instance_name = \
7848
          netutils.GetHostname(name=src_instance_name).name
7849

    
7850
    else:
7851
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
7852
                                 self.op.mode, errors.ECODE_INVAL)
7853

    
7854
  def ExpandNames(self):
7855
    """ExpandNames for CreateInstance.
7856

7857
    Figure out the right locks for instance creation.
7858

7859
    """
7860
    self.needed_locks = {}
7861

    
7862
    instance_name = self.op.instance_name
7863
    # this is just a preventive check, but someone might still add this
7864
    # instance in the meantime, and creation will fail at lock-add time
7865
    if instance_name in self.cfg.GetInstanceList():
7866
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7867
                                 instance_name, errors.ECODE_EXISTS)
7868

    
7869
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7870

    
7871
    if self.op.iallocator:
7872
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7873
    else:
7874
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7875
      nodelist = [self.op.pnode]
7876
      if self.op.snode is not None:
7877
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7878
        nodelist.append(self.op.snode)
7879
      self.needed_locks[locking.LEVEL_NODE] = nodelist
7880

    
7881
    # in case of import lock the source node too
7882
    if self.op.mode == constants.INSTANCE_IMPORT:
7883
      src_node = self.op.src_node
7884
      src_path = self.op.src_path
7885

    
7886
      if src_path is None:
7887
        self.op.src_path = src_path = self.op.instance_name
7888

    
7889
      if src_node is None:
7890
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7891
        self.op.src_node = None
7892
        if os.path.isabs(src_path):
7893
          raise errors.OpPrereqError("Importing an instance from an absolute"
7894
                                     " path requires a source node option",
7895
                                     errors.ECODE_INVAL)
7896
      else:
7897
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7898
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7899
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
7900
        if not os.path.isabs(src_path):
7901
          self.op.src_path = src_path = \
7902
            utils.PathJoin(constants.EXPORT_DIR, src_path)
7903

    
7904
  def _RunAllocator(self):
7905
    """Run the allocator based on input opcode.
7906

7907
    """
7908
    nics = [n.ToDict() for n in self.nics]
7909
    ial = IAllocator(self.cfg, self.rpc,
7910
                     mode=constants.IALLOCATOR_MODE_ALLOC,
7911
                     name=self.op.instance_name,
7912
                     disk_template=self.op.disk_template,
7913
                     tags=[],
7914
                     os=self.op.os_type,
7915
                     vcpus=self.be_full[constants.BE_VCPUS],
7916
                     mem_size=self.be_full[constants.BE_MEMORY],
7917
                     disks=self.disks,
7918
                     nics=nics,
7919
                     hypervisor=self.op.hypervisor,
7920
                     )
7921

    
7922
    ial.Run(self.op.iallocator)
7923

    
7924
    if not ial.success:
7925
      raise errors.OpPrereqError("Can't compute nodes using"
7926
                                 " iallocator '%s': %s" %
7927
                                 (self.op.iallocator, ial.info),
7928
                                 errors.ECODE_NORES)
7929
    if len(ial.result) != ial.required_nodes:
7930
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7931
                                 " of nodes (%s), required %s" %
7932
                                 (self.op.iallocator, len(ial.result),
7933
                                  ial.required_nodes), errors.ECODE_FAULT)
7934
    self.op.pnode = ial.result[0]
7935
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7936
                 self.op.instance_name, self.op.iallocator,
7937
                 utils.CommaJoin(ial.result))
7938
    if ial.required_nodes == 2:
7939
      self.op.snode = ial.result[1]
7940

    
7941
  def BuildHooksEnv(self):
7942
    """Build hooks env.
7943

7944
    This runs on master, primary and secondary nodes of the instance.
7945

7946
    """
7947
    env = {
7948
      "ADD_MODE": self.op.mode,
7949
      }
7950
    if self.op.mode == constants.INSTANCE_IMPORT:
7951
      env["SRC_NODE"] = self.op.src_node
7952
      env["SRC_PATH"] = self.op.src_path
7953
      env["SRC_IMAGES"] = self.src_images
7954

    
7955
    env.update(_BuildInstanceHookEnv(
7956
      name=self.op.instance_name,
7957
      primary_node=self.op.pnode,
7958
      secondary_nodes=self.secondaries,
7959
      status=self.op.start,
7960
      os_type=self.op.os_type,
7961
      memory=self.be_full[constants.BE_MEMORY],
7962
      vcpus=self.be_full[constants.BE_VCPUS],
7963
      nics=_NICListToTuple(self, self.nics),
7964
      disk_template=self.op.disk_template,
7965
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
7966
             for d in self.disks],
7967
      bep=self.be_full,
7968
      hvp=self.hv_full,
7969
      hypervisor_name=self.op.hypervisor,
7970
    ))
7971

    
7972
    return env
7973

    
7974
  def BuildHooksNodes(self):
7975
    """Build hooks nodes.
7976

7977
    """
7978
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
7979
    return nl, nl
7980

    
7981
  def _ReadExportInfo(self):
7982
    """Reads the export information from disk.
7983

7984
    It will override the opcode source node and path with the actual
7985
    information, if these two were not specified before.
7986

7987
    @return: the export information
7988

7989
    """
7990
    assert self.op.mode == constants.INSTANCE_IMPORT
7991

    
7992
    src_node = self.op.src_node
7993
    src_path = self.op.src_path
7994

    
7995
    if src_node is None:
7996
      locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
7997
      exp_list = self.rpc.call_export_list(locked_nodes)
7998
      found = False
7999
      for node in exp_list:
8000
        if exp_list[node].fail_msg:
8001
          continue
8002
        if src_path in exp_list[node].payload:
8003
          found = True
8004
          self.op.src_node = src_node = node
8005
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8006
                                                       src_path)
8007
          break
8008
      if not found:
8009
        raise errors.OpPrereqError("No export found for relative path %s" %
8010
                                    src_path, errors.ECODE_INVAL)
8011

    
8012
    _CheckNodeOnline(self, src_node)
8013
    result = self.rpc.call_export_info(src_node, src_path)
8014
    result.Raise("No export or invalid export found in dir %s" % src_path)
8015

    
8016
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8017
    if not export_info.has_section(constants.INISECT_EXP):
8018
      raise errors.ProgrammerError("Corrupted export config",
8019
                                   errors.ECODE_ENVIRON)
8020

    
8021
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8022
    if (int(ei_version) != constants.EXPORT_VERSION):
8023
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8024
                                 (ei_version, constants.EXPORT_VERSION),
8025
                                 errors.ECODE_ENVIRON)
8026
    return export_info
8027

    
8028
  def _ReadExportParams(self, einfo):
8029
    """Use export parameters as defaults.
8030

8031
    In case the opcode doesn't specify (as in override) some instance
8032
    parameters, then try to use them from the export information, if
8033
    that declares them.
8034

8035
    """
8036
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8037

    
8038
    if self.op.disk_template is None:
8039
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8040
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8041
                                          "disk_template")
8042
      else:
8043
        raise errors.OpPrereqError("No disk template specified and the export"
8044
                                   " is missing the disk_template information",
8045
                                   errors.ECODE_INVAL)
8046

    
8047
    if not self.op.disks:
8048
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
8049
        disks = []
8050
        # TODO: import the disk iv_name too
8051
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8052
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8053
          disks.append({constants.IDISK_SIZE: disk_sz})
8054
        self.op.disks = disks
8055
      else:
8056
        raise errors.OpPrereqError("No disk info specified and the export"
8057
                                   " is missing the disk information",
8058
                                   errors.ECODE_INVAL)
8059

    
8060
    if (not self.op.nics and
8061
        einfo.has_option(constants.INISECT_INS, "nic_count")):
8062
      nics = []
8063
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8064
        ndict = {}
8065
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8066
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8067
          ndict[name] = v
8068
        nics.append(ndict)
8069
      self.op.nics = nics
8070

    
8071
    if (self.op.hypervisor is None and
8072
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8073
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8074
    if einfo.has_section(constants.INISECT_HYP):
8075
      # use the export parameters but do not override the ones
8076
      # specified by the user
8077
      for name, value in einfo.items(constants.INISECT_HYP):
8078
        if name not in self.op.hvparams:
8079
          self.op.hvparams[name] = value
8080

    
8081
    if einfo.has_section(constants.INISECT_BEP):
8082
      # use the parameters, without overriding
8083
      for name, value in einfo.items(constants.INISECT_BEP):
8084
        if name not in self.op.beparams:
8085
          self.op.beparams[name] = value
8086
    else:
8087
      # try to read the parameters old style, from the main section
8088
      for name in constants.BES_PARAMETERS:
8089
        if (name not in self.op.beparams and
8090
            einfo.has_option(constants.INISECT_INS, name)):
8091
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8092

    
8093
    if einfo.has_section(constants.INISECT_OSP):
8094
      # use the parameters, without overriding
8095
      for name, value in einfo.items(constants.INISECT_OSP):
8096
        if name not in self.op.osparams:
8097
          self.op.osparams[name] = value
8098

    
8099
  def _RevertToDefaults(self, cluster):
8100
    """Revert the instance parameters to the default values.
8101

8102
    """
8103
    # hvparams
8104
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8105
    for name in self.op.hvparams.keys():
8106
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8107
        del self.op.hvparams[name]
8108
    # beparams
8109
    be_defs = cluster.SimpleFillBE({})
8110
    for name in self.op.beparams.keys():
8111
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8112
        del self.op.beparams[name]
8113
    # nic params
8114
    nic_defs = cluster.SimpleFillNIC({})
8115
    for nic in self.op.nics:
8116
      for name in constants.NICS_PARAMETERS:
8117
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8118
          del nic[name]
8119
    # osparams
8120
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8121
    for name in self.op.osparams.keys():
8122
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8123
        del self.op.osparams[name]
8124

    
8125
  def CheckPrereq(self):
8126
    """Check prerequisites.
8127

8128
    """
8129
    if self.op.mode == constants.INSTANCE_IMPORT:
8130
      export_info = self._ReadExportInfo()
8131
      self._ReadExportParams(export_info)
8132

    
8133
    if (not self.cfg.GetVGName() and
8134
        self.op.disk_template not in constants.DTS_NOT_LVM):
8135
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8136
                                 " instances", errors.ECODE_STATE)
8137

    
8138
    if self.op.hypervisor is None:
8139
      self.op.hypervisor = self.cfg.GetHypervisorType()
8140

    
8141
    cluster = self.cfg.GetClusterInfo()
8142
    enabled_hvs = cluster.enabled_hypervisors
8143
    if self.op.hypervisor not in enabled_hvs:
8144
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8145
                                 " cluster (%s)" % (self.op.hypervisor,
8146
                                  ",".join(enabled_hvs)),
8147
                                 errors.ECODE_STATE)
8148

    
8149
    # check hypervisor parameter syntax (locally)
8150
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8151
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8152
                                      self.op.hvparams)
8153
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8154
    hv_type.CheckParameterSyntax(filled_hvp)
8155
    self.hv_full = filled_hvp
8156
    # check that we don't specify global parameters on an instance
8157
    _CheckGlobalHvParams(self.op.hvparams)
8158

    
8159
    # fill and remember the beparams dict
8160
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8161
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8162

    
8163
    # build os parameters
8164
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8165

    
8166
    # now that hvp/bep are in final format, let's reset to defaults,
8167
    # if told to do so
8168
    if self.op.identify_defaults:
8169
      self._RevertToDefaults(cluster)
8170

    
8171
    # NIC buildup
8172
    self.nics = []
8173
    for idx, nic in enumerate(self.op.nics):
8174
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8175
      nic_mode = nic_mode_req
8176
      if nic_mode is None:
8177
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8178

    
8179
      # in routed mode, for the first nic, the default ip is 'auto'
8180
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8181
        default_ip_mode = constants.VALUE_AUTO
8182
      else:
8183
        default_ip_mode = constants.VALUE_NONE
8184

    
8185
      # ip validity checks
8186
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8187
      if ip is None or ip.lower() == constants.VALUE_NONE:
8188
        nic_ip = None
8189
      elif ip.lower() == constants.VALUE_AUTO:
8190
        if not self.op.name_check:
8191
          raise errors.OpPrereqError("IP address set to auto but name checks"
8192
                                     " have been skipped",
8193
                                     errors.ECODE_INVAL)
8194
        nic_ip = self.hostname1.ip
8195
      else:
8196
        if not netutils.IPAddress.IsValid(ip):
8197
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8198
                                     errors.ECODE_INVAL)
8199
        nic_ip = ip
8200

    
8201
      # TODO: check the ip address for uniqueness
8202
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8203
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8204
                                   errors.ECODE_INVAL)
8205

    
8206
      # MAC address verification
8207
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8208
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8209
        mac = utils.NormalizeAndValidateMac(mac)
8210

    
8211
        try:
8212
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8213
        except errors.ReservationError:
8214
          raise errors.OpPrereqError("MAC address %s already in use"
8215
                                     " in cluster" % mac,
8216
                                     errors.ECODE_NOTUNIQUE)
8217

    
8218
      #  Build nic parameters
8219
      link = nic.get(constants.INIC_LINK, None)
8220
      nicparams = {}
8221
      if nic_mode_req:
8222
        nicparams[constants.NIC_MODE] = nic_mode_req
8223
      if link:
8224
        nicparams[constants.NIC_LINK] = link
8225

    
8226
      check_params = cluster.SimpleFillNIC(nicparams)
8227
      objects.NIC.CheckParameterSyntax(check_params)
8228
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8229

    
8230
    # disk checks/pre-build
8231
    default_vg = self.cfg.GetVGName()
8232
    self.disks = []
8233
    for disk in self.op.disks:
8234
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8235
      if mode not in constants.DISK_ACCESS_SET:
8236
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8237
                                   mode, errors.ECODE_INVAL)
8238
      size = disk.get(constants.IDISK_SIZE, None)
8239
      if size is None:
8240
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8241
      try:
8242
        size = int(size)
8243
      except (TypeError, ValueError):
8244
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8245
                                   errors.ECODE_INVAL)
8246

    
8247
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8248
      new_disk = {
8249
        constants.IDISK_SIZE: size,
8250
        constants.IDISK_MODE: mode,
8251
        constants.IDISK_VG: data_vg,
8252
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8253
        }
8254
      if constants.IDISK_ADOPT in disk:
8255
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8256
      self.disks.append(new_disk)
8257

    
8258
    if self.op.mode == constants.INSTANCE_IMPORT:
8259

    
8260
      # Check that the new instance doesn't have less disks than the export
8261
      instance_disks = len(self.disks)
8262
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8263
      if instance_disks < export_disks:
8264
        raise errors.OpPrereqError("Not enough disks to import."
8265
                                   " (instance: %d, export: %d)" %
8266
                                   (instance_disks, export_disks),
8267
                                   errors.ECODE_INVAL)
8268

    
8269
      disk_images = []
8270
      for idx in range(export_disks):
8271
        option = 'disk%d_dump' % idx
8272
        if export_info.has_option(constants.INISECT_INS, option):
8273
          # FIXME: are the old os-es, disk sizes, etc. useful?
8274
          export_name = export_info.get(constants.INISECT_INS, option)
8275
          image = utils.PathJoin(self.op.src_path, export_name)
8276
          disk_images.append(image)
8277
        else:
8278
          disk_images.append(False)
8279

    
8280
      self.src_images = disk_images
8281

    
8282
      old_name = export_info.get(constants.INISECT_INS, 'name')
8283
      try:
8284
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
8285
      except (TypeError, ValueError), err:
8286
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
8287
                                   " an integer: %s" % str(err),
8288
                                   errors.ECODE_STATE)
8289
      if self.op.instance_name == old_name:
8290
        for idx, nic in enumerate(self.nics):
8291
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8292
            nic_mac_ini = 'nic%d_mac' % idx
8293
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8294

    
8295
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8296

    
8297
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8298
    if self.op.ip_check:
8299
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8300
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8301
                                   (self.check_ip, self.op.instance_name),
8302
                                   errors.ECODE_NOTUNIQUE)
8303

    
8304
    #### mac address generation
8305
    # By generating here the mac address both the allocator and the hooks get
8306
    # the real final mac address rather than the 'auto' or 'generate' value.
8307
    # There is a race condition between the generation and the instance object
8308
    # creation, which means that we know the mac is valid now, but we're not
8309
    # sure it will be when we actually add the instance. If things go bad
8310
    # adding the instance will abort because of a duplicate mac, and the
8311
    # creation job will fail.
8312
    for nic in self.nics:
8313
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8314
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8315

    
8316
    #### allocator run
8317

    
8318
    if self.op.iallocator is not None:
8319
      self._RunAllocator()
8320

    
8321
    #### node related checks
8322

    
8323
    # check primary node
8324
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8325
    assert self.pnode is not None, \
8326
      "Cannot retrieve locked node %s" % self.op.pnode
8327
    if pnode.offline:
8328
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8329
                                 pnode.name, errors.ECODE_STATE)
8330
    if pnode.drained:
8331
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8332
                                 pnode.name, errors.ECODE_STATE)
8333
    if not pnode.vm_capable:
8334
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8335
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8336

    
8337
    self.secondaries = []
8338

    
8339
    # mirror node verification
8340
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8341
      if self.op.snode == pnode.name:
8342
        raise errors.OpPrereqError("The secondary node cannot be the"
8343
                                   " primary node", errors.ECODE_INVAL)
8344
      _CheckNodeOnline(self, self.op.snode)
8345
      _CheckNodeNotDrained(self, self.op.snode)
8346
      _CheckNodeVmCapable(self, self.op.snode)
8347
      self.secondaries.append(self.op.snode)
8348

    
8349
    nodenames = [pnode.name] + self.secondaries
8350

    
8351
    if not self.adopt_disks:
8352
      # Check lv size requirements, if not adopting
8353
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8354
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8355

    
8356
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8357
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8358
                                disk[constants.IDISK_ADOPT])
8359
                     for disk in self.disks])
8360
      if len(all_lvs) != len(self.disks):
8361
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8362
                                   errors.ECODE_INVAL)
8363
      for lv_name in all_lvs:
8364
        try:
8365
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8366
          # to ReserveLV uses the same syntax
8367
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8368
        except errors.ReservationError:
8369
          raise errors.OpPrereqError("LV named %s used by another instance" %
8370
                                     lv_name, errors.ECODE_NOTUNIQUE)
8371

    
8372
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8373
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8374

    
8375
      node_lvs = self.rpc.call_lv_list([pnode.name],
8376
                                       vg_names.payload.keys())[pnode.name]
8377
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8378
      node_lvs = node_lvs.payload
8379

    
8380
      delta = all_lvs.difference(node_lvs.keys())
8381
      if delta:
8382
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8383
                                   utils.CommaJoin(delta),
8384
                                   errors.ECODE_INVAL)
8385
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8386
      if online_lvs:
8387
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8388
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8389
                                   errors.ECODE_STATE)
8390
      # update the size of disk based on what is found
8391
      for dsk in self.disks:
8392
        dsk[constants.IDISK_SIZE] = \
8393
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8394
                                        dsk[constants.IDISK_ADOPT])][0]))
8395

    
8396
    elif self.op.disk_template == constants.DT_BLOCK:
8397
      # Normalize and de-duplicate device paths
8398
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8399
                       for disk in self.disks])
8400
      if len(all_disks) != len(self.disks):
8401
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
8402
                                   errors.ECODE_INVAL)
8403
      baddisks = [d for d in all_disks
8404
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8405
      if baddisks:
8406
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8407
                                   " cannot be adopted" %
8408
                                   (", ".join(baddisks),
8409
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
8410
                                   errors.ECODE_INVAL)
8411

    
8412
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
8413
                                            list(all_disks))[pnode.name]
8414
      node_disks.Raise("Cannot get block device information from node %s" %
8415
                       pnode.name)
8416
      node_disks = node_disks.payload
8417
      delta = all_disks.difference(node_disks.keys())
8418
      if delta:
8419
        raise errors.OpPrereqError("Missing block device(s): %s" %
8420
                                   utils.CommaJoin(delta),
8421
                                   errors.ECODE_INVAL)
8422
      for dsk in self.disks:
8423
        dsk[constants.IDISK_SIZE] = \
8424
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8425

    
8426
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8427

    
8428
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8429
    # check OS parameters (remotely)
8430
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8431

    
8432
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8433

    
8434
    # memory check on primary node
8435
    if self.op.start:
8436
      _CheckNodeFreeMemory(self, self.pnode.name,
8437
                           "creating instance %s" % self.op.instance_name,
8438
                           self.be_full[constants.BE_MEMORY],
8439
                           self.op.hypervisor)
8440

    
8441
    self.dry_run_result = list(nodenames)
8442

    
8443
  def Exec(self, feedback_fn):
8444
    """Create and add the instance to the cluster.
8445

8446
    """
8447
    instance = self.op.instance_name
8448
    pnode_name = self.pnode.name
8449

    
8450
    ht_kind = self.op.hypervisor
8451
    if ht_kind in constants.HTS_REQ_PORT:
8452
      network_port = self.cfg.AllocatePort()
8453
    else:
8454
      network_port = None
8455

    
8456
    if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
8457
      # this is needed because os.path.join does not accept None arguments
8458
      if self.op.file_storage_dir is None:
8459
        string_file_storage_dir = ""
8460
      else:
8461
        string_file_storage_dir = self.op.file_storage_dir
8462

    
8463
      # build the full file storage dir path
8464
      if self.op.disk_template == constants.DT_SHARED_FILE:
8465
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8466
      else:
8467
        get_fsd_fn = self.cfg.GetFileStorageDir
8468

    
8469
      file_storage_dir = utils.PathJoin(get_fsd_fn(),
8470
                                        string_file_storage_dir, instance)
8471
    else:
8472
      file_storage_dir = ""
8473

    
8474
    disks = _GenerateDiskTemplate(self,
8475
                                  self.op.disk_template,
8476
                                  instance, pnode_name,
8477
                                  self.secondaries,
8478
                                  self.disks,
8479
                                  file_storage_dir,
8480
                                  self.op.file_driver,
8481
                                  0,
8482
                                  feedback_fn)
8483

    
8484
    iobj = objects.Instance(name=instance, os=self.op.os_type,
8485
                            primary_node=pnode_name,
8486
                            nics=self.nics, disks=disks,
8487
                            disk_template=self.op.disk_template,
8488
                            admin_up=False,
8489
                            network_port=network_port,
8490
                            beparams=self.op.beparams,
8491
                            hvparams=self.op.hvparams,
8492
                            hypervisor=self.op.hypervisor,
8493
                            osparams=self.op.osparams,
8494
                            )
8495

    
8496
    if self.adopt_disks:
8497
      if self.op.disk_template == constants.DT_PLAIN:
8498
        # rename LVs to the newly-generated names; we need to construct
8499
        # 'fake' LV disks with the old data, plus the new unique_id
8500
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8501
        rename_to = []
8502
        for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8503
          rename_to.append(t_dsk.logical_id)
8504
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8505
          self.cfg.SetDiskID(t_dsk, pnode_name)
8506
        result = self.rpc.call_blockdev_rename(pnode_name,
8507
                                               zip(tmp_disks, rename_to))
8508
        result.Raise("Failed to rename adoped LVs")
8509
    else:
8510
      feedback_fn("* creating instance disks...")
8511
      try:
8512
        _CreateDisks(self, iobj)
8513
      except errors.OpExecError:
8514
        self.LogWarning("Device creation failed, reverting...")
8515
        try:
8516
          _RemoveDisks(self, iobj)
8517
        finally:
8518
          self.cfg.ReleaseDRBDMinors(instance)
8519
          raise
8520

    
8521
    feedback_fn("adding instance %s to cluster config" % instance)
8522

    
8523
    self.cfg.AddInstance(iobj, self.proc.GetECId())
8524

    
8525
    # Declare that we don't want to remove the instance lock anymore, as we've
8526
    # added the instance to the config
8527
    del self.remove_locks[locking.LEVEL_INSTANCE]
8528

    
8529
    if self.op.mode == constants.INSTANCE_IMPORT:
8530
      # Release unused nodes
8531
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8532
    else:
8533
      # Release all nodes
8534
      _ReleaseLocks(self, locking.LEVEL_NODE)
8535

    
8536
    disk_abort = False
8537
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8538
      feedback_fn("* wiping instance disks...")
8539
      try:
8540
        _WipeDisks(self, iobj)
8541
      except errors.OpExecError, err:
8542
        logging.exception("Wiping disks failed")
8543
        self.LogWarning("Wiping instance disks failed (%s)", err)
8544
        disk_abort = True
8545

    
8546
    if disk_abort:
8547
      # Something is already wrong with the disks, don't do anything else
8548
      pass
8549
    elif self.op.wait_for_sync:
8550
      disk_abort = not _WaitForSync(self, iobj)
8551
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
8552
      # make sure the disks are not degraded (still sync-ing is ok)
8553
      time.sleep(15)
8554
      feedback_fn("* checking mirrors status")
8555
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8556
    else:
8557
      disk_abort = False
8558

    
8559
    if disk_abort:
8560
      _RemoveDisks(self, iobj)
8561
      self.cfg.RemoveInstance(iobj.name)
8562
      # Make sure the instance lock gets removed
8563
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8564
      raise errors.OpExecError("There are some degraded disks for"
8565
                               " this instance")
8566

    
8567
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8568
      if self.op.mode == constants.INSTANCE_CREATE:
8569
        if not self.op.no_install:
8570
          feedback_fn("* running the instance OS create scripts...")
8571
          # FIXME: pass debug option from opcode to backend
8572
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8573
                                                 self.op.debug_level)
8574
          result.Raise("Could not add os for instance %s"
8575
                       " on node %s" % (instance, pnode_name))
8576

    
8577
      elif self.op.mode == constants.INSTANCE_IMPORT:
8578
        feedback_fn("* running the instance OS import scripts...")
8579

    
8580
        transfers = []
8581

    
8582
        for idx, image in enumerate(self.src_images):
8583
          if not image:
8584
            continue
8585

    
8586
          # FIXME: pass debug option from opcode to backend
8587
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8588
                                             constants.IEIO_FILE, (image, ),
8589
                                             constants.IEIO_SCRIPT,
8590
                                             (iobj.disks[idx], idx),
8591
                                             None)
8592
          transfers.append(dt)
8593

    
8594
        import_result = \
8595
          masterd.instance.TransferInstanceData(self, feedback_fn,
8596
                                                self.op.src_node, pnode_name,
8597
                                                self.pnode.secondary_ip,
8598
                                                iobj, transfers)
8599
        if not compat.all(import_result):
8600
          self.LogWarning("Some disks for instance %s on node %s were not"
8601
                          " imported successfully" % (instance, pnode_name))
8602

    
8603
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8604
        feedback_fn("* preparing remote import...")
8605
        # The source cluster will stop the instance before attempting to make a
8606
        # connection. In some cases stopping an instance can take a long time,
8607
        # hence the shutdown timeout is added to the connection timeout.
8608
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8609
                           self.op.source_shutdown_timeout)
8610
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8611

    
8612
        assert iobj.primary_node == self.pnode.name
8613
        disk_results = \
8614
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8615
                                        self.source_x509_ca,
8616
                                        self._cds, timeouts)
8617
        if not compat.all(disk_results):
8618
          # TODO: Should the instance still be started, even if some disks
8619
          # failed to import (valid for local imports, too)?
8620
          self.LogWarning("Some disks for instance %s on node %s were not"
8621
                          " imported successfully" % (instance, pnode_name))
8622

    
8623
        # Run rename script on newly imported instance
8624
        assert iobj.name == instance
8625
        feedback_fn("Running rename script for %s" % instance)
8626
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8627
                                                   self.source_instance_name,
8628
                                                   self.op.debug_level)
8629
        if result.fail_msg:
8630
          self.LogWarning("Failed to run rename script for %s on node"
8631
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
8632

    
8633
      else:
8634
        # also checked in the prereq part
8635
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8636
                                     % self.op.mode)
8637

    
8638
    if self.op.start:
8639
      iobj.admin_up = True
8640
      self.cfg.Update(iobj, feedback_fn)
8641
      logging.info("Starting instance %s on node %s", instance, pnode_name)
8642
      feedback_fn("* starting instance...")
8643
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
8644
      result.Raise("Could not start instance")
8645

    
8646
    return list(iobj.all_nodes)
8647

    
8648

    
8649
class LUInstanceConsole(NoHooksLU):
8650
  """Connect to an instance's console.
8651

8652
  This is somewhat special in that it returns the command line that
8653
  you need to run on the master node in order to connect to the
8654
  console.
8655

8656
  """
8657
  REQ_BGL = False
8658

    
8659
  def ExpandNames(self):
8660
    self._ExpandAndLockInstance()
8661

    
8662
  def CheckPrereq(self):
8663
    """Check prerequisites.
8664

8665
    This checks that the instance is in the cluster.
8666

8667
    """
8668
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8669
    assert self.instance is not None, \
8670
      "Cannot retrieve locked instance %s" % self.op.instance_name
8671
    _CheckNodeOnline(self, self.instance.primary_node)
8672

    
8673
  def Exec(self, feedback_fn):
8674
    """Connect to the console of an instance
8675

8676
    """
8677
    instance = self.instance
8678
    node = instance.primary_node
8679

    
8680
    node_insts = self.rpc.call_instance_list([node],
8681
                                             [instance.hypervisor])[node]
8682
    node_insts.Raise("Can't get node information from %s" % node)
8683

    
8684
    if instance.name not in node_insts.payload:
8685
      if instance.admin_up:
8686
        state = constants.INSTST_ERRORDOWN
8687
      else:
8688
        state = constants.INSTST_ADMINDOWN
8689
      raise errors.OpExecError("Instance %s is not running (state %s)" %
8690
                               (instance.name, state))
8691

    
8692
    logging.debug("Connecting to console of %s on %s", instance.name, node)
8693

    
8694
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8695

    
8696

    
8697
def _GetInstanceConsole(cluster, instance):
8698
  """Returns console information for an instance.
8699

8700
  @type cluster: L{objects.Cluster}
8701
  @type instance: L{objects.Instance}
8702
  @rtype: dict
8703

8704
  """
8705
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
8706
  # beparams and hvparams are passed separately, to avoid editing the
8707
  # instance and then saving the defaults in the instance itself.
8708
  hvparams = cluster.FillHV(instance)
8709
  beparams = cluster.FillBE(instance)
8710
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8711

    
8712
  assert console.instance == instance.name
8713
  assert console.Validate()
8714

    
8715
  return console.ToDict()
8716

    
8717

    
8718
class LUInstanceReplaceDisks(LogicalUnit):
8719
  """Replace the disks of an instance.
8720

8721
  """
8722
  HPATH = "mirrors-replace"
8723
  HTYPE = constants.HTYPE_INSTANCE
8724
  REQ_BGL = False
8725

    
8726
  def CheckArguments(self):
8727
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8728
                                  self.op.iallocator)
8729

    
8730
  def ExpandNames(self):
8731
    self._ExpandAndLockInstance()
8732

    
8733
    assert locking.LEVEL_NODE not in self.needed_locks
8734
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
8735

    
8736
    assert self.op.iallocator is None or self.op.remote_node is None, \
8737
      "Conflicting options"
8738

    
8739
    if self.op.remote_node is not None:
8740
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8741

    
8742
      # Warning: do not remove the locking of the new secondary here
8743
      # unless DRBD8.AddChildren is changed to work in parallel;
8744
      # currently it doesn't since parallel invocations of
8745
      # FindUnusedMinor will conflict
8746
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
8747
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8748
    else:
8749
      self.needed_locks[locking.LEVEL_NODE] = []
8750
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8751

    
8752
      if self.op.iallocator is not None:
8753
        # iallocator will select a new node in the same group
8754
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
8755

    
8756
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8757
                                   self.op.iallocator, self.op.remote_node,
8758
                                   self.op.disks, False, self.op.early_release)
8759

    
8760
    self.tasklets = [self.replacer]
8761

    
8762
  def DeclareLocks(self, level):
8763
    if level == locking.LEVEL_NODEGROUP:
8764
      assert self.op.remote_node is None
8765
      assert self.op.iallocator is not None
8766
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
8767

    
8768
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
8769
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
8770
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8771

    
8772
    elif level == locking.LEVEL_NODE:
8773
      if self.op.iallocator is not None:
8774
        assert self.op.remote_node is None
8775
        assert not self.needed_locks[locking.LEVEL_NODE]
8776

    
8777
        # Lock member nodes of all locked groups
8778
        self.needed_locks[locking.LEVEL_NODE] = [node_name
8779
          for group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
8780
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
8781
      else:
8782
        self._LockInstancesNodes()
8783

    
8784
  def BuildHooksEnv(self):
8785
    """Build hooks env.
8786

8787
    This runs on the master, the primary and all the secondaries.
8788

8789
    """
8790
    instance = self.replacer.instance
8791
    env = {
8792
      "MODE": self.op.mode,
8793
      "NEW_SECONDARY": self.op.remote_node,
8794
      "OLD_SECONDARY": instance.secondary_nodes[0],
8795
      }
8796
    env.update(_BuildInstanceHookEnvByObject(self, instance))
8797
    return env
8798

    
8799
  def BuildHooksNodes(self):
8800
    """Build hooks nodes.
8801

8802
    """
8803
    instance = self.replacer.instance
8804
    nl = [
8805
      self.cfg.GetMasterNode(),
8806
      instance.primary_node,
8807
      ]
8808
    if self.op.remote_node is not None:
8809
      nl.append(self.op.remote_node)
8810
    return nl, nl
8811

    
8812
  def CheckPrereq(self):
8813
    """Check prerequisites.
8814

8815
    """
8816
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
8817
            self.op.iallocator is None)
8818

    
8819
    owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
8820
    if owned_groups:
8821
      groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8822
      if owned_groups != groups:
8823
        raise errors.OpExecError("Node groups used by instance '%s' changed"
8824
                                 " since lock was acquired, current list is %r,"
8825
                                 " used to be '%s'" %
8826
                                 (self.op.instance_name,
8827
                                  utils.CommaJoin(groups),
8828
                                  utils.CommaJoin(owned_groups)))
8829

    
8830
    return LogicalUnit.CheckPrereq(self)
8831

    
8832

    
8833
class TLReplaceDisks(Tasklet):
8834
  """Replaces disks for an instance.
8835

8836
  Note: Locking is not within the scope of this class.
8837

8838
  """
8839
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8840
               disks, delay_iallocator, early_release):
8841
    """Initializes this class.
8842

8843
    """
8844
    Tasklet.__init__(self, lu)
8845

    
8846
    # Parameters
8847
    self.instance_name = instance_name
8848
    self.mode = mode
8849
    self.iallocator_name = iallocator_name
8850
    self.remote_node = remote_node
8851
    self.disks = disks
8852
    self.delay_iallocator = delay_iallocator
8853
    self.early_release = early_release
8854

    
8855
    # Runtime data
8856
    self.instance = None
8857
    self.new_node = None
8858
    self.target_node = None
8859
    self.other_node = None
8860
    self.remote_node_info = None
8861
    self.node_secondary_ip = None
8862

    
8863
  @staticmethod
8864
  def CheckArguments(mode, remote_node, iallocator):
8865
    """Helper function for users of this class.
8866

8867
    """
8868
    # check for valid parameter combination
8869
    if mode == constants.REPLACE_DISK_CHG:
8870
      if remote_node is None and iallocator is None:
8871
        raise errors.OpPrereqError("When changing the secondary either an"
8872
                                   " iallocator script must be used or the"
8873
                                   " new node given", errors.ECODE_INVAL)
8874

    
8875
      if remote_node is not None and iallocator is not None:
8876
        raise errors.OpPrereqError("Give either the iallocator or the new"
8877
                                   " secondary, not both", errors.ECODE_INVAL)
8878

    
8879
    elif remote_node is not None or iallocator is not None:
8880
      # Not replacing the secondary
8881
      raise errors.OpPrereqError("The iallocator and new node options can"
8882
                                 " only be used when changing the"
8883
                                 " secondary node", errors.ECODE_INVAL)
8884

    
8885
  @staticmethod
8886
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8887
    """Compute a new secondary node using an IAllocator.
8888

8889
    """
8890
    ial = IAllocator(lu.cfg, lu.rpc,
8891
                     mode=constants.IALLOCATOR_MODE_RELOC,
8892
                     name=instance_name,
8893
                     relocate_from=relocate_from)
8894

    
8895
    ial.Run(iallocator_name)
8896

    
8897
    if not ial.success:
8898
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8899
                                 " %s" % (iallocator_name, ial.info),
8900
                                 errors.ECODE_NORES)
8901

    
8902
    if len(ial.result) != ial.required_nodes:
8903
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8904
                                 " of nodes (%s), required %s" %
8905
                                 (iallocator_name,
8906
                                  len(ial.result), ial.required_nodes),
8907
                                 errors.ECODE_FAULT)
8908

    
8909
    remote_node_name = ial.result[0]
8910

    
8911
    lu.LogInfo("Selected new secondary for instance '%s': %s",
8912
               instance_name, remote_node_name)
8913

    
8914
    return remote_node_name
8915

    
8916
  def _FindFaultyDisks(self, node_name):
8917
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8918
                                    node_name, True)
8919

    
8920
  def _CheckDisksActivated(self, instance):
8921
    """Checks if the instance disks are activated.
8922

8923
    @param instance: The instance to check disks
8924
    @return: True if they are activated, False otherwise
8925

8926
    """
8927
    nodes = instance.all_nodes
8928

    
8929
    for idx, dev in enumerate(instance.disks):
8930
      for node in nodes:
8931
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
8932
        self.cfg.SetDiskID(dev, node)
8933

    
8934
        result = self.rpc.call_blockdev_find(node, dev)
8935

    
8936
        if result.offline:
8937
          continue
8938
        elif result.fail_msg or not result.payload:
8939
          return False
8940

    
8941
    return True
8942

    
8943
  def CheckPrereq(self):
8944
    """Check prerequisites.
8945

8946
    This checks that the instance is in the cluster.
8947

8948
    """
8949
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8950
    assert instance is not None, \
8951
      "Cannot retrieve locked instance %s" % self.instance_name
8952

    
8953
    if instance.disk_template != constants.DT_DRBD8:
8954
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8955
                                 " instances", errors.ECODE_INVAL)
8956

    
8957
    if len(instance.secondary_nodes) != 1:
8958
      raise errors.OpPrereqError("The instance has a strange layout,"
8959
                                 " expected one secondary but found %d" %
8960
                                 len(instance.secondary_nodes),
8961
                                 errors.ECODE_FAULT)
8962

    
8963
    if not self.delay_iallocator:
8964
      self._CheckPrereq2()
8965

    
8966
  def _CheckPrereq2(self):
8967
    """Check prerequisites, second part.
8968

8969
    This function should always be part of CheckPrereq. It was separated and is
8970
    now called from Exec because during node evacuation iallocator was only
8971
    called with an unmodified cluster model, not taking planned changes into
8972
    account.
8973

8974
    """
8975
    instance = self.instance
8976
    secondary_node = instance.secondary_nodes[0]
8977

    
8978
    if self.iallocator_name is None:
8979
      remote_node = self.remote_node
8980
    else:
8981
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8982
                                       instance.name, instance.secondary_nodes)
8983

    
8984
    if remote_node is None:
8985
      self.remote_node_info = None
8986
    else:
8987
      assert remote_node in self.lu.glm.list_owned(locking.LEVEL_NODE), \
8988
             "Remote node '%s' is not locked" % remote_node
8989

    
8990
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8991
      assert self.remote_node_info is not None, \
8992
        "Cannot retrieve locked node %s" % remote_node
8993

    
8994
    if remote_node == self.instance.primary_node:
8995
      raise errors.OpPrereqError("The specified node is the primary node of"
8996
                                 " the instance", errors.ECODE_INVAL)
8997

    
8998
    if remote_node == secondary_node:
8999
      raise errors.OpPrereqError("The specified node is already the"
9000
                                 " secondary node of the instance",
9001
                                 errors.ECODE_INVAL)
9002

    
9003
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9004
                                    constants.REPLACE_DISK_CHG):
9005
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9006
                                 errors.ECODE_INVAL)
9007

    
9008
    if self.mode == constants.REPLACE_DISK_AUTO:
9009
      if not self._CheckDisksActivated(instance):
9010
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9011
                                   " first" % self.instance_name,
9012
                                   errors.ECODE_STATE)
9013
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9014
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9015

    
9016
      if faulty_primary and faulty_secondary:
9017
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9018
                                   " one node and can not be repaired"
9019
                                   " automatically" % self.instance_name,
9020
                                   errors.ECODE_STATE)
9021

    
9022
      if faulty_primary:
9023
        self.disks = faulty_primary
9024
        self.target_node = instance.primary_node
9025
        self.other_node = secondary_node
9026
        check_nodes = [self.target_node, self.other_node]
9027
      elif faulty_secondary:
9028
        self.disks = faulty_secondary
9029
        self.target_node = secondary_node
9030
        self.other_node = instance.primary_node
9031
        check_nodes = [self.target_node, self.other_node]
9032
      else:
9033
        self.disks = []
9034
        check_nodes = []
9035

    
9036
    else:
9037
      # Non-automatic modes
9038
      if self.mode == constants.REPLACE_DISK_PRI:
9039
        self.target_node = instance.primary_node
9040
        self.other_node = secondary_node
9041
        check_nodes = [self.target_node, self.other_node]
9042

    
9043
      elif self.mode == constants.REPLACE_DISK_SEC:
9044
        self.target_node = secondary_node
9045
        self.other_node = instance.primary_node
9046
        check_nodes = [self.target_node, self.other_node]
9047

    
9048
      elif self.mode == constants.REPLACE_DISK_CHG:
9049
        self.new_node = remote_node
9050
        self.other_node = instance.primary_node
9051
        self.target_node = secondary_node
9052
        check_nodes = [self.new_node, self.other_node]
9053

    
9054
        _CheckNodeNotDrained(self.lu, remote_node)
9055
        _CheckNodeVmCapable(self.lu, remote_node)
9056

    
9057
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9058
        assert old_node_info is not None
9059
        if old_node_info.offline and not self.early_release:
9060
          # doesn't make sense to delay the release
9061
          self.early_release = True
9062
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9063
                          " early-release mode", secondary_node)
9064

    
9065
      else:
9066
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9067
                                     self.mode)
9068

    
9069
      # If not specified all disks should be replaced
9070
      if not self.disks:
9071
        self.disks = range(len(self.instance.disks))
9072

    
9073
    for node in check_nodes:
9074
      _CheckNodeOnline(self.lu, node)
9075

    
9076
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9077
                                                          self.other_node,
9078
                                                          self.target_node]
9079
                              if node_name is not None)
9080

    
9081
    # Release unneeded node locks
9082
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9083

    
9084
    # Release any owned node group
9085
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9086
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9087

    
9088
    # Check whether disks are valid
9089
    for disk_idx in self.disks:
9090
      instance.FindDisk(disk_idx)
9091

    
9092
    # Get secondary node IP addresses
9093
    self.node_secondary_ip = \
9094
      dict((node_name, self.cfg.GetNodeInfo(node_name).secondary_ip)
9095
           for node_name in touched_nodes)
9096

    
9097
  def Exec(self, feedback_fn):
9098
    """Execute disk replacement.
9099

9100
    This dispatches the disk replacement to the appropriate handler.
9101

9102
    """
9103
    if self.delay_iallocator:
9104
      self._CheckPrereq2()
9105

    
9106
    if __debug__:
9107
      # Verify owned locks before starting operation
9108
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9109
      assert set(owned_locks) == set(self.node_secondary_ip), \
9110
          ("Incorrect node locks, owning %s, expected %s" %
9111
           (owned_locks, self.node_secondary_ip.keys()))
9112

    
9113
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_INSTANCE)
9114
      assert list(owned_locks) == [self.instance_name], \
9115
          "Instance '%s' not locked" % self.instance_name
9116

    
9117
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9118
          "Should not own any node group lock at this point"
9119

    
9120
    if not self.disks:
9121
      feedback_fn("No disks need replacement")
9122
      return
9123

    
9124
    feedback_fn("Replacing disk(s) %s for %s" %
9125
                (utils.CommaJoin(self.disks), self.instance.name))
9126

    
9127
    activate_disks = (not self.instance.admin_up)
9128

    
9129
    # Activate the instance disks if we're replacing them on a down instance
9130
    if activate_disks:
9131
      _StartInstanceDisks(self.lu, self.instance, True)
9132

    
9133
    try:
9134
      # Should we replace the secondary node?
9135
      if self.new_node is not None:
9136
        fn = self._ExecDrbd8Secondary
9137
      else:
9138
        fn = self._ExecDrbd8DiskOnly
9139

    
9140
      result = fn(feedback_fn)
9141
    finally:
9142
      # Deactivate the instance disks if we're replacing them on a
9143
      # down instance
9144
      if activate_disks:
9145
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9146

    
9147
    if __debug__:
9148
      # Verify owned locks
9149
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9150
      nodes = frozenset(self.node_secondary_ip)
9151
      assert ((self.early_release and not owned_locks) or
9152
              (not self.early_release and not (set(owned_locks) - nodes))), \
9153
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9154
         " nodes=%r" % (self.early_release, owned_locks, nodes))
9155

    
9156
    return result
9157

    
9158
  def _CheckVolumeGroup(self, nodes):
9159
    self.lu.LogInfo("Checking volume groups")
9160

    
9161
    vgname = self.cfg.GetVGName()
9162

    
9163
    # Make sure volume group exists on all involved nodes
9164
    results = self.rpc.call_vg_list(nodes)
9165
    if not results:
9166
      raise errors.OpExecError("Can't list volume groups on the nodes")
9167

    
9168
    for node in nodes:
9169
      res = results[node]
9170
      res.Raise("Error checking node %s" % node)
9171
      if vgname not in res.payload:
9172
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9173
                                 (vgname, node))
9174

    
9175
  def _CheckDisksExistence(self, nodes):
9176
    # Check disk existence
9177
    for idx, dev in enumerate(self.instance.disks):
9178
      if idx not in self.disks:
9179
        continue
9180

    
9181
      for node in nodes:
9182
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9183
        self.cfg.SetDiskID(dev, node)
9184

    
9185
        result = self.rpc.call_blockdev_find(node, dev)
9186

    
9187
        msg = result.fail_msg
9188
        if msg or not result.payload:
9189
          if not msg:
9190
            msg = "disk not found"
9191
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9192
                                   (idx, node, msg))
9193

    
9194
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9195
    for idx, dev in enumerate(self.instance.disks):
9196
      if idx not in self.disks:
9197
        continue
9198

    
9199
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9200
                      (idx, node_name))
9201

    
9202
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9203
                                   ldisk=ldisk):
9204
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9205
                                 " replace disks for instance %s" %
9206
                                 (node_name, self.instance.name))
9207

    
9208
  def _CreateNewStorage(self, node_name):
9209
    iv_names = {}
9210

    
9211
    for idx, dev in enumerate(self.instance.disks):
9212
      if idx not in self.disks:
9213
        continue
9214

    
9215
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9216

    
9217
      self.cfg.SetDiskID(dev, node_name)
9218

    
9219
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9220
      names = _GenerateUniqueNames(self.lu, lv_names)
9221

    
9222
      vg_data = dev.children[0].logical_id[0]
9223
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9224
                             logical_id=(vg_data, names[0]))
9225
      vg_meta = dev.children[1].logical_id[0]
9226
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9227
                             logical_id=(vg_meta, names[1]))
9228

    
9229
      new_lvs = [lv_data, lv_meta]
9230
      old_lvs = dev.children
9231
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9232

    
9233
      # we pass force_create=True to force the LVM creation
9234
      for new_lv in new_lvs:
9235
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9236
                        _GetInstanceInfoText(self.instance), False)
9237

    
9238
    return iv_names
9239

    
9240
  def _CheckDevices(self, node_name, iv_names):
9241
    for name, (dev, _, _) in iv_names.iteritems():
9242
      self.cfg.SetDiskID(dev, node_name)
9243

    
9244
      result = self.rpc.call_blockdev_find(node_name, dev)
9245

    
9246
      msg = result.fail_msg
9247
      if msg or not result.payload:
9248
        if not msg:
9249
          msg = "disk not found"
9250
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9251
                                 (name, msg))
9252

    
9253
      if result.payload.is_degraded:
9254
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9255

    
9256
  def _RemoveOldStorage(self, node_name, iv_names):
9257
    for name, (_, old_lvs, _) in iv_names.iteritems():
9258
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9259

    
9260
      for lv in old_lvs:
9261
        self.cfg.SetDiskID(lv, node_name)
9262

    
9263
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9264
        if msg:
9265
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9266
                             hint="remove unused LVs manually")
9267

    
9268
  def _ExecDrbd8DiskOnly(self, feedback_fn):
9269
    """Replace a disk on the primary or secondary for DRBD 8.
9270

9271
    The algorithm for replace is quite complicated:
9272

9273
      1. for each disk to be replaced:
9274

9275
        1. create new LVs on the target node with unique names
9276
        1. detach old LVs from the drbd device
9277
        1. rename old LVs to name_replaced.<time_t>
9278
        1. rename new LVs to old LVs
9279
        1. attach the new LVs (with the old names now) to the drbd device
9280

9281
      1. wait for sync across all devices
9282

9283
      1. for each modified disk:
9284

9285
        1. remove old LVs (which have the name name_replaces.<time_t>)
9286

9287
    Failures are not very well handled.
9288

9289
    """
9290
    steps_total = 6
9291

    
9292
    # Step: check device activation
9293
    self.lu.LogStep(1, steps_total, "Check device existence")
9294
    self._CheckDisksExistence([self.other_node, self.target_node])
9295
    self._CheckVolumeGroup([self.target_node, self.other_node])
9296

    
9297
    # Step: check other node consistency
9298
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9299
    self._CheckDisksConsistency(self.other_node,
9300
                                self.other_node == self.instance.primary_node,
9301
                                False)
9302

    
9303
    # Step: create new storage
9304
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9305
    iv_names = self._CreateNewStorage(self.target_node)
9306

    
9307
    # Step: for each lv, detach+rename*2+attach
9308
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9309
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9310
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9311

    
9312
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9313
                                                     old_lvs)
9314
      result.Raise("Can't detach drbd from local storage on node"
9315
                   " %s for device %s" % (self.target_node, dev.iv_name))
9316
      #dev.children = []
9317
      #cfg.Update(instance)
9318

    
9319
      # ok, we created the new LVs, so now we know we have the needed
9320
      # storage; as such, we proceed on the target node to rename
9321
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9322
      # using the assumption that logical_id == physical_id (which in
9323
      # turn is the unique_id on that node)
9324

    
9325
      # FIXME(iustin): use a better name for the replaced LVs
9326
      temp_suffix = int(time.time())
9327
      ren_fn = lambda d, suff: (d.physical_id[0],
9328
                                d.physical_id[1] + "_replaced-%s" % suff)
9329

    
9330
      # Build the rename list based on what LVs exist on the node
9331
      rename_old_to_new = []
9332
      for to_ren in old_lvs:
9333
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9334
        if not result.fail_msg and result.payload:
9335
          # device exists
9336
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9337

    
9338
      self.lu.LogInfo("Renaming the old LVs on the target node")
9339
      result = self.rpc.call_blockdev_rename(self.target_node,
9340
                                             rename_old_to_new)
9341
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9342

    
9343
      # Now we rename the new LVs to the old LVs
9344
      self.lu.LogInfo("Renaming the new LVs on the target node")
9345
      rename_new_to_old = [(new, old.physical_id)
9346
                           for old, new in zip(old_lvs, new_lvs)]
9347
      result = self.rpc.call_blockdev_rename(self.target_node,
9348
                                             rename_new_to_old)
9349
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9350

    
9351
      for old, new in zip(old_lvs, new_lvs):
9352
        new.logical_id = old.logical_id
9353
        self.cfg.SetDiskID(new, self.target_node)
9354

    
9355
      for disk in old_lvs:
9356
        disk.logical_id = ren_fn(disk, temp_suffix)
9357
        self.cfg.SetDiskID(disk, self.target_node)
9358

    
9359
      # Now that the new lvs have the old name, we can add them to the device
9360
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9361
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9362
                                                  new_lvs)
9363
      msg = result.fail_msg
9364
      if msg:
9365
        for new_lv in new_lvs:
9366
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9367
                                               new_lv).fail_msg
9368
          if msg2:
9369
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9370
                               hint=("cleanup manually the unused logical"
9371
                                     "volumes"))
9372
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9373

    
9374
      dev.children = new_lvs
9375

    
9376
      self.cfg.Update(self.instance, feedback_fn)
9377

    
9378
    cstep = 5
9379
    if self.early_release:
9380
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9381
      cstep += 1
9382
      self._RemoveOldStorage(self.target_node, iv_names)
9383
      # WARNING: we release both node locks here, do not do other RPCs
9384
      # than WaitForSync to the primary node
9385
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9386
                    names=[self.target_node, self.other_node])
9387

    
9388
    # Wait for sync
9389
    # This can fail as the old devices are degraded and _WaitForSync
9390
    # does a combined result over all disks, so we don't check its return value
9391
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9392
    cstep += 1
9393
    _WaitForSync(self.lu, self.instance)
9394

    
9395
    # Check all devices manually
9396
    self._CheckDevices(self.instance.primary_node, iv_names)
9397

    
9398
    # Step: remove old storage
9399
    if not self.early_release:
9400
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9401
      cstep += 1
9402
      self._RemoveOldStorage(self.target_node, iv_names)
9403

    
9404
  def _ExecDrbd8Secondary(self, feedback_fn):
9405
    """Replace the secondary node for DRBD 8.
9406

9407
    The algorithm for replace is quite complicated:
9408
      - for all disks of the instance:
9409
        - create new LVs on the new node with same names
9410
        - shutdown the drbd device on the old secondary
9411
        - disconnect the drbd network on the primary
9412
        - create the drbd device on the new secondary
9413
        - network attach the drbd on the primary, using an artifice:
9414
          the drbd code for Attach() will connect to the network if it
9415
          finds a device which is connected to the good local disks but
9416
          not network enabled
9417
      - wait for sync across all devices
9418
      - remove all disks from the old secondary
9419

9420
    Failures are not very well handled.
9421

9422
    """
9423
    steps_total = 6
9424

    
9425
    # Step: check device activation
9426
    self.lu.LogStep(1, steps_total, "Check device existence")
9427
    self._CheckDisksExistence([self.instance.primary_node])
9428
    self._CheckVolumeGroup([self.instance.primary_node])
9429

    
9430
    # Step: check other node consistency
9431
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9432
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
9433

    
9434
    # Step: create new storage
9435
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9436
    for idx, dev in enumerate(self.instance.disks):
9437
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9438
                      (self.new_node, idx))
9439
      # we pass force_create=True to force LVM creation
9440
      for new_lv in dev.children:
9441
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9442
                        _GetInstanceInfoText(self.instance), False)
9443

    
9444
    # Step 4: dbrd minors and drbd setups changes
9445
    # after this, we must manually remove the drbd minors on both the
9446
    # error and the success paths
9447
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9448
    minors = self.cfg.AllocateDRBDMinor([self.new_node
9449
                                         for dev in self.instance.disks],
9450
                                        self.instance.name)
9451
    logging.debug("Allocated minors %r", minors)
9452

    
9453
    iv_names = {}
9454
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9455
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9456
                      (self.new_node, idx))
9457
      # create new devices on new_node; note that we create two IDs:
9458
      # one without port, so the drbd will be activated without
9459
      # networking information on the new node at this stage, and one
9460
      # with network, for the latter activation in step 4
9461
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9462
      if self.instance.primary_node == o_node1:
9463
        p_minor = o_minor1
9464
      else:
9465
        assert self.instance.primary_node == o_node2, "Three-node instance?"
9466
        p_minor = o_minor2
9467

    
9468
      new_alone_id = (self.instance.primary_node, self.new_node, None,
9469
                      p_minor, new_minor, o_secret)
9470
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
9471
                    p_minor, new_minor, o_secret)
9472

    
9473
      iv_names[idx] = (dev, dev.children, new_net_id)
9474
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9475
                    new_net_id)
9476
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9477
                              logical_id=new_alone_id,
9478
                              children=dev.children,
9479
                              size=dev.size)
9480
      try:
9481
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9482
                              _GetInstanceInfoText(self.instance), False)
9483
      except errors.GenericError:
9484
        self.cfg.ReleaseDRBDMinors(self.instance.name)
9485
        raise
9486

    
9487
    # We have new devices, shutdown the drbd on the old secondary
9488
    for idx, dev in enumerate(self.instance.disks):
9489
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9490
      self.cfg.SetDiskID(dev, self.target_node)
9491
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9492
      if msg:
9493
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9494
                           "node: %s" % (idx, msg),
9495
                           hint=("Please cleanup this device manually as"
9496
                                 " soon as possible"))
9497

    
9498
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9499
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9500
                                               self.node_secondary_ip,
9501
                                               self.instance.disks)\
9502
                                              [self.instance.primary_node]
9503

    
9504
    msg = result.fail_msg
9505
    if msg:
9506
      # detaches didn't succeed (unlikely)
9507
      self.cfg.ReleaseDRBDMinors(self.instance.name)
9508
      raise errors.OpExecError("Can't detach the disks from the network on"
9509
                               " old node: %s" % (msg,))
9510

    
9511
    # if we managed to detach at least one, we update all the disks of
9512
    # the instance to point to the new secondary
9513
    self.lu.LogInfo("Updating instance configuration")
9514
    for dev, _, new_logical_id in iv_names.itervalues():
9515
      dev.logical_id = new_logical_id
9516
      self.cfg.SetDiskID(dev, self.instance.primary_node)
9517

    
9518
    self.cfg.Update(self.instance, feedback_fn)
9519

    
9520
    # and now perform the drbd attach
9521
    self.lu.LogInfo("Attaching primary drbds to new secondary"
9522
                    " (standalone => connected)")
9523
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9524
                                            self.new_node],
9525
                                           self.node_secondary_ip,
9526
                                           self.instance.disks,
9527
                                           self.instance.name,
9528
                                           False)
9529
    for to_node, to_result in result.items():
9530
      msg = to_result.fail_msg
9531
      if msg:
9532
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9533
                           to_node, msg,
9534
                           hint=("please do a gnt-instance info to see the"
9535
                                 " status of disks"))
9536
    cstep = 5
9537
    if self.early_release:
9538
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9539
      cstep += 1
9540
      self._RemoveOldStorage(self.target_node, iv_names)
9541
      # WARNING: we release all node locks here, do not do other RPCs
9542
      # than WaitForSync to the primary node
9543
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9544
                    names=[self.instance.primary_node,
9545
                           self.target_node,
9546
                           self.new_node])
9547

    
9548
    # Wait for sync
9549
    # This can fail as the old devices are degraded and _WaitForSync
9550
    # does a combined result over all disks, so we don't check its return value
9551
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9552
    cstep += 1
9553
    _WaitForSync(self.lu, self.instance)
9554

    
9555
    # Check all devices manually
9556
    self._CheckDevices(self.instance.primary_node, iv_names)
9557

    
9558
    # Step: remove old storage
9559
    if not self.early_release:
9560
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9561
      self._RemoveOldStorage(self.target_node, iv_names)
9562

    
9563

    
9564
class LURepairNodeStorage(NoHooksLU):
9565
  """Repairs the volume group on a node.
9566

9567
  """
9568
  REQ_BGL = False
9569

    
9570
  def CheckArguments(self):
9571
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9572

    
9573
    storage_type = self.op.storage_type
9574

    
9575
    if (constants.SO_FIX_CONSISTENCY not in
9576
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9577
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
9578
                                 " repaired" % storage_type,
9579
                                 errors.ECODE_INVAL)
9580

    
9581
  def ExpandNames(self):
9582
    self.needed_locks = {
9583
      locking.LEVEL_NODE: [self.op.node_name],
9584
      }
9585

    
9586
  def _CheckFaultyDisks(self, instance, node_name):
9587
    """Ensure faulty disks abort the opcode or at least warn."""
9588
    try:
9589
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9590
                                  node_name, True):
9591
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9592
                                   " node '%s'" % (instance.name, node_name),
9593
                                   errors.ECODE_STATE)
9594
    except errors.OpPrereqError, err:
9595
      if self.op.ignore_consistency:
9596
        self.proc.LogWarning(str(err.args[0]))
9597
      else:
9598
        raise
9599

    
9600
  def CheckPrereq(self):
9601
    """Check prerequisites.
9602

9603
    """
9604
    # Check whether any instance on this node has faulty disks
9605
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9606
      if not inst.admin_up:
9607
        continue
9608
      check_nodes = set(inst.all_nodes)
9609
      check_nodes.discard(self.op.node_name)
9610
      for inst_node_name in check_nodes:
9611
        self._CheckFaultyDisks(inst, inst_node_name)
9612

    
9613
  def Exec(self, feedback_fn):
9614
    feedback_fn("Repairing storage unit '%s' on %s ..." %
9615
                (self.op.name, self.op.node_name))
9616

    
9617
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9618
    result = self.rpc.call_storage_execute(self.op.node_name,
9619
                                           self.op.storage_type, st_args,
9620
                                           self.op.name,
9621
                                           constants.SO_FIX_CONSISTENCY)
9622
    result.Raise("Failed to repair storage unit '%s' on %s" %
9623
                 (self.op.name, self.op.node_name))
9624

    
9625

    
9626
class LUNodeEvacStrategy(NoHooksLU):
9627
  """Computes the node evacuation strategy.
9628

9629
  """
9630
  REQ_BGL = False
9631

    
9632
  def CheckArguments(self):
9633
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9634

    
9635
  def ExpandNames(self):
9636
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9637
    self.needed_locks = locks = {}
9638
    if self.op.remote_node is None:
9639
      locks[locking.LEVEL_NODE] = locking.ALL_SET
9640
    else:
9641
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9642
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
9643

    
9644
  def Exec(self, feedback_fn):
9645
    if self.op.remote_node is not None:
9646
      instances = []
9647
      for node in self.op.nodes:
9648
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
9649
      result = []
9650
      for i in instances:
9651
        if i.primary_node == self.op.remote_node:
9652
          raise errors.OpPrereqError("Node %s is the primary node of"
9653
                                     " instance %s, cannot use it as"
9654
                                     " secondary" %
9655
                                     (self.op.remote_node, i.name),
9656
                                     errors.ECODE_INVAL)
9657
        result.append([i.name, self.op.remote_node])
9658
    else:
9659
      ial = IAllocator(self.cfg, self.rpc,
9660
                       mode=constants.IALLOCATOR_MODE_MEVAC,
9661
                       evac_nodes=self.op.nodes)
9662
      ial.Run(self.op.iallocator, validate=True)
9663
      if not ial.success:
9664
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
9665
                                 errors.ECODE_NORES)
9666
      result = ial.result
9667
    return result
9668

    
9669

    
9670
class LUInstanceGrowDisk(LogicalUnit):
9671
  """Grow a disk of an instance.
9672

9673
  """
9674
  HPATH = "disk-grow"
9675
  HTYPE = constants.HTYPE_INSTANCE
9676
  REQ_BGL = False
9677

    
9678
  def ExpandNames(self):
9679
    self._ExpandAndLockInstance()
9680
    self.needed_locks[locking.LEVEL_NODE] = []
9681
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9682

    
9683
  def DeclareLocks(self, level):
9684
    if level == locking.LEVEL_NODE:
9685
      self._LockInstancesNodes()
9686

    
9687
  def BuildHooksEnv(self):
9688
    """Build hooks env.
9689

9690
    This runs on the master, the primary and all the secondaries.
9691

9692
    """
9693
    env = {
9694
      "DISK": self.op.disk,
9695
      "AMOUNT": self.op.amount,
9696
      }
9697
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9698
    return env
9699

    
9700
  def BuildHooksNodes(self):
9701
    """Build hooks nodes.
9702

9703
    """
9704
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9705
    return (nl, nl)
9706

    
9707
  def CheckPrereq(self):
9708
    """Check prerequisites.
9709

9710
    This checks that the instance is in the cluster.
9711

9712
    """
9713
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9714
    assert instance is not None, \
9715
      "Cannot retrieve locked instance %s" % self.op.instance_name
9716
    nodenames = list(instance.all_nodes)
9717
    for node in nodenames:
9718
      _CheckNodeOnline(self, node)
9719

    
9720
    self.instance = instance
9721

    
9722
    if instance.disk_template not in constants.DTS_GROWABLE:
9723
      raise errors.OpPrereqError("Instance's disk layout does not support"
9724
                                 " growing", errors.ECODE_INVAL)
9725

    
9726
    self.disk = instance.FindDisk(self.op.disk)
9727

    
9728
    if instance.disk_template not in (constants.DT_FILE,
9729
                                      constants.DT_SHARED_FILE):
9730
      # TODO: check the free disk space for file, when that feature will be
9731
      # supported
9732
      _CheckNodesFreeDiskPerVG(self, nodenames,
9733
                               self.disk.ComputeGrowth(self.op.amount))
9734

    
9735
  def Exec(self, feedback_fn):
9736
    """Execute disk grow.
9737

9738
    """
9739
    instance = self.instance
9740
    disk = self.disk
9741

    
9742
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
9743
    if not disks_ok:
9744
      raise errors.OpExecError("Cannot activate block device to grow")
9745

    
9746
    # First run all grow ops in dry-run mode
9747
    for node in instance.all_nodes:
9748
      self.cfg.SetDiskID(disk, node)
9749
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
9750
      result.Raise("Grow request failed to node %s" % node)
9751

    
9752
    # We know that (as far as we can test) operations across different
9753
    # nodes will succeed, time to run it for real
9754
    for node in instance.all_nodes:
9755
      self.cfg.SetDiskID(disk, node)
9756
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
9757
      result.Raise("Grow request failed to node %s" % node)
9758

    
9759
      # TODO: Rewrite code to work properly
9760
      # DRBD goes into sync mode for a short amount of time after executing the
9761
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
9762
      # calling "resize" in sync mode fails. Sleeping for a short amount of
9763
      # time is a work-around.
9764
      time.sleep(5)
9765

    
9766
    disk.RecordGrow(self.op.amount)
9767
    self.cfg.Update(instance, feedback_fn)
9768
    if self.op.wait_for_sync:
9769
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
9770
      if disk_abort:
9771
        self.proc.LogWarning("Disk sync-ing has not returned a good"
9772
                             " status; please check the instance")
9773
      if not instance.admin_up:
9774
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9775
    elif not instance.admin_up:
9776
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
9777
                           " not supposed to be running because no wait for"
9778
                           " sync mode was requested")
9779

    
9780

    
9781
class LUInstanceQueryData(NoHooksLU):
9782
  """Query runtime instance data.
9783

9784
  """
9785
  REQ_BGL = False
9786

    
9787
  def ExpandNames(self):
9788
    self.needed_locks = {}
9789

    
9790
    # Use locking if requested or when non-static information is wanted
9791
    if not (self.op.static or self.op.use_locking):
9792
      self.LogWarning("Non-static data requested, locks need to be acquired")
9793
      self.op.use_locking = True
9794

    
9795
    if self.op.instances or not self.op.use_locking:
9796
      # Expand instance names right here
9797
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
9798
    else:
9799
      # Will use acquired locks
9800
      self.wanted_names = None
9801

    
9802
    if self.op.use_locking:
9803
      self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9804

    
9805
      if self.wanted_names is None:
9806
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9807
      else:
9808
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9809

    
9810
      self.needed_locks[locking.LEVEL_NODE] = []
9811
      self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9812
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9813

    
9814
  def DeclareLocks(self, level):
9815
    if self.op.use_locking and level == locking.LEVEL_NODE:
9816
      self._LockInstancesNodes()
9817

    
9818
  def CheckPrereq(self):
9819
    """Check prerequisites.
9820

9821
    This only checks the optional instance list against the existing names.
9822

9823
    """
9824
    if self.wanted_names is None:
9825
      assert self.op.use_locking, "Locking was not used"
9826
      self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
9827

    
9828
    self.wanted_instances = [self.cfg.GetInstanceInfo(name)
9829
                             for name in self.wanted_names]
9830

    
9831
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
9832
    """Returns the status of a block device
9833

9834
    """
9835
    if self.op.static or not node:
9836
      return None
9837

    
9838
    self.cfg.SetDiskID(dev, node)
9839

    
9840
    result = self.rpc.call_blockdev_find(node, dev)
9841
    if result.offline:
9842
      return None
9843

    
9844
    result.Raise("Can't compute disk status for %s" % instance_name)
9845

    
9846
    status = result.payload
9847
    if status is None:
9848
      return None
9849

    
9850
    return (status.dev_path, status.major, status.minor,
9851
            status.sync_percent, status.estimated_time,
9852
            status.is_degraded, status.ldisk_status)
9853

    
9854
  def _ComputeDiskStatus(self, instance, snode, dev):
9855
    """Compute block device status.
9856

9857
    """
9858
    if dev.dev_type in constants.LDS_DRBD:
9859
      # we change the snode then (otherwise we use the one passed in)
9860
      if dev.logical_id[0] == instance.primary_node:
9861
        snode = dev.logical_id[1]
9862
      else:
9863
        snode = dev.logical_id[0]
9864

    
9865
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9866
                                              instance.name, dev)
9867
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9868

    
9869
    if dev.children:
9870
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
9871
                      for child in dev.children]
9872
    else:
9873
      dev_children = []
9874

    
9875
    return {
9876
      "iv_name": dev.iv_name,
9877
      "dev_type": dev.dev_type,
9878
      "logical_id": dev.logical_id,
9879
      "physical_id": dev.physical_id,
9880
      "pstatus": dev_pstatus,
9881
      "sstatus": dev_sstatus,
9882
      "children": dev_children,
9883
      "mode": dev.mode,
9884
      "size": dev.size,
9885
      }
9886

    
9887
  def Exec(self, feedback_fn):
9888
    """Gather and return data"""
9889
    result = {}
9890

    
9891
    cluster = self.cfg.GetClusterInfo()
9892

    
9893
    for instance in self.wanted_instances:
9894
      if not self.op.static:
9895
        remote_info = self.rpc.call_instance_info(instance.primary_node,
9896
                                                  instance.name,
9897
                                                  instance.hypervisor)
9898
        remote_info.Raise("Error checking node %s" % instance.primary_node)
9899
        remote_info = remote_info.payload
9900
        if remote_info and "state" in remote_info:
9901
          remote_state = "up"
9902
        else:
9903
          remote_state = "down"
9904
      else:
9905
        remote_state = None
9906
      if instance.admin_up:
9907
        config_state = "up"
9908
      else:
9909
        config_state = "down"
9910

    
9911
      disks = [self._ComputeDiskStatus(instance, None, device)
9912
               for device in instance.disks]
9913

    
9914
      result[instance.name] = {
9915
        "name": instance.name,
9916
        "config_state": config_state,
9917
        "run_state": remote_state,
9918
        "pnode": instance.primary_node,
9919
        "snodes": instance.secondary_nodes,
9920
        "os": instance.os,
9921
        # this happens to be the same format used for hooks
9922
        "nics": _NICListToTuple(self, instance.nics),
9923
        "disk_template": instance.disk_template,
9924
        "disks": disks,
9925
        "hypervisor": instance.hypervisor,
9926
        "network_port": instance.network_port,
9927
        "hv_instance": instance.hvparams,
9928
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
9929
        "be_instance": instance.beparams,
9930
        "be_actual": cluster.FillBE(instance),
9931
        "os_instance": instance.osparams,
9932
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9933
        "serial_no": instance.serial_no,
9934
        "mtime": instance.mtime,
9935
        "ctime": instance.ctime,
9936
        "uuid": instance.uuid,
9937
        }
9938

    
9939
    return result
9940

    
9941

    
9942
class LUInstanceSetParams(LogicalUnit):
9943
  """Modifies an instances's parameters.
9944

9945
  """
9946
  HPATH = "instance-modify"
9947
  HTYPE = constants.HTYPE_INSTANCE
9948
  REQ_BGL = False
9949

    
9950
  def CheckArguments(self):
9951
    if not (self.op.nics or self.op.disks or self.op.disk_template or
9952
            self.op.hvparams or self.op.beparams or self.op.os_name):
9953
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9954

    
9955
    if self.op.hvparams:
9956
      _CheckGlobalHvParams(self.op.hvparams)
9957

    
9958
    # Disk validation
9959
    disk_addremove = 0
9960
    for disk_op, disk_dict in self.op.disks:
9961
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9962
      if disk_op == constants.DDM_REMOVE:
9963
        disk_addremove += 1
9964
        continue
9965
      elif disk_op == constants.DDM_ADD:
9966
        disk_addremove += 1
9967
      else:
9968
        if not isinstance(disk_op, int):
9969
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9970
        if not isinstance(disk_dict, dict):
9971
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9972
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9973

    
9974
      if disk_op == constants.DDM_ADD:
9975
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
9976
        if mode not in constants.DISK_ACCESS_SET:
9977
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9978
                                     errors.ECODE_INVAL)
9979
        size = disk_dict.get(constants.IDISK_SIZE, None)
9980
        if size is None:
9981
          raise errors.OpPrereqError("Required disk parameter size missing",
9982
                                     errors.ECODE_INVAL)
9983
        try:
9984
          size = int(size)
9985
        except (TypeError, ValueError), err:
9986
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9987
                                     str(err), errors.ECODE_INVAL)
9988
        disk_dict[constants.IDISK_SIZE] = size
9989
      else:
9990
        # modification of disk
9991
        if constants.IDISK_SIZE in disk_dict:
9992
          raise errors.OpPrereqError("Disk size change not possible, use"
9993
                                     " grow-disk", errors.ECODE_INVAL)
9994

    
9995
    if disk_addremove > 1:
9996
      raise errors.OpPrereqError("Only one disk add or remove operation"
9997
                                 " supported at a time", errors.ECODE_INVAL)
9998

    
9999
    if self.op.disks and self.op.disk_template is not None:
10000
      raise errors.OpPrereqError("Disk template conversion and other disk"
10001
                                 " changes not supported at the same time",
10002
                                 errors.ECODE_INVAL)
10003

    
10004
    if (self.op.disk_template and
10005
        self.op.disk_template in constants.DTS_INT_MIRROR and
10006
        self.op.remote_node is None):
10007
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10008
                                 " one requires specifying a secondary node",
10009
                                 errors.ECODE_INVAL)
10010

    
10011
    # NIC validation
10012
    nic_addremove = 0
10013
    for nic_op, nic_dict in self.op.nics:
10014
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10015
      if nic_op == constants.DDM_REMOVE:
10016
        nic_addremove += 1
10017
        continue
10018
      elif nic_op == constants.DDM_ADD:
10019
        nic_addremove += 1
10020
      else:
10021
        if not isinstance(nic_op, int):
10022
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10023
        if not isinstance(nic_dict, dict):
10024
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10025
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10026

    
10027
      # nic_dict should be a dict
10028
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10029
      if nic_ip is not None:
10030
        if nic_ip.lower() == constants.VALUE_NONE:
10031
          nic_dict[constants.INIC_IP] = None
10032
        else:
10033
          if not netutils.IPAddress.IsValid(nic_ip):
10034
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10035
                                       errors.ECODE_INVAL)
10036

    
10037
      nic_bridge = nic_dict.get('bridge', None)
10038
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10039
      if nic_bridge and nic_link:
10040
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10041
                                   " at the same time", errors.ECODE_INVAL)
10042
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10043
        nic_dict['bridge'] = None
10044
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10045
        nic_dict[constants.INIC_LINK] = None
10046

    
10047
      if nic_op == constants.DDM_ADD:
10048
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10049
        if nic_mac is None:
10050
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10051

    
10052
      if constants.INIC_MAC in nic_dict:
10053
        nic_mac = nic_dict[constants.INIC_MAC]
10054
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10055
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10056

    
10057
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10058
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10059
                                     " modifying an existing nic",
10060
                                     errors.ECODE_INVAL)
10061

    
10062
    if nic_addremove > 1:
10063
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10064
                                 " supported at a time", errors.ECODE_INVAL)
10065

    
10066
  def ExpandNames(self):
10067
    self._ExpandAndLockInstance()
10068
    self.needed_locks[locking.LEVEL_NODE] = []
10069
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10070

    
10071
  def DeclareLocks(self, level):
10072
    if level == locking.LEVEL_NODE:
10073
      self._LockInstancesNodes()
10074
      if self.op.disk_template and self.op.remote_node:
10075
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10076
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10077

    
10078
  def BuildHooksEnv(self):
10079
    """Build hooks env.
10080

10081
    This runs on the master, primary and secondaries.
10082

10083
    """
10084
    args = dict()
10085
    if constants.BE_MEMORY in self.be_new:
10086
      args['memory'] = self.be_new[constants.BE_MEMORY]
10087
    if constants.BE_VCPUS in self.be_new:
10088
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
10089
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10090
    # information at all.
10091
    if self.op.nics:
10092
      args['nics'] = []
10093
      nic_override = dict(self.op.nics)
10094
      for idx, nic in enumerate(self.instance.nics):
10095
        if idx in nic_override:
10096
          this_nic_override = nic_override[idx]
10097
        else:
10098
          this_nic_override = {}
10099
        if constants.INIC_IP in this_nic_override:
10100
          ip = this_nic_override[constants.INIC_IP]
10101
        else:
10102
          ip = nic.ip
10103
        if constants.INIC_MAC in this_nic_override:
10104
          mac = this_nic_override[constants.INIC_MAC]
10105
        else:
10106
          mac = nic.mac
10107
        if idx in self.nic_pnew:
10108
          nicparams = self.nic_pnew[idx]
10109
        else:
10110
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10111
        mode = nicparams[constants.NIC_MODE]
10112
        link = nicparams[constants.NIC_LINK]
10113
        args['nics'].append((ip, mac, mode, link))
10114
      if constants.DDM_ADD in nic_override:
10115
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10116
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10117
        nicparams = self.nic_pnew[constants.DDM_ADD]
10118
        mode = nicparams[constants.NIC_MODE]
10119
        link = nicparams[constants.NIC_LINK]
10120
        args['nics'].append((ip, mac, mode, link))
10121
      elif constants.DDM_REMOVE in nic_override:
10122
        del args['nics'][-1]
10123

    
10124
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10125
    if self.op.disk_template:
10126
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10127

    
10128
    return env
10129

    
10130
  def BuildHooksNodes(self):
10131
    """Build hooks nodes.
10132

10133
    """
10134
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10135
    return (nl, nl)
10136

    
10137
  def CheckPrereq(self):
10138
    """Check prerequisites.
10139

10140
    This only checks the instance list against the existing names.
10141

10142
    """
10143
    # checking the new params on the primary/secondary nodes
10144

    
10145
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10146
    cluster = self.cluster = self.cfg.GetClusterInfo()
10147
    assert self.instance is not None, \
10148
      "Cannot retrieve locked instance %s" % self.op.instance_name
10149
    pnode = instance.primary_node
10150
    nodelist = list(instance.all_nodes)
10151

    
10152
    # OS change
10153
    if self.op.os_name and not self.op.force:
10154
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10155
                      self.op.force_variant)
10156
      instance_os = self.op.os_name
10157
    else:
10158
      instance_os = instance.os
10159

    
10160
    if self.op.disk_template:
10161
      if instance.disk_template == self.op.disk_template:
10162
        raise errors.OpPrereqError("Instance already has disk template %s" %
10163
                                   instance.disk_template, errors.ECODE_INVAL)
10164

    
10165
      if (instance.disk_template,
10166
          self.op.disk_template) not in self._DISK_CONVERSIONS:
10167
        raise errors.OpPrereqError("Unsupported disk template conversion from"
10168
                                   " %s to %s" % (instance.disk_template,
10169
                                                  self.op.disk_template),
10170
                                   errors.ECODE_INVAL)
10171
      _CheckInstanceDown(self, instance, "cannot change disk template")
10172
      if self.op.disk_template in constants.DTS_INT_MIRROR:
10173
        if self.op.remote_node == pnode:
10174
          raise errors.OpPrereqError("Given new secondary node %s is the same"
10175
                                     " as the primary node of the instance" %
10176
                                     self.op.remote_node, errors.ECODE_STATE)
10177
        _CheckNodeOnline(self, self.op.remote_node)
10178
        _CheckNodeNotDrained(self, self.op.remote_node)
10179
        # FIXME: here we assume that the old instance type is DT_PLAIN
10180
        assert instance.disk_template == constants.DT_PLAIN
10181
        disks = [{constants.IDISK_SIZE: d.size,
10182
                  constants.IDISK_VG: d.logical_id[0]}
10183
                 for d in instance.disks]
10184
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10185
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10186

    
10187
    # hvparams processing
10188
    if self.op.hvparams:
10189
      hv_type = instance.hypervisor
10190
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10191
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10192
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10193

    
10194
      # local check
10195
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10196
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10197
      self.hv_new = hv_new # the new actual values
10198
      self.hv_inst = i_hvdict # the new dict (without defaults)
10199
    else:
10200
      self.hv_new = self.hv_inst = {}
10201

    
10202
    # beparams processing
10203
    if self.op.beparams:
10204
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10205
                                   use_none=True)
10206
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10207
      be_new = cluster.SimpleFillBE(i_bedict)
10208
      self.be_new = be_new # the new actual values
10209
      self.be_inst = i_bedict # the new dict (without defaults)
10210
    else:
10211
      self.be_new = self.be_inst = {}
10212
    be_old = cluster.FillBE(instance)
10213

    
10214
    # osparams processing
10215
    if self.op.osparams:
10216
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10217
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10218
      self.os_inst = i_osdict # the new dict (without defaults)
10219
    else:
10220
      self.os_inst = {}
10221

    
10222
    self.warn = []
10223

    
10224
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10225
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10226
      mem_check_list = [pnode]
10227
      if be_new[constants.BE_AUTO_BALANCE]:
10228
        # either we changed auto_balance to yes or it was from before
10229
        mem_check_list.extend(instance.secondary_nodes)
10230
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
10231
                                                  instance.hypervisor)
10232
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10233
                                         instance.hypervisor)
10234
      pninfo = nodeinfo[pnode]
10235
      msg = pninfo.fail_msg
10236
      if msg:
10237
        # Assume the primary node is unreachable and go ahead
10238
        self.warn.append("Can't get info from primary node %s: %s" %
10239
                         (pnode,  msg))
10240
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
10241
        self.warn.append("Node data from primary node %s doesn't contain"
10242
                         " free memory information" % pnode)
10243
      elif instance_info.fail_msg:
10244
        self.warn.append("Can't get instance runtime information: %s" %
10245
                        instance_info.fail_msg)
10246
      else:
10247
        if instance_info.payload:
10248
          current_mem = int(instance_info.payload['memory'])
10249
        else:
10250
          # Assume instance not running
10251
          # (there is a slight race condition here, but it's not very probable,
10252
          # and we have no other way to check)
10253
          current_mem = 0
10254
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10255
                    pninfo.payload['memory_free'])
10256
        if miss_mem > 0:
10257
          raise errors.OpPrereqError("This change will prevent the instance"
10258
                                     " from starting, due to %d MB of memory"
10259
                                     " missing on its primary node" % miss_mem,
10260
                                     errors.ECODE_NORES)
10261

    
10262
      if be_new[constants.BE_AUTO_BALANCE]:
10263
        for node, nres in nodeinfo.items():
10264
          if node not in instance.secondary_nodes:
10265
            continue
10266
          nres.Raise("Can't get info from secondary node %s" % node,
10267
                     prereq=True, ecode=errors.ECODE_STATE)
10268
          if not isinstance(nres.payload.get('memory_free', None), int):
10269
            raise errors.OpPrereqError("Secondary node %s didn't return free"
10270
                                       " memory information" % node,
10271
                                       errors.ECODE_STATE)
10272
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
10273
            raise errors.OpPrereqError("This change will prevent the instance"
10274
                                       " from failover to its secondary node"
10275
                                       " %s, due to not enough memory" % node,
10276
                                       errors.ECODE_STATE)
10277

    
10278
    # NIC processing
10279
    self.nic_pnew = {}
10280
    self.nic_pinst = {}
10281
    for nic_op, nic_dict in self.op.nics:
10282
      if nic_op == constants.DDM_REMOVE:
10283
        if not instance.nics:
10284
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10285
                                     errors.ECODE_INVAL)
10286
        continue
10287
      if nic_op != constants.DDM_ADD:
10288
        # an existing nic
10289
        if not instance.nics:
10290
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10291
                                     " no NICs" % nic_op,
10292
                                     errors.ECODE_INVAL)
10293
        if nic_op < 0 or nic_op >= len(instance.nics):
10294
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10295
                                     " are 0 to %d" %
10296
                                     (nic_op, len(instance.nics) - 1),
10297
                                     errors.ECODE_INVAL)
10298
        old_nic_params = instance.nics[nic_op].nicparams
10299
        old_nic_ip = instance.nics[nic_op].ip
10300
      else:
10301
        old_nic_params = {}
10302
        old_nic_ip = None
10303

    
10304
      update_params_dict = dict([(key, nic_dict[key])
10305
                                 for key in constants.NICS_PARAMETERS
10306
                                 if key in nic_dict])
10307

    
10308
      if 'bridge' in nic_dict:
10309
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
10310

    
10311
      new_nic_params = _GetUpdatedParams(old_nic_params,
10312
                                         update_params_dict)
10313
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10314
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10315
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10316
      self.nic_pinst[nic_op] = new_nic_params
10317
      self.nic_pnew[nic_op] = new_filled_nic_params
10318
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10319

    
10320
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
10321
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10322
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10323
        if msg:
10324
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10325
          if self.op.force:
10326
            self.warn.append(msg)
10327
          else:
10328
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10329
      if new_nic_mode == constants.NIC_MODE_ROUTED:
10330
        if constants.INIC_IP in nic_dict:
10331
          nic_ip = nic_dict[constants.INIC_IP]
10332
        else:
10333
          nic_ip = old_nic_ip
10334
        if nic_ip is None:
10335
          raise errors.OpPrereqError('Cannot set the nic ip to None'
10336
                                     ' on a routed nic', errors.ECODE_INVAL)
10337
      if constants.INIC_MAC in nic_dict:
10338
        nic_mac = nic_dict[constants.INIC_MAC]
10339
        if nic_mac is None:
10340
          raise errors.OpPrereqError('Cannot set the nic mac to None',
10341
                                     errors.ECODE_INVAL)
10342
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10343
          # otherwise generate the mac
10344
          nic_dict[constants.INIC_MAC] = \
10345
            self.cfg.GenerateMAC(self.proc.GetECId())
10346
        else:
10347
          # or validate/reserve the current one
10348
          try:
10349
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10350
          except errors.ReservationError:
10351
            raise errors.OpPrereqError("MAC address %s already in use"
10352
                                       " in cluster" % nic_mac,
10353
                                       errors.ECODE_NOTUNIQUE)
10354

    
10355
    # DISK processing
10356
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10357
      raise errors.OpPrereqError("Disk operations not supported for"
10358
                                 " diskless instances",
10359
                                 errors.ECODE_INVAL)
10360
    for disk_op, _ in self.op.disks:
10361
      if disk_op == constants.DDM_REMOVE:
10362
        if len(instance.disks) == 1:
10363
          raise errors.OpPrereqError("Cannot remove the last disk of"
10364
                                     " an instance", errors.ECODE_INVAL)
10365
        _CheckInstanceDown(self, instance, "cannot remove disks")
10366

    
10367
      if (disk_op == constants.DDM_ADD and
10368
          len(instance.disks) >= constants.MAX_DISKS):
10369
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10370
                                   " add more" % constants.MAX_DISKS,
10371
                                   errors.ECODE_STATE)
10372
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10373
        # an existing disk
10374
        if disk_op < 0 or disk_op >= len(instance.disks):
10375
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
10376
                                     " are 0 to %d" %
10377
                                     (disk_op, len(instance.disks)),
10378
                                     errors.ECODE_INVAL)
10379

    
10380
    return
10381

    
10382
  def _ConvertPlainToDrbd(self, feedback_fn):
10383
    """Converts an instance from plain to drbd.
10384

10385
    """
10386
    feedback_fn("Converting template to drbd")
10387
    instance = self.instance
10388
    pnode = instance.primary_node
10389
    snode = self.op.remote_node
10390

    
10391
    # create a fake disk info for _GenerateDiskTemplate
10392
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10393
                  constants.IDISK_VG: d.logical_id[0]}
10394
                 for d in instance.disks]
10395
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10396
                                      instance.name, pnode, [snode],
10397
                                      disk_info, None, None, 0, feedback_fn)
10398
    info = _GetInstanceInfoText(instance)
10399
    feedback_fn("Creating aditional volumes...")
10400
    # first, create the missing data and meta devices
10401
    for disk in new_disks:
10402
      # unfortunately this is... not too nice
10403
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10404
                            info, True)
10405
      for child in disk.children:
10406
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
10407
    # at this stage, all new LVs have been created, we can rename the
10408
    # old ones
10409
    feedback_fn("Renaming original volumes...")
10410
    rename_list = [(o, n.children[0].logical_id)
10411
                   for (o, n) in zip(instance.disks, new_disks)]
10412
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
10413
    result.Raise("Failed to rename original LVs")
10414

    
10415
    feedback_fn("Initializing DRBD devices...")
10416
    # all child devices are in place, we can now create the DRBD devices
10417
    for disk in new_disks:
10418
      for node in [pnode, snode]:
10419
        f_create = node == pnode
10420
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10421

    
10422
    # at this point, the instance has been modified
10423
    instance.disk_template = constants.DT_DRBD8
10424
    instance.disks = new_disks
10425
    self.cfg.Update(instance, feedback_fn)
10426

    
10427
    # disks are created, waiting for sync
10428
    disk_abort = not _WaitForSync(self, instance,
10429
                                  oneshot=not self.op.wait_for_sync)
10430
    if disk_abort:
10431
      raise errors.OpExecError("There are some degraded disks for"
10432
                               " this instance, please cleanup manually")
10433

    
10434
  def _ConvertDrbdToPlain(self, feedback_fn):
10435
    """Converts an instance from drbd to plain.
10436

10437
    """
10438
    instance = self.instance
10439
    assert len(instance.secondary_nodes) == 1
10440
    pnode = instance.primary_node
10441
    snode = instance.secondary_nodes[0]
10442
    feedback_fn("Converting template to plain")
10443

    
10444
    old_disks = instance.disks
10445
    new_disks = [d.children[0] for d in old_disks]
10446

    
10447
    # copy over size and mode
10448
    for parent, child in zip(old_disks, new_disks):
10449
      child.size = parent.size
10450
      child.mode = parent.mode
10451

    
10452
    # update instance structure
10453
    instance.disks = new_disks
10454
    instance.disk_template = constants.DT_PLAIN
10455
    self.cfg.Update(instance, feedback_fn)
10456

    
10457
    feedback_fn("Removing volumes on the secondary node...")
10458
    for disk in old_disks:
10459
      self.cfg.SetDiskID(disk, snode)
10460
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10461
      if msg:
10462
        self.LogWarning("Could not remove block device %s on node %s,"
10463
                        " continuing anyway: %s", disk.iv_name, snode, msg)
10464

    
10465
    feedback_fn("Removing unneeded volumes on the primary node...")
10466
    for idx, disk in enumerate(old_disks):
10467
      meta = disk.children[1]
10468
      self.cfg.SetDiskID(meta, pnode)
10469
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10470
      if msg:
10471
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
10472
                        " continuing anyway: %s", idx, pnode, msg)
10473

    
10474
  def Exec(self, feedback_fn):
10475
    """Modifies an instance.
10476

10477
    All parameters take effect only at the next restart of the instance.
10478

10479
    """
10480
    # Process here the warnings from CheckPrereq, as we don't have a
10481
    # feedback_fn there.
10482
    for warn in self.warn:
10483
      feedback_fn("WARNING: %s" % warn)
10484

    
10485
    result = []
10486
    instance = self.instance
10487
    # disk changes
10488
    for disk_op, disk_dict in self.op.disks:
10489
      if disk_op == constants.DDM_REMOVE:
10490
        # remove the last disk
10491
        device = instance.disks.pop()
10492
        device_idx = len(instance.disks)
10493
        for node, disk in device.ComputeNodeTree(instance.primary_node):
10494
          self.cfg.SetDiskID(disk, node)
10495
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10496
          if msg:
10497
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
10498
                            " continuing anyway", device_idx, node, msg)
10499
        result.append(("disk/%d" % device_idx, "remove"))
10500
      elif disk_op == constants.DDM_ADD:
10501
        # add a new disk
10502
        if instance.disk_template in (constants.DT_FILE,
10503
                                        constants.DT_SHARED_FILE):
10504
          file_driver, file_path = instance.disks[0].logical_id
10505
          file_path = os.path.dirname(file_path)
10506
        else:
10507
          file_driver = file_path = None
10508
        disk_idx_base = len(instance.disks)
10509
        new_disk = _GenerateDiskTemplate(self,
10510
                                         instance.disk_template,
10511
                                         instance.name, instance.primary_node,
10512
                                         instance.secondary_nodes,
10513
                                         [disk_dict],
10514
                                         file_path,
10515
                                         file_driver,
10516
                                         disk_idx_base, feedback_fn)[0]
10517
        instance.disks.append(new_disk)
10518
        info = _GetInstanceInfoText(instance)
10519

    
10520
        logging.info("Creating volume %s for instance %s",
10521
                     new_disk.iv_name, instance.name)
10522
        # Note: this needs to be kept in sync with _CreateDisks
10523
        #HARDCODE
10524
        for node in instance.all_nodes:
10525
          f_create = node == instance.primary_node
10526
          try:
10527
            _CreateBlockDev(self, node, instance, new_disk,
10528
                            f_create, info, f_create)
10529
          except errors.OpExecError, err:
10530
            self.LogWarning("Failed to create volume %s (%s) on"
10531
                            " node %s: %s",
10532
                            new_disk.iv_name, new_disk, node, err)
10533
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10534
                       (new_disk.size, new_disk.mode)))
10535
      else:
10536
        # change a given disk
10537
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10538
        result.append(("disk.mode/%d" % disk_op,
10539
                       disk_dict[constants.IDISK_MODE]))
10540

    
10541
    if self.op.disk_template:
10542
      r_shut = _ShutdownInstanceDisks(self, instance)
10543
      if not r_shut:
10544
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10545
                                 " proceed with disk template conversion")
10546
      mode = (instance.disk_template, self.op.disk_template)
10547
      try:
10548
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
10549
      except:
10550
        self.cfg.ReleaseDRBDMinors(instance.name)
10551
        raise
10552
      result.append(("disk_template", self.op.disk_template))
10553

    
10554
    # NIC changes
10555
    for nic_op, nic_dict in self.op.nics:
10556
      if nic_op == constants.DDM_REMOVE:
10557
        # remove the last nic
10558
        del instance.nics[-1]
10559
        result.append(("nic.%d" % len(instance.nics), "remove"))
10560
      elif nic_op == constants.DDM_ADD:
10561
        # mac and bridge should be set, by now
10562
        mac = nic_dict[constants.INIC_MAC]
10563
        ip = nic_dict.get(constants.INIC_IP, None)
10564
        nicparams = self.nic_pinst[constants.DDM_ADD]
10565
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
10566
        instance.nics.append(new_nic)
10567
        result.append(("nic.%d" % (len(instance.nics) - 1),
10568
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
10569
                       (new_nic.mac, new_nic.ip,
10570
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
10571
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
10572
                       )))
10573
      else:
10574
        for key in (constants.INIC_MAC, constants.INIC_IP):
10575
          if key in nic_dict:
10576
            setattr(instance.nics[nic_op], key, nic_dict[key])
10577
        if nic_op in self.nic_pinst:
10578
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
10579
        for key, val in nic_dict.iteritems():
10580
          result.append(("nic.%s/%d" % (key, nic_op), val))
10581

    
10582
    # hvparams changes
10583
    if self.op.hvparams:
10584
      instance.hvparams = self.hv_inst
10585
      for key, val in self.op.hvparams.iteritems():
10586
        result.append(("hv/%s" % key, val))
10587

    
10588
    # beparams changes
10589
    if self.op.beparams:
10590
      instance.beparams = self.be_inst
10591
      for key, val in self.op.beparams.iteritems():
10592
        result.append(("be/%s" % key, val))
10593

    
10594
    # OS change
10595
    if self.op.os_name:
10596
      instance.os = self.op.os_name
10597

    
10598
    # osparams changes
10599
    if self.op.osparams:
10600
      instance.osparams = self.os_inst
10601
      for key, val in self.op.osparams.iteritems():
10602
        result.append(("os/%s" % key, val))
10603

    
10604
    self.cfg.Update(instance, feedback_fn)
10605

    
10606
    return result
10607

    
10608
  _DISK_CONVERSIONS = {
10609
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10610
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10611
    }
10612

    
10613

    
10614
class LUBackupQuery(NoHooksLU):
10615
  """Query the exports list
10616

10617
  """
10618
  REQ_BGL = False
10619

    
10620
  def ExpandNames(self):
10621
    self.needed_locks = {}
10622
    self.share_locks[locking.LEVEL_NODE] = 1
10623
    if not self.op.nodes:
10624
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10625
    else:
10626
      self.needed_locks[locking.LEVEL_NODE] = \
10627
        _GetWantedNodes(self, self.op.nodes)
10628

    
10629
  def Exec(self, feedback_fn):
10630
    """Compute the list of all the exported system images.
10631

10632
    @rtype: dict
10633
    @return: a dictionary with the structure node->(export-list)
10634
        where export-list is a list of the instances exported on
10635
        that node.
10636

10637
    """
10638
    self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
10639
    rpcresult = self.rpc.call_export_list(self.nodes)
10640
    result = {}
10641
    for node in rpcresult:
10642
      if rpcresult[node].fail_msg:
10643
        result[node] = False
10644
      else:
10645
        result[node] = rpcresult[node].payload
10646

    
10647
    return result
10648

    
10649

    
10650
class LUBackupPrepare(NoHooksLU):
10651
  """Prepares an instance for an export and returns useful information.
10652

10653
  """
10654
  REQ_BGL = False
10655

    
10656
  def ExpandNames(self):
10657
    self._ExpandAndLockInstance()
10658

    
10659
  def CheckPrereq(self):
10660
    """Check prerequisites.
10661

10662
    """
10663
    instance_name = self.op.instance_name
10664

    
10665
    self.instance = self.cfg.GetInstanceInfo(instance_name)
10666
    assert self.instance is not None, \
10667
          "Cannot retrieve locked instance %s" % self.op.instance_name
10668
    _CheckNodeOnline(self, self.instance.primary_node)
10669

    
10670
    self._cds = _GetClusterDomainSecret()
10671

    
10672
  def Exec(self, feedback_fn):
10673
    """Prepares an instance for an export.
10674

10675
    """
10676
    instance = self.instance
10677

    
10678
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
10679
      salt = utils.GenerateSecret(8)
10680

    
10681
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10682
      result = self.rpc.call_x509_cert_create(instance.primary_node,
10683
                                              constants.RIE_CERT_VALIDITY)
10684
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
10685

    
10686
      (name, cert_pem) = result.payload
10687

    
10688
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10689
                                             cert_pem)
10690

    
10691
      return {
10692
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10693
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10694
                          salt),
10695
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10696
        }
10697

    
10698
    return None
10699

    
10700

    
10701
class LUBackupExport(LogicalUnit):
10702
  """Export an instance to an image in the cluster.
10703

10704
  """
10705
  HPATH = "instance-export"
10706
  HTYPE = constants.HTYPE_INSTANCE
10707
  REQ_BGL = False
10708

    
10709
  def CheckArguments(self):
10710
    """Check the arguments.
10711

10712
    """
10713
    self.x509_key_name = self.op.x509_key_name
10714
    self.dest_x509_ca_pem = self.op.destination_x509_ca
10715

    
10716
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
10717
      if not self.x509_key_name:
10718
        raise errors.OpPrereqError("Missing X509 key name for encryption",
10719
                                   errors.ECODE_INVAL)
10720

    
10721
      if not self.dest_x509_ca_pem:
10722
        raise errors.OpPrereqError("Missing destination X509 CA",
10723
                                   errors.ECODE_INVAL)
10724

    
10725
  def ExpandNames(self):
10726
    self._ExpandAndLockInstance()
10727

    
10728
    # Lock all nodes for local exports
10729
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10730
      # FIXME: lock only instance primary and destination node
10731
      #
10732
      # Sad but true, for now we have do lock all nodes, as we don't know where
10733
      # the previous export might be, and in this LU we search for it and
10734
      # remove it from its current node. In the future we could fix this by:
10735
      #  - making a tasklet to search (share-lock all), then create the
10736
      #    new one, then one to remove, after
10737
      #  - removing the removal operation altogether
10738
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10739

    
10740
  def DeclareLocks(self, level):
10741
    """Last minute lock declaration."""
10742
    # All nodes are locked anyway, so nothing to do here.
10743

    
10744
  def BuildHooksEnv(self):
10745
    """Build hooks env.
10746

10747
    This will run on the master, primary node and target node.
10748

10749
    """
10750
    env = {
10751
      "EXPORT_MODE": self.op.mode,
10752
      "EXPORT_NODE": self.op.target_node,
10753
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10754
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10755
      # TODO: Generic function for boolean env variables
10756
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10757
      }
10758

    
10759
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10760

    
10761
    return env
10762

    
10763
  def BuildHooksNodes(self):
10764
    """Build hooks nodes.
10765

10766
    """
10767
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10768

    
10769
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10770
      nl.append(self.op.target_node)
10771

    
10772
    return (nl, nl)
10773

    
10774
  def CheckPrereq(self):
10775
    """Check prerequisites.
10776

10777
    This checks that the instance and node names are valid.
10778

10779
    """
10780
    instance_name = self.op.instance_name
10781

    
10782
    self.instance = self.cfg.GetInstanceInfo(instance_name)
10783
    assert self.instance is not None, \
10784
          "Cannot retrieve locked instance %s" % self.op.instance_name
10785
    _CheckNodeOnline(self, self.instance.primary_node)
10786

    
10787
    if (self.op.remove_instance and self.instance.admin_up and
10788
        not self.op.shutdown):
10789
      raise errors.OpPrereqError("Can not remove instance without shutting it"
10790
                                 " down before")
10791

    
10792
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10793
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10794
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10795
      assert self.dst_node is not None
10796

    
10797
      _CheckNodeOnline(self, self.dst_node.name)
10798
      _CheckNodeNotDrained(self, self.dst_node.name)
10799

    
10800
      self._cds = None
10801
      self.dest_disk_info = None
10802
      self.dest_x509_ca = None
10803

    
10804
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10805
      self.dst_node = None
10806

    
10807
      if len(self.op.target_node) != len(self.instance.disks):
10808
        raise errors.OpPrereqError(("Received destination information for %s"
10809
                                    " disks, but instance %s has %s disks") %
10810
                                   (len(self.op.target_node), instance_name,
10811
                                    len(self.instance.disks)),
10812
                                   errors.ECODE_INVAL)
10813

    
10814
      cds = _GetClusterDomainSecret()
10815

    
10816
      # Check X509 key name
10817
      try:
10818
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10819
      except (TypeError, ValueError), err:
10820
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10821

    
10822
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10823
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10824
                                   errors.ECODE_INVAL)
10825

    
10826
      # Load and verify CA
10827
      try:
10828
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10829
      except OpenSSL.crypto.Error, err:
10830
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10831
                                   (err, ), errors.ECODE_INVAL)
10832

    
10833
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10834
      if errcode is not None:
10835
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10836
                                   (msg, ), errors.ECODE_INVAL)
10837

    
10838
      self.dest_x509_ca = cert
10839

    
10840
      # Verify target information
10841
      disk_info = []
10842
      for idx, disk_data in enumerate(self.op.target_node):
10843
        try:
10844
          (host, port, magic) = \
10845
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10846
        except errors.GenericError, err:
10847
          raise errors.OpPrereqError("Target info for disk %s: %s" %
10848
                                     (idx, err), errors.ECODE_INVAL)
10849

    
10850
        disk_info.append((host, port, magic))
10851

    
10852
      assert len(disk_info) == len(self.op.target_node)
10853
      self.dest_disk_info = disk_info
10854

    
10855
    else:
10856
      raise errors.ProgrammerError("Unhandled export mode %r" %
10857
                                   self.op.mode)
10858

    
10859
    # instance disk type verification
10860
    # TODO: Implement export support for file-based disks
10861
    for disk in self.instance.disks:
10862
      if disk.dev_type == constants.LD_FILE:
10863
        raise errors.OpPrereqError("Export not supported for instances with"
10864
                                   " file-based disks", errors.ECODE_INVAL)
10865

    
10866
  def _CleanupExports(self, feedback_fn):
10867
    """Removes exports of current instance from all other nodes.
10868

10869
    If an instance in a cluster with nodes A..D was exported to node C, its
10870
    exports will be removed from the nodes A, B and D.
10871

10872
    """
10873
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
10874

    
10875
    nodelist = self.cfg.GetNodeList()
10876
    nodelist.remove(self.dst_node.name)
10877

    
10878
    # on one-node clusters nodelist will be empty after the removal
10879
    # if we proceed the backup would be removed because OpBackupQuery
10880
    # substitutes an empty list with the full cluster node list.
10881
    iname = self.instance.name
10882
    if nodelist:
10883
      feedback_fn("Removing old exports for instance %s" % iname)
10884
      exportlist = self.rpc.call_export_list(nodelist)
10885
      for node in exportlist:
10886
        if exportlist[node].fail_msg:
10887
          continue
10888
        if iname in exportlist[node].payload:
10889
          msg = self.rpc.call_export_remove(node, iname).fail_msg
10890
          if msg:
10891
            self.LogWarning("Could not remove older export for instance %s"
10892
                            " on node %s: %s", iname, node, msg)
10893

    
10894
  def Exec(self, feedback_fn):
10895
    """Export an instance to an image in the cluster.
10896

10897
    """
10898
    assert self.op.mode in constants.EXPORT_MODES
10899

    
10900
    instance = self.instance
10901
    src_node = instance.primary_node
10902

    
10903
    if self.op.shutdown:
10904
      # shutdown the instance, but not the disks
10905
      feedback_fn("Shutting down instance %s" % instance.name)
10906
      result = self.rpc.call_instance_shutdown(src_node, instance,
10907
                                               self.op.shutdown_timeout)
10908
      # TODO: Maybe ignore failures if ignore_remove_failures is set
10909
      result.Raise("Could not shutdown instance %s on"
10910
                   " node %s" % (instance.name, src_node))
10911

    
10912
    # set the disks ID correctly since call_instance_start needs the
10913
    # correct drbd minor to create the symlinks
10914
    for disk in instance.disks:
10915
      self.cfg.SetDiskID(disk, src_node)
10916

    
10917
    activate_disks = (not instance.admin_up)
10918

    
10919
    if activate_disks:
10920
      # Activate the instance disks if we'exporting a stopped instance
10921
      feedback_fn("Activating disks for %s" % instance.name)
10922
      _StartInstanceDisks(self, instance, None)
10923

    
10924
    try:
10925
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10926
                                                     instance)
10927

    
10928
      helper.CreateSnapshots()
10929
      try:
10930
        if (self.op.shutdown and instance.admin_up and
10931
            not self.op.remove_instance):
10932
          assert not activate_disks
10933
          feedback_fn("Starting instance %s" % instance.name)
10934
          result = self.rpc.call_instance_start(src_node, instance, None, None)
10935
          msg = result.fail_msg
10936
          if msg:
10937
            feedback_fn("Failed to start instance: %s" % msg)
10938
            _ShutdownInstanceDisks(self, instance)
10939
            raise errors.OpExecError("Could not start instance: %s" % msg)
10940

    
10941
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
10942
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10943
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10944
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
10945
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10946

    
10947
          (key_name, _, _) = self.x509_key_name
10948

    
10949
          dest_ca_pem = \
10950
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10951
                                            self.dest_x509_ca)
10952

    
10953
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10954
                                                     key_name, dest_ca_pem,
10955
                                                     timeouts)
10956
      finally:
10957
        helper.Cleanup()
10958

    
10959
      # Check for backwards compatibility
10960
      assert len(dresults) == len(instance.disks)
10961
      assert compat.all(isinstance(i, bool) for i in dresults), \
10962
             "Not all results are boolean: %r" % dresults
10963

    
10964
    finally:
10965
      if activate_disks:
10966
        feedback_fn("Deactivating disks for %s" % instance.name)
10967
        _ShutdownInstanceDisks(self, instance)
10968

    
10969
    if not (compat.all(dresults) and fin_resu):
10970
      failures = []
10971
      if not fin_resu:
10972
        failures.append("export finalization")
10973
      if not compat.all(dresults):
10974
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10975
                               if not dsk)
10976
        failures.append("disk export: disk(s) %s" % fdsk)
10977

    
10978
      raise errors.OpExecError("Export failed, errors in %s" %
10979
                               utils.CommaJoin(failures))
10980

    
10981
    # At this point, the export was successful, we can cleanup/finish
10982

    
10983
    # Remove instance if requested
10984
    if self.op.remove_instance:
10985
      feedback_fn("Removing instance %s" % instance.name)
10986
      _RemoveInstance(self, feedback_fn, instance,
10987
                      self.op.ignore_remove_failures)
10988

    
10989
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10990
      self._CleanupExports(feedback_fn)
10991

    
10992
    return fin_resu, dresults
10993

    
10994

    
10995
class LUBackupRemove(NoHooksLU):
10996
  """Remove exports related to the named instance.
10997

10998
  """
10999
  REQ_BGL = False
11000

    
11001
  def ExpandNames(self):
11002
    self.needed_locks = {}
11003
    # We need all nodes to be locked in order for RemoveExport to work, but we
11004
    # don't need to lock the instance itself, as nothing will happen to it (and
11005
    # we can remove exports also for a removed instance)
11006
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11007

    
11008
  def Exec(self, feedback_fn):
11009
    """Remove any export.
11010

11011
    """
11012
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11013
    # If the instance was not found we'll try with the name that was passed in.
11014
    # This will only work if it was an FQDN, though.
11015
    fqdn_warn = False
11016
    if not instance_name:
11017
      fqdn_warn = True
11018
      instance_name = self.op.instance_name
11019

    
11020
    locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
11021
    exportlist = self.rpc.call_export_list(locked_nodes)
11022
    found = False
11023
    for node in exportlist:
11024
      msg = exportlist[node].fail_msg
11025
      if msg:
11026
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11027
        continue
11028
      if instance_name in exportlist[node].payload:
11029
        found = True
11030
        result = self.rpc.call_export_remove(node, instance_name)
11031
        msg = result.fail_msg
11032
        if msg:
11033
          logging.error("Could not remove export for instance %s"
11034
                        " on node %s: %s", instance_name, node, msg)
11035

    
11036
    if fqdn_warn and not found:
11037
      feedback_fn("Export not found. If trying to remove an export belonging"
11038
                  " to a deleted instance please use its Fully Qualified"
11039
                  " Domain Name.")
11040

    
11041

    
11042
class LUGroupAdd(LogicalUnit):
11043
  """Logical unit for creating node groups.
11044

11045
  """
11046
  HPATH = "group-add"
11047
  HTYPE = constants.HTYPE_GROUP
11048
  REQ_BGL = False
11049

    
11050
  def ExpandNames(self):
11051
    # We need the new group's UUID here so that we can create and acquire the
11052
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11053
    # that it should not check whether the UUID exists in the configuration.
11054
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11055
    self.needed_locks = {}
11056
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11057

    
11058
  def CheckPrereq(self):
11059
    """Check prerequisites.
11060

11061
    This checks that the given group name is not an existing node group
11062
    already.
11063

11064
    """
11065
    try:
11066
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11067
    except errors.OpPrereqError:
11068
      pass
11069
    else:
11070
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11071
                                 " node group (UUID: %s)" %
11072
                                 (self.op.group_name, existing_uuid),
11073
                                 errors.ECODE_EXISTS)
11074

    
11075
    if self.op.ndparams:
11076
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11077

    
11078
  def BuildHooksEnv(self):
11079
    """Build hooks env.
11080

11081
    """
11082
    return {
11083
      "GROUP_NAME": self.op.group_name,
11084
      }
11085

    
11086
  def BuildHooksNodes(self):
11087
    """Build hooks nodes.
11088

11089
    """
11090
    mn = self.cfg.GetMasterNode()
11091
    return ([mn], [mn])
11092

    
11093
  def Exec(self, feedback_fn):
11094
    """Add the node group to the cluster.
11095

11096
    """
11097
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11098
                                  uuid=self.group_uuid,
11099
                                  alloc_policy=self.op.alloc_policy,
11100
                                  ndparams=self.op.ndparams)
11101

    
11102
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11103
    del self.remove_locks[locking.LEVEL_NODEGROUP]
11104

    
11105

    
11106
class LUGroupAssignNodes(NoHooksLU):
11107
  """Logical unit for assigning nodes to groups.
11108

11109
  """
11110
  REQ_BGL = False
11111

    
11112
  def ExpandNames(self):
11113
    # These raise errors.OpPrereqError on their own:
11114
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11115
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11116

    
11117
    # We want to lock all the affected nodes and groups. We have readily
11118
    # available the list of nodes, and the *destination* group. To gather the
11119
    # list of "source" groups, we need to fetch node information later on.
11120
    self.needed_locks = {
11121
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11122
      locking.LEVEL_NODE: self.op.nodes,
11123
      }
11124

    
11125
  def DeclareLocks(self, level):
11126
    if level == locking.LEVEL_NODEGROUP:
11127
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11128

    
11129
      # Try to get all affected nodes' groups without having the group or node
11130
      # lock yet. Needs verification later in the code flow.
11131
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11132

    
11133
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11134

    
11135
  def CheckPrereq(self):
11136
    """Check prerequisites.
11137

11138
    """
11139
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
11140
    assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
11141
            frozenset(self.op.nodes))
11142

    
11143
    expected_locks = (set([self.group_uuid]) |
11144
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11145
    actual_locks = self.glm.list_owned(locking.LEVEL_NODEGROUP)
11146
    if actual_locks != expected_locks:
11147
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11148
                               " current groups are '%s', used to be '%s'" %
11149
                               (utils.CommaJoin(expected_locks),
11150
                                utils.CommaJoin(actual_locks)))
11151

    
11152
    self.node_data = self.cfg.GetAllNodesInfo()
11153
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11154
    instance_data = self.cfg.GetAllInstancesInfo()
11155

    
11156
    if self.group is None:
11157
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11158
                               (self.op.group_name, self.group_uuid))
11159

    
11160
    (new_splits, previous_splits) = \
11161
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11162
                                             for node in self.op.nodes],
11163
                                            self.node_data, instance_data)
11164

    
11165
    if new_splits:
11166
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11167

    
11168
      if not self.op.force:
11169
        raise errors.OpExecError("The following instances get split by this"
11170
                                 " change and --force was not given: %s" %
11171
                                 fmt_new_splits)
11172
      else:
11173
        self.LogWarning("This operation will split the following instances: %s",
11174
                        fmt_new_splits)
11175

    
11176
        if previous_splits:
11177
          self.LogWarning("In addition, these already-split instances continue"
11178
                          " to be split across groups: %s",
11179
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
11180

    
11181
  def Exec(self, feedback_fn):
11182
    """Assign nodes to a new group.
11183

11184
    """
11185
    for node in self.op.nodes:
11186
      self.node_data[node].group = self.group_uuid
11187

    
11188
    # FIXME: Depends on side-effects of modifying the result of
11189
    # C{cfg.GetAllNodesInfo}
11190

    
11191
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11192

    
11193
  @staticmethod
11194
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11195
    """Check for split instances after a node assignment.
11196

11197
    This method considers a series of node assignments as an atomic operation,
11198
    and returns information about split instances after applying the set of
11199
    changes.
11200

11201
    In particular, it returns information about newly split instances, and
11202
    instances that were already split, and remain so after the change.
11203

11204
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11205
    considered.
11206

11207
    @type changes: list of (node_name, new_group_uuid) pairs.
11208
    @param changes: list of node assignments to consider.
11209
    @param node_data: a dict with data for all nodes
11210
    @param instance_data: a dict with all instances to consider
11211
    @rtype: a two-tuple
11212
    @return: a list of instances that were previously okay and result split as a
11213
      consequence of this change, and a list of instances that were previously
11214
      split and this change does not fix.
11215

11216
    """
11217
    changed_nodes = dict((node, group) for node, group in changes
11218
                         if node_data[node].group != group)
11219

    
11220
    all_split_instances = set()
11221
    previously_split_instances = set()
11222

    
11223
    def InstanceNodes(instance):
11224
      return [instance.primary_node] + list(instance.secondary_nodes)
11225

    
11226
    for inst in instance_data.values():
11227
      if inst.disk_template not in constants.DTS_INT_MIRROR:
11228
        continue
11229

    
11230
      instance_nodes = InstanceNodes(inst)
11231

    
11232
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
11233
        previously_split_instances.add(inst.name)
11234

    
11235
      if len(set(changed_nodes.get(node, node_data[node].group)
11236
                 for node in instance_nodes)) > 1:
11237
        all_split_instances.add(inst.name)
11238

    
11239
    return (list(all_split_instances - previously_split_instances),
11240
            list(previously_split_instances & all_split_instances))
11241

    
11242

    
11243
class _GroupQuery(_QueryBase):
11244
  FIELDS = query.GROUP_FIELDS
11245

    
11246
  def ExpandNames(self, lu):
11247
    lu.needed_locks = {}
11248

    
11249
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
11250
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
11251

    
11252
    if not self.names:
11253
      self.wanted = [name_to_uuid[name]
11254
                     for name in utils.NiceSort(name_to_uuid.keys())]
11255
    else:
11256
      # Accept names to be either names or UUIDs.
11257
      missing = []
11258
      self.wanted = []
11259
      all_uuid = frozenset(self._all_groups.keys())
11260

    
11261
      for name in self.names:
11262
        if name in all_uuid:
11263
          self.wanted.append(name)
11264
        elif name in name_to_uuid:
11265
          self.wanted.append(name_to_uuid[name])
11266
        else:
11267
          missing.append(name)
11268

    
11269
      if missing:
11270
        raise errors.OpPrereqError("Some groups do not exist: %s" %
11271
                                   utils.CommaJoin(missing),
11272
                                   errors.ECODE_NOENT)
11273

    
11274
  def DeclareLocks(self, lu, level):
11275
    pass
11276

    
11277
  def _GetQueryData(self, lu):
11278
    """Computes the list of node groups and their attributes.
11279

11280
    """
11281
    do_nodes = query.GQ_NODE in self.requested_data
11282
    do_instances = query.GQ_INST in self.requested_data
11283

    
11284
    group_to_nodes = None
11285
    group_to_instances = None
11286

    
11287
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11288
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11289
    # latter GetAllInstancesInfo() is not enough, for we have to go through
11290
    # instance->node. Hence, we will need to process nodes even if we only need
11291
    # instance information.
11292
    if do_nodes or do_instances:
11293
      all_nodes = lu.cfg.GetAllNodesInfo()
11294
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11295
      node_to_group = {}
11296

    
11297
      for node in all_nodes.values():
11298
        if node.group in group_to_nodes:
11299
          group_to_nodes[node.group].append(node.name)
11300
          node_to_group[node.name] = node.group
11301

    
11302
      if do_instances:
11303
        all_instances = lu.cfg.GetAllInstancesInfo()
11304
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
11305

    
11306
        for instance in all_instances.values():
11307
          node = instance.primary_node
11308
          if node in node_to_group:
11309
            group_to_instances[node_to_group[node]].append(instance.name)
11310

    
11311
        if not do_nodes:
11312
          # Do not pass on node information if it was not requested.
11313
          group_to_nodes = None
11314

    
11315
    return query.GroupQueryData([self._all_groups[uuid]
11316
                                 for uuid in self.wanted],
11317
                                group_to_nodes, group_to_instances)
11318

    
11319

    
11320
class LUGroupQuery(NoHooksLU):
11321
  """Logical unit for querying node groups.
11322

11323
  """
11324
  REQ_BGL = False
11325

    
11326
  def CheckArguments(self):
11327
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
11328
                          self.op.output_fields, False)
11329

    
11330
  def ExpandNames(self):
11331
    self.gq.ExpandNames(self)
11332

    
11333
  def Exec(self, feedback_fn):
11334
    return self.gq.OldStyleQuery(self)
11335

    
11336

    
11337
class LUGroupSetParams(LogicalUnit):
11338
  """Modifies the parameters of a node group.
11339

11340
  """
11341
  HPATH = "group-modify"
11342
  HTYPE = constants.HTYPE_GROUP
11343
  REQ_BGL = False
11344

    
11345
  def CheckArguments(self):
11346
    all_changes = [
11347
      self.op.ndparams,
11348
      self.op.alloc_policy,
11349
      ]
11350

    
11351
    if all_changes.count(None) == len(all_changes):
11352
      raise errors.OpPrereqError("Please pass at least one modification",
11353
                                 errors.ECODE_INVAL)
11354

    
11355
  def ExpandNames(self):
11356
    # This raises errors.OpPrereqError on its own:
11357
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11358

    
11359
    self.needed_locks = {
11360
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11361
      }
11362

    
11363
  def CheckPrereq(self):
11364
    """Check prerequisites.
11365

11366
    """
11367
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11368

    
11369
    if self.group is None:
11370
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11371
                               (self.op.group_name, self.group_uuid))
11372

    
11373
    if self.op.ndparams:
11374
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
11375
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11376
      self.new_ndparams = new_ndparams
11377

    
11378
  def BuildHooksEnv(self):
11379
    """Build hooks env.
11380

11381
    """
11382
    return {
11383
      "GROUP_NAME": self.op.group_name,
11384
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
11385
      }
11386

    
11387
  def BuildHooksNodes(self):
11388
    """Build hooks nodes.
11389

11390
    """
11391
    mn = self.cfg.GetMasterNode()
11392
    return ([mn], [mn])
11393

    
11394
  def Exec(self, feedback_fn):
11395
    """Modifies the node group.
11396

11397
    """
11398
    result = []
11399

    
11400
    if self.op.ndparams:
11401
      self.group.ndparams = self.new_ndparams
11402
      result.append(("ndparams", str(self.group.ndparams)))
11403

    
11404
    if self.op.alloc_policy:
11405
      self.group.alloc_policy = self.op.alloc_policy
11406

    
11407
    self.cfg.Update(self.group, feedback_fn)
11408
    return result
11409

    
11410

    
11411

    
11412
class LUGroupRemove(LogicalUnit):
11413
  HPATH = "group-remove"
11414
  HTYPE = constants.HTYPE_GROUP
11415
  REQ_BGL = False
11416

    
11417
  def ExpandNames(self):
11418
    # This will raises errors.OpPrereqError on its own:
11419
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11420
    self.needed_locks = {
11421
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11422
      }
11423

    
11424
  def CheckPrereq(self):
11425
    """Check prerequisites.
11426

11427
    This checks that the given group name exists as a node group, that is
11428
    empty (i.e., contains no nodes), and that is not the last group of the
11429
    cluster.
11430

11431
    """
11432
    # Verify that the group is empty.
11433
    group_nodes = [node.name
11434
                   for node in self.cfg.GetAllNodesInfo().values()
11435
                   if node.group == self.group_uuid]
11436

    
11437
    if group_nodes:
11438
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
11439
                                 " nodes: %s" %
11440
                                 (self.op.group_name,
11441
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
11442
                                 errors.ECODE_STATE)
11443

    
11444
    # Verify the cluster would not be left group-less.
11445
    if len(self.cfg.GetNodeGroupList()) == 1:
11446
      raise errors.OpPrereqError("Group '%s' is the only group,"
11447
                                 " cannot be removed" %
11448
                                 self.op.group_name,
11449
                                 errors.ECODE_STATE)
11450

    
11451
  def BuildHooksEnv(self):
11452
    """Build hooks env.
11453

11454
    """
11455
    return {
11456
      "GROUP_NAME": self.op.group_name,
11457
      }
11458

    
11459
  def BuildHooksNodes(self):
11460
    """Build hooks nodes.
11461

11462
    """
11463
    mn = self.cfg.GetMasterNode()
11464
    return ([mn], [mn])
11465

    
11466
  def Exec(self, feedback_fn):
11467
    """Remove the node group.
11468

11469
    """
11470
    try:
11471
      self.cfg.RemoveNodeGroup(self.group_uuid)
11472
    except errors.ConfigurationError:
11473
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11474
                               (self.op.group_name, self.group_uuid))
11475

    
11476
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11477

    
11478

    
11479
class LUGroupRename(LogicalUnit):
11480
  HPATH = "group-rename"
11481
  HTYPE = constants.HTYPE_GROUP
11482
  REQ_BGL = False
11483

    
11484
  def ExpandNames(self):
11485
    # This raises errors.OpPrereqError on its own:
11486
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11487

    
11488
    self.needed_locks = {
11489
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11490
      }
11491

    
11492
  def CheckPrereq(self):
11493
    """Check prerequisites.
11494

11495
    Ensures requested new name is not yet used.
11496

11497
    """
11498
    try:
11499
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11500
    except errors.OpPrereqError:
11501
      pass
11502
    else:
11503
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11504
                                 " node group (UUID: %s)" %
11505
                                 (self.op.new_name, new_name_uuid),
11506
                                 errors.ECODE_EXISTS)
11507

    
11508
  def BuildHooksEnv(self):
11509
    """Build hooks env.
11510

11511
    """
11512
    return {
11513
      "OLD_NAME": self.op.group_name,
11514
      "NEW_NAME": self.op.new_name,
11515
      }
11516

    
11517
  def BuildHooksNodes(self):
11518
    """Build hooks nodes.
11519

11520
    """
11521
    mn = self.cfg.GetMasterNode()
11522

    
11523
    all_nodes = self.cfg.GetAllNodesInfo()
11524
    all_nodes.pop(mn, None)
11525

    
11526
    run_nodes = [mn]
11527
    run_nodes.extend(node.name for node in all_nodes.values()
11528
                     if node.group == self.group_uuid)
11529

    
11530
    return (run_nodes, run_nodes)
11531

    
11532
  def Exec(self, feedback_fn):
11533
    """Rename the node group.
11534

11535
    """
11536
    group = self.cfg.GetNodeGroup(self.group_uuid)
11537

    
11538
    if group is None:
11539
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11540
                               (self.op.group_name, self.group_uuid))
11541

    
11542
    group.name = self.op.new_name
11543
    self.cfg.Update(group, feedback_fn)
11544

    
11545
    return self.op.new_name
11546

    
11547

    
11548
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
11549
  """Generic tags LU.
11550

11551
  This is an abstract class which is the parent of all the other tags LUs.
11552

11553
  """
11554
  def ExpandNames(self):
11555
    self.group_uuid = None
11556
    self.needed_locks = {}
11557
    if self.op.kind == constants.TAG_NODE:
11558
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
11559
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
11560
    elif self.op.kind == constants.TAG_INSTANCE:
11561
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
11562
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
11563
    elif self.op.kind == constants.TAG_NODEGROUP:
11564
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
11565

    
11566
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
11567
    # not possible to acquire the BGL based on opcode parameters)
11568

    
11569
  def CheckPrereq(self):
11570
    """Check prerequisites.
11571

11572
    """
11573
    if self.op.kind == constants.TAG_CLUSTER:
11574
      self.target = self.cfg.GetClusterInfo()
11575
    elif self.op.kind == constants.TAG_NODE:
11576
      self.target = self.cfg.GetNodeInfo(self.op.name)
11577
    elif self.op.kind == constants.TAG_INSTANCE:
11578
      self.target = self.cfg.GetInstanceInfo(self.op.name)
11579
    elif self.op.kind == constants.TAG_NODEGROUP:
11580
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
11581
    else:
11582
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
11583
                                 str(self.op.kind), errors.ECODE_INVAL)
11584

    
11585

    
11586
class LUTagsGet(TagsLU):
11587
  """Returns the tags of a given object.
11588

11589
  """
11590
  REQ_BGL = False
11591

    
11592
  def ExpandNames(self):
11593
    TagsLU.ExpandNames(self)
11594

    
11595
    # Share locks as this is only a read operation
11596
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
11597

    
11598
  def Exec(self, feedback_fn):
11599
    """Returns the tag list.
11600

11601
    """
11602
    return list(self.target.GetTags())
11603

    
11604

    
11605
class LUTagsSearch(NoHooksLU):
11606
  """Searches the tags for a given pattern.
11607

11608
  """
11609
  REQ_BGL = False
11610

    
11611
  def ExpandNames(self):
11612
    self.needed_locks = {}
11613

    
11614
  def CheckPrereq(self):
11615
    """Check prerequisites.
11616

11617
    This checks the pattern passed for validity by compiling it.
11618

11619
    """
11620
    try:
11621
      self.re = re.compile(self.op.pattern)
11622
    except re.error, err:
11623
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
11624
                                 (self.op.pattern, err), errors.ECODE_INVAL)
11625

    
11626
  def Exec(self, feedback_fn):
11627
    """Returns the tag list.
11628

11629
    """
11630
    cfg = self.cfg
11631
    tgts = [("/cluster", cfg.GetClusterInfo())]
11632
    ilist = cfg.GetAllInstancesInfo().values()
11633
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
11634
    nlist = cfg.GetAllNodesInfo().values()
11635
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
11636
    tgts.extend(("/nodegroup/%s" % n.name, n)
11637
                for n in cfg.GetAllNodeGroupsInfo().values())
11638
    results = []
11639
    for path, target in tgts:
11640
      for tag in target.GetTags():
11641
        if self.re.search(tag):
11642
          results.append((path, tag))
11643
    return results
11644

    
11645

    
11646
class LUTagsSet(TagsLU):
11647
  """Sets a tag on a given object.
11648

11649
  """
11650
  REQ_BGL = False
11651

    
11652
  def CheckPrereq(self):
11653
    """Check prerequisites.
11654

11655
    This checks the type and length of the tag name and value.
11656

11657
    """
11658
    TagsLU.CheckPrereq(self)
11659
    for tag in self.op.tags:
11660
      objects.TaggableObject.ValidateTag(tag)
11661

    
11662
  def Exec(self, feedback_fn):
11663
    """Sets the tag.
11664

11665
    """
11666
    try:
11667
      for tag in self.op.tags:
11668
        self.target.AddTag(tag)
11669
    except errors.TagError, err:
11670
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
11671
    self.cfg.Update(self.target, feedback_fn)
11672

    
11673

    
11674
class LUTagsDel(TagsLU):
11675
  """Delete a list of tags from a given object.
11676

11677
  """
11678
  REQ_BGL = False
11679

    
11680
  def CheckPrereq(self):
11681
    """Check prerequisites.
11682

11683
    This checks that we have the given tag.
11684

11685
    """
11686
    TagsLU.CheckPrereq(self)
11687
    for tag in self.op.tags:
11688
      objects.TaggableObject.ValidateTag(tag)
11689
    del_tags = frozenset(self.op.tags)
11690
    cur_tags = self.target.GetTags()
11691

    
11692
    diff_tags = del_tags - cur_tags
11693
    if diff_tags:
11694
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
11695
      raise errors.OpPrereqError("Tag(s) %s not found" %
11696
                                 (utils.CommaJoin(diff_names), ),
11697
                                 errors.ECODE_NOENT)
11698

    
11699
  def Exec(self, feedback_fn):
11700
    """Remove the tag from the object.
11701

11702
    """
11703
    for tag in self.op.tags:
11704
      self.target.RemoveTag(tag)
11705
    self.cfg.Update(self.target, feedback_fn)
11706

    
11707

    
11708
class LUTestDelay(NoHooksLU):
11709
  """Sleep for a specified amount of time.
11710

11711
  This LU sleeps on the master and/or nodes for a specified amount of
11712
  time.
11713

11714
  """
11715
  REQ_BGL = False
11716

    
11717
  def ExpandNames(self):
11718
    """Expand names and set required locks.
11719

11720
    This expands the node list, if any.
11721

11722
    """
11723
    self.needed_locks = {}
11724
    if self.op.on_nodes:
11725
      # _GetWantedNodes can be used here, but is not always appropriate to use
11726
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
11727
      # more information.
11728
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
11729
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
11730

    
11731
  def _TestDelay(self):
11732
    """Do the actual sleep.
11733

11734
    """
11735
    if self.op.on_master:
11736
      if not utils.TestDelay(self.op.duration):
11737
        raise errors.OpExecError("Error during master delay test")
11738
    if self.op.on_nodes:
11739
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
11740
      for node, node_result in result.items():
11741
        node_result.Raise("Failure during rpc call to node %s" % node)
11742

    
11743
  def Exec(self, feedback_fn):
11744
    """Execute the test delay opcode, with the wanted repetitions.
11745

11746
    """
11747
    if self.op.repeat == 0:
11748
      self._TestDelay()
11749
    else:
11750
      top_value = self.op.repeat - 1
11751
      for i in range(self.op.repeat):
11752
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
11753
        self._TestDelay()
11754

    
11755

    
11756
class LUTestJqueue(NoHooksLU):
11757
  """Utility LU to test some aspects of the job queue.
11758

11759
  """
11760
  REQ_BGL = False
11761

    
11762
  # Must be lower than default timeout for WaitForJobChange to see whether it
11763
  # notices changed jobs
11764
  _CLIENT_CONNECT_TIMEOUT = 20.0
11765
  _CLIENT_CONFIRM_TIMEOUT = 60.0
11766

    
11767
  @classmethod
11768
  def _NotifyUsingSocket(cls, cb, errcls):
11769
    """Opens a Unix socket and waits for another program to connect.
11770

11771
    @type cb: callable
11772
    @param cb: Callback to send socket name to client
11773
    @type errcls: class
11774
    @param errcls: Exception class to use for errors
11775

11776
    """
11777
    # Using a temporary directory as there's no easy way to create temporary
11778
    # sockets without writing a custom loop around tempfile.mktemp and
11779
    # socket.bind
11780
    tmpdir = tempfile.mkdtemp()
11781
    try:
11782
      tmpsock = utils.PathJoin(tmpdir, "sock")
11783

    
11784
      logging.debug("Creating temporary socket at %s", tmpsock)
11785
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
11786
      try:
11787
        sock.bind(tmpsock)
11788
        sock.listen(1)
11789

    
11790
        # Send details to client
11791
        cb(tmpsock)
11792

    
11793
        # Wait for client to connect before continuing
11794
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
11795
        try:
11796
          (conn, _) = sock.accept()
11797
        except socket.error, err:
11798
          raise errcls("Client didn't connect in time (%s)" % err)
11799
      finally:
11800
        sock.close()
11801
    finally:
11802
      # Remove as soon as client is connected
11803
      shutil.rmtree(tmpdir)
11804

    
11805
    # Wait for client to close
11806
    try:
11807
      try:
11808
        # pylint: disable-msg=E1101
11809
        # Instance of '_socketobject' has no ... member
11810
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
11811
        conn.recv(1)
11812
      except socket.error, err:
11813
        raise errcls("Client failed to confirm notification (%s)" % err)
11814
    finally:
11815
      conn.close()
11816

    
11817
  def _SendNotification(self, test, arg, sockname):
11818
    """Sends a notification to the client.
11819

11820
    @type test: string
11821
    @param test: Test name
11822
    @param arg: Test argument (depends on test)
11823
    @type sockname: string
11824
    @param sockname: Socket path
11825

11826
    """
11827
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11828

    
11829
  def _Notify(self, prereq, test, arg):
11830
    """Notifies the client of a test.
11831

11832
    @type prereq: bool
11833
    @param prereq: Whether this is a prereq-phase test
11834
    @type test: string
11835
    @param test: Test name
11836
    @param arg: Test argument (depends on test)
11837

11838
    """
11839
    if prereq:
11840
      errcls = errors.OpPrereqError
11841
    else:
11842
      errcls = errors.OpExecError
11843

    
11844
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11845
                                                  test, arg),
11846
                                   errcls)
11847

    
11848
  def CheckArguments(self):
11849
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11850
    self.expandnames_calls = 0
11851

    
11852
  def ExpandNames(self):
11853
    checkargs_calls = getattr(self, "checkargs_calls", 0)
11854
    if checkargs_calls < 1:
11855
      raise errors.ProgrammerError("CheckArguments was not called")
11856

    
11857
    self.expandnames_calls += 1
11858

    
11859
    if self.op.notify_waitlock:
11860
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
11861

    
11862
    self.LogInfo("Expanding names")
11863

    
11864
    # Get lock on master node (just to get a lock, not for a particular reason)
11865
    self.needed_locks = {
11866
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11867
      }
11868

    
11869
  def Exec(self, feedback_fn):
11870
    if self.expandnames_calls < 1:
11871
      raise errors.ProgrammerError("ExpandNames was not called")
11872

    
11873
    if self.op.notify_exec:
11874
      self._Notify(False, constants.JQT_EXEC, None)
11875

    
11876
    self.LogInfo("Executing")
11877

    
11878
    if self.op.log_messages:
11879
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
11880
      for idx, msg in enumerate(self.op.log_messages):
11881
        self.LogInfo("Sending log message %s", idx + 1)
11882
        feedback_fn(constants.JQT_MSGPREFIX + msg)
11883
        # Report how many test messages have been sent
11884
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
11885

    
11886
    if self.op.fail:
11887
      raise errors.OpExecError("Opcode failure was requested")
11888

    
11889
    return True
11890

    
11891

    
11892
class IAllocator(object):
11893
  """IAllocator framework.
11894

11895
  An IAllocator instance has three sets of attributes:
11896
    - cfg that is needed to query the cluster
11897
    - input data (all members of the _KEYS class attribute are required)
11898
    - four buffer attributes (in|out_data|text), that represent the
11899
      input (to the external script) in text and data structure format,
11900
      and the output from it, again in two formats
11901
    - the result variables from the script (success, info, nodes) for
11902
      easy usage
11903

11904
  """
11905
  # pylint: disable-msg=R0902
11906
  # lots of instance attributes
11907

    
11908
  def __init__(self, cfg, rpc, mode, **kwargs):
11909
    self.cfg = cfg
11910
    self.rpc = rpc
11911
    # init buffer variables
11912
    self.in_text = self.out_text = self.in_data = self.out_data = None
11913
    # init all input fields so that pylint is happy
11914
    self.mode = mode
11915
    self.mem_size = self.disks = self.disk_template = None
11916
    self.os = self.tags = self.nics = self.vcpus = None
11917
    self.hypervisor = None
11918
    self.relocate_from = None
11919
    self.name = None
11920
    self.evac_nodes = None
11921
    self.instances = None
11922
    self.reloc_mode = None
11923
    self.target_groups = None
11924
    # computed fields
11925
    self.required_nodes = None
11926
    # init result fields
11927
    self.success = self.info = self.result = None
11928

    
11929
    try:
11930
      (fn, keyset, self._result_check) = self._MODE_DATA[self.mode]
11931
    except KeyError:
11932
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
11933
                                   " IAllocator" % self.mode)
11934

    
11935
    for key in kwargs:
11936
      if key not in keyset:
11937
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
11938
                                     " IAllocator" % key)
11939
      setattr(self, key, kwargs[key])
11940

    
11941
    for key in keyset:
11942
      if key not in kwargs:
11943
        raise errors.ProgrammerError("Missing input parameter '%s' to"
11944
                                     " IAllocator" % key)
11945
    self._BuildInputData(compat.partial(fn, self))
11946

    
11947
  def _ComputeClusterData(self):
11948
    """Compute the generic allocator input data.
11949

11950
    This is the data that is independent of the actual operation.
11951

11952
    """
11953
    cfg = self.cfg
11954
    cluster_info = cfg.GetClusterInfo()
11955
    # cluster data
11956
    data = {
11957
      "version": constants.IALLOCATOR_VERSION,
11958
      "cluster_name": cfg.GetClusterName(),
11959
      "cluster_tags": list(cluster_info.GetTags()),
11960
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
11961
      # we don't have job IDs
11962
      }
11963
    ninfo = cfg.GetAllNodesInfo()
11964
    iinfo = cfg.GetAllInstancesInfo().values()
11965
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
11966

    
11967
    # node data
11968
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
11969

    
11970
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11971
      hypervisor_name = self.hypervisor
11972
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11973
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
11974
    elif self.mode in (constants.IALLOCATOR_MODE_MEVAC,
11975
                       constants.IALLOCATOR_MODE_MRELOC):
11976
      hypervisor_name = cluster_info.enabled_hypervisors[0]
11977

    
11978
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
11979
                                        hypervisor_name)
11980
    node_iinfo = \
11981
      self.rpc.call_all_instances_info(node_list,
11982
                                       cluster_info.enabled_hypervisors)
11983

    
11984
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
11985

    
11986
    config_ndata = self._ComputeBasicNodeData(ninfo)
11987
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
11988
                                                 i_list, config_ndata)
11989
    assert len(data["nodes"]) == len(ninfo), \
11990
        "Incomplete node data computed"
11991

    
11992
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11993

    
11994
    self.in_data = data
11995

    
11996
  @staticmethod
11997
  def _ComputeNodeGroupData(cfg):
11998
    """Compute node groups data.
11999

12000
    """
12001
    ng = dict((guuid, {
12002
      "name": gdata.name,
12003
      "alloc_policy": gdata.alloc_policy,
12004
      })
12005
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12006

    
12007
    return ng
12008

    
12009
  @staticmethod
12010
  def _ComputeBasicNodeData(node_cfg):
12011
    """Compute global node data.
12012

12013
    @rtype: dict
12014
    @returns: a dict of name: (node dict, node config)
12015

12016
    """
12017
    # fill in static (config-based) values
12018
    node_results = dict((ninfo.name, {
12019
      "tags": list(ninfo.GetTags()),
12020
      "primary_ip": ninfo.primary_ip,
12021
      "secondary_ip": ninfo.secondary_ip,
12022
      "offline": ninfo.offline,
12023
      "drained": ninfo.drained,
12024
      "master_candidate": ninfo.master_candidate,
12025
      "group": ninfo.group,
12026
      "master_capable": ninfo.master_capable,
12027
      "vm_capable": ninfo.vm_capable,
12028
      })
12029
      for ninfo in node_cfg.values())
12030

    
12031
    return node_results
12032

    
12033
  @staticmethod
12034
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
12035
                              node_results):
12036
    """Compute global node data.
12037

12038
    @param node_results: the basic node structures as filled from the config
12039

12040
    """
12041
    # make a copy of the current dict
12042
    node_results = dict(node_results)
12043
    for nname, nresult in node_data.items():
12044
      assert nname in node_results, "Missing basic data for node %s" % nname
12045
      ninfo = node_cfg[nname]
12046

    
12047
      if not (ninfo.offline or ninfo.drained):
12048
        nresult.Raise("Can't get data for node %s" % nname)
12049
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
12050
                                nname)
12051
        remote_info = nresult.payload
12052

    
12053
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
12054
                     'vg_size', 'vg_free', 'cpu_total']:
12055
          if attr not in remote_info:
12056
            raise errors.OpExecError("Node '%s' didn't return attribute"
12057
                                     " '%s'" % (nname, attr))
12058
          if not isinstance(remote_info[attr], int):
12059
            raise errors.OpExecError("Node '%s' returned invalid value"
12060
                                     " for '%s': %s" %
12061
                                     (nname, attr, remote_info[attr]))
12062
        # compute memory used by primary instances
12063
        i_p_mem = i_p_up_mem = 0
12064
        for iinfo, beinfo in i_list:
12065
          if iinfo.primary_node == nname:
12066
            i_p_mem += beinfo[constants.BE_MEMORY]
12067
            if iinfo.name not in node_iinfo[nname].payload:
12068
              i_used_mem = 0
12069
            else:
12070
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
12071
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
12072
            remote_info['memory_free'] -= max(0, i_mem_diff)
12073

    
12074
            if iinfo.admin_up:
12075
              i_p_up_mem += beinfo[constants.BE_MEMORY]
12076

    
12077
        # compute memory used by instances
12078
        pnr_dyn = {
12079
          "total_memory": remote_info['memory_total'],
12080
          "reserved_memory": remote_info['memory_dom0'],
12081
          "free_memory": remote_info['memory_free'],
12082
          "total_disk": remote_info['vg_size'],
12083
          "free_disk": remote_info['vg_free'],
12084
          "total_cpus": remote_info['cpu_total'],
12085
          "i_pri_memory": i_p_mem,
12086
          "i_pri_up_memory": i_p_up_mem,
12087
          }
12088
        pnr_dyn.update(node_results[nname])
12089
        node_results[nname] = pnr_dyn
12090

    
12091
    return node_results
12092

    
12093
  @staticmethod
12094
  def _ComputeInstanceData(cluster_info, i_list):
12095
    """Compute global instance data.
12096

12097
    """
12098
    instance_data = {}
12099
    for iinfo, beinfo in i_list:
12100
      nic_data = []
12101
      for nic in iinfo.nics:
12102
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
12103
        nic_dict = {
12104
          "mac": nic.mac,
12105
          "ip": nic.ip,
12106
          "mode": filled_params[constants.NIC_MODE],
12107
          "link": filled_params[constants.NIC_LINK],
12108
          }
12109
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
12110
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
12111
        nic_data.append(nic_dict)
12112
      pir = {
12113
        "tags": list(iinfo.GetTags()),
12114
        "admin_up": iinfo.admin_up,
12115
        "vcpus": beinfo[constants.BE_VCPUS],
12116
        "memory": beinfo[constants.BE_MEMORY],
12117
        "os": iinfo.os,
12118
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
12119
        "nics": nic_data,
12120
        "disks": [{constants.IDISK_SIZE: dsk.size,
12121
                   constants.IDISK_MODE: dsk.mode}
12122
                  for dsk in iinfo.disks],
12123
        "disk_template": iinfo.disk_template,
12124
        "hypervisor": iinfo.hypervisor,
12125
        }
12126
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
12127
                                                 pir["disks"])
12128
      instance_data[iinfo.name] = pir
12129

    
12130
    return instance_data
12131

    
12132
  def _AddNewInstance(self):
12133
    """Add new instance data to allocator structure.
12134

12135
    This in combination with _AllocatorGetClusterData will create the
12136
    correct structure needed as input for the allocator.
12137

12138
    The checks for the completeness of the opcode must have already been
12139
    done.
12140

12141
    """
12142
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
12143

    
12144
    if self.disk_template in constants.DTS_INT_MIRROR:
12145
      self.required_nodes = 2
12146
    else:
12147
      self.required_nodes = 1
12148

    
12149
    request = {
12150
      "name": self.name,
12151
      "disk_template": self.disk_template,
12152
      "tags": self.tags,
12153
      "os": self.os,
12154
      "vcpus": self.vcpus,
12155
      "memory": self.mem_size,
12156
      "disks": self.disks,
12157
      "disk_space_total": disk_space,
12158
      "nics": self.nics,
12159
      "required_nodes": self.required_nodes,
12160
      }
12161

    
12162
    return request
12163

    
12164
  def _AddRelocateInstance(self):
12165
    """Add relocate instance data to allocator structure.
12166

12167
    This in combination with _IAllocatorGetClusterData will create the
12168
    correct structure needed as input for the allocator.
12169

12170
    The checks for the completeness of the opcode must have already been
12171
    done.
12172

12173
    """
12174
    instance = self.cfg.GetInstanceInfo(self.name)
12175
    if instance is None:
12176
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
12177
                                   " IAllocator" % self.name)
12178

    
12179
    if instance.disk_template not in constants.DTS_MIRRORED:
12180
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
12181
                                 errors.ECODE_INVAL)
12182

    
12183
    if instance.disk_template in constants.DTS_INT_MIRROR and \
12184
        len(instance.secondary_nodes) != 1:
12185
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
12186
                                 errors.ECODE_STATE)
12187

    
12188
    self.required_nodes = 1
12189
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
12190
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
12191

    
12192
    request = {
12193
      "name": self.name,
12194
      "disk_space_total": disk_space,
12195
      "required_nodes": self.required_nodes,
12196
      "relocate_from": self.relocate_from,
12197
      }
12198
    return request
12199

    
12200
  def _AddEvacuateNodes(self):
12201
    """Add evacuate nodes data to allocator structure.
12202

12203
    """
12204
    request = {
12205
      "evac_nodes": self.evac_nodes
12206
      }
12207
    return request
12208

    
12209
  def _AddMultiRelocate(self):
12210
    """Get data for multi-relocate requests.
12211

12212
    """
12213
    return {
12214
      "instances": self.instances,
12215
      "reloc_mode": self.reloc_mode,
12216
      "target_groups": self.target_groups,
12217
      }
12218

    
12219
  def _BuildInputData(self, fn):
12220
    """Build input data structures.
12221

12222
    """
12223
    self._ComputeClusterData()
12224

    
12225
    request = fn()
12226
    request["type"] = self.mode
12227
    self.in_data["request"] = request
12228

    
12229
    self.in_text = serializer.Dump(self.in_data)
12230

    
12231
  _MODE_DATA = {
12232
    constants.IALLOCATOR_MODE_ALLOC:
12233
      (_AddNewInstance,
12234
       ["name", "mem_size", "disks", "disk_template", "os", "tags", "nics",
12235
        "vcpus", "hypervisor"], ht.TList),
12236
    constants.IALLOCATOR_MODE_RELOC:
12237
      (_AddRelocateInstance, ["name", "relocate_from"], ht.TList),
12238
    constants.IALLOCATOR_MODE_MEVAC:
12239
      (_AddEvacuateNodes, ["evac_nodes"],
12240
       ht.TListOf(ht.TAnd(ht.TIsLength(2),
12241
                          ht.TListOf(ht.TString)))),
12242
    constants.IALLOCATOR_MODE_MRELOC:
12243
      (_AddMultiRelocate, ["instances", "reloc_mode", "target_groups"],
12244
       ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
12245
         # pylint: disable-msg=E1101
12246
         # Class '...' has no 'OP_ID' member
12247
         "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
12248
                              opcodes.OpInstanceMigrate.OP_ID,
12249
                              opcodes.OpInstanceReplaceDisks.OP_ID])
12250
         })))),
12251
    }
12252

    
12253
  def Run(self, name, validate=True, call_fn=None):
12254
    """Run an instance allocator and return the results.
12255

12256
    """
12257
    if call_fn is None:
12258
      call_fn = self.rpc.call_iallocator_runner
12259

    
12260
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
12261
    result.Raise("Failure while running the iallocator script")
12262

    
12263
    self.out_text = result.payload
12264
    if validate:
12265
      self._ValidateResult()
12266

    
12267
  def _ValidateResult(self):
12268
    """Process the allocator results.
12269

12270
    This will process and if successful save the result in
12271
    self.out_data and the other parameters.
12272

12273
    """
12274
    try:
12275
      rdict = serializer.Load(self.out_text)
12276
    except Exception, err:
12277
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
12278

    
12279
    if not isinstance(rdict, dict):
12280
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
12281

    
12282
    # TODO: remove backwards compatiblity in later versions
12283
    if "nodes" in rdict and "result" not in rdict:
12284
      rdict["result"] = rdict["nodes"]
12285
      del rdict["nodes"]
12286

    
12287
    for key in "success", "info", "result":
12288
      if key not in rdict:
12289
        raise errors.OpExecError("Can't parse iallocator results:"
12290
                                 " missing key '%s'" % key)
12291
      setattr(self, key, rdict[key])
12292

    
12293
    if not self._result_check(self.result):
12294
      raise errors.OpExecError("Iallocator returned invalid result,"
12295
                               " expected %s, got %s" %
12296
                               (self._result_check, self.result),
12297
                               errors.ECODE_INVAL)
12298

    
12299
    if self.mode in (constants.IALLOCATOR_MODE_RELOC,
12300
                     constants.IALLOCATOR_MODE_MEVAC):
12301
      node2group = dict((name, ndata["group"])
12302
                        for (name, ndata) in self.in_data["nodes"].items())
12303

    
12304
      fn = compat.partial(self._NodesToGroups, node2group,
12305
                          self.in_data["nodegroups"])
12306

    
12307
      if self.mode == constants.IALLOCATOR_MODE_RELOC:
12308
        assert self.relocate_from is not None
12309
        assert self.required_nodes == 1
12310

    
12311
        request_groups = fn(self.relocate_from)
12312
        result_groups = fn(rdict["result"])
12313

    
12314
        if result_groups != request_groups:
12315
          raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
12316
                                   " differ from original groups (%s)" %
12317
                                   (utils.CommaJoin(result_groups),
12318
                                    utils.CommaJoin(request_groups)))
12319
      elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
12320
        request_groups = fn(self.evac_nodes)
12321
        for (instance_name, secnode) in self.result:
12322
          result_groups = fn([secnode])
12323
          if result_groups != request_groups:
12324
            raise errors.OpExecError("Iallocator returned new secondary node"
12325
                                     " '%s' (group '%s') for instance '%s'"
12326
                                     " which is not in original group '%s'" %
12327
                                     (secnode, utils.CommaJoin(result_groups),
12328
                                      instance_name,
12329
                                      utils.CommaJoin(request_groups)))
12330
      else:
12331
        raise errors.ProgrammerError("Unhandled mode '%s'" % self.mode)
12332

    
12333
    self.out_data = rdict
12334

    
12335
  @staticmethod
12336
  def _NodesToGroups(node2group, groups, nodes):
12337
    """Returns a list of unique group names for a list of nodes.
12338

12339
    @type node2group: dict
12340
    @param node2group: Map from node name to group UUID
12341
    @type groups: dict
12342
    @param groups: Group information
12343
    @type nodes: list
12344
    @param nodes: Node names
12345

12346
    """
12347
    result = set()
12348

    
12349
    for node in nodes:
12350
      try:
12351
        group_uuid = node2group[node]
12352
      except KeyError:
12353
        # Ignore unknown node
12354
        pass
12355
      else:
12356
        try:
12357
          group = groups[group_uuid]
12358
        except KeyError:
12359
          # Can't find group, let's use UUID
12360
          group_name = group_uuid
12361
        else:
12362
          group_name = group["name"]
12363

    
12364
        result.add(group_name)
12365

    
12366
    return sorted(result)
12367

    
12368

    
12369
class LUTestAllocator(NoHooksLU):
12370
  """Run allocator tests.
12371

12372
  This LU runs the allocator tests
12373

12374
  """
12375
  def CheckPrereq(self):
12376
    """Check prerequisites.
12377

12378
    This checks the opcode parameters depending on the director and mode test.
12379

12380
    """
12381
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12382
      for attr in ["mem_size", "disks", "disk_template",
12383
                   "os", "tags", "nics", "vcpus"]:
12384
        if not hasattr(self.op, attr):
12385
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
12386
                                     attr, errors.ECODE_INVAL)
12387
      iname = self.cfg.ExpandInstanceName(self.op.name)
12388
      if iname is not None:
12389
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
12390
                                   iname, errors.ECODE_EXISTS)
12391
      if not isinstance(self.op.nics, list):
12392
        raise errors.OpPrereqError("Invalid parameter 'nics'",
12393
                                   errors.ECODE_INVAL)
12394
      if not isinstance(self.op.disks, list):
12395
        raise errors.OpPrereqError("Invalid parameter 'disks'",
12396
                                   errors.ECODE_INVAL)
12397
      for row in self.op.disks:
12398
        if (not isinstance(row, dict) or
12399
            "size" not in row or
12400
            not isinstance(row["size"], int) or
12401
            "mode" not in row or
12402
            row["mode"] not in ['r', 'w']):
12403
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
12404
                                     " parameter", errors.ECODE_INVAL)
12405
      if self.op.hypervisor is None:
12406
        self.op.hypervisor = self.cfg.GetHypervisorType()
12407
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12408
      fname = _ExpandInstanceName(self.cfg, self.op.name)
12409
      self.op.name = fname
12410
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
12411
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12412
      if not hasattr(self.op, "evac_nodes"):
12413
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
12414
                                   " opcode input", errors.ECODE_INVAL)
12415
    elif self.op.mode == constants.IALLOCATOR_MODE_MRELOC:
12416
      if self.op.instances:
12417
        self.op.instances = _GetWantedInstances(self, self.op.instances)
12418
      else:
12419
        raise errors.OpPrereqError("Missing instances to relocate",
12420
                                   errors.ECODE_INVAL)
12421
    else:
12422
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
12423
                                 self.op.mode, errors.ECODE_INVAL)
12424

    
12425
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
12426
      if self.op.allocator is None:
12427
        raise errors.OpPrereqError("Missing allocator name",
12428
                                   errors.ECODE_INVAL)
12429
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
12430
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
12431
                                 self.op.direction, errors.ECODE_INVAL)
12432

    
12433
  def Exec(self, feedback_fn):
12434
    """Run the allocator test.
12435

12436
    """
12437
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12438
      ial = IAllocator(self.cfg, self.rpc,
12439
                       mode=self.op.mode,
12440
                       name=self.op.name,
12441
                       mem_size=self.op.mem_size,
12442
                       disks=self.op.disks,
12443
                       disk_template=self.op.disk_template,
12444
                       os=self.op.os,
12445
                       tags=self.op.tags,
12446
                       nics=self.op.nics,
12447
                       vcpus=self.op.vcpus,
12448
                       hypervisor=self.op.hypervisor,
12449
                       )
12450
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12451
      ial = IAllocator(self.cfg, self.rpc,
12452
                       mode=self.op.mode,
12453
                       name=self.op.name,
12454
                       relocate_from=list(self.relocate_from),
12455
                       )
12456
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12457
      ial = IAllocator(self.cfg, self.rpc,
12458
                       mode=self.op.mode,
12459
                       evac_nodes=self.op.evac_nodes)
12460
    elif self.op.mode == constants.IALLOCATOR_MODE_MRELOC:
12461
      ial = IAllocator(self.cfg, self.rpc,
12462
                       mode=self.op.mode,
12463
                       instances=self.op.instances,
12464
                       reloc_mode=self.op.reloc_mode,
12465
                       target_groups=self.op.target_groups)
12466
    else:
12467
      raise errors.ProgrammerError("Uncatched mode %s in"
12468
                                   " LUTestAllocator.Exec", self.op.mode)
12469

    
12470
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
12471
      result = ial.in_text
12472
    else:
12473
      ial.Run(self.op.allocator, validate=False)
12474
      result = ial.out_text
12475
    return result
12476

    
12477

    
12478
#: Query type implementations
12479
_QUERY_IMPL = {
12480
  constants.QR_INSTANCE: _InstanceQuery,
12481
  constants.QR_NODE: _NodeQuery,
12482
  constants.QR_GROUP: _GroupQuery,
12483
  constants.QR_OS: _OsQuery,
12484
  }
12485

    
12486
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
12487

    
12488

    
12489
def _GetQueryImplementation(name):
12490
  """Returns the implemtnation for a query type.
12491

12492
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
12493

12494
  """
12495
  try:
12496
    return _QUERY_IMPL[name]
12497
  except KeyError:
12498
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
12499
                               errors.ECODE_INVAL)