Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ c16915bd

History | View | Annotate | Download (444.7 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43

    
44
from ganeti import ssh
45
from ganeti import utils
46
from ganeti import errors
47
from ganeti import hypervisor
48
from ganeti import locking
49
from ganeti import constants
50
from ganeti import objects
51
from ganeti import serializer
52
from ganeti import ssconf
53
from ganeti import uidpool
54
from ganeti import compat
55
from ganeti import masterd
56
from ganeti import netutils
57
from ganeti import query
58
from ganeti import qlang
59
from ganeti import opcodes
60
from ganeti import ht
61

    
62
import ganeti.masterd.instance # pylint: disable-msg=W0611
63

    
64

    
65
def _SupportsOob(cfg, node):
66
  """Tells if node supports OOB.
67

68
  @type cfg: L{config.ConfigWriter}
69
  @param cfg: The cluster configuration
70
  @type node: L{objects.Node}
71
  @param node: The node
72
  @return: The OOB script if supported or an empty string otherwise
73

74
  """
75
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
76

    
77

    
78
class ResultWithJobs:
79
  """Data container for LU results with jobs.
80

81
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83
  contained in the C{jobs} attribute and include the job IDs in the opcode
84
  result.
85

86
  """
87
  def __init__(self, jobs, **kwargs):
88
    """Initializes this class.
89

90
    Additional return values can be specified as keyword arguments.
91

92
    @type jobs: list of lists of L{opcode.OpCode}
93
    @param jobs: A list of lists of opcode objects
94

95
    """
96
    self.jobs = jobs
97
    self.other = kwargs
98

    
99

    
100
class LogicalUnit(object):
101
  """Logical Unit base class.
102

103
  Subclasses must follow these rules:
104
    - implement ExpandNames
105
    - implement CheckPrereq (except when tasklets are used)
106
    - implement Exec (except when tasklets are used)
107
    - implement BuildHooksEnv
108
    - implement BuildHooksNodes
109
    - redefine HPATH and HTYPE
110
    - optionally redefine their run requirements:
111
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
112

113
  Note that all commands require root permissions.
114

115
  @ivar dry_run_result: the value (if any) that will be returned to the caller
116
      in dry-run mode (signalled by opcode dry_run parameter)
117

118
  """
119
  HPATH = None
120
  HTYPE = None
121
  REQ_BGL = True
122

    
123
  def __init__(self, processor, op, context, rpc):
124
    """Constructor for LogicalUnit.
125

126
    This needs to be overridden in derived classes in order to check op
127
    validity.
128

129
    """
130
    self.proc = processor
131
    self.op = op
132
    self.cfg = context.cfg
133
    self.glm = context.glm
134
    self.context = context
135
    self.rpc = rpc
136
    # Dicts used to declare locking needs to mcpu
137
    self.needed_locks = None
138
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
139
    self.add_locks = {}
140
    self.remove_locks = {}
141
    # Used to force good behavior when calling helper functions
142
    self.recalculate_locks = {}
143
    # logging
144
    self.Log = processor.Log # pylint: disable-msg=C0103
145
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
146
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
147
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
148
    # support for dry-run
149
    self.dry_run_result = None
150
    # support for generic debug attribute
151
    if (not hasattr(self.op, "debug_level") or
152
        not isinstance(self.op.debug_level, int)):
153
      self.op.debug_level = 0
154

    
155
    # Tasklets
156
    self.tasklets = None
157

    
158
    # Validate opcode parameters and set defaults
159
    self.op.Validate(True)
160

    
161
    self.CheckArguments()
162

    
163
  def CheckArguments(self):
164
    """Check syntactic validity for the opcode arguments.
165

166
    This method is for doing a simple syntactic check and ensure
167
    validity of opcode parameters, without any cluster-related
168
    checks. While the same can be accomplished in ExpandNames and/or
169
    CheckPrereq, doing these separate is better because:
170

171
      - ExpandNames is left as as purely a lock-related function
172
      - CheckPrereq is run after we have acquired locks (and possible
173
        waited for them)
174

175
    The function is allowed to change the self.op attribute so that
176
    later methods can no longer worry about missing parameters.
177

178
    """
179
    pass
180

    
181
  def ExpandNames(self):
182
    """Expand names for this LU.
183

184
    This method is called before starting to execute the opcode, and it should
185
    update all the parameters of the opcode to their canonical form (e.g. a
186
    short node name must be fully expanded after this method has successfully
187
    completed). This way locking, hooks, logging, etc. can work correctly.
188

189
    LUs which implement this method must also populate the self.needed_locks
190
    member, as a dict with lock levels as keys, and a list of needed lock names
191
    as values. Rules:
192

193
      - use an empty dict if you don't need any lock
194
      - if you don't need any lock at a particular level omit that level
195
      - don't put anything for the BGL level
196
      - if you want all locks at a level use locking.ALL_SET as a value
197

198
    If you need to share locks (rather than acquire them exclusively) at one
199
    level you can modify self.share_locks, setting a true value (usually 1) for
200
    that level. By default locks are not shared.
201

202
    This function can also define a list of tasklets, which then will be
203
    executed in order instead of the usual LU-level CheckPrereq and Exec
204
    functions, if those are not defined by the LU.
205

206
    Examples::
207

208
      # Acquire all nodes and one instance
209
      self.needed_locks = {
210
        locking.LEVEL_NODE: locking.ALL_SET,
211
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
212
      }
213
      # Acquire just two nodes
214
      self.needed_locks = {
215
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
216
      }
217
      # Acquire no locks
218
      self.needed_locks = {} # No, you can't leave it to the default value None
219

220
    """
221
    # The implementation of this method is mandatory only if the new LU is
222
    # concurrent, so that old LUs don't need to be changed all at the same
223
    # time.
224
    if self.REQ_BGL:
225
      self.needed_locks = {} # Exclusive LUs don't need locks.
226
    else:
227
      raise NotImplementedError
228

    
229
  def DeclareLocks(self, level):
230
    """Declare LU locking needs for a level
231

232
    While most LUs can just declare their locking needs at ExpandNames time,
233
    sometimes there's the need to calculate some locks after having acquired
234
    the ones before. This function is called just before acquiring locks at a
235
    particular level, but after acquiring the ones at lower levels, and permits
236
    such calculations. It can be used to modify self.needed_locks, and by
237
    default it does nothing.
238

239
    This function is only called if you have something already set in
240
    self.needed_locks for the level.
241

242
    @param level: Locking level which is going to be locked
243
    @type level: member of ganeti.locking.LEVELS
244

245
    """
246

    
247
  def CheckPrereq(self):
248
    """Check prerequisites for this LU.
249

250
    This method should check that the prerequisites for the execution
251
    of this LU are fulfilled. It can do internode communication, but
252
    it should be idempotent - no cluster or system changes are
253
    allowed.
254

255
    The method should raise errors.OpPrereqError in case something is
256
    not fulfilled. Its return value is ignored.
257

258
    This method should also update all the parameters of the opcode to
259
    their canonical form if it hasn't been done by ExpandNames before.
260

261
    """
262
    if self.tasklets is not None:
263
      for (idx, tl) in enumerate(self.tasklets):
264
        logging.debug("Checking prerequisites for tasklet %s/%s",
265
                      idx + 1, len(self.tasklets))
266
        tl.CheckPrereq()
267
    else:
268
      pass
269

    
270
  def Exec(self, feedback_fn):
271
    """Execute the LU.
272

273
    This method should implement the actual work. It should raise
274
    errors.OpExecError for failures that are somewhat dealt with in
275
    code, or expected.
276

277
    """
278
    if self.tasklets is not None:
279
      for (idx, tl) in enumerate(self.tasklets):
280
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
281
        tl.Exec(feedback_fn)
282
    else:
283
      raise NotImplementedError
284

    
285
  def BuildHooksEnv(self):
286
    """Build hooks environment for this LU.
287

288
    @rtype: dict
289
    @return: Dictionary containing the environment that will be used for
290
      running the hooks for this LU. The keys of the dict must not be prefixed
291
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
292
      will extend the environment with additional variables. If no environment
293
      should be defined, an empty dictionary should be returned (not C{None}).
294
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
295
      will not be called.
296

297
    """
298
    raise NotImplementedError
299

    
300
  def BuildHooksNodes(self):
301
    """Build list of nodes to run LU's hooks.
302

303
    @rtype: tuple; (list, list)
304
    @return: Tuple containing a list of node names on which the hook
305
      should run before the execution and a list of node names on which the
306
      hook should run after the execution. No nodes should be returned as an
307
      empty list (and not None).
308
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
309
      will not be called.
310

311
    """
312
    raise NotImplementedError
313

    
314
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
315
    """Notify the LU about the results of its hooks.
316

317
    This method is called every time a hooks phase is executed, and notifies
318
    the Logical Unit about the hooks' result. The LU can then use it to alter
319
    its result based on the hooks.  By default the method does nothing and the
320
    previous result is passed back unchanged but any LU can define it if it
321
    wants to use the local cluster hook-scripts somehow.
322

323
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
324
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
325
    @param hook_results: the results of the multi-node hooks rpc call
326
    @param feedback_fn: function used send feedback back to the caller
327
    @param lu_result: the previous Exec result this LU had, or None
328
        in the PRE phase
329
    @return: the new Exec result, based on the previous result
330
        and hook results
331

332
    """
333
    # API must be kept, thus we ignore the unused argument and could
334
    # be a function warnings
335
    # pylint: disable-msg=W0613,R0201
336
    return lu_result
337

    
338
  def _ExpandAndLockInstance(self):
339
    """Helper function to expand and lock an instance.
340

341
    Many LUs that work on an instance take its name in self.op.instance_name
342
    and need to expand it and then declare the expanded name for locking. This
343
    function does it, and then updates self.op.instance_name to the expanded
344
    name. It also initializes needed_locks as a dict, if this hasn't been done
345
    before.
346

347
    """
348
    if self.needed_locks is None:
349
      self.needed_locks = {}
350
    else:
351
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
352
        "_ExpandAndLockInstance called with instance-level locks set"
353
    self.op.instance_name = _ExpandInstanceName(self.cfg,
354
                                                self.op.instance_name)
355
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
356

    
357
  def _LockInstancesNodes(self, primary_only=False):
358
    """Helper function to declare instances' nodes for locking.
359

360
    This function should be called after locking one or more instances to lock
361
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
362
    with all primary or secondary nodes for instances already locked and
363
    present in self.needed_locks[locking.LEVEL_INSTANCE].
364

365
    It should be called from DeclareLocks, and for safety only works if
366
    self.recalculate_locks[locking.LEVEL_NODE] is set.
367

368
    In the future it may grow parameters to just lock some instance's nodes, or
369
    to just lock primaries or secondary nodes, if needed.
370

371
    If should be called in DeclareLocks in a way similar to::
372

373
      if level == locking.LEVEL_NODE:
374
        self._LockInstancesNodes()
375

376
    @type primary_only: boolean
377
    @param primary_only: only lock primary nodes of locked instances
378

379
    """
380
    assert locking.LEVEL_NODE in self.recalculate_locks, \
381
      "_LockInstancesNodes helper function called with no nodes to recalculate"
382

    
383
    # TODO: check if we're really been called with the instance locks held
384

    
385
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
386
    # future we might want to have different behaviors depending on the value
387
    # of self.recalculate_locks[locking.LEVEL_NODE]
388
    wanted_nodes = []
389
    for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
390
      instance = self.context.cfg.GetInstanceInfo(instance_name)
391
      wanted_nodes.append(instance.primary_node)
392
      if not primary_only:
393
        wanted_nodes.extend(instance.secondary_nodes)
394

    
395
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
396
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
397
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
398
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
399

    
400
    del self.recalculate_locks[locking.LEVEL_NODE]
401

    
402

    
403
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
404
  """Simple LU which runs no hooks.
405

406
  This LU is intended as a parent for other LogicalUnits which will
407
  run no hooks, in order to reduce duplicate code.
408

409
  """
410
  HPATH = None
411
  HTYPE = None
412

    
413
  def BuildHooksEnv(self):
414
    """Empty BuildHooksEnv for NoHooksLu.
415

416
    This just raises an error.
417

418
    """
419
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
420

    
421
  def BuildHooksNodes(self):
422
    """Empty BuildHooksNodes for NoHooksLU.
423

424
    """
425
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
426

    
427

    
428
class Tasklet:
429
  """Tasklet base class.
430

431
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
432
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
433
  tasklets know nothing about locks.
434

435
  Subclasses must follow these rules:
436
    - Implement CheckPrereq
437
    - Implement Exec
438

439
  """
440
  def __init__(self, lu):
441
    self.lu = lu
442

    
443
    # Shortcuts
444
    self.cfg = lu.cfg
445
    self.rpc = lu.rpc
446

    
447
  def CheckPrereq(self):
448
    """Check prerequisites for this tasklets.
449

450
    This method should check whether the prerequisites for the execution of
451
    this tasklet are fulfilled. It can do internode communication, but it
452
    should be idempotent - no cluster or system changes are allowed.
453

454
    The method should raise errors.OpPrereqError in case something is not
455
    fulfilled. Its return value is ignored.
456

457
    This method should also update all parameters to their canonical form if it
458
    hasn't been done before.
459

460
    """
461
    pass
462

    
463
  def Exec(self, feedback_fn):
464
    """Execute the tasklet.
465

466
    This method should implement the actual work. It should raise
467
    errors.OpExecError for failures that are somewhat dealt with in code, or
468
    expected.
469

470
    """
471
    raise NotImplementedError
472

    
473

    
474
class _QueryBase:
475
  """Base for query utility classes.
476

477
  """
478
  #: Attribute holding field definitions
479
  FIELDS = None
480

    
481
  def __init__(self, filter_, fields, use_locking):
482
    """Initializes this class.
483

484
    """
485
    self.use_locking = use_locking
486

    
487
    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
488
                             namefield="name")
489
    self.requested_data = self.query.RequestedData()
490
    self.names = self.query.RequestedNames()
491

    
492
    # Sort only if no names were requested
493
    self.sort_by_name = not self.names
494

    
495
    self.do_locking = None
496
    self.wanted = None
497

    
498
  def _GetNames(self, lu, all_names, lock_level):
499
    """Helper function to determine names asked for in the query.
500

501
    """
502
    if self.do_locking:
503
      names = lu.glm.list_owned(lock_level)
504
    else:
505
      names = all_names
506

    
507
    if self.wanted == locking.ALL_SET:
508
      assert not self.names
509
      # caller didn't specify names, so ordering is not important
510
      return utils.NiceSort(names)
511

    
512
    # caller specified names and we must keep the same order
513
    assert self.names
514
    assert not self.do_locking or lu.glm.is_owned(lock_level)
515

    
516
    missing = set(self.wanted).difference(names)
517
    if missing:
518
      raise errors.OpExecError("Some items were removed before retrieving"
519
                               " their data: %s" % missing)
520

    
521
    # Return expanded names
522
    return self.wanted
523

    
524
  def ExpandNames(self, lu):
525
    """Expand names for this query.
526

527
    See L{LogicalUnit.ExpandNames}.
528

529
    """
530
    raise NotImplementedError()
531

    
532
  def DeclareLocks(self, lu, level):
533
    """Declare locks for this query.
534

535
    See L{LogicalUnit.DeclareLocks}.
536

537
    """
538
    raise NotImplementedError()
539

    
540
  def _GetQueryData(self, lu):
541
    """Collects all data for this query.
542

543
    @return: Query data object
544

545
    """
546
    raise NotImplementedError()
547

    
548
  def NewStyleQuery(self, lu):
549
    """Collect data and execute query.
550

551
    """
552
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
553
                                  sort_by_name=self.sort_by_name)
554

    
555
  def OldStyleQuery(self, lu):
556
    """Collect data and execute query.
557

558
    """
559
    return self.query.OldStyleQuery(self._GetQueryData(lu),
560
                                    sort_by_name=self.sort_by_name)
561

    
562

    
563
def _GetWantedNodes(lu, nodes):
564
  """Returns list of checked and expanded node names.
565

566
  @type lu: L{LogicalUnit}
567
  @param lu: the logical unit on whose behalf we execute
568
  @type nodes: list
569
  @param nodes: list of node names or None for all nodes
570
  @rtype: list
571
  @return: the list of nodes, sorted
572
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
573

574
  """
575
  if nodes:
576
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
577

    
578
  return utils.NiceSort(lu.cfg.GetNodeList())
579

    
580

    
581
def _GetWantedInstances(lu, instances):
582
  """Returns list of checked and expanded instance names.
583

584
  @type lu: L{LogicalUnit}
585
  @param lu: the logical unit on whose behalf we execute
586
  @type instances: list
587
  @param instances: list of instance names or None for all instances
588
  @rtype: list
589
  @return: the list of instances, sorted
590
  @raise errors.OpPrereqError: if the instances parameter is wrong type
591
  @raise errors.OpPrereqError: if any of the passed instances is not found
592

593
  """
594
  if instances:
595
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
596
  else:
597
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
598
  return wanted
599

    
600

    
601
def _GetUpdatedParams(old_params, update_dict,
602
                      use_default=True, use_none=False):
603
  """Return the new version of a parameter dictionary.
604

605
  @type old_params: dict
606
  @param old_params: old parameters
607
  @type update_dict: dict
608
  @param update_dict: dict containing new parameter values, or
609
      constants.VALUE_DEFAULT to reset the parameter to its default
610
      value
611
  @param use_default: boolean
612
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
613
      values as 'to be deleted' values
614
  @param use_none: boolean
615
  @type use_none: whether to recognise C{None} values as 'to be
616
      deleted' values
617
  @rtype: dict
618
  @return: the new parameter dictionary
619

620
  """
621
  params_copy = copy.deepcopy(old_params)
622
  for key, val in update_dict.iteritems():
623
    if ((use_default and val == constants.VALUE_DEFAULT) or
624
        (use_none and val is None)):
625
      try:
626
        del params_copy[key]
627
      except KeyError:
628
        pass
629
    else:
630
      params_copy[key] = val
631
  return params_copy
632

    
633

    
634
def _ReleaseLocks(lu, level, names=None, keep=None):
635
  """Releases locks owned by an LU.
636

637
  @type lu: L{LogicalUnit}
638
  @param level: Lock level
639
  @type names: list or None
640
  @param names: Names of locks to release
641
  @type keep: list or None
642
  @param keep: Names of locks to retain
643

644
  """
645
  assert not (keep is not None and names is not None), \
646
         "Only one of the 'names' and the 'keep' parameters can be given"
647

    
648
  if names is not None:
649
    should_release = names.__contains__
650
  elif keep:
651
    should_release = lambda name: name not in keep
652
  else:
653
    should_release = None
654

    
655
  if should_release:
656
    retain = []
657
    release = []
658

    
659
    # Determine which locks to release
660
    for name in lu.glm.list_owned(level):
661
      if should_release(name):
662
        release.append(name)
663
      else:
664
        retain.append(name)
665

    
666
    assert len(lu.glm.list_owned(level)) == (len(retain) + len(release))
667

    
668
    # Release just some locks
669
    lu.glm.release(level, names=release)
670

    
671
    assert frozenset(lu.glm.list_owned(level)) == frozenset(retain)
672
  else:
673
    # Release everything
674
    lu.glm.release(level)
675

    
676
    assert not lu.glm.is_owned(level), "No locks should be owned"
677

    
678

    
679
def _RunPostHook(lu, node_name):
680
  """Runs the post-hook for an opcode on a single node.
681

682
  """
683
  hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
684
  try:
685
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
686
  except:
687
    # pylint: disable-msg=W0702
688
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
689

    
690

    
691
def _CheckOutputFields(static, dynamic, selected):
692
  """Checks whether all selected fields are valid.
693

694
  @type static: L{utils.FieldSet}
695
  @param static: static fields set
696
  @type dynamic: L{utils.FieldSet}
697
  @param dynamic: dynamic fields set
698

699
  """
700
  f = utils.FieldSet()
701
  f.Extend(static)
702
  f.Extend(dynamic)
703

    
704
  delta = f.NonMatching(selected)
705
  if delta:
706
    raise errors.OpPrereqError("Unknown output fields selected: %s"
707
                               % ",".join(delta), errors.ECODE_INVAL)
708

    
709

    
710
def _CheckGlobalHvParams(params):
711
  """Validates that given hypervisor params are not global ones.
712

713
  This will ensure that instances don't get customised versions of
714
  global params.
715

716
  """
717
  used_globals = constants.HVC_GLOBALS.intersection(params)
718
  if used_globals:
719
    msg = ("The following hypervisor parameters are global and cannot"
720
           " be customized at instance level, please modify them at"
721
           " cluster level: %s" % utils.CommaJoin(used_globals))
722
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
723

    
724

    
725
def _CheckNodeOnline(lu, node, msg=None):
726
  """Ensure that a given node is online.
727

728
  @param lu: the LU on behalf of which we make the check
729
  @param node: the node to check
730
  @param msg: if passed, should be a message to replace the default one
731
  @raise errors.OpPrereqError: if the node is offline
732

733
  """
734
  if msg is None:
735
    msg = "Can't use offline node"
736
  if lu.cfg.GetNodeInfo(node).offline:
737
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
738

    
739

    
740
def _CheckNodeNotDrained(lu, node):
741
  """Ensure that a given node is not drained.
742

743
  @param lu: the LU on behalf of which we make the check
744
  @param node: the node to check
745
  @raise errors.OpPrereqError: if the node is drained
746

747
  """
748
  if lu.cfg.GetNodeInfo(node).drained:
749
    raise errors.OpPrereqError("Can't use drained node %s" % node,
750
                               errors.ECODE_STATE)
751

    
752

    
753
def _CheckNodeVmCapable(lu, node):
754
  """Ensure that a given node is vm capable.
755

756
  @param lu: the LU on behalf of which we make the check
757
  @param node: the node to check
758
  @raise errors.OpPrereqError: if the node is not vm capable
759

760
  """
761
  if not lu.cfg.GetNodeInfo(node).vm_capable:
762
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
763
                               errors.ECODE_STATE)
764

    
765

    
766
def _CheckNodeHasOS(lu, node, os_name, force_variant):
767
  """Ensure that a node supports a given OS.
768

769
  @param lu: the LU on behalf of which we make the check
770
  @param node: the node to check
771
  @param os_name: the OS to query about
772
  @param force_variant: whether to ignore variant errors
773
  @raise errors.OpPrereqError: if the node is not supporting the OS
774

775
  """
776
  result = lu.rpc.call_os_get(node, os_name)
777
  result.Raise("OS '%s' not in supported OS list for node %s" %
778
               (os_name, node),
779
               prereq=True, ecode=errors.ECODE_INVAL)
780
  if not force_variant:
781
    _CheckOSVariant(result.payload, os_name)
782

    
783

    
784
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
785
  """Ensure that a node has the given secondary ip.
786

787
  @type lu: L{LogicalUnit}
788
  @param lu: the LU on behalf of which we make the check
789
  @type node: string
790
  @param node: the node to check
791
  @type secondary_ip: string
792
  @param secondary_ip: the ip to check
793
  @type prereq: boolean
794
  @param prereq: whether to throw a prerequisite or an execute error
795
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
796
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
797

798
  """
799
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
800
  result.Raise("Failure checking secondary ip on node %s" % node,
801
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
802
  if not result.payload:
803
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
804
           " please fix and re-run this command" % secondary_ip)
805
    if prereq:
806
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
807
    else:
808
      raise errors.OpExecError(msg)
809

    
810

    
811
def _GetClusterDomainSecret():
812
  """Reads the cluster domain secret.
813

814
  """
815
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
816
                               strict=True)
817

    
818

    
819
def _CheckInstanceDown(lu, instance, reason):
820
  """Ensure that an instance is not running."""
821
  if instance.admin_up:
822
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
823
                               (instance.name, reason), errors.ECODE_STATE)
824

    
825
  pnode = instance.primary_node
826
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
827
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
828
              prereq=True, ecode=errors.ECODE_ENVIRON)
829

    
830
  if instance.name in ins_l.payload:
831
    raise errors.OpPrereqError("Instance %s is running, %s" %
832
                               (instance.name, reason), errors.ECODE_STATE)
833

    
834

    
835
def _ExpandItemName(fn, name, kind):
836
  """Expand an item name.
837

838
  @param fn: the function to use for expansion
839
  @param name: requested item name
840
  @param kind: text description ('Node' or 'Instance')
841
  @return: the resolved (full) name
842
  @raise errors.OpPrereqError: if the item is not found
843

844
  """
845
  full_name = fn(name)
846
  if full_name is None:
847
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
848
                               errors.ECODE_NOENT)
849
  return full_name
850

    
851

    
852
def _ExpandNodeName(cfg, name):
853
  """Wrapper over L{_ExpandItemName} for nodes."""
854
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
855

    
856

    
857
def _ExpandInstanceName(cfg, name):
858
  """Wrapper over L{_ExpandItemName} for instance."""
859
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
860

    
861

    
862
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
863
                          memory, vcpus, nics, disk_template, disks,
864
                          bep, hvp, hypervisor_name, tags):
865
  """Builds instance related env variables for hooks
866

867
  This builds the hook environment from individual variables.
868

869
  @type name: string
870
  @param name: the name of the instance
871
  @type primary_node: string
872
  @param primary_node: the name of the instance's primary node
873
  @type secondary_nodes: list
874
  @param secondary_nodes: list of secondary nodes as strings
875
  @type os_type: string
876
  @param os_type: the name of the instance's OS
877
  @type status: boolean
878
  @param status: the should_run status of the instance
879
  @type memory: string
880
  @param memory: the memory size of the instance
881
  @type vcpus: string
882
  @param vcpus: the count of VCPUs the instance has
883
  @type nics: list
884
  @param nics: list of tuples (ip, mac, mode, link) representing
885
      the NICs the instance has
886
  @type disk_template: string
887
  @param disk_template: the disk template of the instance
888
  @type disks: list
889
  @param disks: the list of (size, mode) pairs
890
  @type bep: dict
891
  @param bep: the backend parameters for the instance
892
  @type hvp: dict
893
  @param hvp: the hypervisor parameters for the instance
894
  @type hypervisor_name: string
895
  @param hypervisor_name: the hypervisor for the instance
896
  @type tags: list
897
  @param tags: list of instance tags as strings
898
  @rtype: dict
899
  @return: the hook environment for this instance
900

901
  """
902
  if status:
903
    str_status = "up"
904
  else:
905
    str_status = "down"
906
  env = {
907
    "OP_TARGET": name,
908
    "INSTANCE_NAME": name,
909
    "INSTANCE_PRIMARY": primary_node,
910
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
911
    "INSTANCE_OS_TYPE": os_type,
912
    "INSTANCE_STATUS": str_status,
913
    "INSTANCE_MEMORY": memory,
914
    "INSTANCE_VCPUS": vcpus,
915
    "INSTANCE_DISK_TEMPLATE": disk_template,
916
    "INSTANCE_HYPERVISOR": hypervisor_name,
917
  }
918

    
919
  if nics:
920
    nic_count = len(nics)
921
    for idx, (ip, mac, mode, link) in enumerate(nics):
922
      if ip is None:
923
        ip = ""
924
      env["INSTANCE_NIC%d_IP" % idx] = ip
925
      env["INSTANCE_NIC%d_MAC" % idx] = mac
926
      env["INSTANCE_NIC%d_MODE" % idx] = mode
927
      env["INSTANCE_NIC%d_LINK" % idx] = link
928
      if mode == constants.NIC_MODE_BRIDGED:
929
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
930
  else:
931
    nic_count = 0
932

    
933
  env["INSTANCE_NIC_COUNT"] = nic_count
934

    
935
  if disks:
936
    disk_count = len(disks)
937
    for idx, (size, mode) in enumerate(disks):
938
      env["INSTANCE_DISK%d_SIZE" % idx] = size
939
      env["INSTANCE_DISK%d_MODE" % idx] = mode
940
  else:
941
    disk_count = 0
942

    
943
  env["INSTANCE_DISK_COUNT"] = disk_count
944

    
945
  if not tags:
946
    tags = []
947

    
948
  env["INSTANCE_TAGS"] = " ".join(tags)
949

    
950
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
951
    for key, value in source.items():
952
      env["INSTANCE_%s_%s" % (kind, key)] = value
953

    
954
  return env
955

    
956

    
957
def _NICListToTuple(lu, nics):
958
  """Build a list of nic information tuples.
959

960
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
961
  value in LUInstanceQueryData.
962

963
  @type lu:  L{LogicalUnit}
964
  @param lu: the logical unit on whose behalf we execute
965
  @type nics: list of L{objects.NIC}
966
  @param nics: list of nics to convert to hooks tuples
967

968
  """
969
  hooks_nics = []
970
  cluster = lu.cfg.GetClusterInfo()
971
  for nic in nics:
972
    ip = nic.ip
973
    mac = nic.mac
974
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
975
    mode = filled_params[constants.NIC_MODE]
976
    link = filled_params[constants.NIC_LINK]
977
    hooks_nics.append((ip, mac, mode, link))
978
  return hooks_nics
979

    
980

    
981
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
982
  """Builds instance related env variables for hooks from an object.
983

984
  @type lu: L{LogicalUnit}
985
  @param lu: the logical unit on whose behalf we execute
986
  @type instance: L{objects.Instance}
987
  @param instance: the instance for which we should build the
988
      environment
989
  @type override: dict
990
  @param override: dictionary with key/values that will override
991
      our values
992
  @rtype: dict
993
  @return: the hook environment dictionary
994

995
  """
996
  cluster = lu.cfg.GetClusterInfo()
997
  bep = cluster.FillBE(instance)
998
  hvp = cluster.FillHV(instance)
999
  args = {
1000
    'name': instance.name,
1001
    'primary_node': instance.primary_node,
1002
    'secondary_nodes': instance.secondary_nodes,
1003
    'os_type': instance.os,
1004
    'status': instance.admin_up,
1005
    'memory': bep[constants.BE_MEMORY],
1006
    'vcpus': bep[constants.BE_VCPUS],
1007
    'nics': _NICListToTuple(lu, instance.nics),
1008
    'disk_template': instance.disk_template,
1009
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
1010
    'bep': bep,
1011
    'hvp': hvp,
1012
    'hypervisor_name': instance.hypervisor,
1013
    'tags': instance.tags,
1014
  }
1015
  if override:
1016
    args.update(override)
1017
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1018

    
1019

    
1020
def _AdjustCandidatePool(lu, exceptions):
1021
  """Adjust the candidate pool after node operations.
1022

1023
  """
1024
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1025
  if mod_list:
1026
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1027
               utils.CommaJoin(node.name for node in mod_list))
1028
    for name in mod_list:
1029
      lu.context.ReaddNode(name)
1030
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1031
  if mc_now > mc_max:
1032
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1033
               (mc_now, mc_max))
1034

    
1035

    
1036
def _DecideSelfPromotion(lu, exceptions=None):
1037
  """Decide whether I should promote myself as a master candidate.
1038

1039
  """
1040
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1041
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1042
  # the new node will increase mc_max with one, so:
1043
  mc_should = min(mc_should + 1, cp_size)
1044
  return mc_now < mc_should
1045

    
1046

    
1047
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1048
  """Check that the brigdes needed by a list of nics exist.
1049

1050
  """
1051
  cluster = lu.cfg.GetClusterInfo()
1052
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1053
  brlist = [params[constants.NIC_LINK] for params in paramslist
1054
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1055
  if brlist:
1056
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1057
    result.Raise("Error checking bridges on destination node '%s'" %
1058
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1059

    
1060

    
1061
def _CheckInstanceBridgesExist(lu, instance, node=None):
1062
  """Check that the brigdes needed by an instance exist.
1063

1064
  """
1065
  if node is None:
1066
    node = instance.primary_node
1067
  _CheckNicsBridgesExist(lu, instance.nics, node)
1068

    
1069

    
1070
def _CheckOSVariant(os_obj, name):
1071
  """Check whether an OS name conforms to the os variants specification.
1072

1073
  @type os_obj: L{objects.OS}
1074
  @param os_obj: OS object to check
1075
  @type name: string
1076
  @param name: OS name passed by the user, to check for validity
1077

1078
  """
1079
  if not os_obj.supported_variants:
1080
    return
1081
  variant = objects.OS.GetVariant(name)
1082
  if not variant:
1083
    raise errors.OpPrereqError("OS name must include a variant",
1084
                               errors.ECODE_INVAL)
1085

    
1086
  if variant not in os_obj.supported_variants:
1087
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1088

    
1089

    
1090
def _GetNodeInstancesInner(cfg, fn):
1091
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1092

    
1093

    
1094
def _GetNodeInstances(cfg, node_name):
1095
  """Returns a list of all primary and secondary instances on a node.
1096

1097
  """
1098

    
1099
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1100

    
1101

    
1102
def _GetNodePrimaryInstances(cfg, node_name):
1103
  """Returns primary instances on a node.
1104

1105
  """
1106
  return _GetNodeInstancesInner(cfg,
1107
                                lambda inst: node_name == inst.primary_node)
1108

    
1109

    
1110
def _GetNodeSecondaryInstances(cfg, node_name):
1111
  """Returns secondary instances on a node.
1112

1113
  """
1114
  return _GetNodeInstancesInner(cfg,
1115
                                lambda inst: node_name in inst.secondary_nodes)
1116

    
1117

    
1118
def _GetStorageTypeArgs(cfg, storage_type):
1119
  """Returns the arguments for a storage type.
1120

1121
  """
1122
  # Special case for file storage
1123
  if storage_type == constants.ST_FILE:
1124
    # storage.FileStorage wants a list of storage directories
1125
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1126

    
1127
  return []
1128

    
1129

    
1130
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1131
  faulty = []
1132

    
1133
  for dev in instance.disks:
1134
    cfg.SetDiskID(dev, node_name)
1135

    
1136
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1137
  result.Raise("Failed to get disk status from node %s" % node_name,
1138
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1139

    
1140
  for idx, bdev_status in enumerate(result.payload):
1141
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1142
      faulty.append(idx)
1143

    
1144
  return faulty
1145

    
1146

    
1147
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1148
  """Check the sanity of iallocator and node arguments and use the
1149
  cluster-wide iallocator if appropriate.
1150

1151
  Check that at most one of (iallocator, node) is specified. If none is
1152
  specified, then the LU's opcode's iallocator slot is filled with the
1153
  cluster-wide default iallocator.
1154

1155
  @type iallocator_slot: string
1156
  @param iallocator_slot: the name of the opcode iallocator slot
1157
  @type node_slot: string
1158
  @param node_slot: the name of the opcode target node slot
1159

1160
  """
1161
  node = getattr(lu.op, node_slot, None)
1162
  iallocator = getattr(lu.op, iallocator_slot, None)
1163

    
1164
  if node is not None and iallocator is not None:
1165
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1166
                               errors.ECODE_INVAL)
1167
  elif node is None and iallocator is None:
1168
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1169
    if default_iallocator:
1170
      setattr(lu.op, iallocator_slot, default_iallocator)
1171
    else:
1172
      raise errors.OpPrereqError("No iallocator or node given and no"
1173
                                 " cluster-wide default iallocator found;"
1174
                                 " please specify either an iallocator or a"
1175
                                 " node, or set a cluster-wide default"
1176
                                 " iallocator")
1177

    
1178

    
1179
class LUClusterPostInit(LogicalUnit):
1180
  """Logical unit for running hooks after cluster initialization.
1181

1182
  """
1183
  HPATH = "cluster-init"
1184
  HTYPE = constants.HTYPE_CLUSTER
1185

    
1186
  def BuildHooksEnv(self):
1187
    """Build hooks env.
1188

1189
    """
1190
    return {
1191
      "OP_TARGET": self.cfg.GetClusterName(),
1192
      }
1193

    
1194
  def BuildHooksNodes(self):
1195
    """Build hooks nodes.
1196

1197
    """
1198
    return ([], [self.cfg.GetMasterNode()])
1199

    
1200
  def Exec(self, feedback_fn):
1201
    """Nothing to do.
1202

1203
    """
1204
    return True
1205

    
1206

    
1207
class LUClusterDestroy(LogicalUnit):
1208
  """Logical unit for destroying the cluster.
1209

1210
  """
1211
  HPATH = "cluster-destroy"
1212
  HTYPE = constants.HTYPE_CLUSTER
1213

    
1214
  def BuildHooksEnv(self):
1215
    """Build hooks env.
1216

1217
    """
1218
    return {
1219
      "OP_TARGET": self.cfg.GetClusterName(),
1220
      }
1221

    
1222
  def BuildHooksNodes(self):
1223
    """Build hooks nodes.
1224

1225
    """
1226
    return ([], [])
1227

    
1228
  def CheckPrereq(self):
1229
    """Check prerequisites.
1230

1231
    This checks whether the cluster is empty.
1232

1233
    Any errors are signaled by raising errors.OpPrereqError.
1234

1235
    """
1236
    master = self.cfg.GetMasterNode()
1237

    
1238
    nodelist = self.cfg.GetNodeList()
1239
    if len(nodelist) != 1 or nodelist[0] != master:
1240
      raise errors.OpPrereqError("There are still %d node(s) in"
1241
                                 " this cluster." % (len(nodelist) - 1),
1242
                                 errors.ECODE_INVAL)
1243
    instancelist = self.cfg.GetInstanceList()
1244
    if instancelist:
1245
      raise errors.OpPrereqError("There are still %d instance(s) in"
1246
                                 " this cluster." % len(instancelist),
1247
                                 errors.ECODE_INVAL)
1248

    
1249
  def Exec(self, feedback_fn):
1250
    """Destroys the cluster.
1251

1252
    """
1253
    master = self.cfg.GetMasterNode()
1254

    
1255
    # Run post hooks on master node before it's removed
1256
    _RunPostHook(self, master)
1257

    
1258
    result = self.rpc.call_node_stop_master(master, False)
1259
    result.Raise("Could not disable the master role")
1260

    
1261
    return master
1262

    
1263

    
1264
def _VerifyCertificate(filename):
1265
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1266

1267
  @type filename: string
1268
  @param filename: Path to PEM file
1269

1270
  """
1271
  try:
1272
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1273
                                           utils.ReadFile(filename))
1274
  except Exception, err: # pylint: disable-msg=W0703
1275
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1276
            "Failed to load X509 certificate %s: %s" % (filename, err))
1277

    
1278
  (errcode, msg) = \
1279
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1280
                                constants.SSL_CERT_EXPIRATION_ERROR)
1281

    
1282
  if msg:
1283
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1284
  else:
1285
    fnamemsg = None
1286

    
1287
  if errcode is None:
1288
    return (None, fnamemsg)
1289
  elif errcode == utils.CERT_WARNING:
1290
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1291
  elif errcode == utils.CERT_ERROR:
1292
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1293

    
1294
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1295

    
1296

    
1297
def _GetAllHypervisorParameters(cluster, instances):
1298
  """Compute the set of all hypervisor parameters.
1299

1300
  @type cluster: L{objects.Cluster}
1301
  @param cluster: the cluster object
1302
  @param instances: list of L{objects.Instance}
1303
  @param instances: additional instances from which to obtain parameters
1304
  @rtype: list of (origin, hypervisor, parameters)
1305
  @return: a list with all parameters found, indicating the hypervisor they
1306
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1307

1308
  """
1309
  hvp_data = []
1310

    
1311
  for hv_name in cluster.enabled_hypervisors:
1312
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1313

    
1314
  for os_name, os_hvp in cluster.os_hvp.items():
1315
    for hv_name, hv_params in os_hvp.items():
1316
      if hv_params:
1317
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1318
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1319

    
1320
  # TODO: collapse identical parameter values in a single one
1321
  for instance in instances:
1322
    if instance.hvparams:
1323
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1324
                       cluster.FillHV(instance)))
1325

    
1326
  return hvp_data
1327

    
1328

    
1329
class _VerifyErrors(object):
1330
  """Mix-in for cluster/group verify LUs.
1331

1332
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1333
  self.op and self._feedback_fn to be available.)
1334

1335
  """
1336
  TCLUSTER = "cluster"
1337
  TNODE = "node"
1338
  TINSTANCE = "instance"
1339

    
1340
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1341
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1342
  ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1343
  ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1344
  ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1345
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1346
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1347
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1348
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1349
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1350
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1351
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1352
  ENODEDRBD = (TNODE, "ENODEDRBD")
1353
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1354
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1355
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1356
  ENODEHV = (TNODE, "ENODEHV")
1357
  ENODELVM = (TNODE, "ENODELVM")
1358
  ENODEN1 = (TNODE, "ENODEN1")
1359
  ENODENET = (TNODE, "ENODENET")
1360
  ENODEOS = (TNODE, "ENODEOS")
1361
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1362
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1363
  ENODERPC = (TNODE, "ENODERPC")
1364
  ENODESSH = (TNODE, "ENODESSH")
1365
  ENODEVERSION = (TNODE, "ENODEVERSION")
1366
  ENODESETUP = (TNODE, "ENODESETUP")
1367
  ENODETIME = (TNODE, "ENODETIME")
1368
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1369

    
1370
  ETYPE_FIELD = "code"
1371
  ETYPE_ERROR = "ERROR"
1372
  ETYPE_WARNING = "WARNING"
1373

    
1374
  def _Error(self, ecode, item, msg, *args, **kwargs):
1375
    """Format an error message.
1376

1377
    Based on the opcode's error_codes parameter, either format a
1378
    parseable error code, or a simpler error string.
1379

1380
    This must be called only from Exec and functions called from Exec.
1381

1382
    """
1383
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1384
    itype, etxt = ecode
1385
    # first complete the msg
1386
    if args:
1387
      msg = msg % args
1388
    # then format the whole message
1389
    if self.op.error_codes: # This is a mix-in. pylint: disable-msg=E1101
1390
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1391
    else:
1392
      if item:
1393
        item = " " + item
1394
      else:
1395
        item = ""
1396
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1397
    # and finally report it via the feedback_fn
1398
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable-msg=E1101
1399

    
1400
  def _ErrorIf(self, cond, *args, **kwargs):
1401
    """Log an error message if the passed condition is True.
1402

1403
    """
1404
    cond = (bool(cond)
1405
            or self.op.debug_simulate_errors) # pylint: disable-msg=E1101
1406
    if cond:
1407
      self._Error(*args, **kwargs)
1408
    # do not mark the operation as failed for WARN cases only
1409
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1410
      self.bad = self.bad or cond
1411

    
1412

    
1413
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1414
  """Verifies the cluster config.
1415

1416
  """
1417
  REQ_BGL = True
1418

    
1419
  def _VerifyHVP(self, hvp_data):
1420
    """Verifies locally the syntax of the hypervisor parameters.
1421

1422
    """
1423
    for item, hv_name, hv_params in hvp_data:
1424
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1425
             (item, hv_name))
1426
      try:
1427
        hv_class = hypervisor.GetHypervisor(hv_name)
1428
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1429
        hv_class.CheckParameterSyntax(hv_params)
1430
      except errors.GenericError, err:
1431
        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1432

    
1433
  def ExpandNames(self):
1434
    # Information can be safely retrieved as the BGL is acquired in exclusive
1435
    # mode
1436
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1437
    self.all_node_info = self.cfg.GetAllNodesInfo()
1438
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1439
    self.needed_locks = {}
1440

    
1441
  def Exec(self, feedback_fn):
1442
    """Verify integrity of cluster, performing various test on nodes.
1443

1444
    """
1445
    self.bad = False
1446
    self._feedback_fn = feedback_fn
1447

    
1448
    feedback_fn("* Verifying cluster config")
1449

    
1450
    for msg in self.cfg.VerifyConfig():
1451
      self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1452

    
1453
    feedback_fn("* Verifying cluster certificate files")
1454

    
1455
    for cert_filename in constants.ALL_CERT_FILES:
1456
      (errcode, msg) = _VerifyCertificate(cert_filename)
1457
      self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1458

    
1459
    feedback_fn("* Verifying hypervisor parameters")
1460

    
1461
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1462
                                                self.all_inst_info.values()))
1463

    
1464
    feedback_fn("* Verifying all nodes belong to an existing group")
1465

    
1466
    # We do this verification here because, should this bogus circumstance
1467
    # occur, it would never be caught by VerifyGroup, which only acts on
1468
    # nodes/instances reachable from existing node groups.
1469

    
1470
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1471
                         if node.group not in self.all_group_info)
1472

    
1473
    dangling_instances = {}
1474
    no_node_instances = []
1475

    
1476
    for inst in self.all_inst_info.values():
1477
      if inst.primary_node in dangling_nodes:
1478
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1479
      elif inst.primary_node not in self.all_node_info:
1480
        no_node_instances.append(inst.name)
1481

    
1482
    pretty_dangling = [
1483
        "%s (%s)" %
1484
        (node.name,
1485
         utils.CommaJoin(dangling_instances.get(node.name,
1486
                                                ["no instances"])))
1487
        for node in dangling_nodes]
1488

    
1489
    self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1490
                  "the following nodes (and their instances) belong to a non"
1491
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1492

    
1493
    self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1494
                  "the following instances have a non-existing primary-node:"
1495
                  " %s", utils.CommaJoin(no_node_instances))
1496

    
1497
    return (not self.bad, [g.name for g in self.all_group_info.values()])
1498

    
1499

    
1500
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1501
  """Verifies the status of a node group.
1502

1503
  """
1504
  HPATH = "cluster-verify"
1505
  HTYPE = constants.HTYPE_CLUSTER
1506
  REQ_BGL = False
1507

    
1508
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1509

    
1510
  class NodeImage(object):
1511
    """A class representing the logical and physical status of a node.
1512

1513
    @type name: string
1514
    @ivar name: the node name to which this object refers
1515
    @ivar volumes: a structure as returned from
1516
        L{ganeti.backend.GetVolumeList} (runtime)
1517
    @ivar instances: a list of running instances (runtime)
1518
    @ivar pinst: list of configured primary instances (config)
1519
    @ivar sinst: list of configured secondary instances (config)
1520
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1521
        instances for which this node is secondary (config)
1522
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1523
    @ivar dfree: free disk, as reported by the node (runtime)
1524
    @ivar offline: the offline status (config)
1525
    @type rpc_fail: boolean
1526
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1527
        not whether the individual keys were correct) (runtime)
1528
    @type lvm_fail: boolean
1529
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1530
    @type hyp_fail: boolean
1531
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1532
    @type ghost: boolean
1533
    @ivar ghost: whether this is a known node or not (config)
1534
    @type os_fail: boolean
1535
    @ivar os_fail: whether the RPC call didn't return valid OS data
1536
    @type oslist: list
1537
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1538
    @type vm_capable: boolean
1539
    @ivar vm_capable: whether the node can host instances
1540

1541
    """
1542
    def __init__(self, offline=False, name=None, vm_capable=True):
1543
      self.name = name
1544
      self.volumes = {}
1545
      self.instances = []
1546
      self.pinst = []
1547
      self.sinst = []
1548
      self.sbp = {}
1549
      self.mfree = 0
1550
      self.dfree = 0
1551
      self.offline = offline
1552
      self.vm_capable = vm_capable
1553
      self.rpc_fail = False
1554
      self.lvm_fail = False
1555
      self.hyp_fail = False
1556
      self.ghost = False
1557
      self.os_fail = False
1558
      self.oslist = {}
1559

    
1560
  def ExpandNames(self):
1561
    # This raises errors.OpPrereqError on its own:
1562
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1563

    
1564
    # Get instances in node group; this is unsafe and needs verification later
1565
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1566

    
1567
    self.needed_locks = {
1568
      locking.LEVEL_INSTANCE: inst_names,
1569
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1570
      locking.LEVEL_NODE: [],
1571
      }
1572

    
1573
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1574

    
1575
  def DeclareLocks(self, level):
1576
    if level == locking.LEVEL_NODE:
1577
      # Get members of node group; this is unsafe and needs verification later
1578
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1579

    
1580
      all_inst_info = self.cfg.GetAllInstancesInfo()
1581

    
1582
      # In Exec(), we warn about mirrored instances that have primary and
1583
      # secondary living in separate node groups. To fully verify that
1584
      # volumes for these instances are healthy, we will need to do an
1585
      # extra call to their secondaries. We ensure here those nodes will
1586
      # be locked.
1587
      for inst in self.glm.list_owned(locking.LEVEL_INSTANCE):
1588
        # Important: access only the instances whose lock is owned
1589
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1590
          nodes.update(all_inst_info[inst].secondary_nodes)
1591

    
1592
      self.needed_locks[locking.LEVEL_NODE] = nodes
1593

    
1594
  def CheckPrereq(self):
1595
    group_nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1596
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1597

    
1598
    unlocked_nodes = \
1599
        group_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1600

    
1601
    unlocked_instances = \
1602
        group_instances.difference(self.glm.list_owned(locking.LEVEL_INSTANCE))
1603

    
1604
    if unlocked_nodes:
1605
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1606
                                 utils.CommaJoin(unlocked_nodes))
1607

    
1608
    if unlocked_instances:
1609
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1610
                                 utils.CommaJoin(unlocked_instances))
1611

    
1612
    self.all_node_info = self.cfg.GetAllNodesInfo()
1613
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1614

    
1615
    self.my_node_names = utils.NiceSort(group_nodes)
1616
    self.my_inst_names = utils.NiceSort(group_instances)
1617

    
1618
    self.my_node_info = dict((name, self.all_node_info[name])
1619
                             for name in self.my_node_names)
1620

    
1621
    self.my_inst_info = dict((name, self.all_inst_info[name])
1622
                             for name in self.my_inst_names)
1623

    
1624
    # We detect here the nodes that will need the extra RPC calls for verifying
1625
    # split LV volumes; they should be locked.
1626
    extra_lv_nodes = set()
1627

    
1628
    for inst in self.my_inst_info.values():
1629
      if inst.disk_template in constants.DTS_INT_MIRROR:
1630
        group = self.my_node_info[inst.primary_node].group
1631
        for nname in inst.secondary_nodes:
1632
          if self.all_node_info[nname].group != group:
1633
            extra_lv_nodes.add(nname)
1634

    
1635
    unlocked_lv_nodes = \
1636
        extra_lv_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1637

    
1638
    if unlocked_lv_nodes:
1639
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1640
                                 utils.CommaJoin(unlocked_lv_nodes))
1641
    self.extra_lv_nodes = list(extra_lv_nodes)
1642

    
1643
  def _VerifyNode(self, ninfo, nresult):
1644
    """Perform some basic validation on data returned from a node.
1645

1646
      - check the result data structure is well formed and has all the
1647
        mandatory fields
1648
      - check ganeti version
1649

1650
    @type ninfo: L{objects.Node}
1651
    @param ninfo: the node to check
1652
    @param nresult: the results from the node
1653
    @rtype: boolean
1654
    @return: whether overall this call was successful (and we can expect
1655
         reasonable values in the respose)
1656

1657
    """
1658
    node = ninfo.name
1659
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1660

    
1661
    # main result, nresult should be a non-empty dict
1662
    test = not nresult or not isinstance(nresult, dict)
1663
    _ErrorIf(test, self.ENODERPC, node,
1664
                  "unable to verify node: no data returned")
1665
    if test:
1666
      return False
1667

    
1668
    # compares ganeti version
1669
    local_version = constants.PROTOCOL_VERSION
1670
    remote_version = nresult.get("version", None)
1671
    test = not (remote_version and
1672
                isinstance(remote_version, (list, tuple)) and
1673
                len(remote_version) == 2)
1674
    _ErrorIf(test, self.ENODERPC, node,
1675
             "connection to node returned invalid data")
1676
    if test:
1677
      return False
1678

    
1679
    test = local_version != remote_version[0]
1680
    _ErrorIf(test, self.ENODEVERSION, node,
1681
             "incompatible protocol versions: master %s,"
1682
             " node %s", local_version, remote_version[0])
1683
    if test:
1684
      return False
1685

    
1686
    # node seems compatible, we can actually try to look into its results
1687

    
1688
    # full package version
1689
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1690
                  self.ENODEVERSION, node,
1691
                  "software version mismatch: master %s, node %s",
1692
                  constants.RELEASE_VERSION, remote_version[1],
1693
                  code=self.ETYPE_WARNING)
1694

    
1695
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1696
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1697
      for hv_name, hv_result in hyp_result.iteritems():
1698
        test = hv_result is not None
1699
        _ErrorIf(test, self.ENODEHV, node,
1700
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1701

    
1702
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1703
    if ninfo.vm_capable and isinstance(hvp_result, list):
1704
      for item, hv_name, hv_result in hvp_result:
1705
        _ErrorIf(True, self.ENODEHV, node,
1706
                 "hypervisor %s parameter verify failure (source %s): %s",
1707
                 hv_name, item, hv_result)
1708

    
1709
    test = nresult.get(constants.NV_NODESETUP,
1710
                       ["Missing NODESETUP results"])
1711
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1712
             "; ".join(test))
1713

    
1714
    return True
1715

    
1716
  def _VerifyNodeTime(self, ninfo, nresult,
1717
                      nvinfo_starttime, nvinfo_endtime):
1718
    """Check the node time.
1719

1720
    @type ninfo: L{objects.Node}
1721
    @param ninfo: the node to check
1722
    @param nresult: the remote results for the node
1723
    @param nvinfo_starttime: the start time of the RPC call
1724
    @param nvinfo_endtime: the end time of the RPC call
1725

1726
    """
1727
    node = ninfo.name
1728
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1729

    
1730
    ntime = nresult.get(constants.NV_TIME, None)
1731
    try:
1732
      ntime_merged = utils.MergeTime(ntime)
1733
    except (ValueError, TypeError):
1734
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1735
      return
1736

    
1737
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1738
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1739
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1740
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1741
    else:
1742
      ntime_diff = None
1743

    
1744
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1745
             "Node time diverges by at least %s from master node time",
1746
             ntime_diff)
1747

    
1748
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1749
    """Check the node LVM results.
1750

1751
    @type ninfo: L{objects.Node}
1752
    @param ninfo: the node to check
1753
    @param nresult: the remote results for the node
1754
    @param vg_name: the configured VG name
1755

1756
    """
1757
    if vg_name is None:
1758
      return
1759

    
1760
    node = ninfo.name
1761
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1762

    
1763
    # checks vg existence and size > 20G
1764
    vglist = nresult.get(constants.NV_VGLIST, None)
1765
    test = not vglist
1766
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1767
    if not test:
1768
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1769
                                            constants.MIN_VG_SIZE)
1770
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1771

    
1772
    # check pv names
1773
    pvlist = nresult.get(constants.NV_PVLIST, None)
1774
    test = pvlist is None
1775
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1776
    if not test:
1777
      # check that ':' is not present in PV names, since it's a
1778
      # special character for lvcreate (denotes the range of PEs to
1779
      # use on the PV)
1780
      for _, pvname, owner_vg in pvlist:
1781
        test = ":" in pvname
1782
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1783
                 " '%s' of VG '%s'", pvname, owner_vg)
1784

    
1785
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1786
    """Check the node bridges.
1787

1788
    @type ninfo: L{objects.Node}
1789
    @param ninfo: the node to check
1790
    @param nresult: the remote results for the node
1791
    @param bridges: the expected list of bridges
1792

1793
    """
1794
    if not bridges:
1795
      return
1796

    
1797
    node = ninfo.name
1798
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1799

    
1800
    missing = nresult.get(constants.NV_BRIDGES, None)
1801
    test = not isinstance(missing, list)
1802
    _ErrorIf(test, self.ENODENET, node,
1803
             "did not return valid bridge information")
1804
    if not test:
1805
      _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1806
               utils.CommaJoin(sorted(missing)))
1807

    
1808
  def _VerifyNodeNetwork(self, ninfo, nresult):
1809
    """Check the node network connectivity results.
1810

1811
    @type ninfo: L{objects.Node}
1812
    @param ninfo: the node to check
1813
    @param nresult: the remote results for the node
1814

1815
    """
1816
    node = ninfo.name
1817
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1818

    
1819
    test = constants.NV_NODELIST not in nresult
1820
    _ErrorIf(test, self.ENODESSH, node,
1821
             "node hasn't returned node ssh connectivity data")
1822
    if not test:
1823
      if nresult[constants.NV_NODELIST]:
1824
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1825
          _ErrorIf(True, self.ENODESSH, node,
1826
                   "ssh communication with node '%s': %s", a_node, a_msg)
1827

    
1828
    test = constants.NV_NODENETTEST not in nresult
1829
    _ErrorIf(test, self.ENODENET, node,
1830
             "node hasn't returned node tcp connectivity data")
1831
    if not test:
1832
      if nresult[constants.NV_NODENETTEST]:
1833
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1834
        for anode in nlist:
1835
          _ErrorIf(True, self.ENODENET, node,
1836
                   "tcp communication with node '%s': %s",
1837
                   anode, nresult[constants.NV_NODENETTEST][anode])
1838

    
1839
    test = constants.NV_MASTERIP not in nresult
1840
    _ErrorIf(test, self.ENODENET, node,
1841
             "node hasn't returned node master IP reachability data")
1842
    if not test:
1843
      if not nresult[constants.NV_MASTERIP]:
1844
        if node == self.master_node:
1845
          msg = "the master node cannot reach the master IP (not configured?)"
1846
        else:
1847
          msg = "cannot reach the master IP"
1848
        _ErrorIf(True, self.ENODENET, node, msg)
1849

    
1850
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1851
                      diskstatus):
1852
    """Verify an instance.
1853

1854
    This function checks to see if the required block devices are
1855
    available on the instance's node.
1856

1857
    """
1858
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1859
    node_current = instanceconfig.primary_node
1860

    
1861
    node_vol_should = {}
1862
    instanceconfig.MapLVsByNode(node_vol_should)
1863

    
1864
    for node in node_vol_should:
1865
      n_img = node_image[node]
1866
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1867
        # ignore missing volumes on offline or broken nodes
1868
        continue
1869
      for volume in node_vol_should[node]:
1870
        test = volume not in n_img.volumes
1871
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1872
                 "volume %s missing on node %s", volume, node)
1873

    
1874
    if instanceconfig.admin_up:
1875
      pri_img = node_image[node_current]
1876
      test = instance not in pri_img.instances and not pri_img.offline
1877
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1878
               "instance not running on its primary node %s",
1879
               node_current)
1880

    
1881
    diskdata = [(nname, success, status, idx)
1882
                for (nname, disks) in diskstatus.items()
1883
                for idx, (success, status) in enumerate(disks)]
1884

    
1885
    for nname, success, bdev_status, idx in diskdata:
1886
      # the 'ghost node' construction in Exec() ensures that we have a
1887
      # node here
1888
      snode = node_image[nname]
1889
      bad_snode = snode.ghost or snode.offline
1890
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1891
               self.EINSTANCEFAULTYDISK, instance,
1892
               "couldn't retrieve status for disk/%s on %s: %s",
1893
               idx, nname, bdev_status)
1894
      _ErrorIf((instanceconfig.admin_up and success and
1895
                bdev_status.ldisk_status == constants.LDS_FAULTY),
1896
               self.EINSTANCEFAULTYDISK, instance,
1897
               "disk/%s on %s is faulty", idx, nname)
1898

    
1899
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1900
    """Verify if there are any unknown volumes in the cluster.
1901

1902
    The .os, .swap and backup volumes are ignored. All other volumes are
1903
    reported as unknown.
1904

1905
    @type reserved: L{ganeti.utils.FieldSet}
1906
    @param reserved: a FieldSet of reserved volume names
1907

1908
    """
1909
    for node, n_img in node_image.items():
1910
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1911
        # skip non-healthy nodes
1912
        continue
1913
      for volume in n_img.volumes:
1914
        test = ((node not in node_vol_should or
1915
                volume not in node_vol_should[node]) and
1916
                not reserved.Matches(volume))
1917
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1918
                      "volume %s is unknown", volume)
1919

    
1920
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1921
    """Verify N+1 Memory Resilience.
1922

1923
    Check that if one single node dies we can still start all the
1924
    instances it was primary for.
1925

1926
    """
1927
    cluster_info = self.cfg.GetClusterInfo()
1928
    for node, n_img in node_image.items():
1929
      # This code checks that every node which is now listed as
1930
      # secondary has enough memory to host all instances it is
1931
      # supposed to should a single other node in the cluster fail.
1932
      # FIXME: not ready for failover to an arbitrary node
1933
      # FIXME: does not support file-backed instances
1934
      # WARNING: we currently take into account down instances as well
1935
      # as up ones, considering that even if they're down someone
1936
      # might want to start them even in the event of a node failure.
1937
      if n_img.offline:
1938
        # we're skipping offline nodes from the N+1 warning, since
1939
        # most likely we don't have good memory infromation from them;
1940
        # we already list instances living on such nodes, and that's
1941
        # enough warning
1942
        continue
1943
      for prinode, instances in n_img.sbp.items():
1944
        needed_mem = 0
1945
        for instance in instances:
1946
          bep = cluster_info.FillBE(instance_cfg[instance])
1947
          if bep[constants.BE_AUTO_BALANCE]:
1948
            needed_mem += bep[constants.BE_MEMORY]
1949
        test = n_img.mfree < needed_mem
1950
        self._ErrorIf(test, self.ENODEN1, node,
1951
                      "not enough memory to accomodate instance failovers"
1952
                      " should node %s fail (%dMiB needed, %dMiB available)",
1953
                      prinode, needed_mem, n_img.mfree)
1954

    
1955
  @classmethod
1956
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1957
                   (files_all, files_all_opt, files_mc, files_vm)):
1958
    """Verifies file checksums collected from all nodes.
1959

1960
    @param errorif: Callback for reporting errors
1961
    @param nodeinfo: List of L{objects.Node} objects
1962
    @param master_node: Name of master node
1963
    @param all_nvinfo: RPC results
1964

1965
    """
1966
    node_names = frozenset(node.name for node in nodeinfo)
1967

    
1968
    assert master_node in node_names
1969
    assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1970
            sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1971
           "Found file listed in more than one file list"
1972

    
1973
    # Define functions determining which nodes to consider for a file
1974
    file2nodefn = dict([(filename, fn)
1975
      for (files, fn) in [(files_all, None),
1976
                          (files_all_opt, None),
1977
                          (files_mc, lambda node: (node.master_candidate or
1978
                                                   node.name == master_node)),
1979
                          (files_vm, lambda node: node.vm_capable)]
1980
      for filename in files])
1981

    
1982
    fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
1983

    
1984
    for node in nodeinfo:
1985
      nresult = all_nvinfo[node.name]
1986

    
1987
      if nresult.fail_msg or not nresult.payload:
1988
        node_files = None
1989
      else:
1990
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
1991

    
1992
      test = not (node_files and isinstance(node_files, dict))
1993
      errorif(test, cls.ENODEFILECHECK, node.name,
1994
              "Node did not return file checksum data")
1995
      if test:
1996
        continue
1997

    
1998
      for (filename, checksum) in node_files.items():
1999
        # Check if the file should be considered for a node
2000
        fn = file2nodefn[filename]
2001
        if fn is None or fn(node):
2002
          fileinfo[filename].setdefault(checksum, set()).add(node.name)
2003

    
2004
    for (filename, checksums) in fileinfo.items():
2005
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2006

    
2007
      # Nodes having the file
2008
      with_file = frozenset(node_name
2009
                            for nodes in fileinfo[filename].values()
2010
                            for node_name in nodes)
2011

    
2012
      # Nodes missing file
2013
      missing_file = node_names - with_file
2014

    
2015
      if filename in files_all_opt:
2016
        # All or no nodes
2017
        errorif(missing_file and missing_file != node_names,
2018
                cls.ECLUSTERFILECHECK, None,
2019
                "File %s is optional, but it must exist on all or no nodes (not"
2020
                " found on %s)",
2021
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2022
      else:
2023
        errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2024
                "File %s is missing from node(s) %s", filename,
2025
                utils.CommaJoin(utils.NiceSort(missing_file)))
2026

    
2027
      # See if there are multiple versions of the file
2028
      test = len(checksums) > 1
2029
      if test:
2030
        variants = ["variant %s on %s" %
2031
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2032
                    for (idx, (checksum, nodes)) in
2033
                      enumerate(sorted(checksums.items()))]
2034
      else:
2035
        variants = []
2036

    
2037
      errorif(test, cls.ECLUSTERFILECHECK, None,
2038
              "File %s found with %s different checksums (%s)",
2039
              filename, len(checksums), "; ".join(variants))
2040

    
2041
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2042
                      drbd_map):
2043
    """Verifies and the node DRBD status.
2044

2045
    @type ninfo: L{objects.Node}
2046
    @param ninfo: the node to check
2047
    @param nresult: the remote results for the node
2048
    @param instanceinfo: the dict of instances
2049
    @param drbd_helper: the configured DRBD usermode helper
2050
    @param drbd_map: the DRBD map as returned by
2051
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2052

2053
    """
2054
    node = ninfo.name
2055
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2056

    
2057
    if drbd_helper:
2058
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2059
      test = (helper_result == None)
2060
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
2061
               "no drbd usermode helper returned")
2062
      if helper_result:
2063
        status, payload = helper_result
2064
        test = not status
2065
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2066
                 "drbd usermode helper check unsuccessful: %s", payload)
2067
        test = status and (payload != drbd_helper)
2068
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2069
                 "wrong drbd usermode helper: %s", payload)
2070

    
2071
    # compute the DRBD minors
2072
    node_drbd = {}
2073
    for minor, instance in drbd_map[node].items():
2074
      test = instance not in instanceinfo
2075
      _ErrorIf(test, self.ECLUSTERCFG, None,
2076
               "ghost instance '%s' in temporary DRBD map", instance)
2077
        # ghost instance should not be running, but otherwise we
2078
        # don't give double warnings (both ghost instance and
2079
        # unallocated minor in use)
2080
      if test:
2081
        node_drbd[minor] = (instance, False)
2082
      else:
2083
        instance = instanceinfo[instance]
2084
        node_drbd[minor] = (instance.name, instance.admin_up)
2085

    
2086
    # and now check them
2087
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2088
    test = not isinstance(used_minors, (tuple, list))
2089
    _ErrorIf(test, self.ENODEDRBD, node,
2090
             "cannot parse drbd status file: %s", str(used_minors))
2091
    if test:
2092
      # we cannot check drbd status
2093
      return
2094

    
2095
    for minor, (iname, must_exist) in node_drbd.items():
2096
      test = minor not in used_minors and must_exist
2097
      _ErrorIf(test, self.ENODEDRBD, node,
2098
               "drbd minor %d of instance %s is not active", minor, iname)
2099
    for minor in used_minors:
2100
      test = minor not in node_drbd
2101
      _ErrorIf(test, self.ENODEDRBD, node,
2102
               "unallocated drbd minor %d is in use", minor)
2103

    
2104
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2105
    """Builds the node OS structures.
2106

2107
    @type ninfo: L{objects.Node}
2108
    @param ninfo: the node to check
2109
    @param nresult: the remote results for the node
2110
    @param nimg: the node image object
2111

2112
    """
2113
    node = ninfo.name
2114
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2115

    
2116
    remote_os = nresult.get(constants.NV_OSLIST, None)
2117
    test = (not isinstance(remote_os, list) or
2118
            not compat.all(isinstance(v, list) and len(v) == 7
2119
                           for v in remote_os))
2120

    
2121
    _ErrorIf(test, self.ENODEOS, node,
2122
             "node hasn't returned valid OS data")
2123

    
2124
    nimg.os_fail = test
2125

    
2126
    if test:
2127
      return
2128

    
2129
    os_dict = {}
2130

    
2131
    for (name, os_path, status, diagnose,
2132
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2133

    
2134
      if name not in os_dict:
2135
        os_dict[name] = []
2136

    
2137
      # parameters is a list of lists instead of list of tuples due to
2138
      # JSON lacking a real tuple type, fix it:
2139
      parameters = [tuple(v) for v in parameters]
2140
      os_dict[name].append((os_path, status, diagnose,
2141
                            set(variants), set(parameters), set(api_ver)))
2142

    
2143
    nimg.oslist = os_dict
2144

    
2145
  def _VerifyNodeOS(self, ninfo, nimg, base):
2146
    """Verifies the node OS list.
2147

2148
    @type ninfo: L{objects.Node}
2149
    @param ninfo: the node to check
2150
    @param nimg: the node image object
2151
    @param base: the 'template' node we match against (e.g. from the master)
2152

2153
    """
2154
    node = ninfo.name
2155
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2156

    
2157
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2158

    
2159
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2160
    for os_name, os_data in nimg.oslist.items():
2161
      assert os_data, "Empty OS status for OS %s?!" % os_name
2162
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2163
      _ErrorIf(not f_status, self.ENODEOS, node,
2164
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2165
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2166
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2167
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2168
      # this will catched in backend too
2169
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
2170
               and not f_var, self.ENODEOS, node,
2171
               "OS %s with API at least %d does not declare any variant",
2172
               os_name, constants.OS_API_V15)
2173
      # comparisons with the 'base' image
2174
      test = os_name not in base.oslist
2175
      _ErrorIf(test, self.ENODEOS, node,
2176
               "Extra OS %s not present on reference node (%s)",
2177
               os_name, base.name)
2178
      if test:
2179
        continue
2180
      assert base.oslist[os_name], "Base node has empty OS status?"
2181
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2182
      if not b_status:
2183
        # base OS is invalid, skipping
2184
        continue
2185
      for kind, a, b in [("API version", f_api, b_api),
2186
                         ("variants list", f_var, b_var),
2187
                         ("parameters", beautify_params(f_param),
2188
                          beautify_params(b_param))]:
2189
        _ErrorIf(a != b, self.ENODEOS, node,
2190
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2191
                 kind, os_name, base.name,
2192
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2193

    
2194
    # check any missing OSes
2195
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2196
    _ErrorIf(missing, self.ENODEOS, node,
2197
             "OSes present on reference node %s but missing on this node: %s",
2198
             base.name, utils.CommaJoin(missing))
2199

    
2200
  def _VerifyOob(self, ninfo, nresult):
2201
    """Verifies out of band functionality of a node.
2202

2203
    @type ninfo: L{objects.Node}
2204
    @param ninfo: the node to check
2205
    @param nresult: the remote results for the node
2206

2207
    """
2208
    node = ninfo.name
2209
    # We just have to verify the paths on master and/or master candidates
2210
    # as the oob helper is invoked on the master
2211
    if ((ninfo.master_candidate or ninfo.master_capable) and
2212
        constants.NV_OOB_PATHS in nresult):
2213
      for path_result in nresult[constants.NV_OOB_PATHS]:
2214
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2215

    
2216
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2217
    """Verifies and updates the node volume data.
2218

2219
    This function will update a L{NodeImage}'s internal structures
2220
    with data from the remote call.
2221

2222
    @type ninfo: L{objects.Node}
2223
    @param ninfo: the node to check
2224
    @param nresult: the remote results for the node
2225
    @param nimg: the node image object
2226
    @param vg_name: the configured VG name
2227

2228
    """
2229
    node = ninfo.name
2230
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2231

    
2232
    nimg.lvm_fail = True
2233
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2234
    if vg_name is None:
2235
      pass
2236
    elif isinstance(lvdata, basestring):
2237
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2238
               utils.SafeEncode(lvdata))
2239
    elif not isinstance(lvdata, dict):
2240
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2241
    else:
2242
      nimg.volumes = lvdata
2243
      nimg.lvm_fail = False
2244

    
2245
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2246
    """Verifies and updates the node instance list.
2247

2248
    If the listing was successful, then updates this node's instance
2249
    list. Otherwise, it marks the RPC call as failed for the instance
2250
    list key.
2251

2252
    @type ninfo: L{objects.Node}
2253
    @param ninfo: the node to check
2254
    @param nresult: the remote results for the node
2255
    @param nimg: the node image object
2256

2257
    """
2258
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2259
    test = not isinstance(idata, list)
2260
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2261
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
2262
    if test:
2263
      nimg.hyp_fail = True
2264
    else:
2265
      nimg.instances = idata
2266

    
2267
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2268
    """Verifies and computes a node information map
2269

2270
    @type ninfo: L{objects.Node}
2271
    @param ninfo: the node to check
2272
    @param nresult: the remote results for the node
2273
    @param nimg: the node image object
2274
    @param vg_name: the configured VG name
2275

2276
    """
2277
    node = ninfo.name
2278
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2279

    
2280
    # try to read free memory (from the hypervisor)
2281
    hv_info = nresult.get(constants.NV_HVINFO, None)
2282
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2283
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2284
    if not test:
2285
      try:
2286
        nimg.mfree = int(hv_info["memory_free"])
2287
      except (ValueError, TypeError):
2288
        _ErrorIf(True, self.ENODERPC, node,
2289
                 "node returned invalid nodeinfo, check hypervisor")
2290

    
2291
    # FIXME: devise a free space model for file based instances as well
2292
    if vg_name is not None:
2293
      test = (constants.NV_VGLIST not in nresult or
2294
              vg_name not in nresult[constants.NV_VGLIST])
2295
      _ErrorIf(test, self.ENODELVM, node,
2296
               "node didn't return data for the volume group '%s'"
2297
               " - it is either missing or broken", vg_name)
2298
      if not test:
2299
        try:
2300
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2301
        except (ValueError, TypeError):
2302
          _ErrorIf(True, self.ENODERPC, node,
2303
                   "node returned invalid LVM info, check LVM status")
2304

    
2305
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2306
    """Gets per-disk status information for all instances.
2307

2308
    @type nodelist: list of strings
2309
    @param nodelist: Node names
2310
    @type node_image: dict of (name, L{objects.Node})
2311
    @param node_image: Node objects
2312
    @type instanceinfo: dict of (name, L{objects.Instance})
2313
    @param instanceinfo: Instance objects
2314
    @rtype: {instance: {node: [(succes, payload)]}}
2315
    @return: a dictionary of per-instance dictionaries with nodes as
2316
        keys and disk information as values; the disk information is a
2317
        list of tuples (success, payload)
2318

2319
    """
2320
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2321

    
2322
    node_disks = {}
2323
    node_disks_devonly = {}
2324
    diskless_instances = set()
2325
    diskless = constants.DT_DISKLESS
2326

    
2327
    for nname in nodelist:
2328
      node_instances = list(itertools.chain(node_image[nname].pinst,
2329
                                            node_image[nname].sinst))
2330
      diskless_instances.update(inst for inst in node_instances
2331
                                if instanceinfo[inst].disk_template == diskless)
2332
      disks = [(inst, disk)
2333
               for inst in node_instances
2334
               for disk in instanceinfo[inst].disks]
2335

    
2336
      if not disks:
2337
        # No need to collect data
2338
        continue
2339

    
2340
      node_disks[nname] = disks
2341

    
2342
      # Creating copies as SetDiskID below will modify the objects and that can
2343
      # lead to incorrect data returned from nodes
2344
      devonly = [dev.Copy() for (_, dev) in disks]
2345

    
2346
      for dev in devonly:
2347
        self.cfg.SetDiskID(dev, nname)
2348

    
2349
      node_disks_devonly[nname] = devonly
2350

    
2351
    assert len(node_disks) == len(node_disks_devonly)
2352

    
2353
    # Collect data from all nodes with disks
2354
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2355
                                                          node_disks_devonly)
2356

    
2357
    assert len(result) == len(node_disks)
2358

    
2359
    instdisk = {}
2360

    
2361
    for (nname, nres) in result.items():
2362
      disks = node_disks[nname]
2363

    
2364
      if nres.offline:
2365
        # No data from this node
2366
        data = len(disks) * [(False, "node offline")]
2367
      else:
2368
        msg = nres.fail_msg
2369
        _ErrorIf(msg, self.ENODERPC, nname,
2370
                 "while getting disk information: %s", msg)
2371
        if msg:
2372
          # No data from this node
2373
          data = len(disks) * [(False, msg)]
2374
        else:
2375
          data = []
2376
          for idx, i in enumerate(nres.payload):
2377
            if isinstance(i, (tuple, list)) and len(i) == 2:
2378
              data.append(i)
2379
            else:
2380
              logging.warning("Invalid result from node %s, entry %d: %s",
2381
                              nname, idx, i)
2382
              data.append((False, "Invalid result from the remote node"))
2383

    
2384
      for ((inst, _), status) in zip(disks, data):
2385
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2386

    
2387
    # Add empty entries for diskless instances.
2388
    for inst in diskless_instances:
2389
      assert inst not in instdisk
2390
      instdisk[inst] = {}
2391

    
2392
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2393
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2394
                      compat.all(isinstance(s, (tuple, list)) and
2395
                                 len(s) == 2 for s in statuses)
2396
                      for inst, nnames in instdisk.items()
2397
                      for nname, statuses in nnames.items())
2398
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2399

    
2400
    return instdisk
2401

    
2402
  def BuildHooksEnv(self):
2403
    """Build hooks env.
2404

2405
    Cluster-Verify hooks just ran in the post phase and their failure makes
2406
    the output be logged in the verify output and the verification to fail.
2407

2408
    """
2409
    env = {
2410
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2411
      }
2412

    
2413
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2414
               for node in self.my_node_info.values())
2415

    
2416
    return env
2417

    
2418
  def BuildHooksNodes(self):
2419
    """Build hooks nodes.
2420

2421
    """
2422
    assert self.my_node_names, ("Node list not gathered,"
2423
      " has CheckPrereq been executed?")
2424
    return ([], self.my_node_names)
2425

    
2426
  def Exec(self, feedback_fn):
2427
    """Verify integrity of the node group, performing various test on nodes.
2428

2429
    """
2430
    # This method has too many local variables. pylint: disable-msg=R0914
2431
    self.bad = False
2432
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2433
    verbose = self.op.verbose
2434
    self._feedback_fn = feedback_fn
2435

    
2436
    vg_name = self.cfg.GetVGName()
2437
    drbd_helper = self.cfg.GetDRBDHelper()
2438
    cluster = self.cfg.GetClusterInfo()
2439
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2440
    hypervisors = cluster.enabled_hypervisors
2441
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2442

    
2443
    i_non_redundant = [] # Non redundant instances
2444
    i_non_a_balanced = [] # Non auto-balanced instances
2445
    n_offline = 0 # Count of offline nodes
2446
    n_drained = 0 # Count of nodes being drained
2447
    node_vol_should = {}
2448

    
2449
    # FIXME: verify OS list
2450

    
2451
    # File verification
2452
    filemap = _ComputeAncillaryFiles(cluster, False)
2453

    
2454
    # do local checksums
2455
    master_node = self.master_node = self.cfg.GetMasterNode()
2456
    master_ip = self.cfg.GetMasterIP()
2457

    
2458
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2459

    
2460
    # We will make nodes contact all nodes in their group, and one node from
2461
    # every other group.
2462
    # TODO: should it be a *random* node, different every time?
2463
    online_nodes = [node.name for node in node_data_list if not node.offline]
2464
    other_group_nodes = {}
2465

    
2466
    for name in sorted(self.all_node_info):
2467
      node = self.all_node_info[name]
2468
      if (node.group not in other_group_nodes
2469
          and node.group != self.group_uuid
2470
          and not node.offline):
2471
        other_group_nodes[node.group] = node.name
2472

    
2473
    node_verify_param = {
2474
      constants.NV_FILELIST:
2475
        utils.UniqueSequence(filename
2476
                             for files in filemap
2477
                             for filename in files),
2478
      constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
2479
      constants.NV_HYPERVISOR: hypervisors,
2480
      constants.NV_HVPARAMS:
2481
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2482
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2483
                                 for node in node_data_list
2484
                                 if not node.offline],
2485
      constants.NV_INSTANCELIST: hypervisors,
2486
      constants.NV_VERSION: None,
2487
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2488
      constants.NV_NODESETUP: None,
2489
      constants.NV_TIME: None,
2490
      constants.NV_MASTERIP: (master_node, master_ip),
2491
      constants.NV_OSLIST: None,
2492
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2493
      }
2494

    
2495
    if vg_name is not None:
2496
      node_verify_param[constants.NV_VGLIST] = None
2497
      node_verify_param[constants.NV_LVLIST] = vg_name
2498
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2499
      node_verify_param[constants.NV_DRBDLIST] = None
2500

    
2501
    if drbd_helper:
2502
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2503

    
2504
    # bridge checks
2505
    # FIXME: this needs to be changed per node-group, not cluster-wide
2506
    bridges = set()
2507
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2508
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2509
      bridges.add(default_nicpp[constants.NIC_LINK])
2510
    for instance in self.my_inst_info.values():
2511
      for nic in instance.nics:
2512
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2513
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2514
          bridges.add(full_nic[constants.NIC_LINK])
2515

    
2516
    if bridges:
2517
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2518

    
2519
    # Build our expected cluster state
2520
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2521
                                                 name=node.name,
2522
                                                 vm_capable=node.vm_capable))
2523
                      for node in node_data_list)
2524

    
2525
    # Gather OOB paths
2526
    oob_paths = []
2527
    for node in self.all_node_info.values():
2528
      path = _SupportsOob(self.cfg, node)
2529
      if path and path not in oob_paths:
2530
        oob_paths.append(path)
2531

    
2532
    if oob_paths:
2533
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2534

    
2535
    for instance in self.my_inst_names:
2536
      inst_config = self.my_inst_info[instance]
2537

    
2538
      for nname in inst_config.all_nodes:
2539
        if nname not in node_image:
2540
          gnode = self.NodeImage(name=nname)
2541
          gnode.ghost = (nname not in self.all_node_info)
2542
          node_image[nname] = gnode
2543

    
2544
      inst_config.MapLVsByNode(node_vol_should)
2545

    
2546
      pnode = inst_config.primary_node
2547
      node_image[pnode].pinst.append(instance)
2548

    
2549
      for snode in inst_config.secondary_nodes:
2550
        nimg = node_image[snode]
2551
        nimg.sinst.append(instance)
2552
        if pnode not in nimg.sbp:
2553
          nimg.sbp[pnode] = []
2554
        nimg.sbp[pnode].append(instance)
2555

    
2556
    # At this point, we have the in-memory data structures complete,
2557
    # except for the runtime information, which we'll gather next
2558

    
2559
    # Due to the way our RPC system works, exact response times cannot be
2560
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2561
    # time before and after executing the request, we can at least have a time
2562
    # window.
2563
    nvinfo_starttime = time.time()
2564
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2565
                                           node_verify_param,
2566
                                           self.cfg.GetClusterName())
2567
    nvinfo_endtime = time.time()
2568

    
2569
    if self.extra_lv_nodes and vg_name is not None:
2570
      extra_lv_nvinfo = \
2571
          self.rpc.call_node_verify(self.extra_lv_nodes,
2572
                                    {constants.NV_LVLIST: vg_name},
2573
                                    self.cfg.GetClusterName())
2574
    else:
2575
      extra_lv_nvinfo = {}
2576

    
2577
    all_drbd_map = self.cfg.ComputeDRBDMap()
2578

    
2579
    feedback_fn("* Gathering disk information (%s nodes)" %
2580
                len(self.my_node_names))
2581
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2582
                                     self.my_inst_info)
2583

    
2584
    feedback_fn("* Verifying configuration file consistency")
2585

    
2586
    # If not all nodes are being checked, we need to make sure the master node
2587
    # and a non-checked vm_capable node are in the list.
2588
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2589
    if absent_nodes:
2590
      vf_nvinfo = all_nvinfo.copy()
2591
      vf_node_info = list(self.my_node_info.values())
2592
      additional_nodes = []
2593
      if master_node not in self.my_node_info:
2594
        additional_nodes.append(master_node)
2595
        vf_node_info.append(self.all_node_info[master_node])
2596
      # Add the first vm_capable node we find which is not included
2597
      for node in absent_nodes:
2598
        nodeinfo = self.all_node_info[node]
2599
        if nodeinfo.vm_capable and not nodeinfo.offline:
2600
          additional_nodes.append(node)
2601
          vf_node_info.append(self.all_node_info[node])
2602
          break
2603
      key = constants.NV_FILELIST
2604
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2605
                                                 {key: node_verify_param[key]},
2606
                                                 self.cfg.GetClusterName()))
2607
    else:
2608
      vf_nvinfo = all_nvinfo
2609
      vf_node_info = self.my_node_info.values()
2610

    
2611
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2612

    
2613
    feedback_fn("* Verifying node status")
2614

    
2615
    refos_img = None
2616

    
2617
    for node_i in node_data_list:
2618
      node = node_i.name
2619
      nimg = node_image[node]
2620

    
2621
      if node_i.offline:
2622
        if verbose:
2623
          feedback_fn("* Skipping offline node %s" % (node,))
2624
        n_offline += 1
2625
        continue
2626

    
2627
      if node == master_node:
2628
        ntype = "master"
2629
      elif node_i.master_candidate:
2630
        ntype = "master candidate"
2631
      elif node_i.drained:
2632
        ntype = "drained"
2633
        n_drained += 1
2634
      else:
2635
        ntype = "regular"
2636
      if verbose:
2637
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2638

    
2639
      msg = all_nvinfo[node].fail_msg
2640
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2641
      if msg:
2642
        nimg.rpc_fail = True
2643
        continue
2644

    
2645
      nresult = all_nvinfo[node].payload
2646

    
2647
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2648
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2649
      self._VerifyNodeNetwork(node_i, nresult)
2650
      self._VerifyOob(node_i, nresult)
2651

    
2652
      if nimg.vm_capable:
2653
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2654
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2655
                             all_drbd_map)
2656

    
2657
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2658
        self._UpdateNodeInstances(node_i, nresult, nimg)
2659
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2660
        self._UpdateNodeOS(node_i, nresult, nimg)
2661

    
2662
        if not nimg.os_fail:
2663
          if refos_img is None:
2664
            refos_img = nimg
2665
          self._VerifyNodeOS(node_i, nimg, refos_img)
2666
        self._VerifyNodeBridges(node_i, nresult, bridges)
2667

    
2668
        # Check whether all running instancies are primary for the node. (This
2669
        # can no longer be done from _VerifyInstance below, since some of the
2670
        # wrong instances could be from other node groups.)
2671
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2672

    
2673
        for inst in non_primary_inst:
2674
          test = inst in self.all_inst_info
2675
          _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2676
                   "instance should not run on node %s", node_i.name)
2677
          _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2678
                   "node is running unknown instance %s", inst)
2679

    
2680
    for node, result in extra_lv_nvinfo.items():
2681
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2682
                              node_image[node], vg_name)
2683

    
2684
    feedback_fn("* Verifying instance status")
2685
    for instance in self.my_inst_names:
2686
      if verbose:
2687
        feedback_fn("* Verifying instance %s" % instance)
2688
      inst_config = self.my_inst_info[instance]
2689
      self._VerifyInstance(instance, inst_config, node_image,
2690
                           instdisk[instance])
2691
      inst_nodes_offline = []
2692

    
2693
      pnode = inst_config.primary_node
2694
      pnode_img = node_image[pnode]
2695
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2696
               self.ENODERPC, pnode, "instance %s, connection to"
2697
               " primary node failed", instance)
2698

    
2699
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2700
               self.EINSTANCEBADNODE, instance,
2701
               "instance is marked as running and lives on offline node %s",
2702
               inst_config.primary_node)
2703

    
2704
      # If the instance is non-redundant we cannot survive losing its primary
2705
      # node, so we are not N+1 compliant. On the other hand we have no disk
2706
      # templates with more than one secondary so that situation is not well
2707
      # supported either.
2708
      # FIXME: does not support file-backed instances
2709
      if not inst_config.secondary_nodes:
2710
        i_non_redundant.append(instance)
2711

    
2712
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2713
               instance, "instance has multiple secondary nodes: %s",
2714
               utils.CommaJoin(inst_config.secondary_nodes),
2715
               code=self.ETYPE_WARNING)
2716

    
2717
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2718
        pnode = inst_config.primary_node
2719
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2720
        instance_groups = {}
2721

    
2722
        for node in instance_nodes:
2723
          instance_groups.setdefault(self.all_node_info[node].group,
2724
                                     []).append(node)
2725

    
2726
        pretty_list = [
2727
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2728
          # Sort so that we always list the primary node first.
2729
          for group, nodes in sorted(instance_groups.items(),
2730
                                     key=lambda (_, nodes): pnode in nodes,
2731
                                     reverse=True)]
2732

    
2733
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2734
                      instance, "instance has primary and secondary nodes in"
2735
                      " different groups: %s", utils.CommaJoin(pretty_list),
2736
                      code=self.ETYPE_WARNING)
2737

    
2738
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2739
        i_non_a_balanced.append(instance)
2740

    
2741
      for snode in inst_config.secondary_nodes:
2742
        s_img = node_image[snode]
2743
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2744
                 "instance %s, connection to secondary node failed", instance)
2745

    
2746
        if s_img.offline:
2747
          inst_nodes_offline.append(snode)
2748

    
2749
      # warn that the instance lives on offline nodes
2750
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2751
               "instance has offline secondary node(s) %s",
2752
               utils.CommaJoin(inst_nodes_offline))
2753
      # ... or ghost/non-vm_capable nodes
2754
      for node in inst_config.all_nodes:
2755
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2756
                 "instance lives on ghost node %s", node)
2757
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2758
                 instance, "instance lives on non-vm_capable node %s", node)
2759

    
2760
    feedback_fn("* Verifying orphan volumes")
2761
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2762

    
2763
    # We will get spurious "unknown volume" warnings if any node of this group
2764
    # is secondary for an instance whose primary is in another group. To avoid
2765
    # them, we find these instances and add their volumes to node_vol_should.
2766
    for inst in self.all_inst_info.values():
2767
      for secondary in inst.secondary_nodes:
2768
        if (secondary in self.my_node_info
2769
            and inst.name not in self.my_inst_info):
2770
          inst.MapLVsByNode(node_vol_should)
2771
          break
2772

    
2773
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2774

    
2775
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2776
      feedback_fn("* Verifying N+1 Memory redundancy")
2777
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2778

    
2779
    feedback_fn("* Other Notes")
2780
    if i_non_redundant:
2781
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2782
                  % len(i_non_redundant))
2783

    
2784
    if i_non_a_balanced:
2785
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2786
                  % len(i_non_a_balanced))
2787

    
2788
    if n_offline:
2789
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2790

    
2791
    if n_drained:
2792
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2793

    
2794
    return not self.bad
2795

    
2796
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2797
    """Analyze the post-hooks' result
2798

2799
    This method analyses the hook result, handles it, and sends some
2800
    nicely-formatted feedback back to the user.
2801

2802
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2803
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2804
    @param hooks_results: the results of the multi-node hooks rpc call
2805
    @param feedback_fn: function used send feedback back to the caller
2806
    @param lu_result: previous Exec result
2807
    @return: the new Exec result, based on the previous result
2808
        and hook results
2809

2810
    """
2811
    # We only really run POST phase hooks, and are only interested in
2812
    # their results
2813
    if phase == constants.HOOKS_PHASE_POST:
2814
      # Used to change hooks' output to proper indentation
2815
      feedback_fn("* Hooks Results")
2816
      assert hooks_results, "invalid result from hooks"
2817

    
2818
      for node_name in hooks_results:
2819
        res = hooks_results[node_name]
2820
        msg = res.fail_msg
2821
        test = msg and not res.offline
2822
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2823
                      "Communication failure in hooks execution: %s", msg)
2824
        if res.offline or msg:
2825
          # No need to investigate payload if node is offline or gave an error.
2826
          # override manually lu_result here as _ErrorIf only
2827
          # overrides self.bad
2828
          lu_result = 1
2829
          continue
2830
        for script, hkr, output in res.payload:
2831
          test = hkr == constants.HKR_FAIL
2832
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2833
                        "Script %s failed, output:", script)
2834
          if test:
2835
            output = self._HOOKS_INDENT_RE.sub('      ', output)
2836
            feedback_fn("%s" % output)
2837
            lu_result = 0
2838

    
2839
      return lu_result
2840

    
2841

    
2842
class LUClusterVerifyDisks(NoHooksLU):
2843
  """Verifies the cluster disks status.
2844

2845
  """
2846
  REQ_BGL = False
2847

    
2848
  def ExpandNames(self):
2849
    self.needed_locks = {
2850
      locking.LEVEL_NODE: locking.ALL_SET,
2851
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2852
    }
2853
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2854

    
2855
  def Exec(self, feedback_fn):
2856
    """Verify integrity of cluster disks.
2857

2858
    @rtype: tuple of three items
2859
    @return: a tuple of (dict of node-to-node_error, list of instances
2860
        which need activate-disks, dict of instance: (node, volume) for
2861
        missing volumes
2862

2863
    """
2864
    result = res_nodes, res_instances, res_missing = {}, [], {}
2865

    
2866
    nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2867
    instances = self.cfg.GetAllInstancesInfo().values()
2868

    
2869
    nv_dict = {}
2870
    for inst in instances:
2871
      inst_lvs = {}
2872
      if not inst.admin_up:
2873
        continue
2874
      inst.MapLVsByNode(inst_lvs)
2875
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2876
      for node, vol_list in inst_lvs.iteritems():
2877
        for vol in vol_list:
2878
          nv_dict[(node, vol)] = inst
2879

    
2880
    if not nv_dict:
2881
      return result
2882

    
2883
    node_lvs = self.rpc.call_lv_list(nodes, [])
2884
    for node, node_res in node_lvs.items():
2885
      if node_res.offline:
2886
        continue
2887
      msg = node_res.fail_msg
2888
      if msg:
2889
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2890
        res_nodes[node] = msg
2891
        continue
2892

    
2893
      lvs = node_res.payload
2894
      for lv_name, (_, _, lv_online) in lvs.items():
2895
        inst = nv_dict.pop((node, lv_name), None)
2896
        if (not lv_online and inst is not None
2897
            and inst.name not in res_instances):
2898
          res_instances.append(inst.name)
2899

    
2900
    # any leftover items in nv_dict are missing LVs, let's arrange the
2901
    # data better
2902
    for key, inst in nv_dict.iteritems():
2903
      if inst.name not in res_missing:
2904
        res_missing[inst.name] = []
2905
      res_missing[inst.name].append(key)
2906

    
2907
    return result
2908

    
2909

    
2910
class LUClusterRepairDiskSizes(NoHooksLU):
2911
  """Verifies the cluster disks sizes.
2912

2913
  """
2914
  REQ_BGL = False
2915

    
2916
  def ExpandNames(self):
2917
    if self.op.instances:
2918
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
2919
      self.needed_locks = {
2920
        locking.LEVEL_NODE: [],
2921
        locking.LEVEL_INSTANCE: self.wanted_names,
2922
        }
2923
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2924
    else:
2925
      self.wanted_names = None
2926
      self.needed_locks = {
2927
        locking.LEVEL_NODE: locking.ALL_SET,
2928
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2929
        }
2930
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2931

    
2932
  def DeclareLocks(self, level):
2933
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2934
      self._LockInstancesNodes(primary_only=True)
2935

    
2936
  def CheckPrereq(self):
2937
    """Check prerequisites.
2938

2939
    This only checks the optional instance list against the existing names.
2940

2941
    """
2942
    if self.wanted_names is None:
2943
      self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
2944

    
2945
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2946
                             in self.wanted_names]
2947

    
2948
  def _EnsureChildSizes(self, disk):
2949
    """Ensure children of the disk have the needed disk size.
2950

2951
    This is valid mainly for DRBD8 and fixes an issue where the
2952
    children have smaller disk size.
2953

2954
    @param disk: an L{ganeti.objects.Disk} object
2955

2956
    """
2957
    if disk.dev_type == constants.LD_DRBD8:
2958
      assert disk.children, "Empty children for DRBD8?"
2959
      fchild = disk.children[0]
2960
      mismatch = fchild.size < disk.size
2961
      if mismatch:
2962
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2963
                     fchild.size, disk.size)
2964
        fchild.size = disk.size
2965

    
2966
      # and we recurse on this child only, not on the metadev
2967
      return self._EnsureChildSizes(fchild) or mismatch
2968
    else:
2969
      return False
2970

    
2971
  def Exec(self, feedback_fn):
2972
    """Verify the size of cluster disks.
2973

2974
    """
2975
    # TODO: check child disks too
2976
    # TODO: check differences in size between primary/secondary nodes
2977
    per_node_disks = {}
2978
    for instance in self.wanted_instances:
2979
      pnode = instance.primary_node
2980
      if pnode not in per_node_disks:
2981
        per_node_disks[pnode] = []
2982
      for idx, disk in enumerate(instance.disks):
2983
        per_node_disks[pnode].append((instance, idx, disk))
2984

    
2985
    changed = []
2986
    for node, dskl in per_node_disks.items():
2987
      newl = [v[2].Copy() for v in dskl]
2988
      for dsk in newl:
2989
        self.cfg.SetDiskID(dsk, node)
2990
      result = self.rpc.call_blockdev_getsize(node, newl)
2991
      if result.fail_msg:
2992
        self.LogWarning("Failure in blockdev_getsize call to node"
2993
                        " %s, ignoring", node)
2994
        continue
2995
      if len(result.payload) != len(dskl):
2996
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
2997
                        " result.payload=%s", node, len(dskl), result.payload)
2998
        self.LogWarning("Invalid result from node %s, ignoring node results",
2999
                        node)
3000
        continue
3001
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3002
        if size is None:
3003
          self.LogWarning("Disk %d of instance %s did not return size"
3004
                          " information, ignoring", idx, instance.name)
3005
          continue
3006
        if not isinstance(size, (int, long)):
3007
          self.LogWarning("Disk %d of instance %s did not return valid"
3008
                          " size information, ignoring", idx, instance.name)
3009
          continue
3010
        size = size >> 20
3011
        if size != disk.size:
3012
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3013
                       " correcting: recorded %d, actual %d", idx,
3014
                       instance.name, disk.size, size)
3015
          disk.size = size
3016
          self.cfg.Update(instance, feedback_fn)
3017
          changed.append((instance.name, idx, size))
3018
        if self._EnsureChildSizes(disk):
3019
          self.cfg.Update(instance, feedback_fn)
3020
          changed.append((instance.name, idx, disk.size))
3021
    return changed
3022

    
3023

    
3024
class LUClusterRename(LogicalUnit):
3025
  """Rename the cluster.
3026

3027
  """
3028
  HPATH = "cluster-rename"
3029
  HTYPE = constants.HTYPE_CLUSTER
3030

    
3031
  def BuildHooksEnv(self):
3032
    """Build hooks env.
3033

3034
    """
3035
    return {
3036
      "OP_TARGET": self.cfg.GetClusterName(),
3037
      "NEW_NAME": self.op.name,
3038
      }
3039

    
3040
  def BuildHooksNodes(self):
3041
    """Build hooks nodes.
3042

3043
    """
3044
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3045

    
3046
  def CheckPrereq(self):
3047
    """Verify that the passed name is a valid one.
3048

3049
    """
3050
    hostname = netutils.GetHostname(name=self.op.name,
3051
                                    family=self.cfg.GetPrimaryIPFamily())
3052

    
3053
    new_name = hostname.name
3054
    self.ip = new_ip = hostname.ip
3055
    old_name = self.cfg.GetClusterName()
3056
    old_ip = self.cfg.GetMasterIP()
3057
    if new_name == old_name and new_ip == old_ip:
3058
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3059
                                 " cluster has changed",
3060
                                 errors.ECODE_INVAL)
3061
    if new_ip != old_ip:
3062
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3063
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3064
                                   " reachable on the network" %
3065
                                   new_ip, errors.ECODE_NOTUNIQUE)
3066

    
3067
    self.op.name = new_name
3068

    
3069
  def Exec(self, feedback_fn):
3070
    """Rename the cluster.
3071

3072
    """
3073
    clustername = self.op.name
3074
    ip = self.ip
3075

    
3076
    # shutdown the master IP
3077
    master = self.cfg.GetMasterNode()
3078
    result = self.rpc.call_node_stop_master(master, False)
3079
    result.Raise("Could not disable the master role")
3080

    
3081
    try:
3082
      cluster = self.cfg.GetClusterInfo()
3083
      cluster.cluster_name = clustername
3084
      cluster.master_ip = ip
3085
      self.cfg.Update(cluster, feedback_fn)
3086

    
3087
      # update the known hosts file
3088
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3089
      node_list = self.cfg.GetOnlineNodeList()
3090
      try:
3091
        node_list.remove(master)
3092
      except ValueError:
3093
        pass
3094
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3095
    finally:
3096
      result = self.rpc.call_node_start_master(master, False, False)
3097
      msg = result.fail_msg
3098
      if msg:
3099
        self.LogWarning("Could not re-enable the master role on"
3100
                        " the master, please restart manually: %s", msg)
3101

    
3102
    return clustername
3103

    
3104

    
3105
class LUClusterSetParams(LogicalUnit):
3106
  """Change the parameters of the cluster.
3107

3108
  """
3109
  HPATH = "cluster-modify"
3110
  HTYPE = constants.HTYPE_CLUSTER
3111
  REQ_BGL = False
3112

    
3113
  def CheckArguments(self):
3114
    """Check parameters
3115

3116
    """
3117
    if self.op.uid_pool:
3118
      uidpool.CheckUidPool(self.op.uid_pool)
3119

    
3120
    if self.op.add_uids:
3121
      uidpool.CheckUidPool(self.op.add_uids)
3122

    
3123
    if self.op.remove_uids:
3124
      uidpool.CheckUidPool(self.op.remove_uids)
3125

    
3126
  def ExpandNames(self):
3127
    # FIXME: in the future maybe other cluster params won't require checking on
3128
    # all nodes to be modified.
3129
    self.needed_locks = {
3130
      locking.LEVEL_NODE: locking.ALL_SET,
3131
    }
3132
    self.share_locks[locking.LEVEL_NODE] = 1
3133

    
3134
  def BuildHooksEnv(self):
3135
    """Build hooks env.
3136

3137
    """
3138
    return {
3139
      "OP_TARGET": self.cfg.GetClusterName(),
3140
      "NEW_VG_NAME": self.op.vg_name,
3141
      }
3142

    
3143
  def BuildHooksNodes(self):
3144
    """Build hooks nodes.
3145

3146
    """
3147
    mn = self.cfg.GetMasterNode()
3148
    return ([mn], [mn])
3149

    
3150
  def CheckPrereq(self):
3151
    """Check prerequisites.
3152

3153
    This checks whether the given params don't conflict and
3154
    if the given volume group is valid.
3155

3156
    """
3157
    if self.op.vg_name is not None and not self.op.vg_name:
3158
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3159
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3160
                                   " instances exist", errors.ECODE_INVAL)
3161

    
3162
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3163
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3164
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3165
                                   " drbd-based instances exist",
3166
                                   errors.ECODE_INVAL)
3167

    
3168
    node_list = self.glm.list_owned(locking.LEVEL_NODE)
3169

    
3170
    # if vg_name not None, checks given volume group on all nodes
3171
    if self.op.vg_name:
3172
      vglist = self.rpc.call_vg_list(node_list)
3173
      for node in node_list:
3174
        msg = vglist[node].fail_msg
3175
        if msg:
3176
          # ignoring down node
3177
          self.LogWarning("Error while gathering data on node %s"
3178
                          " (ignoring node): %s", node, msg)
3179
          continue
3180
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3181
                                              self.op.vg_name,
3182
                                              constants.MIN_VG_SIZE)
3183
        if vgstatus:
3184
          raise errors.OpPrereqError("Error on node '%s': %s" %
3185
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3186

    
3187
    if self.op.drbd_helper:
3188
      # checks given drbd helper on all nodes
3189
      helpers = self.rpc.call_drbd_helper(node_list)
3190
      for node in node_list:
3191
        ninfo = self.cfg.GetNodeInfo(node)
3192
        if ninfo.offline:
3193
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3194
          continue
3195
        msg = helpers[node].fail_msg
3196
        if msg:
3197
          raise errors.OpPrereqError("Error checking drbd helper on node"
3198
                                     " '%s': %s" % (node, msg),
3199
                                     errors.ECODE_ENVIRON)
3200
        node_helper = helpers[node].payload
3201
        if node_helper != self.op.drbd_helper:
3202
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3203
                                     (node, node_helper), errors.ECODE_ENVIRON)
3204

    
3205
    self.cluster = cluster = self.cfg.GetClusterInfo()
3206
    # validate params changes
3207
    if self.op.beparams:
3208
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3209
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3210

    
3211
    if self.op.ndparams:
3212
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3213
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3214

    
3215
      # TODO: we need a more general way to handle resetting
3216
      # cluster-level parameters to default values
3217
      if self.new_ndparams["oob_program"] == "":
3218
        self.new_ndparams["oob_program"] = \
3219
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3220

    
3221
    if self.op.nicparams:
3222
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3223
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3224
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3225
      nic_errors = []
3226

    
3227
      # check all instances for consistency
3228
      for instance in self.cfg.GetAllInstancesInfo().values():
3229
        for nic_idx, nic in enumerate(instance.nics):
3230
          params_copy = copy.deepcopy(nic.nicparams)
3231
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3232

    
3233
          # check parameter syntax
3234
          try:
3235
            objects.NIC.CheckParameterSyntax(params_filled)
3236
          except errors.ConfigurationError, err:
3237
            nic_errors.append("Instance %s, nic/%d: %s" %
3238
                              (instance.name, nic_idx, err))
3239

    
3240
          # if we're moving instances to routed, check that they have an ip
3241
          target_mode = params_filled[constants.NIC_MODE]
3242
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3243
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3244
                              " address" % (instance.name, nic_idx))
3245
      if nic_errors:
3246
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3247
                                   "\n".join(nic_errors))
3248

    
3249
    # hypervisor list/parameters
3250
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3251
    if self.op.hvparams:
3252
      for hv_name, hv_dict in self.op.hvparams.items():
3253
        if hv_name not in self.new_hvparams:
3254
          self.new_hvparams[hv_name] = hv_dict
3255
        else:
3256
          self.new_hvparams[hv_name].update(hv_dict)
3257

    
3258
    # os hypervisor parameters
3259
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3260
    if self.op.os_hvp:
3261
      for os_name, hvs in self.op.os_hvp.items():
3262
        if os_name not in self.new_os_hvp:
3263
          self.new_os_hvp[os_name] = hvs
3264
        else:
3265
          for hv_name, hv_dict in hvs.items():
3266
            if hv_name not in self.new_os_hvp[os_name]:
3267
              self.new_os_hvp[os_name][hv_name] = hv_dict
3268
            else:
3269
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3270

    
3271
    # os parameters
3272
    self.new_osp = objects.FillDict(cluster.osparams, {})
3273
    if self.op.osparams:
3274
      for os_name, osp in self.op.osparams.items():
3275
        if os_name not in self.new_osp:
3276
          self.new_osp[os_name] = {}
3277

    
3278
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3279
                                                  use_none=True)
3280

    
3281
        if not self.new_osp[os_name]:
3282
          # we removed all parameters
3283
          del self.new_osp[os_name]
3284
        else:
3285
          # check the parameter validity (remote check)
3286
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3287
                         os_name, self.new_osp[os_name])
3288

    
3289
    # changes to the hypervisor list
3290
    if self.op.enabled_hypervisors is not None:
3291
      self.hv_list = self.op.enabled_hypervisors
3292
      for hv in self.hv_list:
3293
        # if the hypervisor doesn't already exist in the cluster
3294
        # hvparams, we initialize it to empty, and then (in both
3295
        # cases) we make sure to fill the defaults, as we might not
3296
        # have a complete defaults list if the hypervisor wasn't
3297
        # enabled before
3298
        if hv not in new_hvp:
3299
          new_hvp[hv] = {}
3300
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3301
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3302
    else:
3303
      self.hv_list = cluster.enabled_hypervisors
3304

    
3305
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3306
      # either the enabled list has changed, or the parameters have, validate
3307
      for hv_name, hv_params in self.new_hvparams.items():
3308
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3309
            (self.op.enabled_hypervisors and
3310
             hv_name in self.op.enabled_hypervisors)):
3311
          # either this is a new hypervisor, or its parameters have changed
3312
          hv_class = hypervisor.GetHypervisor(hv_name)
3313
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3314
          hv_class.CheckParameterSyntax(hv_params)
3315
          _CheckHVParams(self, node_list, hv_name, hv_params)
3316

    
3317
    if self.op.os_hvp:
3318
      # no need to check any newly-enabled hypervisors, since the
3319
      # defaults have already been checked in the above code-block
3320
      for os_name, os_hvp in self.new_os_hvp.items():
3321
        for hv_name, hv_params in os_hvp.items():
3322
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3323
          # we need to fill in the new os_hvp on top of the actual hv_p
3324
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3325
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3326
          hv_class = hypervisor.GetHypervisor(hv_name)
3327
          hv_class.CheckParameterSyntax(new_osp)
3328
          _CheckHVParams(self, node_list, hv_name, new_osp)
3329

    
3330
    if self.op.default_iallocator:
3331
      alloc_script = utils.FindFile(self.op.default_iallocator,
3332
                                    constants.IALLOCATOR_SEARCH_PATH,
3333
                                    os.path.isfile)
3334
      if alloc_script is None:
3335
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3336
                                   " specified" % self.op.default_iallocator,
3337
                                   errors.ECODE_INVAL)
3338

    
3339
  def Exec(self, feedback_fn):
3340
    """Change the parameters of the cluster.
3341

3342
    """
3343
    if self.op.vg_name is not None:
3344
      new_volume = self.op.vg_name
3345
      if not new_volume:
3346
        new_volume = None
3347
      if new_volume != self.cfg.GetVGName():
3348
        self.cfg.SetVGName(new_volume)
3349
      else:
3350
        feedback_fn("Cluster LVM configuration already in desired"
3351
                    " state, not changing")
3352
    if self.op.drbd_helper is not None:
3353
      new_helper = self.op.drbd_helper
3354
      if not new_helper:
3355
        new_helper = None
3356
      if new_helper != self.cfg.GetDRBDHelper():
3357
        self.cfg.SetDRBDHelper(new_helper)
3358
      else:
3359
        feedback_fn("Cluster DRBD helper already in desired state,"
3360
                    " not changing")
3361
    if self.op.hvparams:
3362
      self.cluster.hvparams = self.new_hvparams
3363
    if self.op.os_hvp:
3364
      self.cluster.os_hvp = self.new_os_hvp
3365
    if self.op.enabled_hypervisors is not None:
3366
      self.cluster.hvparams = self.new_hvparams
3367
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3368
    if self.op.beparams:
3369
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3370
    if self.op.nicparams:
3371
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3372
    if self.op.osparams:
3373
      self.cluster.osparams = self.new_osp
3374
    if self.op.ndparams:
3375
      self.cluster.ndparams = self.new_ndparams
3376

    
3377
    if self.op.candidate_pool_size is not None:
3378
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3379
      # we need to update the pool size here, otherwise the save will fail
3380
      _AdjustCandidatePool(self, [])
3381

    
3382
    if self.op.maintain_node_health is not None:
3383
      self.cluster.maintain_node_health = self.op.maintain_node_health
3384

    
3385
    if self.op.prealloc_wipe_disks is not None:
3386
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3387

    
3388
    if self.op.add_uids is not None:
3389
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3390

    
3391
    if self.op.remove_uids is not None:
3392
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3393

    
3394
    if self.op.uid_pool is not None:
3395
      self.cluster.uid_pool = self.op.uid_pool
3396

    
3397
    if self.op.default_iallocator is not None:
3398
      self.cluster.default_iallocator = self.op.default_iallocator
3399

    
3400
    if self.op.reserved_lvs is not None:
3401
      self.cluster.reserved_lvs = self.op.reserved_lvs
3402

    
3403
    def helper_os(aname, mods, desc):
3404
      desc += " OS list"
3405
      lst = getattr(self.cluster, aname)
3406
      for key, val in mods:
3407
        if key == constants.DDM_ADD:
3408
          if val in lst:
3409
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3410
          else:
3411
            lst.append(val)
3412
        elif key == constants.DDM_REMOVE:
3413
          if val in lst:
3414
            lst.remove(val)
3415
          else:
3416
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3417
        else:
3418
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3419

    
3420
    if self.op.hidden_os:
3421
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3422

    
3423
    if self.op.blacklisted_os:
3424
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3425

    
3426
    if self.op.master_netdev:
3427
      master = self.cfg.GetMasterNode()
3428
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3429
                  self.cluster.master_netdev)
3430
      result = self.rpc.call_node_stop_master(master, False)
3431
      result.Raise("Could not disable the master ip")
3432
      feedback_fn("Changing master_netdev from %s to %s" %
3433
                  (self.cluster.master_netdev, self.op.master_netdev))
3434
      self.cluster.master_netdev = self.op.master_netdev
3435

    
3436
    self.cfg.Update(self.cluster, feedback_fn)
3437

    
3438
    if self.op.master_netdev:
3439
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3440
                  self.op.master_netdev)
3441
      result = self.rpc.call_node_start_master(master, False, False)
3442
      if result.fail_msg:
3443
        self.LogWarning("Could not re-enable the master ip on"
3444
                        " the master, please restart manually: %s",
3445
                        result.fail_msg)
3446

    
3447

    
3448
def _UploadHelper(lu, nodes, fname):
3449
  """Helper for uploading a file and showing warnings.
3450

3451
  """
3452
  if os.path.exists(fname):
3453
    result = lu.rpc.call_upload_file(nodes, fname)
3454
    for to_node, to_result in result.items():
3455
      msg = to_result.fail_msg
3456
      if msg:
3457
        msg = ("Copy of file %s to node %s failed: %s" %
3458
               (fname, to_node, msg))
3459
        lu.proc.LogWarning(msg)
3460

    
3461

    
3462
def _ComputeAncillaryFiles(cluster, redist):
3463
  """Compute files external to Ganeti which need to be consistent.
3464

3465
  @type redist: boolean
3466
  @param redist: Whether to include files which need to be redistributed
3467

3468
  """
3469
  # Compute files for all nodes
3470
  files_all = set([
3471
    constants.SSH_KNOWN_HOSTS_FILE,
3472
    constants.CONFD_HMAC_KEY,
3473
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3474
    ])
3475

    
3476
  if not redist:
3477
    files_all.update(constants.ALL_CERT_FILES)
3478
    files_all.update(ssconf.SimpleStore().GetFileList())
3479

    
3480
  if cluster.modify_etc_hosts:
3481
    files_all.add(constants.ETC_HOSTS)
3482

    
3483
  # Files which must either exist on all nodes or on none
3484
  files_all_opt = set([
3485
    constants.RAPI_USERS_FILE,
3486
    ])
3487

    
3488
  # Files which should only be on master candidates
3489
  files_mc = set()
3490
  if not redist:
3491
    files_mc.add(constants.CLUSTER_CONF_FILE)
3492

    
3493
  # Files which should only be on VM-capable nodes
3494
  files_vm = set(filename
3495
    for hv_name in cluster.enabled_hypervisors
3496
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3497

    
3498
  # Filenames must be unique
3499
  assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3500
          sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3501
         "Found file listed in more than one file list"
3502

    
3503
  return (files_all, files_all_opt, files_mc, files_vm)
3504

    
3505

    
3506
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3507
  """Distribute additional files which are part of the cluster configuration.
3508

3509
  ConfigWriter takes care of distributing the config and ssconf files, but
3510
  there are more files which should be distributed to all nodes. This function
3511
  makes sure those are copied.
3512

3513
  @param lu: calling logical unit
3514
  @param additional_nodes: list of nodes not in the config to distribute to
3515
  @type additional_vm: boolean
3516
  @param additional_vm: whether the additional nodes are vm-capable or not
3517

3518
  """
3519
  # Gather target nodes
3520
  cluster = lu.cfg.GetClusterInfo()
3521
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3522

    
3523
  online_nodes = lu.cfg.GetOnlineNodeList()
3524
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3525

    
3526
  if additional_nodes is not None:
3527
    online_nodes.extend(additional_nodes)
3528
    if additional_vm:
3529
      vm_nodes.extend(additional_nodes)
3530

    
3531
  # Never distribute to master node
3532
  for nodelist in [online_nodes, vm_nodes]:
3533
    if master_info.name in nodelist:
3534
      nodelist.remove(master_info.name)
3535

    
3536
  # Gather file lists
3537
  (files_all, files_all_opt, files_mc, files_vm) = \
3538
    _ComputeAncillaryFiles(cluster, True)
3539

    
3540
  # Never re-distribute configuration file from here
3541
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3542
              constants.CLUSTER_CONF_FILE in files_vm)
3543
  assert not files_mc, "Master candidates not handled in this function"
3544

    
3545
  filemap = [
3546
    (online_nodes, files_all),
3547
    (online_nodes, files_all_opt),
3548
    (vm_nodes, files_vm),
3549
    ]
3550

    
3551
  # Upload the files
3552
  for (node_list, files) in filemap:
3553
    for fname in files:
3554
      _UploadHelper(lu, node_list, fname)
3555

    
3556

    
3557
class LUClusterRedistConf(NoHooksLU):
3558
  """Force the redistribution of cluster configuration.
3559

3560
  This is a very simple LU.
3561

3562
  """
3563
  REQ_BGL = False
3564

    
3565
  def ExpandNames(self):
3566
    self.needed_locks = {
3567
      locking.LEVEL_NODE: locking.ALL_SET,
3568
    }
3569
    self.share_locks[locking.LEVEL_NODE] = 1
3570

    
3571
  def Exec(self, feedback_fn):
3572
    """Redistribute the configuration.
3573

3574
    """
3575
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3576
    _RedistributeAncillaryFiles(self)
3577

    
3578

    
3579
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3580
  """Sleep and poll for an instance's disk to sync.
3581

3582
  """
3583
  if not instance.disks or disks is not None and not disks:
3584
    return True
3585

    
3586
  disks = _ExpandCheckDisks(instance, disks)
3587

    
3588
  if not oneshot:
3589
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3590

    
3591
  node = instance.primary_node
3592

    
3593
  for dev in disks:
3594
    lu.cfg.SetDiskID(dev, node)
3595

    
3596
  # TODO: Convert to utils.Retry
3597

    
3598
  retries = 0
3599
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3600
  while True:
3601
    max_time = 0
3602
    done = True
3603
    cumul_degraded = False
3604
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3605
    msg = rstats.fail_msg
3606
    if msg:
3607
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3608
      retries += 1
3609
      if retries >= 10:
3610
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3611
                                 " aborting." % node)
3612
      time.sleep(6)
3613
      continue
3614
    rstats = rstats.payload
3615
    retries = 0
3616
    for i, mstat in enumerate(rstats):
3617
      if mstat is None:
3618
        lu.LogWarning("Can't compute data for node %s/%s",
3619
                           node, disks[i].iv_name)
3620
        continue
3621

    
3622
      cumul_degraded = (cumul_degraded or
3623
                        (mstat.is_degraded and mstat.sync_percent is None))
3624
      if mstat.sync_percent is not None:
3625
        done = False
3626
        if mstat.estimated_time is not None:
3627
          rem_time = ("%s remaining (estimated)" %
3628
                      utils.FormatSeconds(mstat.estimated_time))
3629
          max_time = mstat.estimated_time
3630
        else:
3631
          rem_time = "no time estimate"
3632
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3633
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3634

    
3635
    # if we're done but degraded, let's do a few small retries, to
3636
    # make sure we see a stable and not transient situation; therefore
3637
    # we force restart of the loop
3638
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3639
      logging.info("Degraded disks found, %d retries left", degr_retries)
3640
      degr_retries -= 1
3641
      time.sleep(1)
3642
      continue
3643

    
3644
    if done or oneshot:
3645
      break
3646

    
3647
    time.sleep(min(60, max_time))
3648

    
3649
  if done:
3650
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3651
  return not cumul_degraded
3652

    
3653

    
3654
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3655
  """Check that mirrors are not degraded.
3656

3657
  The ldisk parameter, if True, will change the test from the
3658
  is_degraded attribute (which represents overall non-ok status for
3659
  the device(s)) to the ldisk (representing the local storage status).
3660

3661
  """
3662
  lu.cfg.SetDiskID(dev, node)
3663

    
3664
  result = True
3665

    
3666
  if on_primary or dev.AssembleOnSecondary():
3667
    rstats = lu.rpc.call_blockdev_find(node, dev)
3668
    msg = rstats.fail_msg
3669
    if msg:
3670
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3671
      result = False
3672
    elif not rstats.payload:
3673
      lu.LogWarning("Can't find disk on node %s", node)
3674
      result = False
3675
    else:
3676
      if ldisk:
3677
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3678
      else:
3679
        result = result and not rstats.payload.is_degraded
3680

    
3681
  if dev.children:
3682
    for child in dev.children:
3683
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3684

    
3685
  return result
3686

    
3687

    
3688
class LUOobCommand(NoHooksLU):
3689
  """Logical unit for OOB handling.
3690

3691
  """
3692
  REG_BGL = False
3693
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3694

    
3695
  def ExpandNames(self):
3696
    """Gather locks we need.
3697

3698
    """
3699
    if self.op.node_names:
3700
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3701
      lock_names = self.op.node_names
3702
    else:
3703
      lock_names = locking.ALL_SET
3704

    
3705
    self.needed_locks = {
3706
      locking.LEVEL_NODE: lock_names,
3707
      }
3708

    
3709
  def CheckPrereq(self):
3710
    """Check prerequisites.
3711

3712
    This checks:
3713
     - the node exists in the configuration
3714
     - OOB is supported
3715

3716
    Any errors are signaled by raising errors.OpPrereqError.
3717

3718
    """
3719
    self.nodes = []
3720
    self.master_node = self.cfg.GetMasterNode()
3721

    
3722
    assert self.op.power_delay >= 0.0
3723

    
3724
    if self.op.node_names:
3725
      if (self.op.command in self._SKIP_MASTER and
3726
          self.master_node in self.op.node_names):
3727
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3728
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3729

    
3730
        if master_oob_handler:
3731
          additional_text = ("run '%s %s %s' if you want to operate on the"
3732
                             " master regardless") % (master_oob_handler,
3733
                                                      self.op.command,
3734
                                                      self.master_node)
3735
        else:
3736
          additional_text = "it does not support out-of-band operations"
3737

    
3738
        raise errors.OpPrereqError(("Operating on the master node %s is not"
3739
                                    " allowed for %s; %s") %
3740
                                   (self.master_node, self.op.command,
3741
                                    additional_text), errors.ECODE_INVAL)
3742
    else:
3743
      self.op.node_names = self.cfg.GetNodeList()
3744
      if self.op.command in self._SKIP_MASTER:
3745
        self.op.node_names.remove(self.master_node)
3746

    
3747
    if self.op.command in self._SKIP_MASTER:
3748
      assert self.master_node not in self.op.node_names
3749

    
3750
    for node_name in self.op.node_names:
3751
      node = self.cfg.GetNodeInfo(node_name)
3752

    
3753
      if node is None:
3754
        raise errors.OpPrereqError("Node %s not found" % node_name,
3755
                                   errors.ECODE_NOENT)
3756
      else:
3757
        self.nodes.append(node)
3758

    
3759
      if (not self.op.ignore_status and
3760
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3761
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
3762
                                    " not marked offline") % node_name,
3763
                                   errors.ECODE_STATE)
3764

    
3765
  def Exec(self, feedback_fn):
3766
    """Execute OOB and return result if we expect any.
3767

3768
    """
3769
    master_node = self.master_node
3770
    ret = []
3771

    
3772
    for idx, node in enumerate(utils.NiceSort(self.nodes,
3773
                                              key=lambda node: node.name)):
3774
      node_entry = [(constants.RS_NORMAL, node.name)]
3775
      ret.append(node_entry)
3776

    
3777
      oob_program = _SupportsOob(self.cfg, node)
3778

    
3779
      if not oob_program:
3780
        node_entry.append((constants.RS_UNAVAIL, None))
3781
        continue
3782

    
3783
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
3784
                   self.op.command, oob_program, node.name)
3785
      result = self.rpc.call_run_oob(master_node, oob_program,
3786
                                     self.op.command, node.name,
3787
                                     self.op.timeout)
3788

    
3789
      if result.fail_msg:
3790
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
3791
                        node.name, result.fail_msg)
3792
        node_entry.append((constants.RS_NODATA, None))
3793
      else:
3794
        try:
3795
          self._CheckPayload(result)
3796
        except errors.OpExecError, err:
3797
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
3798
                          node.name, err)
3799
          node_entry.append((constants.RS_NODATA, None))
3800
        else:
3801
          if self.op.command == constants.OOB_HEALTH:
3802
            # For health we should log important events
3803
            for item, status in result.payload:
3804
              if status in [constants.OOB_STATUS_WARNING,
3805
                            constants.OOB_STATUS_CRITICAL]:
3806
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
3807
                                item, node.name, status)
3808

    
3809
          if self.op.command == constants.OOB_POWER_ON:
3810
            node.powered = True
3811
          elif self.op.command == constants.OOB_POWER_OFF:
3812
            node.powered = False
3813
          elif self.op.command == constants.OOB_POWER_STATUS:
3814
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3815
            if powered != node.powered:
3816
              logging.warning(("Recorded power state (%s) of node '%s' does not"
3817
                               " match actual power state (%s)"), node.powered,
3818
                              node.name, powered)
3819

    
3820
          # For configuration changing commands we should update the node
3821
          if self.op.command in (constants.OOB_POWER_ON,
3822
                                 constants.OOB_POWER_OFF):
3823
            self.cfg.Update(node, feedback_fn)
3824

    
3825
          node_entry.append((constants.RS_NORMAL, result.payload))
3826

    
3827
          if (self.op.command == constants.OOB_POWER_ON and
3828
              idx < len(self.nodes) - 1):
3829
            time.sleep(self.op.power_delay)
3830

    
3831
    return ret
3832

    
3833
  def _CheckPayload(self, result):
3834
    """Checks if the payload is valid.
3835

3836
    @param result: RPC result
3837
    @raises errors.OpExecError: If payload is not valid
3838

3839
    """
3840
    errs = []
3841
    if self.op.command == constants.OOB_HEALTH:
3842
      if not isinstance(result.payload, list):
3843
        errs.append("command 'health' is expected to return a list but got %s" %
3844
                    type(result.payload))
3845
      else:
3846
        for item, status in result.payload:
3847
          if status not in constants.OOB_STATUSES:
3848
            errs.append("health item '%s' has invalid status '%s'" %
3849
                        (item, status))
3850

    
3851
    if self.op.command == constants.OOB_POWER_STATUS:
3852
      if not isinstance(result.payload, dict):
3853
        errs.append("power-status is expected to return a dict but got %s" %
3854
                    type(result.payload))
3855

    
3856
    if self.op.command in [
3857
        constants.OOB_POWER_ON,
3858
        constants.OOB_POWER_OFF,
3859
        constants.OOB_POWER_CYCLE,
3860
        ]:
3861
      if result.payload is not None:
3862
        errs.append("%s is expected to not return payload but got '%s'" %
3863
                    (self.op.command, result.payload))
3864

    
3865
    if errs:
3866
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3867
                               utils.CommaJoin(errs))
3868

    
3869
class _OsQuery(_QueryBase):
3870
  FIELDS = query.OS_FIELDS
3871

    
3872
  def ExpandNames(self, lu):
3873
    # Lock all nodes in shared mode
3874
    # Temporary removal of locks, should be reverted later
3875
    # TODO: reintroduce locks when they are lighter-weight
3876
    lu.needed_locks = {}
3877
    #self.share_locks[locking.LEVEL_NODE] = 1
3878
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3879

    
3880
    # The following variables interact with _QueryBase._GetNames
3881
    if self.names:
3882
      self.wanted = self.names
3883
    else:
3884
      self.wanted = locking.ALL_SET
3885

    
3886
    self.do_locking = self.use_locking
3887

    
3888
  def DeclareLocks(self, lu, level):
3889
    pass
3890

    
3891
  @staticmethod
3892
  def _DiagnoseByOS(rlist):
3893
    """Remaps a per-node return list into an a per-os per-node dictionary
3894

3895
    @param rlist: a map with node names as keys and OS objects as values
3896

3897
    @rtype: dict
3898
    @return: a dictionary with osnames as keys and as value another
3899
        map, with nodes as keys and tuples of (path, status, diagnose,
3900
        variants, parameters, api_versions) as values, eg::
3901

3902
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3903
                                     (/srv/..., False, "invalid api")],
3904
                           "node2": [(/srv/..., True, "", [], [])]}
3905
          }
3906

3907
    """
3908
    all_os = {}
3909
    # we build here the list of nodes that didn't fail the RPC (at RPC
3910
    # level), so that nodes with a non-responding node daemon don't
3911
    # make all OSes invalid
3912
    good_nodes = [node_name for node_name in rlist
3913
                  if not rlist[node_name].fail_msg]
3914
    for node_name, nr in rlist.items():
3915
      if nr.fail_msg or not nr.payload:
3916
        continue
3917
      for (name, path, status, diagnose, variants,
3918
           params, api_versions) in nr.payload:
3919
        if name not in all_os:
3920
          # build a list of nodes for this os containing empty lists
3921
          # for each node in node_list
3922
          all_os[name] = {}
3923
          for nname in good_nodes:
3924
            all_os[name][nname] = []
3925
        # convert params from [name, help] to (name, help)
3926
        params = [tuple(v) for v in params]
3927
        all_os[name][node_name].append((path, status, diagnose,
3928
                                        variants, params, api_versions))
3929
    return all_os
3930

    
3931
  def _GetQueryData(self, lu):
3932
    """Computes the list of nodes and their attributes.
3933

3934
    """
3935
    # Locking is not used
3936
    assert not (compat.any(lu.glm.is_owned(level)
3937
                           for level in locking.LEVELS
3938
                           if level != locking.LEVEL_CLUSTER) or
3939
                self.do_locking or self.use_locking)
3940

    
3941
    valid_nodes = [node.name
3942
                   for node in lu.cfg.GetAllNodesInfo().values()
3943
                   if not node.offline and node.vm_capable]
3944
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3945
    cluster = lu.cfg.GetClusterInfo()
3946

    
3947
    data = {}
3948

    
3949
    for (os_name, os_data) in pol.items():
3950
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3951
                          hidden=(os_name in cluster.hidden_os),
3952
                          blacklisted=(os_name in cluster.blacklisted_os))
3953

    
3954
      variants = set()
3955
      parameters = set()
3956
      api_versions = set()
3957

    
3958
      for idx, osl in enumerate(os_data.values()):
3959
        info.valid = bool(info.valid and osl and osl[0][1])
3960
        if not info.valid:
3961
          break
3962

    
3963
        (node_variants, node_params, node_api) = osl[0][3:6]
3964
        if idx == 0:
3965
          # First entry
3966
          variants.update(node_variants)
3967
          parameters.update(node_params)
3968
          api_versions.update(node_api)
3969
        else:
3970
          # Filter out inconsistent values
3971
          variants.intersection_update(node_variants)
3972
          parameters.intersection_update(node_params)
3973
          api_versions.intersection_update(node_api)
3974

    
3975
      info.variants = list(variants)
3976
      info.parameters = list(parameters)
3977
      info.api_versions = list(api_versions)
3978

    
3979
      data[os_name] = info
3980

    
3981
    # Prepare data in requested order
3982
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
3983
            if name in data]
3984

    
3985

    
3986
class LUOsDiagnose(NoHooksLU):
3987
  """Logical unit for OS diagnose/query.
3988

3989
  """
3990
  REQ_BGL = False
3991

    
3992
  @staticmethod
3993
  def _BuildFilter(fields, names):
3994
    """Builds a filter for querying OSes.
3995

3996
    """
3997
    name_filter = qlang.MakeSimpleFilter("name", names)
3998

    
3999
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4000
    # respective field is not requested
4001
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4002
                     for fname in ["hidden", "blacklisted"]
4003
                     if fname not in fields]
4004
    if "valid" not in fields:
4005
      status_filter.append([qlang.OP_TRUE, "valid"])
4006

    
4007
    if status_filter:
4008
      status_filter.insert(0, qlang.OP_AND)
4009
    else:
4010
      status_filter = None
4011

    
4012
    if name_filter and status_filter:
4013
      return [qlang.OP_AND, name_filter, status_filter]
4014
    elif name_filter:
4015
      return name_filter
4016
    else:
4017
      return status_filter
4018

    
4019
  def CheckArguments(self):
4020
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4021
                       self.op.output_fields, False)
4022

    
4023
  def ExpandNames(self):
4024
    self.oq.ExpandNames(self)
4025

    
4026
  def Exec(self, feedback_fn):
4027
    return self.oq.OldStyleQuery(self)
4028

    
4029

    
4030
class LUNodeRemove(LogicalUnit):
4031
  """Logical unit for removing a node.
4032

4033
  """
4034
  HPATH = "node-remove"
4035
  HTYPE = constants.HTYPE_NODE
4036

    
4037
  def BuildHooksEnv(self):
4038
    """Build hooks env.
4039

4040
    This doesn't run on the target node in the pre phase as a failed
4041
    node would then be impossible to remove.
4042

4043
    """
4044
    return {
4045
      "OP_TARGET": self.op.node_name,
4046
      "NODE_NAME": self.op.node_name,
4047
      }
4048

    
4049
  def BuildHooksNodes(self):
4050
    """Build hooks nodes.
4051

4052
    """
4053
    all_nodes = self.cfg.GetNodeList()
4054
    try:
4055
      all_nodes.remove(self.op.node_name)
4056
    except ValueError:
4057
      logging.warning("Node '%s', which is about to be removed, was not found"
4058
                      " in the list of all nodes", self.op.node_name)
4059
    return (all_nodes, all_nodes)
4060

    
4061
  def CheckPrereq(self):
4062
    """Check prerequisites.
4063

4064
    This checks:
4065
     - the node exists in the configuration
4066
     - it does not have primary or secondary instances
4067
     - it's not the master
4068

4069
    Any errors are signaled by raising errors.OpPrereqError.
4070

4071
    """
4072
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4073
    node = self.cfg.GetNodeInfo(self.op.node_name)
4074
    assert node is not None
4075

    
4076
    instance_list = self.cfg.GetInstanceList()
4077

    
4078
    masternode = self.cfg.GetMasterNode()
4079
    if node.name == masternode:
4080
      raise errors.OpPrereqError("Node is the master node, failover to another"
4081
                                 " node is required", errors.ECODE_INVAL)
4082

    
4083
    for instance_name in instance_list:
4084
      instance = self.cfg.GetInstanceInfo(instance_name)
4085
      if node.name in instance.all_nodes:
4086
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4087
                                   " please remove first" % instance_name,
4088
                                   errors.ECODE_INVAL)
4089
    self.op.node_name = node.name
4090
    self.node = node
4091

    
4092
  def Exec(self, feedback_fn):
4093
    """Removes the node from the cluster.
4094

4095
    """
4096
    node = self.node
4097
    logging.info("Stopping the node daemon and removing configs from node %s",
4098
                 node.name)
4099

    
4100
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4101

    
4102
    # Promote nodes to master candidate as needed
4103
    _AdjustCandidatePool(self, exceptions=[node.name])
4104
    self.context.RemoveNode(node.name)
4105

    
4106
    # Run post hooks on the node before it's removed
4107
    _RunPostHook(self, node.name)
4108

    
4109
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4110
    msg = result.fail_msg
4111
    if msg:
4112
      self.LogWarning("Errors encountered on the remote node while leaving"
4113
                      " the cluster: %s", msg)
4114

    
4115
    # Remove node from our /etc/hosts
4116
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4117
      master_node = self.cfg.GetMasterNode()
4118
      result = self.rpc.call_etc_hosts_modify(master_node,
4119
                                              constants.ETC_HOSTS_REMOVE,
4120
                                              node.name, None)
4121
      result.Raise("Can't update hosts file with new host data")
4122
      _RedistributeAncillaryFiles(self)
4123

    
4124

    
4125
class _NodeQuery(_QueryBase):
4126
  FIELDS = query.NODE_FIELDS
4127

    
4128
  def ExpandNames(self, lu):
4129
    lu.needed_locks = {}
4130
    lu.share_locks[locking.LEVEL_NODE] = 1
4131

    
4132
    if self.names:
4133
      self.wanted = _GetWantedNodes(lu, self.names)
4134
    else:
4135
      self.wanted = locking.ALL_SET
4136

    
4137
    self.do_locking = (self.use_locking and
4138
                       query.NQ_LIVE in self.requested_data)
4139

    
4140
    if self.do_locking:
4141
      # if we don't request only static fields, we need to lock the nodes
4142
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4143

    
4144
  def DeclareLocks(self, lu, level):
4145
    pass
4146

    
4147
  def _GetQueryData(self, lu):
4148
    """Computes the list of nodes and their attributes.
4149

4150
    """
4151
    all_info = lu.cfg.GetAllNodesInfo()
4152

    
4153
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4154

    
4155
    # Gather data as requested
4156
    if query.NQ_LIVE in self.requested_data:
4157
      # filter out non-vm_capable nodes
4158
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4159

    
4160
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4161
                                        lu.cfg.GetHypervisorType())
4162
      live_data = dict((name, nresult.payload)
4163
                       for (name, nresult) in node_data.items()
4164
                       if not nresult.fail_msg and nresult.payload)
4165
    else:
4166
      live_data = None
4167

    
4168
    if query.NQ_INST in self.requested_data:
4169
      node_to_primary = dict([(name, set()) for name in nodenames])
4170
      node_to_secondary = dict([(name, set()) for name in nodenames])
4171

    
4172
      inst_data = lu.cfg.GetAllInstancesInfo()
4173

    
4174
      for inst in inst_data.values():
4175
        if inst.primary_node in node_to_primary:
4176
          node_to_primary[inst.primary_node].add(inst.name)
4177
        for secnode in inst.secondary_nodes:
4178
          if secnode in node_to_secondary:
4179
            node_to_secondary[secnode].add(inst.name)
4180
    else:
4181
      node_to_primary = None
4182
      node_to_secondary = None
4183

    
4184
    if query.NQ_OOB in self.requested_data:
4185
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4186
                         for name, node in all_info.iteritems())
4187
    else:
4188
      oob_support = None
4189

    
4190
    if query.NQ_GROUP in self.requested_data:
4191
      groups = lu.cfg.GetAllNodeGroupsInfo()
4192
    else:
4193
      groups = {}
4194

    
4195
    return query.NodeQueryData([all_info[name] for name in nodenames],
4196
                               live_data, lu.cfg.GetMasterNode(),
4197
                               node_to_primary, node_to_secondary, groups,
4198
                               oob_support, lu.cfg.GetClusterInfo())
4199

    
4200

    
4201
class LUNodeQuery(NoHooksLU):
4202
  """Logical unit for querying nodes.
4203

4204
  """
4205
  # pylint: disable-msg=W0142
4206
  REQ_BGL = False
4207

    
4208
  def CheckArguments(self):
4209
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4210
                         self.op.output_fields, self.op.use_locking)
4211

    
4212
  def ExpandNames(self):
4213
    self.nq.ExpandNames(self)
4214

    
4215
  def Exec(self, feedback_fn):
4216
    return self.nq.OldStyleQuery(self)
4217

    
4218

    
4219
class LUNodeQueryvols(NoHooksLU):
4220
  """Logical unit for getting volumes on node(s).
4221

4222
  """
4223
  REQ_BGL = False
4224
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4225
  _FIELDS_STATIC = utils.FieldSet("node")
4226

    
4227
  def CheckArguments(self):
4228
    _CheckOutputFields(static=self._FIELDS_STATIC,
4229
                       dynamic=self._FIELDS_DYNAMIC,
4230
                       selected=self.op.output_fields)
4231

    
4232
  def ExpandNames(self):
4233
    self.needed_locks = {}
4234
    self.share_locks[locking.LEVEL_NODE] = 1
4235
    if not self.op.nodes:
4236
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4237
    else:
4238
      self.needed_locks[locking.LEVEL_NODE] = \
4239
        _GetWantedNodes(self, self.op.nodes)
4240

    
4241
  def Exec(self, feedback_fn):
4242
    """Computes the list of nodes and their attributes.
4243

4244
    """
4245
    nodenames = self.glm.list_owned(locking.LEVEL_NODE)
4246
    volumes = self.rpc.call_node_volumes(nodenames)
4247

    
4248
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
4249
             in self.cfg.GetInstanceList()]
4250

    
4251
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
4252

    
4253
    output = []
4254
    for node in nodenames:
4255
      nresult = volumes[node]
4256
      if nresult.offline:
4257
        continue
4258
      msg = nresult.fail_msg
4259
      if msg:
4260
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4261
        continue
4262

    
4263
      node_vols = nresult.payload[:]
4264
      node_vols.sort(key=lambda vol: vol['dev'])
4265

    
4266
      for vol in node_vols:
4267
        node_output = []
4268
        for field in self.op.output_fields:
4269
          if field == "node":
4270
            val = node
4271
          elif field == "phys":
4272
            val = vol['dev']
4273
          elif field == "vg":
4274
            val = vol['vg']
4275
          elif field == "name":
4276
            val = vol['name']
4277
          elif field == "size":
4278
            val = int(float(vol['size']))
4279
          elif field == "instance":
4280
            for inst in ilist:
4281
              if node not in lv_by_node[inst]:
4282
                continue
4283
              if vol['name'] in lv_by_node[inst][node]:
4284
                val = inst.name
4285
                break
4286
            else:
4287
              val = '-'
4288
          else:
4289
            raise errors.ParameterError(field)
4290
          node_output.append(str(val))
4291

    
4292
        output.append(node_output)
4293

    
4294
    return output
4295

    
4296

    
4297
class LUNodeQueryStorage(NoHooksLU):
4298
  """Logical unit for getting information on storage units on node(s).
4299

4300
  """
4301
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4302
  REQ_BGL = False
4303

    
4304
  def CheckArguments(self):
4305
    _CheckOutputFields(static=self._FIELDS_STATIC,
4306
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4307
                       selected=self.op.output_fields)
4308

    
4309
  def ExpandNames(self):
4310
    self.needed_locks = {}
4311
    self.share_locks[locking.LEVEL_NODE] = 1
4312

    
4313
    if self.op.nodes:
4314
      self.needed_locks[locking.LEVEL_NODE] = \
4315
        _GetWantedNodes(self, self.op.nodes)
4316
    else:
4317
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4318

    
4319
  def Exec(self, feedback_fn):
4320
    """Computes the list of nodes and their attributes.
4321

4322
    """
4323
    self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
4324

    
4325
    # Always get name to sort by
4326
    if constants.SF_NAME in self.op.output_fields:
4327
      fields = self.op.output_fields[:]
4328
    else:
4329
      fields = [constants.SF_NAME] + self.op.output_fields
4330

    
4331
    # Never ask for node or type as it's only known to the LU
4332
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4333
      while extra in fields:
4334
        fields.remove(extra)
4335

    
4336
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4337
    name_idx = field_idx[constants.SF_NAME]
4338

    
4339
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4340
    data = self.rpc.call_storage_list(self.nodes,
4341
                                      self.op.storage_type, st_args,
4342
                                      self.op.name, fields)
4343

    
4344
    result = []
4345

    
4346
    for node in utils.NiceSort(self.nodes):
4347
      nresult = data[node]
4348
      if nresult.offline:
4349
        continue
4350

    
4351
      msg = nresult.fail_msg
4352
      if msg:
4353
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4354
        continue
4355

    
4356
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4357

    
4358
      for name in utils.NiceSort(rows.keys()):
4359
        row = rows[name]
4360

    
4361
        out = []
4362

    
4363
        for field in self.op.output_fields:
4364
          if field == constants.SF_NODE:
4365
            val = node
4366
          elif field == constants.SF_TYPE:
4367
            val = self.op.storage_type
4368
          elif field in field_idx:
4369
            val = row[field_idx[field]]
4370
          else:
4371
            raise errors.ParameterError(field)
4372

    
4373
          out.append(val)
4374

    
4375
        result.append(out)
4376

    
4377
    return result
4378

    
4379

    
4380
class _InstanceQuery(_QueryBase):
4381
  FIELDS = query.INSTANCE_FIELDS
4382

    
4383
  def ExpandNames(self, lu):
4384
    lu.needed_locks = {}
4385
    lu.share_locks[locking.LEVEL_INSTANCE] = 1
4386
    lu.share_locks[locking.LEVEL_NODE] = 1
4387

    
4388
    if self.names:
4389
      self.wanted = _GetWantedInstances(lu, self.names)
4390
    else:
4391
      self.wanted = locking.ALL_SET
4392

    
4393
    self.do_locking = (self.use_locking and
4394
                       query.IQ_LIVE in self.requested_data)
4395
    if self.do_locking:
4396
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4397
      lu.needed_locks[locking.LEVEL_NODE] = []
4398
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4399

    
4400
  def DeclareLocks(self, lu, level):
4401
    if level == locking.LEVEL_NODE and self.do_locking:
4402
      lu._LockInstancesNodes() # pylint: disable-msg=W0212
4403

    
4404
  def _GetQueryData(self, lu):
4405
    """Computes the list of instances and their attributes.
4406

4407
    """
4408
    cluster = lu.cfg.GetClusterInfo()
4409
    all_info = lu.cfg.GetAllInstancesInfo()
4410

    
4411
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4412

    
4413
    instance_list = [all_info[name] for name in instance_names]
4414
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4415
                                        for inst in instance_list)))
4416
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4417
    bad_nodes = []
4418
    offline_nodes = []
4419
    wrongnode_inst = set()
4420

    
4421
    # Gather data as requested
4422
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4423
      live_data = {}
4424
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4425
      for name in nodes:
4426
        result = node_data[name]
4427
        if result.offline:
4428
          # offline nodes will be in both lists
4429
          assert result.fail_msg
4430
          offline_nodes.append(name)
4431
        if result.fail_msg:
4432
          bad_nodes.append(name)
4433
        elif result.payload:
4434
          for inst in result.payload:
4435
            if inst in all_info:
4436
              if all_info[inst].primary_node == name:
4437
                live_data.update(result.payload)
4438
              else:
4439
                wrongnode_inst.add(inst)
4440
            else:
4441
              # orphan instance; we don't list it here as we don't
4442
              # handle this case yet in the output of instance listing
4443
              logging.warning("Orphan instance '%s' found on node %s",
4444
                              inst, name)
4445
        # else no instance is alive
4446
    else:
4447
      live_data = {}
4448

    
4449
    if query.IQ_DISKUSAGE in self.requested_data:
4450
      disk_usage = dict((inst.name,
4451
                         _ComputeDiskSize(inst.disk_template,
4452
                                          [{constants.IDISK_SIZE: disk.size}
4453
                                           for disk in inst.disks]))
4454
                        for inst in instance_list)
4455
    else:
4456
      disk_usage = None
4457

    
4458
    if query.IQ_CONSOLE in self.requested_data:
4459
      consinfo = {}
4460
      for inst in instance_list:
4461
        if inst.name in live_data:
4462
          # Instance is running
4463
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4464
        else:
4465
          consinfo[inst.name] = None
4466
      assert set(consinfo.keys()) == set(instance_names)
4467
    else:
4468
      consinfo = None
4469

    
4470
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4471
                                   disk_usage, offline_nodes, bad_nodes,
4472
                                   live_data, wrongnode_inst, consinfo)
4473

    
4474

    
4475
class LUQuery(NoHooksLU):
4476
  """Query for resources/items of a certain kind.
4477

4478
  """
4479
  # pylint: disable-msg=W0142
4480
  REQ_BGL = False
4481

    
4482
  def CheckArguments(self):
4483
    qcls = _GetQueryImplementation(self.op.what)
4484

    
4485
    self.impl = qcls(self.op.filter, self.op.fields, False)
4486

    
4487
  def ExpandNames(self):
4488
    self.impl.ExpandNames(self)
4489

    
4490
  def DeclareLocks(self, level):
4491
    self.impl.DeclareLocks(self, level)
4492

    
4493
  def Exec(self, feedback_fn):
4494
    return self.impl.NewStyleQuery(self)
4495

    
4496

    
4497
class LUQueryFields(NoHooksLU):
4498
  """Query for resources/items of a certain kind.
4499

4500
  """
4501
  # pylint: disable-msg=W0142
4502
  REQ_BGL = False
4503

    
4504
  def CheckArguments(self):
4505
    self.qcls = _GetQueryImplementation(self.op.what)
4506

    
4507
  def ExpandNames(self):
4508
    self.needed_locks = {}
4509

    
4510
  def Exec(self, feedback_fn):
4511
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4512

    
4513

    
4514
class LUNodeModifyStorage(NoHooksLU):
4515
  """Logical unit for modifying a storage volume on a node.
4516

4517
  """
4518
  REQ_BGL = False
4519

    
4520
  def CheckArguments(self):
4521
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4522

    
4523
    storage_type = self.op.storage_type
4524

    
4525
    try:
4526
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4527
    except KeyError:
4528
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4529
                                 " modified" % storage_type,
4530
                                 errors.ECODE_INVAL)
4531

    
4532
    diff = set(self.op.changes.keys()) - modifiable
4533
    if diff:
4534
      raise errors.OpPrereqError("The following fields can not be modified for"
4535
                                 " storage units of type '%s': %r" %
4536
                                 (storage_type, list(diff)),
4537
                                 errors.ECODE_INVAL)
4538

    
4539
  def ExpandNames(self):
4540
    self.needed_locks = {
4541
      locking.LEVEL_NODE: self.op.node_name,
4542
      }
4543

    
4544
  def Exec(self, feedback_fn):
4545
    """Computes the list of nodes and their attributes.
4546

4547
    """
4548
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4549
    result = self.rpc.call_storage_modify(self.op.node_name,
4550
                                          self.op.storage_type, st_args,
4551
                                          self.op.name, self.op.changes)
4552
    result.Raise("Failed to modify storage unit '%s' on %s" %
4553
                 (self.op.name, self.op.node_name))
4554

    
4555

    
4556
class LUNodeAdd(LogicalUnit):
4557
  """Logical unit for adding node to the cluster.
4558

4559
  """
4560
  HPATH = "node-add"
4561
  HTYPE = constants.HTYPE_NODE
4562
  _NFLAGS = ["master_capable", "vm_capable"]
4563

    
4564
  def CheckArguments(self):
4565
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4566
    # validate/normalize the node name
4567
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4568
                                         family=self.primary_ip_family)
4569
    self.op.node_name = self.hostname.name
4570

    
4571
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4572
      raise errors.OpPrereqError("Cannot readd the master node",
4573
                                 errors.ECODE_STATE)
4574

    
4575
    if self.op.readd and self.op.group:
4576
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4577
                                 " being readded", errors.ECODE_INVAL)
4578

    
4579
  def BuildHooksEnv(self):
4580
    """Build hooks env.
4581

4582
    This will run on all nodes before, and on all nodes + the new node after.
4583

4584
    """
4585
    return {
4586
      "OP_TARGET": self.op.node_name,
4587
      "NODE_NAME": self.op.node_name,
4588
      "NODE_PIP": self.op.primary_ip,
4589
      "NODE_SIP": self.op.secondary_ip,
4590
      "MASTER_CAPABLE": str(self.op.master_capable),
4591
      "VM_CAPABLE": str(self.op.vm_capable),
4592
      }
4593

    
4594
  def BuildHooksNodes(self):
4595
    """Build hooks nodes.
4596

4597
    """
4598
    # Exclude added node
4599
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4600
    post_nodes = pre_nodes + [self.op.node_name, ]
4601

    
4602
    return (pre_nodes, post_nodes)
4603

    
4604
  def CheckPrereq(self):
4605
    """Check prerequisites.
4606

4607
    This checks:
4608
     - the new node is not already in the config
4609
     - it is resolvable
4610
     - its parameters (single/dual homed) matches the cluster
4611

4612
    Any errors are signaled by raising errors.OpPrereqError.
4613

4614
    """
4615
    cfg = self.cfg
4616
    hostname = self.hostname
4617
    node = hostname.name
4618
    primary_ip = self.op.primary_ip = hostname.ip
4619
    if self.op.secondary_ip is None:
4620
      if self.primary_ip_family == netutils.IP6Address.family:
4621
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4622
                                   " IPv4 address must be given as secondary",
4623
                                   errors.ECODE_INVAL)
4624
      self.op.secondary_ip = primary_ip
4625

    
4626
    secondary_ip = self.op.secondary_ip
4627
    if not netutils.IP4Address.IsValid(secondary_ip):
4628
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4629
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4630

    
4631
    node_list = cfg.GetNodeList()
4632
    if not self.op.readd and node in node_list:
4633
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4634
                                 node, errors.ECODE_EXISTS)
4635
    elif self.op.readd and node not in node_list:
4636
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4637
                                 errors.ECODE_NOENT)
4638

    
4639
    self.changed_primary_ip = False
4640

    
4641
    for existing_node_name in node_list:
4642
      existing_node = cfg.GetNodeInfo(existing_node_name)
4643

    
4644
      if self.op.readd and node == existing_node_name:
4645
        if existing_node.secondary_ip != secondary_ip:
4646
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4647
                                     " address configuration as before",
4648
                                     errors.ECODE_INVAL)
4649
        if existing_node.primary_ip != primary_ip:
4650
          self.changed_primary_ip = True
4651

    
4652
        continue
4653

    
4654
      if (existing_node.primary_ip == primary_ip or
4655
          existing_node.secondary_ip == primary_ip or
4656
          existing_node.primary_ip == secondary_ip or
4657
          existing_node.secondary_ip == secondary_ip):
4658
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4659
                                   " existing node %s" % existing_node.name,
4660
                                   errors.ECODE_NOTUNIQUE)
4661

    
4662
    # After this 'if' block, None is no longer a valid value for the
4663
    # _capable op attributes
4664
    if self.op.readd:
4665
      old_node = self.cfg.GetNodeInfo(node)
4666
      assert old_node is not None, "Can't retrieve locked node %s" % node
4667
      for attr in self._NFLAGS:
4668
        if getattr(self.op, attr) is None:
4669
          setattr(self.op, attr, getattr(old_node, attr))
4670
    else:
4671
      for attr in self._NFLAGS:
4672
        if getattr(self.op, attr) is None:
4673
          setattr(self.op, attr, True)
4674

    
4675
    if self.op.readd and not self.op.vm_capable:
4676
      pri, sec = cfg.GetNodeInstances(node)
4677
      if pri or sec:
4678
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4679
                                   " flag set to false, but it already holds"
4680
                                   " instances" % node,
4681
                                   errors.ECODE_STATE)
4682

    
4683
    # check that the type of the node (single versus dual homed) is the
4684
    # same as for the master
4685
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4686
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4687
    newbie_singlehomed = secondary_ip == primary_ip
4688
    if master_singlehomed != newbie_singlehomed:
4689
      if master_singlehomed:
4690
        raise errors.OpPrereqError("The master has no secondary ip but the"
4691
                                   " new node has one",
4692
                                   errors.ECODE_INVAL)
4693
      else:
4694
        raise errors.OpPrereqError("The master has a secondary ip but the"
4695
                                   " new node doesn't have one",
4696
                                   errors.ECODE_INVAL)
4697

    
4698
    # checks reachability
4699
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4700
      raise errors.OpPrereqError("Node not reachable by ping",
4701
                                 errors.ECODE_ENVIRON)
4702

    
4703
    if not newbie_singlehomed:
4704
      # check reachability from my secondary ip to newbie's secondary ip
4705
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4706
                           source=myself.secondary_ip):
4707
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4708
                                   " based ping to node daemon port",
4709
                                   errors.ECODE_ENVIRON)
4710

    
4711
    if self.op.readd:
4712
      exceptions = [node]
4713
    else:
4714
      exceptions = []
4715

    
4716
    if self.op.master_capable:
4717
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4718
    else:
4719
      self.master_candidate = False
4720

    
4721
    if self.op.readd:
4722
      self.new_node = old_node
4723
    else:
4724
      node_group = cfg.LookupNodeGroup(self.op.group)
4725
      self.new_node = objects.Node(name=node,
4726
                                   primary_ip=primary_ip,
4727
                                   secondary_ip=secondary_ip,
4728
                                   master_candidate=self.master_candidate,
4729
                                   offline=False, drained=False,
4730
                                   group=node_group)
4731

    
4732
    if self.op.ndparams:
4733
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4734

    
4735
  def Exec(self, feedback_fn):
4736
    """Adds the new node to the cluster.
4737

4738
    """
4739
    new_node = self.new_node
4740
    node = new_node.name
4741

    
4742
    # We adding a new node so we assume it's powered
4743
    new_node.powered = True
4744

    
4745
    # for re-adds, reset the offline/drained/master-candidate flags;
4746
    # we need to reset here, otherwise offline would prevent RPC calls
4747
    # later in the procedure; this also means that if the re-add
4748
    # fails, we are left with a non-offlined, broken node
4749
    if self.op.readd:
4750
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4751
      self.LogInfo("Readding a node, the offline/drained flags were reset")
4752
      # if we demote the node, we do cleanup later in the procedure
4753
      new_node.master_candidate = self.master_candidate
4754
      if self.changed_primary_ip:
4755
        new_node.primary_ip = self.op.primary_ip
4756

    
4757
    # copy the master/vm_capable flags
4758
    for attr in self._NFLAGS:
4759
      setattr(new_node, attr, getattr(self.op, attr))
4760

    
4761
    # notify the user about any possible mc promotion
4762
    if new_node.master_candidate:
4763
      self.LogInfo("Node will be a master candidate")
4764

    
4765
    if self.op.ndparams:
4766
      new_node.ndparams = self.op.ndparams
4767
    else:
4768
      new_node.ndparams = {}
4769

    
4770
    # check connectivity
4771
    result = self.rpc.call_version([node])[node]
4772
    result.Raise("Can't get version information from node %s" % node)
4773
    if constants.PROTOCOL_VERSION == result.payload:
4774
      logging.info("Communication to node %s fine, sw version %s match",
4775
                   node, result.payload)
4776
    else:
4777
      raise errors.OpExecError("Version mismatch master version %s,"
4778
                               " node version %s" %
4779
                               (constants.PROTOCOL_VERSION, result.payload))
4780

    
4781
    # Add node to our /etc/hosts, and add key to known_hosts
4782
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4783
      master_node = self.cfg.GetMasterNode()
4784
      result = self.rpc.call_etc_hosts_modify(master_node,
4785
                                              constants.ETC_HOSTS_ADD,
4786
                                              self.hostname.name,
4787
                                              self.hostname.ip)
4788
      result.Raise("Can't update hosts file with new host data")
4789

    
4790
    if new_node.secondary_ip != new_node.primary_ip:
4791
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4792
                               False)
4793

    
4794
    node_verify_list = [self.cfg.GetMasterNode()]
4795
    node_verify_param = {
4796
      constants.NV_NODELIST: [node],
4797
      # TODO: do a node-net-test as well?
4798
    }
4799

    
4800
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4801
                                       self.cfg.GetClusterName())
4802
    for verifier in node_verify_list:
4803
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
4804
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
4805
      if nl_payload:
4806
        for failed in nl_payload:
4807
          feedback_fn("ssh/hostname verification failed"
4808
                      " (checking from %s): %s" %
4809
                      (verifier, nl_payload[failed]))
4810
        raise errors.OpExecError("ssh/hostname verification failed")
4811

    
4812
    if self.op.readd:
4813
      _RedistributeAncillaryFiles(self)
4814
      self.context.ReaddNode(new_node)
4815
      # make sure we redistribute the config
4816
      self.cfg.Update(new_node, feedback_fn)
4817
      # and make sure the new node will not have old files around
4818
      if not new_node.master_candidate:
4819
        result = self.rpc.call_node_demote_from_mc(new_node.name)
4820
        msg = result.fail_msg
4821
        if msg:
4822
          self.LogWarning("Node failed to demote itself from master"
4823
                          " candidate status: %s" % msg)
4824
    else:
4825
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
4826
                                  additional_vm=self.op.vm_capable)
4827
      self.context.AddNode(new_node, self.proc.GetECId())
4828

    
4829

    
4830
class LUNodeSetParams(LogicalUnit):
4831
  """Modifies the parameters of a node.
4832

4833
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4834
      to the node role (as _ROLE_*)
4835
  @cvar _R2F: a dictionary from node role to tuples of flags
4836
  @cvar _FLAGS: a list of attribute names corresponding to the flags
4837

4838
  """
4839
  HPATH = "node-modify"
4840
  HTYPE = constants.HTYPE_NODE
4841
  REQ_BGL = False
4842
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4843
  _F2R = {
4844
    (True, False, False): _ROLE_CANDIDATE,
4845
    (False, True, False): _ROLE_DRAINED,
4846
    (False, False, True): _ROLE_OFFLINE,
4847
    (False, False, False): _ROLE_REGULAR,
4848
    }
4849
  _R2F = dict((v, k) for k, v in _F2R.items())
4850
  _FLAGS = ["master_candidate", "drained", "offline"]
4851

    
4852
  def CheckArguments(self):
4853
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4854
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4855
                self.op.master_capable, self.op.vm_capable,
4856
                self.op.secondary_ip, self.op.ndparams]
4857
    if all_mods.count(None) == len(all_mods):
4858
      raise errors.OpPrereqError("Please pass at least one modification",
4859
                                 errors.ECODE_INVAL)
4860
    if all_mods.count(True) > 1:
4861
      raise errors.OpPrereqError("Can't set the node into more than one"
4862
                                 " state at the same time",
4863
                                 errors.ECODE_INVAL)
4864

    
4865
    # Boolean value that tells us whether we might be demoting from MC
4866
    self.might_demote = (self.op.master_candidate == False or
4867
                         self.op.offline == True or
4868
                         self.op.drained == True or
4869
                         self.op.master_capable == False)
4870

    
4871
    if self.op.secondary_ip:
4872
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4873
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4874
                                   " address" % self.op.secondary_ip,
4875
                                   errors.ECODE_INVAL)
4876

    
4877
    self.lock_all = self.op.auto_promote and self.might_demote
4878
    self.lock_instances = self.op.secondary_ip is not None
4879

    
4880
  def ExpandNames(self):
4881
    if self.lock_all:
4882
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4883
    else:
4884
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4885

    
4886
    if self.lock_instances:
4887
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4888

    
4889
  def DeclareLocks(self, level):
4890
    # If we have locked all instances, before waiting to lock nodes, release
4891
    # all the ones living on nodes unrelated to the current operation.
4892
    if level == locking.LEVEL_NODE and self.lock_instances:
4893
      self.affected_instances = []
4894
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4895
        instances_keep = []
4896

    
4897
        # Build list of instances to release
4898
        for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
4899
          instance = self.context.cfg.GetInstanceInfo(instance_name)
4900
          if (instance.disk_template in constants.DTS_INT_MIRROR and
4901
              self.op.node_name in instance.all_nodes):
4902
            instances_keep.append(instance_name)
4903
            self.affected_instances.append(instance)
4904

    
4905
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
4906

    
4907
        assert (set(self.glm.list_owned(locking.LEVEL_INSTANCE)) ==
4908
                set(instances_keep))
4909

    
4910
  def BuildHooksEnv(self):
4911
    """Build hooks env.
4912

4913
    This runs on the master node.
4914

4915
    """
4916
    return {
4917
      "OP_TARGET": self.op.node_name,
4918
      "MASTER_CANDIDATE": str(self.op.master_candidate),
4919
      "OFFLINE": str(self.op.offline),
4920
      "DRAINED": str(self.op.drained),
4921
      "MASTER_CAPABLE": str(self.op.master_capable),
4922
      "VM_CAPABLE": str(self.op.vm_capable),
4923
      }
4924

    
4925
  def BuildHooksNodes(self):
4926
    """Build hooks nodes.
4927

4928
    """
4929
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
4930
    return (nl, nl)
4931

    
4932
  def CheckPrereq(self):
4933
    """Check prerequisites.
4934

4935
    This only checks the instance list against the existing names.
4936

4937
    """
4938
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4939

    
4940
    if (self.op.master_candidate is not None or
4941
        self.op.drained is not None or
4942
        self.op.offline is not None):
4943
      # we can't change the master's node flags
4944
      if self.op.node_name == self.cfg.GetMasterNode():
4945
        raise errors.OpPrereqError("The master role can be changed"
4946
                                   " only via master-failover",
4947
                                   errors.ECODE_INVAL)
4948

    
4949
    if self.op.master_candidate and not node.master_capable:
4950
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4951
                                 " it a master candidate" % node.name,
4952
                                 errors.ECODE_STATE)
4953

    
4954
    if self.op.vm_capable == False:
4955
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4956
      if ipri or isec:
4957
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4958
                                   " the vm_capable flag" % node.name,
4959
                                   errors.ECODE_STATE)
4960

    
4961
    if node.master_candidate and self.might_demote and not self.lock_all:
4962
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
4963
      # check if after removing the current node, we're missing master
4964
      # candidates
4965
      (mc_remaining, mc_should, _) = \
4966
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4967
      if mc_remaining < mc_should:
4968
        raise errors.OpPrereqError("Not enough master candidates, please"
4969
                                   " pass auto promote option to allow"
4970
                                   " promotion", errors.ECODE_STATE)
4971

    
4972
    self.old_flags = old_flags = (node.master_candidate,
4973
                                  node.drained, node.offline)
4974
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4975
    self.old_role = old_role = self._F2R[old_flags]
4976

    
4977
    # Check for ineffective changes
4978
    for attr in self._FLAGS:
4979
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4980
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4981
        setattr(self.op, attr, None)
4982

    
4983
    # Past this point, any flag change to False means a transition
4984
    # away from the respective state, as only real changes are kept
4985

    
4986
    # TODO: We might query the real power state if it supports OOB
4987
    if _SupportsOob(self.cfg, node):
4988
      if self.op.offline is False and not (node.powered or
4989
                                           self.op.powered == True):
4990
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
4991
                                    " offline status can be reset") %
4992
                                   self.op.node_name)
4993
    elif self.op.powered is not None:
4994
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
4995
                                  " as it does not support out-of-band"
4996
                                  " handling") % self.op.node_name)
4997

    
4998
    # If we're being deofflined/drained, we'll MC ourself if needed
4999
    if (self.op.drained == False or self.op.offline == False or
5000
        (self.op.master_capable and not node.master_capable)):
5001
      if _DecideSelfPromotion(self):
5002
        self.op.master_candidate = True
5003
        self.LogInfo("Auto-promoting node to master candidate")
5004

    
5005
    # If we're no longer master capable, we'll demote ourselves from MC
5006
    if self.op.master_capable == False and node.master_candidate:
5007
      self.LogInfo("Demoting from master candidate")
5008
      self.op.master_candidate = False
5009

    
5010
    # Compute new role
5011
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5012
    if self.op.master_candidate:
5013
      new_role = self._ROLE_CANDIDATE
5014
    elif self.op.drained:
5015
      new_role = self._ROLE_DRAINED
5016
    elif self.op.offline:
5017
      new_role = self._ROLE_OFFLINE
5018
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5019
      # False is still in new flags, which means we're un-setting (the
5020
      # only) True flag
5021
      new_role = self._ROLE_REGULAR
5022
    else: # no new flags, nothing, keep old role
5023
      new_role = old_role
5024

    
5025
    self.new_role = new_role
5026

    
5027
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5028
      # Trying to transition out of offline status
5029
      result = self.rpc.call_version([node.name])[node.name]
5030
      if result.fail_msg:
5031
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5032
                                   " to report its version: %s" %
5033
                                   (node.name, result.fail_msg),
5034
                                   errors.ECODE_STATE)
5035
      else:
5036
        self.LogWarning("Transitioning node from offline to online state"
5037
                        " without using re-add. Please make sure the node"
5038
                        " is healthy!")
5039

    
5040
    if self.op.secondary_ip:
5041
      # Ok even without locking, because this can't be changed by any LU
5042
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5043
      master_singlehomed = master.secondary_ip == master.primary_ip
5044
      if master_singlehomed and self.op.secondary_ip:
5045
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5046
                                   " homed cluster", errors.ECODE_INVAL)
5047

    
5048
      if node.offline:
5049
        if self.affected_instances:
5050
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
5051
                                     " node has instances (%s) configured"
5052
                                     " to use it" % self.affected_instances)
5053
      else:
5054
        # On online nodes, check that no instances are running, and that
5055
        # the node has the new ip and we can reach it.
5056
        for instance in self.affected_instances:
5057
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5058

    
5059
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5060
        if master.name != node.name:
5061
          # check reachability from master secondary ip to new secondary ip
5062
          if not netutils.TcpPing(self.op.secondary_ip,
5063
                                  constants.DEFAULT_NODED_PORT,
5064
                                  source=master.secondary_ip):
5065
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5066
                                       " based ping to node daemon port",
5067
                                       errors.ECODE_ENVIRON)
5068

    
5069
    if self.op.ndparams:
5070
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5071
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5072
      self.new_ndparams = new_ndparams
5073

    
5074
  def Exec(self, feedback_fn):
5075
    """Modifies a node.
5076

5077
    """
5078
    node = self.node
5079
    old_role = self.old_role
5080
    new_role = self.new_role
5081

    
5082
    result = []
5083

    
5084
    if self.op.ndparams:
5085
      node.ndparams = self.new_ndparams
5086

    
5087
    if self.op.powered is not None:
5088
      node.powered = self.op.powered
5089

    
5090
    for attr in ["master_capable", "vm_capable"]:
5091
      val = getattr(self.op, attr)
5092
      if val is not None:
5093
        setattr(node, attr, val)
5094
        result.append((attr, str(val)))
5095

    
5096
    if new_role != old_role:
5097
      # Tell the node to demote itself, if no longer MC and not offline
5098
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5099
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5100
        if msg:
5101
          self.LogWarning("Node failed to demote itself: %s", msg)
5102

    
5103
      new_flags = self._R2F[new_role]
5104
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5105
        if of != nf:
5106
          result.append((desc, str(nf)))
5107
      (node.master_candidate, node.drained, node.offline) = new_flags
5108

    
5109
      # we locked all nodes, we adjust the CP before updating this node
5110
      if self.lock_all:
5111
        _AdjustCandidatePool(self, [node.name])
5112

    
5113
    if self.op.secondary_ip:
5114
      node.secondary_ip = self.op.secondary_ip
5115
      result.append(("secondary_ip", self.op.secondary_ip))
5116

    
5117
    # this will trigger configuration file update, if needed
5118
    self.cfg.Update(node, feedback_fn)
5119

    
5120
    # this will trigger job queue propagation or cleanup if the mc
5121
    # flag changed
5122
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5123
      self.context.ReaddNode(node)
5124

    
5125
    return result
5126

    
5127

    
5128
class LUNodePowercycle(NoHooksLU):
5129
  """Powercycles a node.
5130

5131
  """
5132
  REQ_BGL = False
5133

    
5134
  def CheckArguments(self):
5135
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5136
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5137
      raise errors.OpPrereqError("The node is the master and the force"
5138
                                 " parameter was not set",
5139
                                 errors.ECODE_INVAL)
5140

    
5141
  def ExpandNames(self):
5142
    """Locking for PowercycleNode.
5143

5144
    This is a last-resort option and shouldn't block on other
5145
    jobs. Therefore, we grab no locks.
5146

5147
    """
5148
    self.needed_locks = {}
5149

    
5150
  def Exec(self, feedback_fn):
5151
    """Reboots a node.
5152

5153
    """
5154
    result = self.rpc.call_node_powercycle(self.op.node_name,
5155
                                           self.cfg.GetHypervisorType())
5156
    result.Raise("Failed to schedule the reboot")
5157
    return result.payload
5158

    
5159

    
5160
class LUClusterQuery(NoHooksLU):
5161
  """Query cluster configuration.
5162

5163
  """
5164
  REQ_BGL = False
5165

    
5166
  def ExpandNames(self):
5167
    self.needed_locks = {}
5168

    
5169
  def Exec(self, feedback_fn):
5170
    """Return cluster config.
5171

5172
    """
5173
    cluster = self.cfg.GetClusterInfo()
5174
    os_hvp = {}
5175

    
5176
    # Filter just for enabled hypervisors
5177
    for os_name, hv_dict in cluster.os_hvp.items():
5178
      os_hvp[os_name] = {}
5179
      for hv_name, hv_params in hv_dict.items():
5180
        if hv_name in cluster.enabled_hypervisors:
5181
          os_hvp[os_name][hv_name] = hv_params
5182

    
5183
    # Convert ip_family to ip_version
5184
    primary_ip_version = constants.IP4_VERSION
5185
    if cluster.primary_ip_family == netutils.IP6Address.family:
5186
      primary_ip_version = constants.IP6_VERSION
5187

    
5188
    result = {
5189
      "software_version": constants.RELEASE_VERSION,
5190
      "protocol_version": constants.PROTOCOL_VERSION,
5191
      "config_version": constants.CONFIG_VERSION,
5192
      "os_api_version": max(constants.OS_API_VERSIONS),
5193
      "export_version": constants.EXPORT_VERSION,
5194
      "architecture": (platform.architecture()[0], platform.machine()),
5195
      "name": cluster.cluster_name,
5196
      "master": cluster.master_node,
5197
      "default_hypervisor": cluster.enabled_hypervisors[0],
5198
      "enabled_hypervisors": cluster.enabled_hypervisors,
5199
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5200
                        for hypervisor_name in cluster.enabled_hypervisors]),
5201
      "os_hvp": os_hvp,
5202
      "beparams": cluster.beparams,
5203
      "osparams": cluster.osparams,
5204
      "nicparams": cluster.nicparams,
5205
      "ndparams": cluster.ndparams,
5206
      "candidate_pool_size": cluster.candidate_pool_size,
5207
      "master_netdev": cluster.master_netdev,
5208
      "volume_group_name": cluster.volume_group_name,
5209
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5210
      "file_storage_dir": cluster.file_storage_dir,
5211
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5212
      "maintain_node_health": cluster.maintain_node_health,
5213
      "ctime": cluster.ctime,
5214
      "mtime": cluster.mtime,
5215
      "uuid": cluster.uuid,
5216
      "tags": list(cluster.GetTags()),
5217
      "uid_pool": cluster.uid_pool,
5218
      "default_iallocator": cluster.default_iallocator,
5219
      "reserved_lvs": cluster.reserved_lvs,
5220
      "primary_ip_version": primary_ip_version,
5221
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5222
      "hidden_os": cluster.hidden_os,
5223
      "blacklisted_os": cluster.blacklisted_os,
5224
      }
5225

    
5226
    return result
5227

    
5228

    
5229
class LUClusterConfigQuery(NoHooksLU):
5230
  """Return configuration values.
5231

5232
  """
5233
  REQ_BGL = False
5234
  _FIELDS_DYNAMIC = utils.FieldSet()
5235
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5236
                                  "watcher_pause", "volume_group_name")
5237

    
5238
  def CheckArguments(self):
5239
    _CheckOutputFields(static=self._FIELDS_STATIC,
5240
                       dynamic=self._FIELDS_DYNAMIC,
5241
                       selected=self.op.output_fields)
5242

    
5243
  def ExpandNames(self):
5244
    self.needed_locks = {}
5245

    
5246
  def Exec(self, feedback_fn):
5247
    """Dump a representation of the cluster config to the standard output.
5248

5249
    """
5250
    values = []
5251
    for field in self.op.output_fields:
5252
      if field == "cluster_name":
5253
        entry = self.cfg.GetClusterName()
5254
      elif field == "master_node":
5255
        entry = self.cfg.GetMasterNode()
5256
      elif field == "drain_flag":
5257
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5258
      elif field == "watcher_pause":
5259
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5260
      elif field == "volume_group_name":
5261
        entry = self.cfg.GetVGName()
5262
      else:
5263
        raise errors.ParameterError(field)
5264
      values.append(entry)
5265
    return values
5266

    
5267

    
5268
class LUInstanceActivateDisks(NoHooksLU):
5269
  """Bring up an instance's disks.
5270

5271
  """
5272
  REQ_BGL = False
5273

    
5274
  def ExpandNames(self):
5275
    self._ExpandAndLockInstance()
5276
    self.needed_locks[locking.LEVEL_NODE] = []
5277
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5278

    
5279
  def DeclareLocks(self, level):
5280
    if level == locking.LEVEL_NODE:
5281
      self._LockInstancesNodes()
5282

    
5283
  def CheckPrereq(self):
5284
    """Check prerequisites.
5285

5286
    This checks that the instance is in the cluster.
5287

5288
    """
5289
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5290
    assert self.instance is not None, \
5291
      "Cannot retrieve locked instance %s" % self.op.instance_name
5292
    _CheckNodeOnline(self, self.instance.primary_node)
5293

    
5294
  def Exec(self, feedback_fn):
5295
    """Activate the disks.
5296

5297
    """
5298
    disks_ok, disks_info = \
5299
              _AssembleInstanceDisks(self, self.instance,
5300
                                     ignore_size=self.op.ignore_size)
5301
    if not disks_ok:
5302
      raise errors.OpExecError("Cannot activate block devices")
5303

    
5304
    return disks_info
5305

    
5306

    
5307
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5308
                           ignore_size=False):
5309
  """Prepare the block devices for an instance.
5310

5311
  This sets up the block devices on all nodes.
5312

5313
  @type lu: L{LogicalUnit}
5314
  @param lu: the logical unit on whose behalf we execute
5315
  @type instance: L{objects.Instance}
5316
  @param instance: the instance for whose disks we assemble
5317
  @type disks: list of L{objects.Disk} or None
5318
  @param disks: which disks to assemble (or all, if None)
5319
  @type ignore_secondaries: boolean
5320
  @param ignore_secondaries: if true, errors on secondary nodes
5321
      won't result in an error return from the function
5322
  @type ignore_size: boolean
5323
  @param ignore_size: if true, the current known size of the disk
5324
      will not be used during the disk activation, useful for cases
5325
      when the size is wrong
5326
  @return: False if the operation failed, otherwise a list of
5327
      (host, instance_visible_name, node_visible_name)
5328
      with the mapping from node devices to instance devices
5329

5330
  """
5331
  device_info = []
5332
  disks_ok = True
5333
  iname = instance.name
5334
  disks = _ExpandCheckDisks(instance, disks)
5335

    
5336
  # With the two passes mechanism we try to reduce the window of
5337
  # opportunity for the race condition of switching DRBD to primary
5338
  # before handshaking occured, but we do not eliminate it
5339

    
5340
  # The proper fix would be to wait (with some limits) until the
5341
  # connection has been made and drbd transitions from WFConnection
5342
  # into any other network-connected state (Connected, SyncTarget,
5343
  # SyncSource, etc.)
5344

    
5345
  # 1st pass, assemble on all nodes in secondary mode
5346
  for idx, inst_disk in enumerate(disks):
5347
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5348
      if ignore_size:
5349
        node_disk = node_disk.Copy()
5350
        node_disk.UnsetSize()
5351
      lu.cfg.SetDiskID(node_disk, node)
5352
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5353
      msg = result.fail_msg
5354
      if msg:
5355
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5356
                           " (is_primary=False, pass=1): %s",
5357
                           inst_disk.iv_name, node, msg)
5358
        if not ignore_secondaries:
5359
          disks_ok = False
5360

    
5361
  # FIXME: race condition on drbd migration to primary
5362

    
5363
  # 2nd pass, do only the primary node
5364
  for idx, inst_disk in enumerate(disks):
5365
    dev_path = None
5366

    
5367
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5368
      if node != instance.primary_node:
5369
        continue
5370
      if ignore_size:
5371
        node_disk = node_disk.Copy()
5372
        node_disk.UnsetSize()
5373
      lu.cfg.SetDiskID(node_disk, node)
5374
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5375
      msg = result.fail_msg
5376
      if msg:
5377
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5378
                           " (is_primary=True, pass=2): %s",
5379
                           inst_disk.iv_name, node, msg)
5380
        disks_ok = False
5381
      else:
5382
        dev_path = result.payload
5383

    
5384
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5385

    
5386
  # leave the disks configured for the primary node
5387
  # this is a workaround that would be fixed better by
5388
  # improving the logical/physical id handling
5389
  for disk in disks:
5390
    lu.cfg.SetDiskID(disk, instance.primary_node)
5391

    
5392
  return disks_ok, device_info
5393

    
5394

    
5395
def _StartInstanceDisks(lu, instance, force):
5396
  """Start the disks of an instance.
5397

5398
  """
5399
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5400
                                           ignore_secondaries=force)
5401
  if not disks_ok:
5402
    _ShutdownInstanceDisks(lu, instance)
5403
    if force is not None and not force:
5404
      lu.proc.LogWarning("", hint="If the message above refers to a"
5405
                         " secondary node,"
5406
                         " you can retry the operation using '--force'.")
5407
    raise errors.OpExecError("Disk consistency error")
5408

    
5409

    
5410
class LUInstanceDeactivateDisks(NoHooksLU):
5411
  """Shutdown an instance's disks.
5412

5413
  """
5414
  REQ_BGL = False
5415

    
5416
  def ExpandNames(self):
5417
    self._ExpandAndLockInstance()
5418
    self.needed_locks[locking.LEVEL_NODE] = []
5419
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5420

    
5421
  def DeclareLocks(self, level):
5422
    if level == locking.LEVEL_NODE:
5423
      self._LockInstancesNodes()
5424

    
5425
  def CheckPrereq(self):
5426
    """Check prerequisites.
5427

5428
    This checks that the instance is in the cluster.
5429

5430
    """
5431
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5432
    assert self.instance is not None, \
5433
      "Cannot retrieve locked instance %s" % self.op.instance_name
5434

    
5435
  def Exec(self, feedback_fn):
5436
    """Deactivate the disks
5437

5438
    """
5439
    instance = self.instance
5440
    if self.op.force:
5441
      _ShutdownInstanceDisks(self, instance)
5442
    else:
5443
      _SafeShutdownInstanceDisks(self, instance)
5444

    
5445

    
5446
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5447
  """Shutdown block devices of an instance.
5448

5449
  This function checks if an instance is running, before calling
5450
  _ShutdownInstanceDisks.
5451

5452
  """
5453
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5454
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5455

    
5456

    
5457
def _ExpandCheckDisks(instance, disks):
5458
  """Return the instance disks selected by the disks list
5459

5460
  @type disks: list of L{objects.Disk} or None
5461
  @param disks: selected disks
5462
  @rtype: list of L{objects.Disk}
5463
  @return: selected instance disks to act on
5464

5465
  """
5466
  if disks is None:
5467
    return instance.disks
5468
  else:
5469
    if not set(disks).issubset(instance.disks):
5470
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5471
                                   " target instance")
5472
    return disks
5473

    
5474

    
5475
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5476
  """Shutdown block devices of an instance.
5477

5478
  This does the shutdown on all nodes of the instance.
5479

5480
  If the ignore_primary is false, errors on the primary node are
5481
  ignored.
5482

5483
  """
5484
  all_result = True
5485
  disks = _ExpandCheckDisks(instance, disks)
5486

    
5487
  for disk in disks:
5488
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5489
      lu.cfg.SetDiskID(top_disk, node)
5490
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5491
      msg = result.fail_msg
5492
      if msg:
5493
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5494
                      disk.iv_name, node, msg)
5495
        if ((node == instance.primary_node and not ignore_primary) or
5496
            (node != instance.primary_node and not result.offline)):
5497
          all_result = False
5498
  return all_result
5499

    
5500

    
5501
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5502
  """Checks if a node has enough free memory.
5503

5504
  This function check if a given node has the needed amount of free
5505
  memory. In case the node has less memory or we cannot get the
5506
  information from the node, this function raise an OpPrereqError
5507
  exception.
5508

5509
  @type lu: C{LogicalUnit}
5510
  @param lu: a logical unit from which we get configuration data
5511
  @type node: C{str}
5512
  @param node: the node to check
5513
  @type reason: C{str}
5514
  @param reason: string to use in the error message
5515
  @type requested: C{int}
5516
  @param requested: the amount of memory in MiB to check for
5517
  @type hypervisor_name: C{str}
5518
  @param hypervisor_name: the hypervisor to ask for memory stats
5519
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5520
      we cannot check the node
5521

5522
  """
5523
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5524
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5525
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5526
  free_mem = nodeinfo[node].payload.get('memory_free', None)
5527
  if not isinstance(free_mem, int):
5528
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5529
                               " was '%s'" % (node, free_mem),
5530
                               errors.ECODE_ENVIRON)
5531
  if requested > free_mem:
5532
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5533
                               " needed %s MiB, available %s MiB" %
5534
                               (node, reason, requested, free_mem),
5535
                               errors.ECODE_NORES)
5536

    
5537

    
5538
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5539
  """Checks if nodes have enough free disk space in the all VGs.
5540

5541
  This function check if all given nodes have the needed amount of
5542
  free disk. In case any node has less disk or we cannot get the
5543
  information from the node, this function raise an OpPrereqError
5544
  exception.
5545

5546
  @type lu: C{LogicalUnit}
5547
  @param lu: a logical unit from which we get configuration data
5548
  @type nodenames: C{list}
5549
  @param nodenames: the list of node names to check
5550
  @type req_sizes: C{dict}
5551
  @param req_sizes: the hash of vg and corresponding amount of disk in
5552
      MiB to check for
5553
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5554
      or we cannot check the node
5555

5556
  """
5557
  for vg, req_size in req_sizes.items():
5558
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5559

    
5560

    
5561
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5562
  """Checks if nodes have enough free disk space in the specified VG.
5563

5564
  This function check if all given nodes have the needed amount of
5565
  free disk. In case any node has less disk or we cannot get the
5566
  information from the node, this function raise an OpPrereqError
5567
  exception.
5568

5569
  @type lu: C{LogicalUnit}
5570
  @param lu: a logical unit from which we get configuration data
5571
  @type nodenames: C{list}
5572
  @param nodenames: the list of node names to check
5573
  @type vg: C{str}
5574
  @param vg: the volume group to check
5575
  @type requested: C{int}
5576
  @param requested: the amount of disk in MiB to check for
5577
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5578
      or we cannot check the node
5579

5580
  """
5581
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5582
  for node in nodenames:
5583
    info = nodeinfo[node]
5584
    info.Raise("Cannot get current information from node %s" % node,
5585
               prereq=True, ecode=errors.ECODE_ENVIRON)
5586
    vg_free = info.payload.get("vg_free", None)
5587
    if not isinstance(vg_free, int):
5588
      raise errors.OpPrereqError("Can't compute free disk space on node"
5589
                                 " %s for vg %s, result was '%s'" %
5590
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5591
    if requested > vg_free:
5592
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5593
                                 " vg %s: required %d MiB, available %d MiB" %
5594
                                 (node, vg, requested, vg_free),
5595
                                 errors.ECODE_NORES)
5596

    
5597

    
5598
class LUInstanceStartup(LogicalUnit):
5599
  """Starts an instance.
5600

5601
  """
5602
  HPATH = "instance-start"
5603
  HTYPE = constants.HTYPE_INSTANCE
5604
  REQ_BGL = False
5605

    
5606
  def CheckArguments(self):
5607
    # extra beparams
5608
    if self.op.beparams:
5609
      # fill the beparams dict
5610
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5611

    
5612
  def ExpandNames(self):
5613
    self._ExpandAndLockInstance()
5614

    
5615
  def BuildHooksEnv(self):
5616
    """Build hooks env.
5617

5618
    This runs on master, primary and secondary nodes of the instance.
5619

5620
    """
5621
    env = {
5622
      "FORCE": self.op.force,
5623
      }
5624

    
5625
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5626

    
5627
    return env
5628

    
5629
  def BuildHooksNodes(self):
5630
    """Build hooks nodes.
5631

5632
    """
5633
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5634
    return (nl, nl)
5635

    
5636
  def CheckPrereq(self):
5637
    """Check prerequisites.
5638

5639
    This checks that the instance is in the cluster.
5640

5641
    """
5642
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5643
    assert self.instance is not None, \
5644
      "Cannot retrieve locked instance %s" % self.op.instance_name
5645

    
5646
    # extra hvparams
5647
    if self.op.hvparams:
5648
      # check hypervisor parameter syntax (locally)
5649
      cluster = self.cfg.GetClusterInfo()
5650
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5651
      filled_hvp = cluster.FillHV(instance)
5652
      filled_hvp.update(self.op.hvparams)
5653
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5654
      hv_type.CheckParameterSyntax(filled_hvp)
5655
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5656

    
5657
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5658

    
5659
    if self.primary_offline and self.op.ignore_offline_nodes:
5660
      self.proc.LogWarning("Ignoring offline primary node")
5661

    
5662
      if self.op.hvparams or self.op.beparams:
5663
        self.proc.LogWarning("Overridden parameters are ignored")
5664
    else:
5665
      _CheckNodeOnline(self, instance.primary_node)
5666

    
5667
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5668

    
5669
      # check bridges existence
5670
      _CheckInstanceBridgesExist(self, instance)
5671

    
5672
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5673
                                                instance.name,
5674
                                                instance.hypervisor)
5675
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5676
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5677
      if not remote_info.payload: # not running already
5678
        _CheckNodeFreeMemory(self, instance.primary_node,
5679
                             "starting instance %s" % instance.name,
5680
                             bep[constants.BE_MEMORY], instance.hypervisor)
5681

    
5682
  def Exec(self, feedback_fn):
5683
    """Start the instance.
5684

5685
    """
5686
    instance = self.instance
5687
    force = self.op.force
5688

    
5689
    if not self.op.no_remember:
5690
      self.cfg.MarkInstanceUp(instance.name)
5691

    
5692
    if self.primary_offline:
5693
      assert self.op.ignore_offline_nodes
5694
      self.proc.LogInfo("Primary node offline, marked instance as started")
5695
    else:
5696
      node_current = instance.primary_node
5697

    
5698
      _StartInstanceDisks(self, instance, force)
5699

    
5700
      result = self.rpc.call_instance_start(node_current, instance,
5701
                                            self.op.hvparams, self.op.beparams)
5702
      msg = result.fail_msg
5703
      if msg:
5704
        _ShutdownInstanceDisks(self, instance)
5705
        raise errors.OpExecError("Could not start instance: %s" % msg)
5706

    
5707

    
5708
class LUInstanceReboot(LogicalUnit):
5709
  """Reboot an instance.
5710

5711
  """
5712
  HPATH = "instance-reboot"
5713
  HTYPE = constants.HTYPE_INSTANCE
5714
  REQ_BGL = False
5715

    
5716
  def ExpandNames(self):
5717
    self._ExpandAndLockInstance()
5718

    
5719
  def BuildHooksEnv(self):
5720
    """Build hooks env.
5721

5722
    This runs on master, primary and secondary nodes of the instance.
5723

5724
    """
5725
    env = {
5726
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5727
      "REBOOT_TYPE": self.op.reboot_type,
5728
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5729
      }
5730

    
5731
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5732

    
5733
    return env
5734

    
5735
  def BuildHooksNodes(self):
5736
    """Build hooks nodes.
5737

5738
    """
5739
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5740
    return (nl, nl)
5741

    
5742
  def CheckPrereq(self):
5743
    """Check prerequisites.
5744

5745
    This checks that the instance is in the cluster.
5746

5747
    """
5748
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5749
    assert self.instance is not None, \
5750
      "Cannot retrieve locked instance %s" % self.op.instance_name
5751

    
5752
    _CheckNodeOnline(self, instance.primary_node)
5753

    
5754
    # check bridges existence
5755
    _CheckInstanceBridgesExist(self, instance)
5756

    
5757
  def Exec(self, feedback_fn):
5758
    """Reboot the instance.
5759

5760
    """
5761
    instance = self.instance
5762
    ignore_secondaries = self.op.ignore_secondaries
5763
    reboot_type = self.op.reboot_type
5764

    
5765
    remote_info = self.rpc.call_instance_info(instance.primary_node,
5766
                                              instance.name,
5767
                                              instance.hypervisor)
5768
    remote_info.Raise("Error checking node %s" % instance.primary_node)
5769
    instance_running = bool(remote_info.payload)
5770

    
5771
    node_current = instance.primary_node
5772

    
5773
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5774
                                            constants.INSTANCE_REBOOT_HARD]:
5775
      for disk in instance.disks:
5776
        self.cfg.SetDiskID(disk, node_current)
5777
      result = self.rpc.call_instance_reboot(node_current, instance,
5778
                                             reboot_type,
5779
                                             self.op.shutdown_timeout)
5780
      result.Raise("Could not reboot instance")
5781
    else:
5782
      if instance_running:
5783
        result = self.rpc.call_instance_shutdown(node_current, instance,
5784
                                                 self.op.shutdown_timeout)
5785
        result.Raise("Could not shutdown instance for full reboot")
5786
        _ShutdownInstanceDisks(self, instance)
5787
      else:
5788
        self.LogInfo("Instance %s was already stopped, starting now",
5789
                     instance.name)
5790
      _StartInstanceDisks(self, instance, ignore_secondaries)
5791
      result = self.rpc.call_instance_start(node_current, instance, None, None)
5792
      msg = result.fail_msg
5793
      if msg:
5794
        _ShutdownInstanceDisks(self, instance)
5795
        raise errors.OpExecError("Could not start instance for"
5796
                                 " full reboot: %s" % msg)
5797

    
5798
    self.cfg.MarkInstanceUp(instance.name)
5799

    
5800

    
5801
class LUInstanceShutdown(LogicalUnit):
5802
  """Shutdown an instance.
5803

5804
  """
5805
  HPATH = "instance-stop"
5806
  HTYPE = constants.HTYPE_INSTANCE
5807
  REQ_BGL = False
5808

    
5809
  def ExpandNames(self):
5810
    self._ExpandAndLockInstance()
5811

    
5812
  def BuildHooksEnv(self):
5813
    """Build hooks env.
5814

5815
    This runs on master, primary and secondary nodes of the instance.
5816

5817
    """
5818
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5819
    env["TIMEOUT"] = self.op.timeout
5820
    return env
5821

    
5822
  def BuildHooksNodes(self):
5823
    """Build hooks nodes.
5824

5825
    """
5826
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5827
    return (nl, nl)
5828

    
5829
  def CheckPrereq(self):
5830
    """Check prerequisites.
5831

5832
    This checks that the instance is in the cluster.
5833

5834
    """
5835
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5836
    assert self.instance is not None, \
5837
      "Cannot retrieve locked instance %s" % self.op.instance_name
5838

    
5839
    self.primary_offline = \
5840
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
5841

    
5842
    if self.primary_offline and self.op.ignore_offline_nodes:
5843
      self.proc.LogWarning("Ignoring offline primary node")
5844
    else:
5845
      _CheckNodeOnline(self, self.instance.primary_node)
5846

    
5847
  def Exec(self, feedback_fn):
5848
    """Shutdown the instance.
5849

5850
    """
5851
    instance = self.instance
5852
    node_current = instance.primary_node
5853
    timeout = self.op.timeout
5854

    
5855
    if not self.op.no_remember:
5856
      self.cfg.MarkInstanceDown(instance.name)
5857

    
5858
    if self.primary_offline:
5859
      assert self.op.ignore_offline_nodes
5860
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
5861
    else:
5862
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5863
      msg = result.fail_msg
5864
      if msg:
5865
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5866

    
5867
      _ShutdownInstanceDisks(self, instance)
5868

    
5869

    
5870
class LUInstanceReinstall(LogicalUnit):
5871
  """Reinstall an instance.
5872

5873
  """
5874
  HPATH = "instance-reinstall"
5875
  HTYPE = constants.HTYPE_INSTANCE
5876
  REQ_BGL = False
5877

    
5878
  def ExpandNames(self):
5879
    self._ExpandAndLockInstance()
5880

    
5881
  def BuildHooksEnv(self):
5882
    """Build hooks env.
5883

5884
    This runs on master, primary and secondary nodes of the instance.
5885

5886
    """
5887
    return _BuildInstanceHookEnvByObject(self, self.instance)
5888

    
5889
  def BuildHooksNodes(self):
5890
    """Build hooks nodes.
5891

5892
    """
5893
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5894
    return (nl, nl)
5895

    
5896
  def CheckPrereq(self):
5897
    """Check prerequisites.
5898

5899
    This checks that the instance is in the cluster and is not running.
5900

5901
    """
5902
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5903
    assert instance is not None, \
5904
      "Cannot retrieve locked instance %s" % self.op.instance_name
5905
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5906
                     " offline, cannot reinstall")
5907
    for node in instance.secondary_nodes:
5908
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
5909
                       " cannot reinstall")
5910

    
5911
    if instance.disk_template == constants.DT_DISKLESS:
5912
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5913
                                 self.op.instance_name,
5914
                                 errors.ECODE_INVAL)
5915
    _CheckInstanceDown(self, instance, "cannot reinstall")
5916

    
5917
    if self.op.os_type is not None:
5918
      # OS verification
5919
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5920
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5921
      instance_os = self.op.os_type
5922
    else:
5923
      instance_os = instance.os
5924

    
5925
    nodelist = list(instance.all_nodes)
5926

    
5927
    if self.op.osparams:
5928
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5929
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5930
      self.os_inst = i_osdict # the new dict (without defaults)
5931
    else:
5932
      self.os_inst = None
5933

    
5934
    self.instance = instance
5935

    
5936
  def Exec(self, feedback_fn):
5937
    """Reinstall the instance.
5938

5939
    """
5940
    inst = self.instance
5941

    
5942
    if self.op.os_type is not None:
5943
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5944
      inst.os = self.op.os_type
5945
      # Write to configuration
5946
      self.cfg.Update(inst, feedback_fn)
5947

    
5948
    _StartInstanceDisks(self, inst, None)
5949
    try:
5950
      feedback_fn("Running the instance OS create scripts...")
5951
      # FIXME: pass debug option from opcode to backend
5952
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5953
                                             self.op.debug_level,
5954
                                             osparams=self.os_inst)
5955
      result.Raise("Could not install OS for instance %s on node %s" %
5956
                   (inst.name, inst.primary_node))
5957
    finally:
5958
      _ShutdownInstanceDisks(self, inst)
5959

    
5960

    
5961
class LUInstanceRecreateDisks(LogicalUnit):
5962
  """Recreate an instance's missing disks.
5963

5964
  """
5965
  HPATH = "instance-recreate-disks"
5966
  HTYPE = constants.HTYPE_INSTANCE
5967
  REQ_BGL = False
5968

    
5969
  def CheckArguments(self):
5970
    # normalise the disk list
5971
    self.op.disks = sorted(frozenset(self.op.disks))
5972

    
5973
  def ExpandNames(self):
5974
    self._ExpandAndLockInstance()
5975
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5976
    if self.op.nodes:
5977
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
5978
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
5979
    else:
5980
      self.needed_locks[locking.LEVEL_NODE] = []
5981

    
5982
  def DeclareLocks(self, level):
5983
    if level == locking.LEVEL_NODE:
5984
      # if we replace the nodes, we only need to lock the old primary,
5985
      # otherwise we need to lock all nodes for disk re-creation
5986
      primary_only = bool(self.op.nodes)
5987
      self._LockInstancesNodes(primary_only=primary_only)
5988

    
5989
  def BuildHooksEnv(self):
5990
    """Build hooks env.
5991

5992
    This runs on master, primary and secondary nodes of the instance.
5993

5994
    """
5995
    return _BuildInstanceHookEnvByObject(self, self.instance)
5996

    
5997
  def BuildHooksNodes(self):
5998
    """Build hooks nodes.
5999

6000
    """
6001
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6002
    return (nl, nl)
6003

    
6004
  def CheckPrereq(self):
6005
    """Check prerequisites.
6006

6007
    This checks that the instance is in the cluster and is not running.
6008

6009
    """
6010
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6011
    assert instance is not None, \
6012
      "Cannot retrieve locked instance %s" % self.op.instance_name
6013
    if self.op.nodes:
6014
      if len(self.op.nodes) != len(instance.all_nodes):
6015
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6016
                                   " %d replacement nodes were specified" %
6017
                                   (instance.name, len(instance.all_nodes),
6018
                                    len(self.op.nodes)),
6019
                                   errors.ECODE_INVAL)
6020
      assert instance.disk_template != constants.DT_DRBD8 or \
6021
          len(self.op.nodes) == 2
6022
      assert instance.disk_template != constants.DT_PLAIN or \
6023
          len(self.op.nodes) == 1
6024
      primary_node = self.op.nodes[0]
6025
    else:
6026
      primary_node = instance.primary_node
6027
    _CheckNodeOnline(self, primary_node)
6028

    
6029
    if instance.disk_template == constants.DT_DISKLESS:
6030
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6031
                                 self.op.instance_name, errors.ECODE_INVAL)
6032
    # if we replace nodes *and* the old primary is offline, we don't
6033
    # check
6034
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6035
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6036
    if not (self.op.nodes and old_pnode.offline):
6037
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6038

    
6039
    if not self.op.disks:
6040
      self.op.disks = range(len(instance.disks))
6041
    else:
6042
      for idx in self.op.disks:
6043
        if idx >= len(instance.disks):
6044
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6045
                                     errors.ECODE_INVAL)
6046
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6047
      raise errors.OpPrereqError("Can't recreate disks partially and"
6048
                                 " change the nodes at the same time",
6049
                                 errors.ECODE_INVAL)
6050
    self.instance = instance
6051

    
6052
  def Exec(self, feedback_fn):
6053
    """Recreate the disks.
6054

6055
    """
6056
    # change primary node, if needed
6057
    if self.op.nodes:
6058
      self.instance.primary_node = self.op.nodes[0]
6059
      self.LogWarning("Changing the instance's nodes, you will have to"
6060
                      " remove any disks left on the older nodes manually")
6061

    
6062
    to_skip = []
6063
    for idx, disk in enumerate(self.instance.disks):
6064
      if idx not in self.op.disks: # disk idx has not been passed in
6065
        to_skip.append(idx)
6066
        continue
6067
      # update secondaries for disks, if needed
6068
      if self.op.nodes:
6069
        if disk.dev_type == constants.LD_DRBD8:
6070
          # need to update the nodes
6071
          assert len(self.op.nodes) == 2
6072
          logical_id = list(disk.logical_id)
6073
          logical_id[0] = self.op.nodes[0]
6074
          logical_id[1] = self.op.nodes[1]
6075
          disk.logical_id = tuple(logical_id)
6076

    
6077
    if self.op.nodes:
6078
      self.cfg.Update(self.instance, feedback_fn)
6079

    
6080
    _CreateDisks(self, self.instance, to_skip=to_skip)
6081

    
6082

    
6083
class LUInstanceRename(LogicalUnit):
6084
  """Rename an instance.
6085

6086
  """
6087
  HPATH = "instance-rename"
6088
  HTYPE = constants.HTYPE_INSTANCE
6089

    
6090
  def CheckArguments(self):
6091
    """Check arguments.
6092

6093
    """
6094
    if self.op.ip_check and not self.op.name_check:
6095
      # TODO: make the ip check more flexible and not depend on the name check
6096
      raise errors.OpPrereqError("IP address check requires a name check",
6097
                                 errors.ECODE_INVAL)
6098

    
6099
  def BuildHooksEnv(self):
6100
    """Build hooks env.
6101

6102
    This runs on master, primary and secondary nodes of the instance.
6103

6104
    """
6105
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6106
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6107
    return env
6108

    
6109
  def BuildHooksNodes(self):
6110
    """Build hooks nodes.
6111

6112
    """
6113
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6114
    return (nl, nl)
6115

    
6116
  def CheckPrereq(self):
6117
    """Check prerequisites.
6118

6119
    This checks that the instance is in the cluster and is not running.
6120

6121
    """
6122
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6123
                                                self.op.instance_name)
6124
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6125
    assert instance is not None
6126
    _CheckNodeOnline(self, instance.primary_node)
6127
    _CheckInstanceDown(self, instance, "cannot rename")
6128
    self.instance = instance
6129

    
6130
    new_name = self.op.new_name
6131
    if self.op.name_check:
6132
      hostname = netutils.GetHostname(name=new_name)
6133
      if hostname != new_name:
6134
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6135
                     hostname.name)
6136
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6137
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6138
                                    " same as given hostname '%s'") %
6139
                                    (hostname.name, self.op.new_name),
6140
                                    errors.ECODE_INVAL)
6141
      new_name = self.op.new_name = hostname.name
6142
      if (self.op.ip_check and
6143
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6144
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6145
                                   (hostname.ip, new_name),
6146
                                   errors.ECODE_NOTUNIQUE)
6147

    
6148
    instance_list = self.cfg.GetInstanceList()
6149
    if new_name in instance_list and new_name != instance.name:
6150
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6151
                                 new_name, errors.ECODE_EXISTS)
6152

    
6153
  def Exec(self, feedback_fn):
6154
    """Rename the instance.
6155

6156
    """
6157
    inst = self.instance
6158
    old_name = inst.name
6159

    
6160
    rename_file_storage = False
6161
    if (inst.disk_template in constants.DTS_FILEBASED and
6162
        self.op.new_name != inst.name):
6163
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6164
      rename_file_storage = True
6165

    
6166
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6167
    # Change the instance lock. This is definitely safe while we hold the BGL.
6168
    # Otherwise the new lock would have to be added in acquired mode.
6169
    assert self.REQ_BGL
6170
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6171
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6172

    
6173
    # re-read the instance from the configuration after rename
6174
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6175

    
6176
    if rename_file_storage:
6177
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6178
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6179
                                                     old_file_storage_dir,
6180
                                                     new_file_storage_dir)
6181
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6182
                   " (but the instance has been renamed in Ganeti)" %
6183
                   (inst.primary_node, old_file_storage_dir,
6184
                    new_file_storage_dir))
6185

    
6186
    _StartInstanceDisks(self, inst, None)
6187
    try:
6188
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6189
                                                 old_name, self.op.debug_level)
6190
      msg = result.fail_msg
6191
      if msg:
6192
        msg = ("Could not run OS rename script for instance %s on node %s"
6193
               " (but the instance has been renamed in Ganeti): %s" %
6194
               (inst.name, inst.primary_node, msg))
6195
        self.proc.LogWarning(msg)
6196
    finally:
6197
      _ShutdownInstanceDisks(self, inst)
6198

    
6199
    return inst.name
6200

    
6201

    
6202
class LUInstanceRemove(LogicalUnit):
6203
  """Remove an instance.
6204

6205
  """
6206
  HPATH = "instance-remove"
6207
  HTYPE = constants.HTYPE_INSTANCE
6208
  REQ_BGL = False
6209

    
6210
  def ExpandNames(self):
6211
    self._ExpandAndLockInstance()
6212
    self.needed_locks[locking.LEVEL_NODE] = []
6213
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6214

    
6215
  def DeclareLocks(self, level):
6216
    if level == locking.LEVEL_NODE:
6217
      self._LockInstancesNodes()
6218

    
6219
  def BuildHooksEnv(self):
6220
    """Build hooks env.
6221

6222
    This runs on master, primary and secondary nodes of the instance.
6223

6224
    """
6225
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6226
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6227
    return env
6228

    
6229
  def BuildHooksNodes(self):
6230
    """Build hooks nodes.
6231

6232
    """
6233
    nl = [self.cfg.GetMasterNode()]
6234
    nl_post = list(self.instance.all_nodes) + nl
6235
    return (nl, nl_post)
6236

    
6237
  def CheckPrereq(self):
6238
    """Check prerequisites.
6239

6240
    This checks that the instance is in the cluster.
6241

6242
    """
6243
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6244
    assert self.instance is not None, \
6245
      "Cannot retrieve locked instance %s" % self.op.instance_name
6246

    
6247
  def Exec(self, feedback_fn):
6248
    """Remove the instance.
6249

6250
    """
6251
    instance = self.instance
6252
    logging.info("Shutting down instance %s on node %s",
6253
                 instance.name, instance.primary_node)
6254

    
6255
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6256
                                             self.op.shutdown_timeout)
6257
    msg = result.fail_msg
6258
    if msg:
6259
      if self.op.ignore_failures:
6260
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6261
      else:
6262
        raise errors.OpExecError("Could not shutdown instance %s on"
6263
                                 " node %s: %s" %
6264
                                 (instance.name, instance.primary_node, msg))
6265

    
6266
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6267

    
6268

    
6269
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6270
  """Utility function to remove an instance.
6271

6272
  """
6273
  logging.info("Removing block devices for instance %s", instance.name)
6274

    
6275
  if not _RemoveDisks(lu, instance):
6276
    if not ignore_failures:
6277
      raise errors.OpExecError("Can't remove instance's disks")
6278
    feedback_fn("Warning: can't remove instance's disks")
6279

    
6280
  logging.info("Removing instance %s out of cluster config", instance.name)
6281

    
6282
  lu.cfg.RemoveInstance(instance.name)
6283

    
6284
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6285
    "Instance lock removal conflict"
6286

    
6287
  # Remove lock for the instance
6288
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6289

    
6290

    
6291
class LUInstanceQuery(NoHooksLU):
6292
  """Logical unit for querying instances.
6293

6294
  """
6295
  # pylint: disable-msg=W0142
6296
  REQ_BGL = False
6297

    
6298
  def CheckArguments(self):
6299
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6300
                             self.op.output_fields, self.op.use_locking)
6301

    
6302
  def ExpandNames(self):
6303
    self.iq.ExpandNames(self)
6304

    
6305
  def DeclareLocks(self, level):
6306
    self.iq.DeclareLocks(self, level)
6307

    
6308
  def Exec(self, feedback_fn):
6309
    return self.iq.OldStyleQuery(self)
6310

    
6311

    
6312
class LUInstanceFailover(LogicalUnit):
6313
  """Failover an instance.
6314

6315
  """
6316
  HPATH = "instance-failover"
6317
  HTYPE = constants.HTYPE_INSTANCE
6318
  REQ_BGL = False
6319

    
6320
  def CheckArguments(self):
6321
    """Check the arguments.
6322

6323
    """
6324
    self.iallocator = getattr(self.op, "iallocator", None)
6325
    self.target_node = getattr(self.op, "target_node", None)
6326

    
6327
  def ExpandNames(self):
6328
    self._ExpandAndLockInstance()
6329

    
6330
    if self.op.target_node is not None:
6331
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6332

    
6333
    self.needed_locks[locking.LEVEL_NODE] = []
6334
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6335

    
6336
    ignore_consistency = self.op.ignore_consistency
6337
    shutdown_timeout = self.op.shutdown_timeout
6338
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6339
                                       cleanup=False,
6340
                                       failover=True,
6341
                                       ignore_consistency=ignore_consistency,
6342
                                       shutdown_timeout=shutdown_timeout)
6343
    self.tasklets = [self._migrater]
6344

    
6345
  def DeclareLocks(self, level):
6346
    if level == locking.LEVEL_NODE:
6347
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6348
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6349
        if self.op.target_node is None:
6350
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6351
        else:
6352
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6353
                                                   self.op.target_node]
6354
        del self.recalculate_locks[locking.LEVEL_NODE]
6355
      else:
6356
        self._LockInstancesNodes()
6357

    
6358
  def BuildHooksEnv(self):
6359
    """Build hooks env.
6360

6361
    This runs on master, primary and secondary nodes of the instance.
6362

6363
    """
6364
    instance = self._migrater.instance
6365
    source_node = instance.primary_node
6366
    target_node = self.op.target_node
6367
    env = {
6368
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6369
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6370
      "OLD_PRIMARY": source_node,
6371
      "NEW_PRIMARY": target_node,
6372
      }
6373

    
6374
    if instance.disk_template in constants.DTS_INT_MIRROR:
6375
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6376
      env["NEW_SECONDARY"] = source_node
6377
    else:
6378
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6379

    
6380
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6381

    
6382
    return env
6383

    
6384
  def BuildHooksNodes(self):
6385
    """Build hooks nodes.
6386

6387
    """
6388
    instance = self._migrater.instance
6389
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6390
    return (nl, nl + [instance.primary_node])
6391

    
6392

    
6393
class LUInstanceMigrate(LogicalUnit):
6394
  """Migrate an instance.
6395

6396
  This is migration without shutting down, compared to the failover,
6397
  which is done with shutdown.
6398

6399
  """
6400
  HPATH = "instance-migrate"
6401
  HTYPE = constants.HTYPE_INSTANCE
6402
  REQ_BGL = False
6403

    
6404
  def ExpandNames(self):
6405
    self._ExpandAndLockInstance()
6406

    
6407
    if self.op.target_node is not None:
6408
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6409

    
6410
    self.needed_locks[locking.LEVEL_NODE] = []
6411
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6412

    
6413
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6414
                                       cleanup=self.op.cleanup,
6415
                                       failover=False,
6416
                                       fallback=self.op.allow_failover)
6417
    self.tasklets = [self._migrater]
6418

    
6419
  def DeclareLocks(self, level):
6420
    if level == locking.LEVEL_NODE:
6421
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6422
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6423
        if self.op.target_node is None:
6424
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6425
        else:
6426
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6427
                                                   self.op.target_node]
6428
        del self.recalculate_locks[locking.LEVEL_NODE]
6429
      else:
6430
        self._LockInstancesNodes()
6431

    
6432
  def BuildHooksEnv(self):
6433
    """Build hooks env.
6434

6435
    This runs on master, primary and secondary nodes of the instance.
6436

6437
    """
6438
    instance = self._migrater.instance
6439
    source_node = instance.primary_node
6440
    target_node = self.op.target_node
6441
    env = _BuildInstanceHookEnvByObject(self, instance)
6442
    env.update({
6443
      "MIGRATE_LIVE": self._migrater.live,
6444
      "MIGRATE_CLEANUP": self.op.cleanup,
6445
      "OLD_PRIMARY": source_node,
6446
      "NEW_PRIMARY": target_node,
6447
      })
6448

    
6449
    if instance.disk_template in constants.DTS_INT_MIRROR:
6450
      env["OLD_SECONDARY"] = target_node
6451
      env["NEW_SECONDARY"] = source_node
6452
    else:
6453
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6454

    
6455
    return env
6456

    
6457
  def BuildHooksNodes(self):
6458
    """Build hooks nodes.
6459

6460
    """
6461
    instance = self._migrater.instance
6462
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6463
    return (nl, nl + [instance.primary_node])
6464

    
6465

    
6466
class LUInstanceMove(LogicalUnit):
6467
  """Move an instance by data-copying.
6468

6469
  """
6470
  HPATH = "instance-move"
6471
  HTYPE = constants.HTYPE_INSTANCE
6472
  REQ_BGL = False
6473

    
6474
  def ExpandNames(self):
6475
    self._ExpandAndLockInstance()
6476
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6477
    self.op.target_node = target_node
6478
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6479
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6480

    
6481
  def DeclareLocks(self, level):
6482
    if level == locking.LEVEL_NODE:
6483
      self._LockInstancesNodes(primary_only=True)
6484

    
6485
  def BuildHooksEnv(self):
6486
    """Build hooks env.
6487

6488
    This runs on master, primary and secondary nodes of the instance.
6489

6490
    """
6491
    env = {
6492
      "TARGET_NODE": self.op.target_node,
6493
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6494
      }
6495
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6496
    return env
6497

    
6498
  def BuildHooksNodes(self):
6499
    """Build hooks nodes.
6500

6501
    """
6502
    nl = [
6503
      self.cfg.GetMasterNode(),
6504
      self.instance.primary_node,
6505
      self.op.target_node,
6506
      ]
6507
    return (nl, nl)
6508

    
6509
  def CheckPrereq(self):
6510
    """Check prerequisites.
6511

6512
    This checks that the instance is in the cluster.
6513

6514
    """
6515
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6516
    assert self.instance is not None, \
6517
      "Cannot retrieve locked instance %s" % self.op.instance_name
6518

    
6519
    node = self.cfg.GetNodeInfo(self.op.target_node)
6520
    assert node is not None, \
6521
      "Cannot retrieve locked node %s" % self.op.target_node
6522

    
6523
    self.target_node = target_node = node.name
6524

    
6525
    if target_node == instance.primary_node:
6526
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6527
                                 (instance.name, target_node),
6528
                                 errors.ECODE_STATE)
6529

    
6530
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6531

    
6532
    for idx, dsk in enumerate(instance.disks):
6533
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6534
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6535
                                   " cannot copy" % idx, errors.ECODE_STATE)
6536

    
6537
    _CheckNodeOnline(self, target_node)
6538
    _CheckNodeNotDrained(self, target_node)
6539
    _CheckNodeVmCapable(self, target_node)
6540

    
6541
    if instance.admin_up:
6542
      # check memory requirements on the secondary node
6543
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6544
                           instance.name, bep[constants.BE_MEMORY],
6545
                           instance.hypervisor)
6546
    else:
6547
      self.LogInfo("Not checking memory on the secondary node as"
6548
                   " instance will not be started")
6549

    
6550
    # check bridge existance
6551
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6552

    
6553
  def Exec(self, feedback_fn):
6554
    """Move an instance.
6555

6556
    The move is done by shutting it down on its present node, copying
6557
    the data over (slow) and starting it on the new node.
6558

6559
    """
6560
    instance = self.instance
6561

    
6562
    source_node = instance.primary_node
6563
    target_node = self.target_node
6564

    
6565
    self.LogInfo("Shutting down instance %s on source node %s",
6566
                 instance.name, source_node)
6567

    
6568
    result = self.rpc.call_instance_shutdown(source_node, instance,
6569
                                             self.op.shutdown_timeout)
6570
    msg = result.fail_msg
6571
    if msg:
6572
      if self.op.ignore_consistency:
6573
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6574
                             " Proceeding anyway. Please make sure node"
6575
                             " %s is down. Error details: %s",
6576
                             instance.name, source_node, source_node, msg)
6577
      else:
6578
        raise errors.OpExecError("Could not shutdown instance %s on"
6579
                                 " node %s: %s" %
6580
                                 (instance.name, source_node, msg))
6581

    
6582
    # create the target disks
6583
    try:
6584
      _CreateDisks(self, instance, target_node=target_node)
6585
    except errors.OpExecError:
6586
      self.LogWarning("Device creation failed, reverting...")
6587
      try:
6588
        _RemoveDisks(self, instance, target_node=target_node)
6589
      finally:
6590
        self.cfg.ReleaseDRBDMinors(instance.name)
6591
        raise
6592

    
6593
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6594

    
6595
    errs = []
6596
    # activate, get path, copy the data over
6597
    for idx, disk in enumerate(instance.disks):
6598
      self.LogInfo("Copying data for disk %d", idx)
6599
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6600
                                               instance.name, True, idx)
6601
      if result.fail_msg:
6602
        self.LogWarning("Can't assemble newly created disk %d: %s",
6603
                        idx, result.fail_msg)
6604
        errs.append(result.fail_msg)
6605
        break
6606
      dev_path = result.payload
6607
      result = self.rpc.call_blockdev_export(source_node, disk,
6608
                                             target_node, dev_path,
6609
                                             cluster_name)
6610
      if result.fail_msg:
6611
        self.LogWarning("Can't copy data over for disk %d: %s",
6612
                        idx, result.fail_msg)
6613
        errs.append(result.fail_msg)
6614
        break
6615

    
6616
    if errs:
6617
      self.LogWarning("Some disks failed to copy, aborting")
6618
      try:
6619
        _RemoveDisks(self, instance, target_node=target_node)
6620
      finally:
6621
        self.cfg.ReleaseDRBDMinors(instance.name)
6622
        raise errors.OpExecError("Errors during disk copy: %s" %
6623
                                 (",".join(errs),))
6624

    
6625
    instance.primary_node = target_node
6626
    self.cfg.Update(instance, feedback_fn)
6627

    
6628
    self.LogInfo("Removing the disks on the original node")
6629
    _RemoveDisks(self, instance, target_node=source_node)
6630

    
6631
    # Only start the instance if it's marked as up
6632
    if instance.admin_up:
6633
      self.LogInfo("Starting instance %s on node %s",
6634
                   instance.name, target_node)
6635

    
6636
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6637
                                           ignore_secondaries=True)
6638
      if not disks_ok:
6639
        _ShutdownInstanceDisks(self, instance)
6640
        raise errors.OpExecError("Can't activate the instance's disks")
6641

    
6642
      result = self.rpc.call_instance_start(target_node, instance, None, None)
6643
      msg = result.fail_msg
6644
      if msg:
6645
        _ShutdownInstanceDisks(self, instance)
6646
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6647
                                 (instance.name, target_node, msg))
6648

    
6649

    
6650
class LUNodeMigrate(LogicalUnit):
6651
  """Migrate all instances from a node.
6652

6653
  """
6654
  HPATH = "node-migrate"
6655
  HTYPE = constants.HTYPE_NODE
6656
  REQ_BGL = False
6657

    
6658
  def CheckArguments(self):
6659
    pass
6660

    
6661
  def ExpandNames(self):
6662
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6663

    
6664
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
6665
    self.needed_locks = {
6666
      locking.LEVEL_NODE: [self.op.node_name],
6667
      }
6668

    
6669
  def BuildHooksEnv(self):
6670
    """Build hooks env.
6671

6672
    This runs on the master, the primary and all the secondaries.
6673

6674
    """
6675
    return {
6676
      "NODE_NAME": self.op.node_name,
6677
      }
6678

    
6679
  def BuildHooksNodes(self):
6680
    """Build hooks nodes.
6681

6682
    """
6683
    nl = [self.cfg.GetMasterNode()]
6684
    return (nl, nl)
6685

    
6686
  def CheckPrereq(self):
6687
    pass
6688

    
6689
  def Exec(self, feedback_fn):
6690
    # Prepare jobs for migration instances
6691
    jobs = [
6692
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
6693
                                 mode=self.op.mode,
6694
                                 live=self.op.live,
6695
                                 iallocator=self.op.iallocator,
6696
                                 target_node=self.op.target_node)]
6697
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
6698
      ]
6699

    
6700
    # TODO: Run iallocator in this opcode and pass correct placement options to
6701
    # OpInstanceMigrate. Since other jobs can modify the cluster between
6702
    # running the iallocator and the actual migration, a good consistency model
6703
    # will have to be found.
6704

    
6705
    assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
6706
            frozenset([self.op.node_name]))
6707

    
6708
    return ResultWithJobs(jobs)
6709

    
6710

    
6711
class TLMigrateInstance(Tasklet):
6712
  """Tasklet class for instance migration.
6713

6714
  @type live: boolean
6715
  @ivar live: whether the migration will be done live or non-live;
6716
      this variable is initalized only after CheckPrereq has run
6717
  @type cleanup: boolean
6718
  @ivar cleanup: Wheater we cleanup from a failed migration
6719
  @type iallocator: string
6720
  @ivar iallocator: The iallocator used to determine target_node
6721
  @type target_node: string
6722
  @ivar target_node: If given, the target_node to reallocate the instance to
6723
  @type failover: boolean
6724
  @ivar failover: Whether operation results in failover or migration
6725
  @type fallback: boolean
6726
  @ivar fallback: Whether fallback to failover is allowed if migration not
6727
                  possible
6728
  @type ignore_consistency: boolean
6729
  @ivar ignore_consistency: Wheter we should ignore consistency between source
6730
                            and target node
6731
  @type shutdown_timeout: int
6732
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
6733

6734
  """
6735
  def __init__(self, lu, instance_name, cleanup=False,
6736
               failover=False, fallback=False,
6737
               ignore_consistency=False,
6738
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6739
    """Initializes this class.
6740

6741
    """
6742
    Tasklet.__init__(self, lu)
6743

    
6744
    # Parameters
6745
    self.instance_name = instance_name
6746
    self.cleanup = cleanup
6747
    self.live = False # will be overridden later
6748
    self.failover = failover
6749
    self.fallback = fallback
6750
    self.ignore_consistency = ignore_consistency
6751
    self.shutdown_timeout = shutdown_timeout
6752

    
6753
  def CheckPrereq(self):
6754
    """Check prerequisites.
6755

6756
    This checks that the instance is in the cluster.
6757

6758
    """
6759
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6760
    instance = self.cfg.GetInstanceInfo(instance_name)
6761
    assert instance is not None
6762
    self.instance = instance
6763

    
6764
    if (not self.cleanup and not instance.admin_up and not self.failover and
6765
        self.fallback):
6766
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6767
                      " to failover")
6768
      self.failover = True
6769

    
6770
    if instance.disk_template not in constants.DTS_MIRRORED:
6771
      if self.failover:
6772
        text = "failovers"
6773
      else:
6774
        text = "migrations"
6775
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6776
                                 " %s" % (instance.disk_template, text),
6777
                                 errors.ECODE_STATE)
6778

    
6779
    if instance.disk_template in constants.DTS_EXT_MIRROR:
6780
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6781

    
6782
      if self.lu.op.iallocator:
6783
        self._RunAllocator()
6784
      else:
6785
        # We set set self.target_node as it is required by
6786
        # BuildHooksEnv
6787
        self.target_node = self.lu.op.target_node
6788

    
6789
      # self.target_node is already populated, either directly or by the
6790
      # iallocator run
6791
      target_node = self.target_node
6792
      if self.target_node == instance.primary_node:
6793
        raise errors.OpPrereqError("Cannot migrate instance %s"
6794
                                   " to its primary (%s)" %
6795
                                   (instance.name, instance.primary_node))
6796

    
6797
      if len(self.lu.tasklets) == 1:
6798
        # It is safe to release locks only when we're the only tasklet
6799
        # in the LU
6800
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
6801
                      keep=[instance.primary_node, self.target_node])
6802

    
6803
    else:
6804
      secondary_nodes = instance.secondary_nodes
6805
      if not secondary_nodes:
6806
        raise errors.ConfigurationError("No secondary node but using"
6807
                                        " %s disk template" %
6808
                                        instance.disk_template)
6809
      target_node = secondary_nodes[0]
6810
      if self.lu.op.iallocator or (self.lu.op.target_node and
6811
                                   self.lu.op.target_node != target_node):
6812
        if self.failover:
6813
          text = "failed over"
6814
        else:
6815
          text = "migrated"
6816
        raise errors.OpPrereqError("Instances with disk template %s cannot"
6817
                                   " be %s to arbitrary nodes"
6818
                                   " (neither an iallocator nor a target"
6819
                                   " node can be passed)" %
6820
                                   (instance.disk_template, text),
6821
                                   errors.ECODE_INVAL)
6822

    
6823
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
6824

    
6825
    # check memory requirements on the secondary node
6826
    if not self.failover or instance.admin_up:
6827
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6828
                           instance.name, i_be[constants.BE_MEMORY],
6829
                           instance.hypervisor)
6830
    else:
6831
      self.lu.LogInfo("Not checking memory on the secondary node as"
6832
                      " instance will not be started")
6833

    
6834
    # check bridge existance
6835
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6836

    
6837
    if not self.cleanup:
6838
      _CheckNodeNotDrained(self.lu, target_node)
6839
      if not self.failover:
6840
        result = self.rpc.call_instance_migratable(instance.primary_node,
6841
                                                   instance)
6842
        if result.fail_msg and self.fallback:
6843
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6844
                          " failover")
6845
          self.failover = True
6846
        else:
6847
          result.Raise("Can't migrate, please use failover",
6848
                       prereq=True, ecode=errors.ECODE_STATE)
6849

    
6850
    assert not (self.failover and self.cleanup)
6851

    
6852
    if not self.failover:
6853
      if self.lu.op.live is not None and self.lu.op.mode is not None:
6854
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6855
                                   " parameters are accepted",
6856
                                   errors.ECODE_INVAL)
6857
      if self.lu.op.live is not None:
6858
        if self.lu.op.live:
6859
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
6860
        else:
6861
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6862
        # reset the 'live' parameter to None so that repeated
6863
        # invocations of CheckPrereq do not raise an exception
6864
        self.lu.op.live = None
6865
      elif self.lu.op.mode is None:
6866
        # read the default value from the hypervisor
6867
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6868
                                                skip_globals=False)
6869
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6870

    
6871
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6872
    else:
6873
      # Failover is never live
6874
      self.live = False
6875

    
6876
  def _RunAllocator(self):
6877
    """Run the allocator based on input opcode.
6878

6879
    """
6880
    ial = IAllocator(self.cfg, self.rpc,
6881
                     mode=constants.IALLOCATOR_MODE_RELOC,
6882
                     name=self.instance_name,
6883
                     # TODO See why hail breaks with a single node below
6884
                     relocate_from=[self.instance.primary_node,
6885
                                    self.instance.primary_node],
6886
                     )
6887

    
6888
    ial.Run(self.lu.op.iallocator)
6889

    
6890
    if not ial.success:
6891
      raise errors.OpPrereqError("Can't compute nodes using"
6892
                                 " iallocator '%s': %s" %
6893
                                 (self.lu.op.iallocator, ial.info),
6894
                                 errors.ECODE_NORES)
6895
    if len(ial.result) != ial.required_nodes:
6896
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6897
                                 " of nodes (%s), required %s" %
6898
                                 (self.lu.op.iallocator, len(ial.result),
6899
                                  ial.required_nodes), errors.ECODE_FAULT)
6900
    self.target_node = ial.result[0]
6901
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6902
                 self.instance_name, self.lu.op.iallocator,
6903
                 utils.CommaJoin(ial.result))
6904

    
6905
  def _WaitUntilSync(self):
6906
    """Poll with custom rpc for disk sync.
6907

6908
    This uses our own step-based rpc call.
6909

6910
    """
6911
    self.feedback_fn("* wait until resync is done")
6912
    all_done = False
6913
    while not all_done:
6914
      all_done = True
6915
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6916
                                            self.nodes_ip,
6917
                                            self.instance.disks)
6918
      min_percent = 100
6919
      for node, nres in result.items():
6920
        nres.Raise("Cannot resync disks on node %s" % node)
6921
        node_done, node_percent = nres.payload
6922
        all_done = all_done and node_done
6923
        if node_percent is not None:
6924
          min_percent = min(min_percent, node_percent)
6925
      if not all_done:
6926
        if min_percent < 100:
6927
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
6928
        time.sleep(2)
6929

    
6930
  def _EnsureSecondary(self, node):
6931
    """Demote a node to secondary.
6932

6933
    """
6934
    self.feedback_fn("* switching node %s to secondary mode" % node)
6935

    
6936
    for dev in self.instance.disks:
6937
      self.cfg.SetDiskID(dev, node)
6938

    
6939
    result = self.rpc.call_blockdev_close(node, self.instance.name,
6940
                                          self.instance.disks)
6941
    result.Raise("Cannot change disk to secondary on node %s" % node)
6942

    
6943
  def _GoStandalone(self):
6944
    """Disconnect from the network.
6945

6946
    """
6947
    self.feedback_fn("* changing into standalone mode")
6948
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6949
                                               self.instance.disks)
6950
    for node, nres in result.items():
6951
      nres.Raise("Cannot disconnect disks node %s" % node)
6952

    
6953
  def _GoReconnect(self, multimaster):
6954
    """Reconnect to the network.
6955

6956
    """
6957
    if multimaster:
6958
      msg = "dual-master"
6959
    else:
6960
      msg = "single-master"
6961
    self.feedback_fn("* changing disks into %s mode" % msg)
6962
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6963
                                           self.instance.disks,
6964
                                           self.instance.name, multimaster)
6965
    for node, nres in result.items():
6966
      nres.Raise("Cannot change disks config on node %s" % node)
6967

    
6968
  def _ExecCleanup(self):
6969
    """Try to cleanup after a failed migration.
6970

6971
    The cleanup is done by:
6972
      - check that the instance is running only on one node
6973
        (and update the config if needed)
6974
      - change disks on its secondary node to secondary
6975
      - wait until disks are fully synchronized
6976
      - disconnect from the network
6977
      - change disks into single-master mode
6978
      - wait again until disks are fully synchronized
6979

6980
    """
6981
    instance = self.instance
6982
    target_node = self.target_node
6983
    source_node = self.source_node
6984

    
6985
    # check running on only one node
6986
    self.feedback_fn("* checking where the instance actually runs"
6987
                     " (if this hangs, the hypervisor might be in"
6988
                     " a bad state)")
6989
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6990
    for node, result in ins_l.items():
6991
      result.Raise("Can't contact node %s" % node)
6992

    
6993
    runningon_source = instance.name in ins_l[source_node].payload
6994
    runningon_target = instance.name in ins_l[target_node].payload
6995

    
6996
    if runningon_source and runningon_target:
6997
      raise errors.OpExecError("Instance seems to be running on two nodes,"
6998
                               " or the hypervisor is confused; you will have"
6999
                               " to ensure manually that it runs only on one"
7000
                               " and restart this operation")
7001

    
7002
    if not (runningon_source or runningon_target):
7003
      raise errors.OpExecError("Instance does not seem to be running at all;"
7004
                               " in this case it's safer to repair by"
7005
                               " running 'gnt-instance stop' to ensure disk"
7006
                               " shutdown, and then restarting it")
7007

    
7008
    if runningon_target:
7009
      # the migration has actually succeeded, we need to update the config
7010
      self.feedback_fn("* instance running on secondary node (%s),"
7011
                       " updating config" % target_node)
7012
      instance.primary_node = target_node
7013
      self.cfg.Update(instance, self.feedback_fn)
7014
      demoted_node = source_node
7015
    else:
7016
      self.feedback_fn("* instance confirmed to be running on its"
7017
                       " primary node (%s)" % source_node)
7018
      demoted_node = target_node
7019

    
7020
    if instance.disk_template in constants.DTS_INT_MIRROR:
7021
      self._EnsureSecondary(demoted_node)
7022
      try:
7023
        self._WaitUntilSync()
7024
      except errors.OpExecError:
7025
        # we ignore here errors, since if the device is standalone, it
7026
        # won't be able to sync
7027
        pass
7028
      self._GoStandalone()
7029
      self._GoReconnect(False)
7030
      self._WaitUntilSync()
7031

    
7032
    self.feedback_fn("* done")
7033

    
7034
  def _RevertDiskStatus(self):
7035
    """Try to revert the disk status after a failed migration.
7036

7037
    """
7038
    target_node = self.target_node
7039
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7040
      return
7041

    
7042
    try:
7043
      self._EnsureSecondary(target_node)
7044
      self._GoStandalone()
7045
      self._GoReconnect(False)
7046
      self._WaitUntilSync()
7047
    except errors.OpExecError, err:
7048
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7049
                         " please try to recover the instance manually;"
7050
                         " error '%s'" % str(err))
7051

    
7052
  def _AbortMigration(self):
7053
    """Call the hypervisor code to abort a started migration.
7054

7055
    """
7056
    instance = self.instance
7057
    target_node = self.target_node
7058
    migration_info = self.migration_info
7059

    
7060
    abort_result = self.rpc.call_finalize_migration(target_node,
7061
                                                    instance,
7062
                                                    migration_info,
7063
                                                    False)
7064
    abort_msg = abort_result.fail_msg
7065
    if abort_msg:
7066
      logging.error("Aborting migration failed on target node %s: %s",
7067
                    target_node, abort_msg)
7068
      # Don't raise an exception here, as we stil have to try to revert the
7069
      # disk status, even if this step failed.
7070

    
7071
  def _ExecMigration(self):
7072
    """Migrate an instance.
7073

7074
    The migrate is done by:
7075
      - change the disks into dual-master mode
7076
      - wait until disks are fully synchronized again
7077
      - migrate the instance
7078
      - change disks on the new secondary node (the old primary) to secondary
7079
      - wait until disks are fully synchronized
7080
      - change disks into single-master mode
7081

7082
    """
7083
    instance = self.instance
7084
    target_node = self.target_node
7085
    source_node = self.source_node
7086

    
7087
    self.feedback_fn("* checking disk consistency between source and target")
7088
    for dev in instance.disks:
7089
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7090
        raise errors.OpExecError("Disk %s is degraded or not fully"
7091
                                 " synchronized on target node,"
7092
                                 " aborting migration" % dev.iv_name)
7093

    
7094
    # First get the migration information from the remote node
7095
    result = self.rpc.call_migration_info(source_node, instance)
7096
    msg = result.fail_msg
7097
    if msg:
7098
      log_err = ("Failed fetching source migration information from %s: %s" %
7099
                 (source_node, msg))
7100
      logging.error(log_err)
7101
      raise errors.OpExecError(log_err)
7102

    
7103
    self.migration_info = migration_info = result.payload
7104

    
7105
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7106
      # Then switch the disks to master/master mode
7107
      self._EnsureSecondary(target_node)
7108
      self._GoStandalone()
7109
      self._GoReconnect(True)
7110
      self._WaitUntilSync()
7111

    
7112
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7113
    result = self.rpc.call_accept_instance(target_node,
7114
                                           instance,
7115
                                           migration_info,
7116
                                           self.nodes_ip[target_node])
7117

    
7118
    msg = result.fail_msg
7119
    if msg:
7120
      logging.error("Instance pre-migration failed, trying to revert"
7121
                    " disk status: %s", msg)
7122
      self.feedback_fn("Pre-migration failed, aborting")
7123
      self._AbortMigration()
7124
      self._RevertDiskStatus()
7125
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7126
                               (instance.name, msg))
7127

    
7128
    self.feedback_fn("* migrating instance to %s" % target_node)
7129
    result = self.rpc.call_instance_migrate(source_node, instance,
7130
                                            self.nodes_ip[target_node],
7131
                                            self.live)
7132
    msg = result.fail_msg
7133
    if msg:
7134
      logging.error("Instance migration failed, trying to revert"
7135
                    " disk status: %s", msg)
7136
      self.feedback_fn("Migration failed, aborting")
7137
      self._AbortMigration()
7138
      self._RevertDiskStatus()
7139
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7140
                               (instance.name, msg))
7141

    
7142
    instance.primary_node = target_node
7143
    # distribute new instance config to the other nodes
7144
    self.cfg.Update(instance, self.feedback_fn)
7145

    
7146
    result = self.rpc.call_finalize_migration(target_node,
7147
                                              instance,
7148
                                              migration_info,
7149
                                              True)
7150
    msg = result.fail_msg
7151
    if msg:
7152
      logging.error("Instance migration succeeded, but finalization failed:"
7153
                    " %s", msg)
7154
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7155
                               msg)
7156

    
7157
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7158
      self._EnsureSecondary(source_node)
7159
      self._WaitUntilSync()
7160
      self._GoStandalone()
7161
      self._GoReconnect(False)
7162
      self._WaitUntilSync()
7163

    
7164
    self.feedback_fn("* done")
7165

    
7166
  def _ExecFailover(self):
7167
    """Failover an instance.
7168

7169
    The failover is done by shutting it down on its present node and
7170
    starting it on the secondary.
7171

7172
    """
7173
    instance = self.instance
7174
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7175

    
7176
    source_node = instance.primary_node
7177
    target_node = self.target_node
7178

    
7179
    if instance.admin_up:
7180
      self.feedback_fn("* checking disk consistency between source and target")
7181
      for dev in instance.disks:
7182
        # for drbd, these are drbd over lvm
7183
        if not _CheckDiskConsistency(self, dev, target_node, False):
7184
          if not self.ignore_consistency:
7185
            raise errors.OpExecError("Disk %s is degraded on target node,"
7186
                                     " aborting failover" % dev.iv_name)
7187
    else:
7188
      self.feedback_fn("* not checking disk consistency as instance is not"
7189
                       " running")
7190

    
7191
    self.feedback_fn("* shutting down instance on source node")
7192
    logging.info("Shutting down instance %s on node %s",
7193
                 instance.name, source_node)
7194

    
7195
    result = self.rpc.call_instance_shutdown(source_node, instance,
7196
                                             self.shutdown_timeout)
7197
    msg = result.fail_msg
7198
    if msg:
7199
      if self.ignore_consistency or primary_node.offline:
7200
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7201
                           " proceeding anyway; please make sure node"
7202
                           " %s is down; error details: %s",
7203
                           instance.name, source_node, source_node, msg)
7204
      else:
7205
        raise errors.OpExecError("Could not shutdown instance %s on"
7206
                                 " node %s: %s" %
7207
                                 (instance.name, source_node, msg))
7208

    
7209
    self.feedback_fn("* deactivating the instance's disks on source node")
7210
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
7211
      raise errors.OpExecError("Can't shut down the instance's disks.")
7212

    
7213
    instance.primary_node = target_node
7214
    # distribute new instance config to the other nodes
7215
    self.cfg.Update(instance, self.feedback_fn)
7216

    
7217
    # Only start the instance if it's marked as up
7218
    if instance.admin_up:
7219
      self.feedback_fn("* activating the instance's disks on target node")
7220
      logging.info("Starting instance %s on node %s",
7221
                   instance.name, target_node)
7222

    
7223
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
7224
                                           ignore_secondaries=True)
7225
      if not disks_ok:
7226
        _ShutdownInstanceDisks(self, instance)
7227
        raise errors.OpExecError("Can't activate the instance's disks")
7228

    
7229
      self.feedback_fn("* starting the instance on the target node")
7230
      result = self.rpc.call_instance_start(target_node, instance, None, None)
7231
      msg = result.fail_msg
7232
      if msg:
7233
        _ShutdownInstanceDisks(self, instance)
7234
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7235
                                 (instance.name, target_node, msg))
7236

    
7237
  def Exec(self, feedback_fn):
7238
    """Perform the migration.
7239

7240
    """
7241
    self.feedback_fn = feedback_fn
7242
    self.source_node = self.instance.primary_node
7243

    
7244
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7245
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7246
      self.target_node = self.instance.secondary_nodes[0]
7247
      # Otherwise self.target_node has been populated either
7248
      # directly, or through an iallocator.
7249

    
7250
    self.all_nodes = [self.source_node, self.target_node]
7251
    self.nodes_ip = {
7252
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
7253
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
7254
      }
7255

    
7256
    if self.failover:
7257
      feedback_fn("Failover instance %s" % self.instance.name)
7258
      self._ExecFailover()
7259
    else:
7260
      feedback_fn("Migrating instance %s" % self.instance.name)
7261

    
7262
      if self.cleanup:
7263
        return self._ExecCleanup()
7264
      else:
7265
        return self._ExecMigration()
7266

    
7267

    
7268
def _CreateBlockDev(lu, node, instance, device, force_create,
7269
                    info, force_open):
7270
  """Create a tree of block devices on a given node.
7271

7272
  If this device type has to be created on secondaries, create it and
7273
  all its children.
7274

7275
  If not, just recurse to children keeping the same 'force' value.
7276

7277
  @param lu: the lu on whose behalf we execute
7278
  @param node: the node on which to create the device
7279
  @type instance: L{objects.Instance}
7280
  @param instance: the instance which owns the device
7281
  @type device: L{objects.Disk}
7282
  @param device: the device to create
7283
  @type force_create: boolean
7284
  @param force_create: whether to force creation of this device; this
7285
      will be change to True whenever we find a device which has
7286
      CreateOnSecondary() attribute
7287
  @param info: the extra 'metadata' we should attach to the device
7288
      (this will be represented as a LVM tag)
7289
  @type force_open: boolean
7290
  @param force_open: this parameter will be passes to the
7291
      L{backend.BlockdevCreate} function where it specifies
7292
      whether we run on primary or not, and it affects both
7293
      the child assembly and the device own Open() execution
7294

7295
  """
7296
  if device.CreateOnSecondary():
7297
    force_create = True
7298

    
7299
  if device.children:
7300
    for child in device.children:
7301
      _CreateBlockDev(lu, node, instance, child, force_create,
7302
                      info, force_open)
7303

    
7304
  if not force_create:
7305
    return
7306

    
7307
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7308

    
7309

    
7310
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7311
  """Create a single block device on a given node.
7312

7313
  This will not recurse over children of the device, so they must be
7314
  created in advance.
7315

7316
  @param lu: the lu on whose behalf we execute
7317
  @param node: the node on which to create the device
7318
  @type instance: L{objects.Instance}
7319
  @param instance: the instance which owns the device
7320
  @type device: L{objects.Disk}
7321
  @param device: the device to create
7322
  @param info: the extra 'metadata' we should attach to the device
7323
      (this will be represented as a LVM tag)
7324
  @type force_open: boolean
7325
  @param force_open: this parameter will be passes to the
7326
      L{backend.BlockdevCreate} function where it specifies
7327
      whether we run on primary or not, and it affects both
7328
      the child assembly and the device own Open() execution
7329

7330
  """
7331
  lu.cfg.SetDiskID(device, node)
7332
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7333
                                       instance.name, force_open, info)
7334
  result.Raise("Can't create block device %s on"
7335
               " node %s for instance %s" % (device, node, instance.name))
7336
  if device.physical_id is None:
7337
    device.physical_id = result.payload
7338

    
7339

    
7340
def _GenerateUniqueNames(lu, exts):
7341
  """Generate a suitable LV name.
7342

7343
  This will generate a logical volume name for the given instance.
7344

7345
  """
7346
  results = []
7347
  for val in exts:
7348
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7349
    results.append("%s%s" % (new_id, val))
7350
  return results
7351

    
7352

    
7353
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7354
                         iv_name, p_minor, s_minor):
7355
  """Generate a drbd8 device complete with its children.
7356

7357
  """
7358
  assert len(vgnames) == len(names) == 2
7359
  port = lu.cfg.AllocatePort()
7360
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7361
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7362
                          logical_id=(vgnames[0], names[0]))
7363
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7364
                          logical_id=(vgnames[1], names[1]))
7365
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7366
                          logical_id=(primary, secondary, port,
7367
                                      p_minor, s_minor,
7368
                                      shared_secret),
7369
                          children=[dev_data, dev_meta],
7370
                          iv_name=iv_name)
7371
  return drbd_dev
7372

    
7373

    
7374
def _GenerateDiskTemplate(lu, template_name,
7375
                          instance_name, primary_node,
7376
                          secondary_nodes, disk_info,
7377
                          file_storage_dir, file_driver,
7378
                          base_index, feedback_fn):
7379
  """Generate the entire disk layout for a given template type.
7380

7381
  """
7382
  #TODO: compute space requirements
7383

    
7384
  vgname = lu.cfg.GetVGName()
7385
  disk_count = len(disk_info)
7386
  disks = []
7387
  if template_name == constants.DT_DISKLESS:
7388
    pass
7389
  elif template_name == constants.DT_PLAIN:
7390
    if len(secondary_nodes) != 0:
7391
      raise errors.ProgrammerError("Wrong template configuration")
7392

    
7393
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7394
                                      for i in range(disk_count)])
7395
    for idx, disk in enumerate(disk_info):
7396
      disk_index = idx + base_index
7397
      vg = disk.get(constants.IDISK_VG, vgname)
7398
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7399
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7400
                              size=disk[constants.IDISK_SIZE],
7401
                              logical_id=(vg, names[idx]),
7402
                              iv_name="disk/%d" % disk_index,
7403
                              mode=disk[constants.IDISK_MODE])
7404
      disks.append(disk_dev)
7405
  elif template_name == constants.DT_DRBD8:
7406
    if len(secondary_nodes) != 1:
7407
      raise errors.ProgrammerError("Wrong template configuration")
7408
    remote_node = secondary_nodes[0]
7409
    minors = lu.cfg.AllocateDRBDMinor(
7410
      [primary_node, remote_node] * len(disk_info), instance_name)
7411

    
7412
    names = []
7413
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7414
                                               for i in range(disk_count)]):
7415
      names.append(lv_prefix + "_data")
7416
      names.append(lv_prefix + "_meta")
7417
    for idx, disk in enumerate(disk_info):
7418
      disk_index = idx + base_index
7419
      data_vg = disk.get(constants.IDISK_VG, vgname)
7420
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7421
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7422
                                      disk[constants.IDISK_SIZE],
7423
                                      [data_vg, meta_vg],
7424
                                      names[idx * 2:idx * 2 + 2],
7425
                                      "disk/%d" % disk_index,
7426
                                      minors[idx * 2], minors[idx * 2 + 1])
7427
      disk_dev.mode = disk[constants.IDISK_MODE]
7428
      disks.append(disk_dev)
7429
  elif template_name == constants.DT_FILE:
7430
    if len(secondary_nodes) != 0:
7431
      raise errors.ProgrammerError("Wrong template configuration")
7432

    
7433
    opcodes.RequireFileStorage()
7434

    
7435
    for idx, disk in enumerate(disk_info):
7436
      disk_index = idx + base_index
7437
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7438
                              size=disk[constants.IDISK_SIZE],
7439
                              iv_name="disk/%d" % disk_index,
7440
                              logical_id=(file_driver,
7441
                                          "%s/disk%d" % (file_storage_dir,
7442
                                                         disk_index)),
7443
                              mode=disk[constants.IDISK_MODE])
7444
      disks.append(disk_dev)
7445
  elif template_name == constants.DT_SHARED_FILE:
7446
    if len(secondary_nodes) != 0:
7447
      raise errors.ProgrammerError("Wrong template configuration")
7448

    
7449
    opcodes.RequireSharedFileStorage()
7450

    
7451
    for idx, disk in enumerate(disk_info):
7452
      disk_index = idx + base_index
7453
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7454
                              size=disk[constants.IDISK_SIZE],
7455
                              iv_name="disk/%d" % disk_index,
7456
                              logical_id=(file_driver,
7457
                                          "%s/disk%d" % (file_storage_dir,
7458
                                                         disk_index)),
7459
                              mode=disk[constants.IDISK_MODE])
7460
      disks.append(disk_dev)
7461
  elif template_name == constants.DT_BLOCK:
7462
    if len(secondary_nodes) != 0:
7463
      raise errors.ProgrammerError("Wrong template configuration")
7464

    
7465
    for idx, disk in enumerate(disk_info):
7466
      disk_index = idx + base_index
7467
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7468
                              size=disk[constants.IDISK_SIZE],
7469
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7470
                                          disk[constants.IDISK_ADOPT]),
7471
                              iv_name="disk/%d" % disk_index,
7472
                              mode=disk[constants.IDISK_MODE])
7473
      disks.append(disk_dev)
7474

    
7475
  else:
7476
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7477
  return disks
7478

    
7479

    
7480
def _GetInstanceInfoText(instance):
7481
  """Compute that text that should be added to the disk's metadata.
7482

7483
  """
7484
  return "originstname+%s" % instance.name
7485

    
7486

    
7487
def _CalcEta(time_taken, written, total_size):
7488
  """Calculates the ETA based on size written and total size.
7489

7490
  @param time_taken: The time taken so far
7491
  @param written: amount written so far
7492
  @param total_size: The total size of data to be written
7493
  @return: The remaining time in seconds
7494

7495
  """
7496
  avg_time = time_taken / float(written)
7497
  return (total_size - written) * avg_time
7498

    
7499

    
7500
def _WipeDisks(lu, instance):
7501
  """Wipes instance disks.
7502

7503
  @type lu: L{LogicalUnit}
7504
  @param lu: the logical unit on whose behalf we execute
7505
  @type instance: L{objects.Instance}
7506
  @param instance: the instance whose disks we should create
7507
  @return: the success of the wipe
7508

7509
  """
7510
  node = instance.primary_node
7511

    
7512
  for device in instance.disks:
7513
    lu.cfg.SetDiskID(device, node)
7514

    
7515
  logging.info("Pause sync of instance %s disks", instance.name)
7516
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7517

    
7518
  for idx, success in enumerate(result.payload):
7519
    if not success:
7520
      logging.warn("pause-sync of instance %s for disks %d failed",
7521
                   instance.name, idx)
7522

    
7523
  try:
7524
    for idx, device in enumerate(instance.disks):
7525
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7526
      # MAX_WIPE_CHUNK at max
7527
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7528
                            constants.MIN_WIPE_CHUNK_PERCENT)
7529
      # we _must_ make this an int, otherwise rounding errors will
7530
      # occur
7531
      wipe_chunk_size = int(wipe_chunk_size)
7532

    
7533
      lu.LogInfo("* Wiping disk %d", idx)
7534
      logging.info("Wiping disk %d for instance %s, node %s using"
7535
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7536

    
7537
      offset = 0
7538
      size = device.size
7539
      last_output = 0
7540
      start_time = time.time()
7541

    
7542
      while offset < size:
7543
        wipe_size = min(wipe_chunk_size, size - offset)
7544
        logging.debug("Wiping disk %d, offset %s, chunk %s",
7545
                      idx, offset, wipe_size)
7546
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7547
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
7548
                     (idx, offset, wipe_size))
7549
        now = time.time()
7550
        offset += wipe_size
7551
        if now - last_output >= 60:
7552
          eta = _CalcEta(now - start_time, offset, size)
7553
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
7554
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
7555
          last_output = now
7556
  finally:
7557
    logging.info("Resume sync of instance %s disks", instance.name)
7558

    
7559
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7560

    
7561
    for idx, success in enumerate(result.payload):
7562
      if not success:
7563
        lu.LogWarning("Resume sync of disk %d failed, please have a"
7564
                      " look at the status and troubleshoot the issue", idx)
7565
        logging.warn("resume-sync of instance %s for disks %d failed",
7566
                     instance.name, idx)
7567

    
7568

    
7569
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7570
  """Create all disks for an instance.
7571

7572
  This abstracts away some work from AddInstance.
7573

7574
  @type lu: L{LogicalUnit}
7575
  @param lu: the logical unit on whose behalf we execute
7576
  @type instance: L{objects.Instance}
7577
  @param instance: the instance whose disks we should create
7578
  @type to_skip: list
7579
  @param to_skip: list of indices to skip
7580
  @type target_node: string
7581
  @param target_node: if passed, overrides the target node for creation
7582
  @rtype: boolean
7583
  @return: the success of the creation
7584

7585
  """
7586
  info = _GetInstanceInfoText(instance)
7587
  if target_node is None:
7588
    pnode = instance.primary_node
7589
    all_nodes = instance.all_nodes
7590
  else:
7591
    pnode = target_node
7592
    all_nodes = [pnode]
7593

    
7594
  if instance.disk_template in constants.DTS_FILEBASED:
7595
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7596
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7597

    
7598
    result.Raise("Failed to create directory '%s' on"
7599
                 " node %s" % (file_storage_dir, pnode))
7600

    
7601
  # Note: this needs to be kept in sync with adding of disks in
7602
  # LUInstanceSetParams
7603
  for idx, device in enumerate(instance.disks):
7604
    if to_skip and idx in to_skip:
7605
      continue
7606
    logging.info("Creating volume %s for instance %s",
7607
                 device.iv_name, instance.name)
7608
    #HARDCODE
7609
    for node in all_nodes:
7610
      f_create = node == pnode
7611
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7612

    
7613

    
7614
def _RemoveDisks(lu, instance, target_node=None):
7615
  """Remove all disks for an instance.
7616

7617
  This abstracts away some work from `AddInstance()` and
7618
  `RemoveInstance()`. Note that in case some of the devices couldn't
7619
  be removed, the removal will continue with the other ones (compare
7620
  with `_CreateDisks()`).
7621

7622
  @type lu: L{LogicalUnit}
7623
  @param lu: the logical unit on whose behalf we execute
7624
  @type instance: L{objects.Instance}
7625
  @param instance: the instance whose disks we should remove
7626
  @type target_node: string
7627
  @param target_node: used to override the node on which to remove the disks
7628
  @rtype: boolean
7629
  @return: the success of the removal
7630

7631
  """
7632
  logging.info("Removing block devices for instance %s", instance.name)
7633

    
7634
  all_result = True
7635
  for device in instance.disks:
7636
    if target_node:
7637
      edata = [(target_node, device)]
7638
    else:
7639
      edata = device.ComputeNodeTree(instance.primary_node)
7640
    for node, disk in edata:
7641
      lu.cfg.SetDiskID(disk, node)
7642
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7643
      if msg:
7644
        lu.LogWarning("Could not remove block device %s on node %s,"
7645
                      " continuing anyway: %s", device.iv_name, node, msg)
7646
        all_result = False
7647

    
7648
  if instance.disk_template == constants.DT_FILE:
7649
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7650
    if target_node:
7651
      tgt = target_node
7652
    else:
7653
      tgt = instance.primary_node
7654
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7655
    if result.fail_msg:
7656
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7657
                    file_storage_dir, instance.primary_node, result.fail_msg)
7658
      all_result = False
7659

    
7660
  return all_result
7661

    
7662

    
7663
def _ComputeDiskSizePerVG(disk_template, disks):
7664
  """Compute disk size requirements in the volume group
7665

7666
  """
7667
  def _compute(disks, payload):
7668
    """Universal algorithm.
7669

7670
    """
7671
    vgs = {}
7672
    for disk in disks:
7673
      vgs[disk[constants.IDISK_VG]] = \
7674
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7675

    
7676
    return vgs
7677

    
7678
  # Required free disk space as a function of disk and swap space
7679
  req_size_dict = {
7680
    constants.DT_DISKLESS: {},
7681
    constants.DT_PLAIN: _compute(disks, 0),
7682
    # 128 MB are added for drbd metadata for each disk
7683
    constants.DT_DRBD8: _compute(disks, 128),
7684
    constants.DT_FILE: {},
7685
    constants.DT_SHARED_FILE: {},
7686
  }
7687

    
7688
  if disk_template not in req_size_dict:
7689
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7690
                                 " is unknown" %  disk_template)
7691

    
7692
  return req_size_dict[disk_template]
7693

    
7694

    
7695
def _ComputeDiskSize(disk_template, disks):
7696
  """Compute disk size requirements in the volume group
7697

7698
  """
7699
  # Required free disk space as a function of disk and swap space
7700
  req_size_dict = {
7701
    constants.DT_DISKLESS: None,
7702
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7703
    # 128 MB are added for drbd metadata for each disk
7704
    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7705
    constants.DT_FILE: None,
7706
    constants.DT_SHARED_FILE: 0,
7707
    constants.DT_BLOCK: 0,
7708
  }
7709

    
7710
  if disk_template not in req_size_dict:
7711
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7712
                                 " is unknown" %  disk_template)
7713

    
7714
  return req_size_dict[disk_template]
7715

    
7716

    
7717
def _FilterVmNodes(lu, nodenames):
7718
  """Filters out non-vm_capable nodes from a list.
7719

7720
  @type lu: L{LogicalUnit}
7721
  @param lu: the logical unit for which we check
7722
  @type nodenames: list
7723
  @param nodenames: the list of nodes on which we should check
7724
  @rtype: list
7725
  @return: the list of vm-capable nodes
7726

7727
  """
7728
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7729
  return [name for name in nodenames if name not in vm_nodes]
7730

    
7731

    
7732
def _CheckHVParams(lu, nodenames, hvname, hvparams):
7733
  """Hypervisor parameter validation.
7734

7735
  This function abstract the hypervisor parameter validation to be
7736
  used in both instance create and instance modify.
7737

7738
  @type lu: L{LogicalUnit}
7739
  @param lu: the logical unit for which we check
7740
  @type nodenames: list
7741
  @param nodenames: the list of nodes on which we should check
7742
  @type hvname: string
7743
  @param hvname: the name of the hypervisor we should use
7744
  @type hvparams: dict
7745
  @param hvparams: the parameters which we need to check
7746
  @raise errors.OpPrereqError: if the parameters are not valid
7747

7748
  """
7749
  nodenames = _FilterVmNodes(lu, nodenames)
7750
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7751
                                                  hvname,
7752
                                                  hvparams)
7753
  for node in nodenames:
7754
    info = hvinfo[node]
7755
    if info.offline:
7756
      continue
7757
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
7758

    
7759

    
7760
def _CheckOSParams(lu, required, nodenames, osname, osparams):
7761
  """OS parameters validation.
7762

7763
  @type lu: L{LogicalUnit}
7764
  @param lu: the logical unit for which we check
7765
  @type required: boolean
7766
  @param required: whether the validation should fail if the OS is not
7767
      found
7768
  @type nodenames: list
7769
  @param nodenames: the list of nodes on which we should check
7770
  @type osname: string
7771
  @param osname: the name of the hypervisor we should use
7772
  @type osparams: dict
7773
  @param osparams: the parameters which we need to check
7774
  @raise errors.OpPrereqError: if the parameters are not valid
7775

7776
  """
7777
  nodenames = _FilterVmNodes(lu, nodenames)
7778
  result = lu.rpc.call_os_validate(required, nodenames, osname,
7779
                                   [constants.OS_VALIDATE_PARAMETERS],
7780
                                   osparams)
7781
  for node, nres in result.items():
7782
    # we don't check for offline cases since this should be run only
7783
    # against the master node and/or an instance's nodes
7784
    nres.Raise("OS Parameters validation failed on node %s" % node)
7785
    if not nres.payload:
7786
      lu.LogInfo("OS %s not found on node %s, validation skipped",
7787
                 osname, node)
7788

    
7789

    
7790
class LUInstanceCreate(LogicalUnit):
7791
  """Create an instance.
7792

7793
  """
7794
  HPATH = "instance-add"
7795
  HTYPE = constants.HTYPE_INSTANCE
7796
  REQ_BGL = False
7797

    
7798
  def CheckArguments(self):
7799
    """Check arguments.
7800

7801
    """
7802
    # do not require name_check to ease forward/backward compatibility
7803
    # for tools
7804
    if self.op.no_install and self.op.start:
7805
      self.LogInfo("No-installation mode selected, disabling startup")
7806
      self.op.start = False
7807
    # validate/normalize the instance name
7808
    self.op.instance_name = \
7809
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
7810

    
7811
    if self.op.ip_check and not self.op.name_check:
7812
      # TODO: make the ip check more flexible and not depend on the name check
7813
      raise errors.OpPrereqError("Cannot do IP address check without a name"
7814
                                 " check", errors.ECODE_INVAL)
7815

    
7816
    # check nics' parameter names
7817
    for nic in self.op.nics:
7818
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7819

    
7820
    # check disks. parameter names and consistent adopt/no-adopt strategy
7821
    has_adopt = has_no_adopt = False
7822
    for disk in self.op.disks:
7823
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7824
      if constants.IDISK_ADOPT in disk:
7825
        has_adopt = True
7826
      else:
7827
        has_no_adopt = True
7828
    if has_adopt and has_no_adopt:
7829
      raise errors.OpPrereqError("Either all disks are adopted or none is",
7830
                                 errors.ECODE_INVAL)
7831
    if has_adopt:
7832
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7833
        raise errors.OpPrereqError("Disk adoption is not supported for the"
7834
                                   " '%s' disk template" %
7835
                                   self.op.disk_template,
7836
                                   errors.ECODE_INVAL)
7837
      if self.op.iallocator is not None:
7838
        raise errors.OpPrereqError("Disk adoption not allowed with an"
7839
                                   " iallocator script", errors.ECODE_INVAL)
7840
      if self.op.mode == constants.INSTANCE_IMPORT:
7841
        raise errors.OpPrereqError("Disk adoption not allowed for"
7842
                                   " instance import", errors.ECODE_INVAL)
7843
    else:
7844
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
7845
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7846
                                   " but no 'adopt' parameter given" %
7847
                                   self.op.disk_template,
7848
                                   errors.ECODE_INVAL)
7849

    
7850
    self.adopt_disks = has_adopt
7851

    
7852
    # instance name verification
7853
    if self.op.name_check:
7854
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7855
      self.op.instance_name = self.hostname1.name
7856
      # used in CheckPrereq for ip ping check
7857
      self.check_ip = self.hostname1.ip
7858
    else:
7859
      self.check_ip = None
7860

    
7861
    # file storage checks
7862
    if (self.op.file_driver and
7863
        not self.op.file_driver in constants.FILE_DRIVER):
7864
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
7865
                                 self.op.file_driver, errors.ECODE_INVAL)
7866

    
7867
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7868
      raise errors.OpPrereqError("File storage directory path not absolute",
7869
                                 errors.ECODE_INVAL)
7870

    
7871
    ### Node/iallocator related checks
7872
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7873

    
7874
    if self.op.pnode is not None:
7875
      if self.op.disk_template in constants.DTS_INT_MIRROR:
7876
        if self.op.snode is None:
7877
          raise errors.OpPrereqError("The networked disk templates need"
7878
                                     " a mirror node", errors.ECODE_INVAL)
7879
      elif self.op.snode:
7880
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7881
                        " template")
7882
        self.op.snode = None
7883

    
7884
    self._cds = _GetClusterDomainSecret()
7885

    
7886
    if self.op.mode == constants.INSTANCE_IMPORT:
7887
      # On import force_variant must be True, because if we forced it at
7888
      # initial install, our only chance when importing it back is that it
7889
      # works again!
7890
      self.op.force_variant = True
7891

    
7892
      if self.op.no_install:
7893
        self.LogInfo("No-installation mode has no effect during import")
7894

    
7895
    elif self.op.mode == constants.INSTANCE_CREATE:
7896
      if self.op.os_type is None:
7897
        raise errors.OpPrereqError("No guest OS specified",
7898
                                   errors.ECODE_INVAL)
7899
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7900
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7901
                                   " installation" % self.op.os_type,
7902
                                   errors.ECODE_STATE)
7903
      if self.op.disk_template is None:
7904
        raise errors.OpPrereqError("No disk template specified",
7905
                                   errors.ECODE_INVAL)
7906

    
7907
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7908
      # Check handshake to ensure both clusters have the same domain secret
7909
      src_handshake = self.op.source_handshake
7910
      if not src_handshake:
7911
        raise errors.OpPrereqError("Missing source handshake",
7912
                                   errors.ECODE_INVAL)
7913

    
7914
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7915
                                                           src_handshake)
7916
      if errmsg:
7917
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7918
                                   errors.ECODE_INVAL)
7919

    
7920
      # Load and check source CA
7921
      self.source_x509_ca_pem = self.op.source_x509_ca
7922
      if not self.source_x509_ca_pem:
7923
        raise errors.OpPrereqError("Missing source X509 CA",
7924
                                   errors.ECODE_INVAL)
7925

    
7926
      try:
7927
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7928
                                                    self._cds)
7929
      except OpenSSL.crypto.Error, err:
7930
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7931
                                   (err, ), errors.ECODE_INVAL)
7932

    
7933
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7934
      if errcode is not None:
7935
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7936
                                   errors.ECODE_INVAL)
7937

    
7938
      self.source_x509_ca = cert
7939

    
7940
      src_instance_name = self.op.source_instance_name
7941
      if not src_instance_name:
7942
        raise errors.OpPrereqError("Missing source instance name",
7943
                                   errors.ECODE_INVAL)
7944

    
7945
      self.source_instance_name = \
7946
          netutils.GetHostname(name=src_instance_name).name
7947

    
7948
    else:
7949
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
7950
                                 self.op.mode, errors.ECODE_INVAL)
7951

    
7952
  def ExpandNames(self):
7953
    """ExpandNames for CreateInstance.
7954

7955
    Figure out the right locks for instance creation.
7956

7957
    """
7958
    self.needed_locks = {}
7959

    
7960
    instance_name = self.op.instance_name
7961
    # this is just a preventive check, but someone might still add this
7962
    # instance in the meantime, and creation will fail at lock-add time
7963
    if instance_name in self.cfg.GetInstanceList():
7964
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7965
                                 instance_name, errors.ECODE_EXISTS)
7966

    
7967
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7968

    
7969
    if self.op.iallocator:
7970
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7971
    else:
7972
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7973
      nodelist = [self.op.pnode]
7974
      if self.op.snode is not None:
7975
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7976
        nodelist.append(self.op.snode)
7977
      self.needed_locks[locking.LEVEL_NODE] = nodelist
7978

    
7979
    # in case of import lock the source node too
7980
    if self.op.mode == constants.INSTANCE_IMPORT:
7981
      src_node = self.op.src_node
7982
      src_path = self.op.src_path
7983

    
7984
      if src_path is None:
7985
        self.op.src_path = src_path = self.op.instance_name
7986

    
7987
      if src_node is None:
7988
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7989
        self.op.src_node = None
7990
        if os.path.isabs(src_path):
7991
          raise errors.OpPrereqError("Importing an instance from an absolute"
7992
                                     " path requires a source node option",
7993
                                     errors.ECODE_INVAL)
7994
      else:
7995
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7996
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7997
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
7998
        if not os.path.isabs(src_path):
7999
          self.op.src_path = src_path = \
8000
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8001

    
8002
  def _RunAllocator(self):
8003
    """Run the allocator based on input opcode.
8004

8005
    """
8006
    nics = [n.ToDict() for n in self.nics]
8007
    ial = IAllocator(self.cfg, self.rpc,
8008
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8009
                     name=self.op.instance_name,
8010
                     disk_template=self.op.disk_template,
8011
                     tags=self.op.tags,
8012
                     os=self.op.os_type,
8013
                     vcpus=self.be_full[constants.BE_VCPUS],
8014
                     memory=self.be_full[constants.BE_MEMORY],
8015
                     disks=self.disks,
8016
                     nics=nics,
8017
                     hypervisor=self.op.hypervisor,
8018
                     )
8019

    
8020
    ial.Run(self.op.iallocator)
8021

    
8022
    if not ial.success:
8023
      raise errors.OpPrereqError("Can't compute nodes using"
8024
                                 " iallocator '%s': %s" %
8025
                                 (self.op.iallocator, ial.info),
8026
                                 errors.ECODE_NORES)
8027
    if len(ial.result) != ial.required_nodes:
8028
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8029
                                 " of nodes (%s), required %s" %
8030
                                 (self.op.iallocator, len(ial.result),
8031
                                  ial.required_nodes), errors.ECODE_FAULT)
8032
    self.op.pnode = ial.result[0]
8033
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8034
                 self.op.instance_name, self.op.iallocator,
8035
                 utils.CommaJoin(ial.result))
8036
    if ial.required_nodes == 2:
8037
      self.op.snode = ial.result[1]
8038

    
8039
  def BuildHooksEnv(self):
8040
    """Build hooks env.
8041

8042
    This runs on master, primary and secondary nodes of the instance.
8043

8044
    """
8045
    env = {
8046
      "ADD_MODE": self.op.mode,
8047
      }
8048
    if self.op.mode == constants.INSTANCE_IMPORT:
8049
      env["SRC_NODE"] = self.op.src_node
8050
      env["SRC_PATH"] = self.op.src_path
8051
      env["SRC_IMAGES"] = self.src_images
8052

    
8053
    env.update(_BuildInstanceHookEnv(
8054
      name=self.op.instance_name,
8055
      primary_node=self.op.pnode,
8056
      secondary_nodes=self.secondaries,
8057
      status=self.op.start,
8058
      os_type=self.op.os_type,
8059
      memory=self.be_full[constants.BE_MEMORY],
8060
      vcpus=self.be_full[constants.BE_VCPUS],
8061
      nics=_NICListToTuple(self, self.nics),
8062
      disk_template=self.op.disk_template,
8063
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8064
             for d in self.disks],
8065
      bep=self.be_full,
8066
      hvp=self.hv_full,
8067
      hypervisor_name=self.op.hypervisor,
8068
      tags=self.op.tags,
8069
    ))
8070

    
8071
    return env
8072

    
8073
  def BuildHooksNodes(self):
8074
    """Build hooks nodes.
8075

8076
    """
8077
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8078
    return nl, nl
8079

    
8080
  def _ReadExportInfo(self):
8081
    """Reads the export information from disk.
8082

8083
    It will override the opcode source node and path with the actual
8084
    information, if these two were not specified before.
8085

8086
    @return: the export information
8087

8088
    """
8089
    assert self.op.mode == constants.INSTANCE_IMPORT
8090

    
8091
    src_node = self.op.src_node
8092
    src_path = self.op.src_path
8093

    
8094
    if src_node is None:
8095
      locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
8096
      exp_list = self.rpc.call_export_list(locked_nodes)
8097
      found = False
8098
      for node in exp_list:
8099
        if exp_list[node].fail_msg:
8100
          continue
8101
        if src_path in exp_list[node].payload:
8102
          found = True
8103
          self.op.src_node = src_node = node
8104
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8105
                                                       src_path)
8106
          break
8107
      if not found:
8108
        raise errors.OpPrereqError("No export found for relative path %s" %
8109
                                    src_path, errors.ECODE_INVAL)
8110

    
8111
    _CheckNodeOnline(self, src_node)
8112
    result = self.rpc.call_export_info(src_node, src_path)
8113
    result.Raise("No export or invalid export found in dir %s" % src_path)
8114

    
8115
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8116
    if not export_info.has_section(constants.INISECT_EXP):
8117
      raise errors.ProgrammerError("Corrupted export config",
8118
                                   errors.ECODE_ENVIRON)
8119

    
8120
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8121
    if (int(ei_version) != constants.EXPORT_VERSION):
8122
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8123
                                 (ei_version, constants.EXPORT_VERSION),
8124
                                 errors.ECODE_ENVIRON)
8125
    return export_info
8126

    
8127
  def _ReadExportParams(self, einfo):
8128
    """Use export parameters as defaults.
8129

8130
    In case the opcode doesn't specify (as in override) some instance
8131
    parameters, then try to use them from the export information, if
8132
    that declares them.
8133

8134
    """
8135
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8136

    
8137
    if self.op.disk_template is None:
8138
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8139
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8140
                                          "disk_template")
8141
      else:
8142
        raise errors.OpPrereqError("No disk template specified and the export"
8143
                                   " is missing the disk_template information",
8144
                                   errors.ECODE_INVAL)
8145

    
8146
    if not self.op.disks:
8147
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
8148
        disks = []
8149
        # TODO: import the disk iv_name too
8150
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8151
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8152
          disks.append({constants.IDISK_SIZE: disk_sz})
8153
        self.op.disks = disks
8154
      else:
8155
        raise errors.OpPrereqError("No disk info specified and the export"
8156
                                   " is missing the disk information",
8157
                                   errors.ECODE_INVAL)
8158

    
8159
    if (not self.op.nics and
8160
        einfo.has_option(constants.INISECT_INS, "nic_count")):
8161
      nics = []
8162
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8163
        ndict = {}
8164
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8165
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8166
          ndict[name] = v
8167
        nics.append(ndict)
8168
      self.op.nics = nics
8169

    
8170
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8171
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8172

    
8173
    if (self.op.hypervisor is None and
8174
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8175
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8176

    
8177
    if einfo.has_section(constants.INISECT_HYP):
8178
      # use the export parameters but do not override the ones
8179
      # specified by the user
8180
      for name, value in einfo.items(constants.INISECT_HYP):
8181
        if name not in self.op.hvparams:
8182
          self.op.hvparams[name] = value
8183

    
8184
    if einfo.has_section(constants.INISECT_BEP):
8185
      # use the parameters, without overriding
8186
      for name, value in einfo.items(constants.INISECT_BEP):
8187
        if name not in self.op.beparams:
8188
          self.op.beparams[name] = value
8189
    else:
8190
      # try to read the parameters old style, from the main section
8191
      for name in constants.BES_PARAMETERS:
8192
        if (name not in self.op.beparams and
8193
            einfo.has_option(constants.INISECT_INS, name)):
8194
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8195

    
8196
    if einfo.has_section(constants.INISECT_OSP):
8197
      # use the parameters, without overriding
8198
      for name, value in einfo.items(constants.INISECT_OSP):
8199
        if name not in self.op.osparams:
8200
          self.op.osparams[name] = value
8201

    
8202
  def _RevertToDefaults(self, cluster):
8203
    """Revert the instance parameters to the default values.
8204

8205
    """
8206
    # hvparams
8207
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8208
    for name in self.op.hvparams.keys():
8209
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8210
        del self.op.hvparams[name]
8211
    # beparams
8212
    be_defs = cluster.SimpleFillBE({})
8213
    for name in self.op.beparams.keys():
8214
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8215
        del self.op.beparams[name]
8216
    # nic params
8217
    nic_defs = cluster.SimpleFillNIC({})
8218
    for nic in self.op.nics:
8219
      for name in constants.NICS_PARAMETERS:
8220
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8221
          del nic[name]
8222
    # osparams
8223
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8224
    for name in self.op.osparams.keys():
8225
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8226
        del self.op.osparams[name]
8227

    
8228
  def CheckPrereq(self):
8229
    """Check prerequisites.
8230

8231
    """
8232
    if self.op.mode == constants.INSTANCE_IMPORT:
8233
      export_info = self._ReadExportInfo()
8234
      self._ReadExportParams(export_info)
8235

    
8236
    if (not self.cfg.GetVGName() and
8237
        self.op.disk_template not in constants.DTS_NOT_LVM):
8238
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8239
                                 " instances", errors.ECODE_STATE)
8240

    
8241
    if self.op.hypervisor is None:
8242
      self.op.hypervisor = self.cfg.GetHypervisorType()
8243

    
8244
    cluster = self.cfg.GetClusterInfo()
8245
    enabled_hvs = cluster.enabled_hypervisors
8246
    if self.op.hypervisor not in enabled_hvs:
8247
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8248
                                 " cluster (%s)" % (self.op.hypervisor,
8249
                                  ",".join(enabled_hvs)),
8250
                                 errors.ECODE_STATE)
8251

    
8252
    # Check tag validity
8253
    for tag in self.op.tags:
8254
      objects.TaggableObject.ValidateTag(tag)
8255

    
8256
    # check hypervisor parameter syntax (locally)
8257
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8258
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8259
                                      self.op.hvparams)
8260
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8261
    hv_type.CheckParameterSyntax(filled_hvp)
8262
    self.hv_full = filled_hvp
8263
    # check that we don't specify global parameters on an instance
8264
    _CheckGlobalHvParams(self.op.hvparams)
8265

    
8266
    # fill and remember the beparams dict
8267
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8268
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8269

    
8270
    # build os parameters
8271
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8272

    
8273
    # now that hvp/bep are in final format, let's reset to defaults,
8274
    # if told to do so
8275
    if self.op.identify_defaults:
8276
      self._RevertToDefaults(cluster)
8277

    
8278
    # NIC buildup
8279
    self.nics = []
8280
    for idx, nic in enumerate(self.op.nics):
8281
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8282
      nic_mode = nic_mode_req
8283
      if nic_mode is None:
8284
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8285

    
8286
      # in routed mode, for the first nic, the default ip is 'auto'
8287
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8288
        default_ip_mode = constants.VALUE_AUTO
8289
      else:
8290
        default_ip_mode = constants.VALUE_NONE
8291

    
8292
      # ip validity checks
8293
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8294
      if ip is None or ip.lower() == constants.VALUE_NONE:
8295
        nic_ip = None
8296
      elif ip.lower() == constants.VALUE_AUTO:
8297
        if not self.op.name_check:
8298
          raise errors.OpPrereqError("IP address set to auto but name checks"
8299
                                     " have been skipped",
8300
                                     errors.ECODE_INVAL)
8301
        nic_ip = self.hostname1.ip
8302
      else:
8303
        if not netutils.IPAddress.IsValid(ip):
8304
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8305
                                     errors.ECODE_INVAL)
8306
        nic_ip = ip
8307

    
8308
      # TODO: check the ip address for uniqueness
8309
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8310
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8311
                                   errors.ECODE_INVAL)
8312

    
8313
      # MAC address verification
8314
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8315
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8316
        mac = utils.NormalizeAndValidateMac(mac)
8317

    
8318
        try:
8319
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8320
        except errors.ReservationError:
8321
          raise errors.OpPrereqError("MAC address %s already in use"
8322
                                     " in cluster" % mac,
8323
                                     errors.ECODE_NOTUNIQUE)
8324

    
8325
      #  Build nic parameters
8326
      link = nic.get(constants.INIC_LINK, None)
8327
      nicparams = {}
8328
      if nic_mode_req:
8329
        nicparams[constants.NIC_MODE] = nic_mode_req
8330
      if link:
8331
        nicparams[constants.NIC_LINK] = link
8332

    
8333
      check_params = cluster.SimpleFillNIC(nicparams)
8334
      objects.NIC.CheckParameterSyntax(check_params)
8335
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8336

    
8337
    # disk checks/pre-build
8338
    default_vg = self.cfg.GetVGName()
8339
    self.disks = []
8340
    for disk in self.op.disks:
8341
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8342
      if mode not in constants.DISK_ACCESS_SET:
8343
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8344
                                   mode, errors.ECODE_INVAL)
8345
      size = disk.get(constants.IDISK_SIZE, None)
8346
      if size is None:
8347
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8348
      try:
8349
        size = int(size)
8350
      except (TypeError, ValueError):
8351
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8352
                                   errors.ECODE_INVAL)
8353

    
8354
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8355
      new_disk = {
8356
        constants.IDISK_SIZE: size,
8357
        constants.IDISK_MODE: mode,
8358
        constants.IDISK_VG: data_vg,
8359
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8360
        }
8361
      if constants.IDISK_ADOPT in disk:
8362
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8363
      self.disks.append(new_disk)
8364

    
8365
    if self.op.mode == constants.INSTANCE_IMPORT:
8366

    
8367
      # Check that the new instance doesn't have less disks than the export
8368
      instance_disks = len(self.disks)
8369
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8370
      if instance_disks < export_disks:
8371
        raise errors.OpPrereqError("Not enough disks to import."
8372
                                   " (instance: %d, export: %d)" %
8373
                                   (instance_disks, export_disks),
8374
                                   errors.ECODE_INVAL)
8375

    
8376
      disk_images = []
8377
      for idx in range(export_disks):
8378
        option = 'disk%d_dump' % idx
8379
        if export_info.has_option(constants.INISECT_INS, option):
8380
          # FIXME: are the old os-es, disk sizes, etc. useful?
8381
          export_name = export_info.get(constants.INISECT_INS, option)
8382
          image = utils.PathJoin(self.op.src_path, export_name)
8383
          disk_images.append(image)
8384
        else:
8385
          disk_images.append(False)
8386

    
8387
      self.src_images = disk_images
8388

    
8389
      old_name = export_info.get(constants.INISECT_INS, 'name')
8390
      try:
8391
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
8392
      except (TypeError, ValueError), err:
8393
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
8394
                                   " an integer: %s" % str(err),
8395
                                   errors.ECODE_STATE)
8396
      if self.op.instance_name == old_name:
8397
        for idx, nic in enumerate(self.nics):
8398
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8399
            nic_mac_ini = 'nic%d_mac' % idx
8400
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8401

    
8402
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8403

    
8404
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8405
    if self.op.ip_check:
8406
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8407
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8408
                                   (self.check_ip, self.op.instance_name),
8409
                                   errors.ECODE_NOTUNIQUE)
8410

    
8411
    #### mac address generation
8412
    # By generating here the mac address both the allocator and the hooks get
8413
    # the real final mac address rather than the 'auto' or 'generate' value.
8414
    # There is a race condition between the generation and the instance object
8415
    # creation, which means that we know the mac is valid now, but we're not
8416
    # sure it will be when we actually add the instance. If things go bad
8417
    # adding the instance will abort because of a duplicate mac, and the
8418
    # creation job will fail.
8419
    for nic in self.nics:
8420
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8421
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8422

    
8423
    #### allocator run
8424

    
8425
    if self.op.iallocator is not None:
8426
      self._RunAllocator()
8427

    
8428
    #### node related checks
8429

    
8430
    # check primary node
8431
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8432
    assert self.pnode is not None, \
8433
      "Cannot retrieve locked node %s" % self.op.pnode
8434
    if pnode.offline:
8435
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8436
                                 pnode.name, errors.ECODE_STATE)
8437
    if pnode.drained:
8438
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8439
                                 pnode.name, errors.ECODE_STATE)
8440
    if not pnode.vm_capable:
8441
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8442
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8443

    
8444
    self.secondaries = []
8445

    
8446
    # mirror node verification
8447
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8448
      if self.op.snode == pnode.name:
8449
        raise errors.OpPrereqError("The secondary node cannot be the"
8450
                                   " primary node", errors.ECODE_INVAL)
8451
      _CheckNodeOnline(self, self.op.snode)
8452
      _CheckNodeNotDrained(self, self.op.snode)
8453
      _CheckNodeVmCapable(self, self.op.snode)
8454
      self.secondaries.append(self.op.snode)
8455

    
8456
    nodenames = [pnode.name] + self.secondaries
8457

    
8458
    if not self.adopt_disks:
8459
      # Check lv size requirements, if not adopting
8460
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8461
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8462

    
8463
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8464
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8465
                                disk[constants.IDISK_ADOPT])
8466
                     for disk in self.disks])
8467
      if len(all_lvs) != len(self.disks):
8468
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8469
                                   errors.ECODE_INVAL)
8470
      for lv_name in all_lvs:
8471
        try:
8472
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8473
          # to ReserveLV uses the same syntax
8474
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8475
        except errors.ReservationError:
8476
          raise errors.OpPrereqError("LV named %s used by another instance" %
8477
                                     lv_name, errors.ECODE_NOTUNIQUE)
8478

    
8479
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8480
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8481

    
8482
      node_lvs = self.rpc.call_lv_list([pnode.name],
8483
                                       vg_names.payload.keys())[pnode.name]
8484
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8485
      node_lvs = node_lvs.payload
8486

    
8487
      delta = all_lvs.difference(node_lvs.keys())
8488
      if delta:
8489
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8490
                                   utils.CommaJoin(delta),
8491
                                   errors.ECODE_INVAL)
8492
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8493
      if online_lvs:
8494
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8495
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8496
                                   errors.ECODE_STATE)
8497
      # update the size of disk based on what is found
8498
      for dsk in self.disks:
8499
        dsk[constants.IDISK_SIZE] = \
8500
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8501
                                        dsk[constants.IDISK_ADOPT])][0]))
8502

    
8503
    elif self.op.disk_template == constants.DT_BLOCK:
8504
      # Normalize and de-duplicate device paths
8505
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8506
                       for disk in self.disks])
8507
      if len(all_disks) != len(self.disks):
8508
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
8509
                                   errors.ECODE_INVAL)
8510
      baddisks = [d for d in all_disks
8511
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8512
      if baddisks:
8513
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8514
                                   " cannot be adopted" %
8515
                                   (", ".join(baddisks),
8516
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
8517
                                   errors.ECODE_INVAL)
8518

    
8519
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
8520
                                            list(all_disks))[pnode.name]
8521
      node_disks.Raise("Cannot get block device information from node %s" %
8522
                       pnode.name)
8523
      node_disks = node_disks.payload
8524
      delta = all_disks.difference(node_disks.keys())
8525
      if delta:
8526
        raise errors.OpPrereqError("Missing block device(s): %s" %
8527
                                   utils.CommaJoin(delta),
8528
                                   errors.ECODE_INVAL)
8529
      for dsk in self.disks:
8530
        dsk[constants.IDISK_SIZE] = \
8531
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8532

    
8533
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8534

    
8535
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8536
    # check OS parameters (remotely)
8537
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8538

    
8539
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8540

    
8541
    # memory check on primary node
8542
    if self.op.start:
8543
      _CheckNodeFreeMemory(self, self.pnode.name,
8544
                           "creating instance %s" % self.op.instance_name,
8545
                           self.be_full[constants.BE_MEMORY],
8546
                           self.op.hypervisor)
8547

    
8548
    self.dry_run_result = list(nodenames)
8549

    
8550
  def Exec(self, feedback_fn):
8551
    """Create and add the instance to the cluster.
8552

8553
    """
8554
    instance = self.op.instance_name
8555
    pnode_name = self.pnode.name
8556

    
8557
    ht_kind = self.op.hypervisor
8558
    if ht_kind in constants.HTS_REQ_PORT:
8559
      network_port = self.cfg.AllocatePort()
8560
    else:
8561
      network_port = None
8562

    
8563
    if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
8564
      # this is needed because os.path.join does not accept None arguments
8565
      if self.op.file_storage_dir is None:
8566
        string_file_storage_dir = ""
8567
      else:
8568
        string_file_storage_dir = self.op.file_storage_dir
8569

    
8570
      # build the full file storage dir path
8571
      if self.op.disk_template == constants.DT_SHARED_FILE:
8572
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8573
      else:
8574
        get_fsd_fn = self.cfg.GetFileStorageDir
8575

    
8576
      file_storage_dir = utils.PathJoin(get_fsd_fn(),
8577
                                        string_file_storage_dir, instance)
8578
    else:
8579
      file_storage_dir = ""
8580

    
8581
    disks = _GenerateDiskTemplate(self,
8582
                                  self.op.disk_template,
8583
                                  instance, pnode_name,
8584
                                  self.secondaries,
8585
                                  self.disks,
8586
                                  file_storage_dir,
8587
                                  self.op.file_driver,
8588
                                  0,
8589
                                  feedback_fn)
8590

    
8591
    iobj = objects.Instance(name=instance, os=self.op.os_type,
8592
                            primary_node=pnode_name,
8593
                            nics=self.nics, disks=disks,
8594
                            disk_template=self.op.disk_template,
8595
                            admin_up=False,
8596
                            network_port=network_port,
8597
                            beparams=self.op.beparams,
8598
                            hvparams=self.op.hvparams,
8599
                            hypervisor=self.op.hypervisor,
8600
                            osparams=self.op.osparams,
8601
                            )
8602

    
8603
    if self.op.tags:
8604
      for tag in self.op.tags:
8605
        iobj.AddTag(tag)
8606

    
8607
    if self.adopt_disks:
8608
      if self.op.disk_template == constants.DT_PLAIN:
8609
        # rename LVs to the newly-generated names; we need to construct
8610
        # 'fake' LV disks with the old data, plus the new unique_id
8611
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8612
        rename_to = []
8613
        for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8614
          rename_to.append(t_dsk.logical_id)
8615
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8616
          self.cfg.SetDiskID(t_dsk, pnode_name)
8617
        result = self.rpc.call_blockdev_rename(pnode_name,
8618
                                               zip(tmp_disks, rename_to))
8619
        result.Raise("Failed to rename adoped LVs")
8620
    else:
8621
      feedback_fn("* creating instance disks...")
8622
      try:
8623
        _CreateDisks(self, iobj)
8624
      except errors.OpExecError:
8625
        self.LogWarning("Device creation failed, reverting...")
8626
        try:
8627
          _RemoveDisks(self, iobj)
8628
        finally:
8629
          self.cfg.ReleaseDRBDMinors(instance)
8630
          raise
8631

    
8632
    feedback_fn("adding instance %s to cluster config" % instance)
8633

    
8634
    self.cfg.AddInstance(iobj, self.proc.GetECId())
8635

    
8636
    # Declare that we don't want to remove the instance lock anymore, as we've
8637
    # added the instance to the config
8638
    del self.remove_locks[locking.LEVEL_INSTANCE]
8639

    
8640
    if self.op.mode == constants.INSTANCE_IMPORT:
8641
      # Release unused nodes
8642
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8643
    else:
8644
      # Release all nodes
8645
      _ReleaseLocks(self, locking.LEVEL_NODE)
8646

    
8647
    disk_abort = False
8648
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8649
      feedback_fn("* wiping instance disks...")
8650
      try:
8651
        _WipeDisks(self, iobj)
8652
      except errors.OpExecError, err:
8653
        logging.exception("Wiping disks failed")
8654
        self.LogWarning("Wiping instance disks failed (%s)", err)
8655
        disk_abort = True
8656

    
8657
    if disk_abort:
8658
      # Something is already wrong with the disks, don't do anything else
8659
      pass
8660
    elif self.op.wait_for_sync:
8661
      disk_abort = not _WaitForSync(self, iobj)
8662
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
8663
      # make sure the disks are not degraded (still sync-ing is ok)
8664
      time.sleep(15)
8665
      feedback_fn("* checking mirrors status")
8666
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8667
    else:
8668
      disk_abort = False
8669

    
8670
    if disk_abort:
8671
      _RemoveDisks(self, iobj)
8672
      self.cfg.RemoveInstance(iobj.name)
8673
      # Make sure the instance lock gets removed
8674
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8675
      raise errors.OpExecError("There are some degraded disks for"
8676
                               " this instance")
8677

    
8678
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8679
      if self.op.mode == constants.INSTANCE_CREATE:
8680
        if not self.op.no_install:
8681
          feedback_fn("* running the instance OS create scripts...")
8682
          # FIXME: pass debug option from opcode to backend
8683
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8684
                                                 self.op.debug_level)
8685
          result.Raise("Could not add os for instance %s"
8686
                       " on node %s" % (instance, pnode_name))
8687

    
8688
      elif self.op.mode == constants.INSTANCE_IMPORT:
8689
        feedback_fn("* running the instance OS import scripts...")
8690

    
8691
        transfers = []
8692

    
8693
        for idx, image in enumerate(self.src_images):
8694
          if not image:
8695
            continue
8696

    
8697
          # FIXME: pass debug option from opcode to backend
8698
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8699
                                             constants.IEIO_FILE, (image, ),
8700
                                             constants.IEIO_SCRIPT,
8701
                                             (iobj.disks[idx], idx),
8702
                                             None)
8703
          transfers.append(dt)
8704

    
8705
        import_result = \
8706
          masterd.instance.TransferInstanceData(self, feedback_fn,
8707
                                                self.op.src_node, pnode_name,
8708
                                                self.pnode.secondary_ip,
8709
                                                iobj, transfers)
8710
        if not compat.all(import_result):
8711
          self.LogWarning("Some disks for instance %s on node %s were not"
8712
                          " imported successfully" % (instance, pnode_name))
8713

    
8714
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8715
        feedback_fn("* preparing remote import...")
8716
        # The source cluster will stop the instance before attempting to make a
8717
        # connection. In some cases stopping an instance can take a long time,
8718
        # hence the shutdown timeout is added to the connection timeout.
8719
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8720
                           self.op.source_shutdown_timeout)
8721
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8722

    
8723
        assert iobj.primary_node == self.pnode.name
8724
        disk_results = \
8725
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8726
                                        self.source_x509_ca,
8727
                                        self._cds, timeouts)
8728
        if not compat.all(disk_results):
8729
          # TODO: Should the instance still be started, even if some disks
8730
          # failed to import (valid for local imports, too)?
8731
          self.LogWarning("Some disks for instance %s on node %s were not"
8732
                          " imported successfully" % (instance, pnode_name))
8733

    
8734
        # Run rename script on newly imported instance
8735
        assert iobj.name == instance
8736
        feedback_fn("Running rename script for %s" % instance)
8737
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8738
                                                   self.source_instance_name,
8739
                                                   self.op.debug_level)
8740
        if result.fail_msg:
8741
          self.LogWarning("Failed to run rename script for %s on node"
8742
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
8743

    
8744
      else:
8745
        # also checked in the prereq part
8746
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8747
                                     % self.op.mode)
8748

    
8749
    if self.op.start:
8750
      iobj.admin_up = True
8751
      self.cfg.Update(iobj, feedback_fn)
8752
      logging.info("Starting instance %s on node %s", instance, pnode_name)
8753
      feedback_fn("* starting instance...")
8754
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
8755
      result.Raise("Could not start instance")
8756

    
8757
    return list(iobj.all_nodes)
8758

    
8759

    
8760
class LUInstanceConsole(NoHooksLU):
8761
  """Connect to an instance's console.
8762

8763
  This is somewhat special in that it returns the command line that
8764
  you need to run on the master node in order to connect to the
8765
  console.
8766

8767
  """
8768
  REQ_BGL = False
8769

    
8770
  def ExpandNames(self):
8771
    self._ExpandAndLockInstance()
8772

    
8773
  def CheckPrereq(self):
8774
    """Check prerequisites.
8775

8776
    This checks that the instance is in the cluster.
8777

8778
    """
8779
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8780
    assert self.instance is not None, \
8781
      "Cannot retrieve locked instance %s" % self.op.instance_name
8782
    _CheckNodeOnline(self, self.instance.primary_node)
8783

    
8784
  def Exec(self, feedback_fn):
8785
    """Connect to the console of an instance
8786

8787
    """
8788
    instance = self.instance
8789
    node = instance.primary_node
8790

    
8791
    node_insts = self.rpc.call_instance_list([node],
8792
                                             [instance.hypervisor])[node]
8793
    node_insts.Raise("Can't get node information from %s" % node)
8794

    
8795
    if instance.name not in node_insts.payload:
8796
      if instance.admin_up:
8797
        state = constants.INSTST_ERRORDOWN
8798
      else:
8799
        state = constants.INSTST_ADMINDOWN
8800
      raise errors.OpExecError("Instance %s is not running (state %s)" %
8801
                               (instance.name, state))
8802

    
8803
    logging.debug("Connecting to console of %s on %s", instance.name, node)
8804

    
8805
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8806

    
8807

    
8808
def _GetInstanceConsole(cluster, instance):
8809
  """Returns console information for an instance.
8810

8811
  @type cluster: L{objects.Cluster}
8812
  @type instance: L{objects.Instance}
8813
  @rtype: dict
8814

8815
  """
8816
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
8817
  # beparams and hvparams are passed separately, to avoid editing the
8818
  # instance and then saving the defaults in the instance itself.
8819
  hvparams = cluster.FillHV(instance)
8820
  beparams = cluster.FillBE(instance)
8821
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8822

    
8823
  assert console.instance == instance.name
8824
  assert console.Validate()
8825

    
8826
  return console.ToDict()
8827

    
8828

    
8829
class LUInstanceReplaceDisks(LogicalUnit):
8830
  """Replace the disks of an instance.
8831

8832
  """
8833
  HPATH = "mirrors-replace"
8834
  HTYPE = constants.HTYPE_INSTANCE
8835
  REQ_BGL = False
8836

    
8837
  def CheckArguments(self):
8838
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8839
                                  self.op.iallocator)
8840

    
8841
  def ExpandNames(self):
8842
    self._ExpandAndLockInstance()
8843

    
8844
    assert locking.LEVEL_NODE not in self.needed_locks
8845
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
8846

    
8847
    assert self.op.iallocator is None or self.op.remote_node is None, \
8848
      "Conflicting options"
8849

    
8850
    if self.op.remote_node is not None:
8851
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8852

    
8853
      # Warning: do not remove the locking of the new secondary here
8854
      # unless DRBD8.AddChildren is changed to work in parallel;
8855
      # currently it doesn't since parallel invocations of
8856
      # FindUnusedMinor will conflict
8857
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
8858
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8859
    else:
8860
      self.needed_locks[locking.LEVEL_NODE] = []
8861
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8862

    
8863
      if self.op.iallocator is not None:
8864
        # iallocator will select a new node in the same group
8865
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
8866

    
8867
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8868
                                   self.op.iallocator, self.op.remote_node,
8869
                                   self.op.disks, False, self.op.early_release)
8870

    
8871
    self.tasklets = [self.replacer]
8872

    
8873
  def DeclareLocks(self, level):
8874
    if level == locking.LEVEL_NODEGROUP:
8875
      assert self.op.remote_node is None
8876
      assert self.op.iallocator is not None
8877
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
8878

    
8879
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
8880
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
8881
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8882

    
8883
    elif level == locking.LEVEL_NODE:
8884
      if self.op.iallocator is not None:
8885
        assert self.op.remote_node is None
8886
        assert not self.needed_locks[locking.LEVEL_NODE]
8887

    
8888
        # Lock member nodes of all locked groups
8889
        self.needed_locks[locking.LEVEL_NODE] = [node_name
8890
          for group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
8891
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
8892
      else:
8893
        self._LockInstancesNodes()
8894

    
8895
  def BuildHooksEnv(self):
8896
    """Build hooks env.
8897

8898
    This runs on the master, the primary and all the secondaries.
8899

8900
    """
8901
    instance = self.replacer.instance
8902
    env = {
8903
      "MODE": self.op.mode,
8904
      "NEW_SECONDARY": self.op.remote_node,
8905
      "OLD_SECONDARY": instance.secondary_nodes[0],
8906
      }
8907
    env.update(_BuildInstanceHookEnvByObject(self, instance))
8908
    return env
8909

    
8910
  def BuildHooksNodes(self):
8911
    """Build hooks nodes.
8912

8913
    """
8914
    instance = self.replacer.instance
8915
    nl = [
8916
      self.cfg.GetMasterNode(),
8917
      instance.primary_node,
8918
      ]
8919
    if self.op.remote_node is not None:
8920
      nl.append(self.op.remote_node)
8921
    return nl, nl
8922

    
8923
  def CheckPrereq(self):
8924
    """Check prerequisites.
8925

8926
    """
8927
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
8928
            self.op.iallocator is None)
8929

    
8930
    owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
8931
    if owned_groups:
8932
      groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8933
      if owned_groups != groups:
8934
        raise errors.OpExecError("Node groups used by instance '%s' changed"
8935
                                 " since lock was acquired, current list is %r,"
8936
                                 " used to be '%s'" %
8937
                                 (self.op.instance_name,
8938
                                  utils.CommaJoin(groups),
8939
                                  utils.CommaJoin(owned_groups)))
8940

    
8941
    return LogicalUnit.CheckPrereq(self)
8942

    
8943

    
8944
class TLReplaceDisks(Tasklet):
8945
  """Replaces disks for an instance.
8946

8947
  Note: Locking is not within the scope of this class.
8948

8949
  """
8950
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8951
               disks, delay_iallocator, early_release):
8952
    """Initializes this class.
8953

8954
    """
8955
    Tasklet.__init__(self, lu)
8956

    
8957
    # Parameters
8958
    self.instance_name = instance_name
8959
    self.mode = mode
8960
    self.iallocator_name = iallocator_name
8961
    self.remote_node = remote_node
8962
    self.disks = disks
8963
    self.delay_iallocator = delay_iallocator
8964
    self.early_release = early_release
8965

    
8966
    # Runtime data
8967
    self.instance = None
8968
    self.new_node = None
8969
    self.target_node = None
8970
    self.other_node = None
8971
    self.remote_node_info = None
8972
    self.node_secondary_ip = None
8973

    
8974
  @staticmethod
8975
  def CheckArguments(mode, remote_node, iallocator):
8976
    """Helper function for users of this class.
8977

8978
    """
8979
    # check for valid parameter combination
8980
    if mode == constants.REPLACE_DISK_CHG:
8981
      if remote_node is None and iallocator is None:
8982
        raise errors.OpPrereqError("When changing the secondary either an"
8983
                                   " iallocator script must be used or the"
8984
                                   " new node given", errors.ECODE_INVAL)
8985

    
8986
      if remote_node is not None and iallocator is not None:
8987
        raise errors.OpPrereqError("Give either the iallocator or the new"
8988
                                   " secondary, not both", errors.ECODE_INVAL)
8989

    
8990
    elif remote_node is not None or iallocator is not None:
8991
      # Not replacing the secondary
8992
      raise errors.OpPrereqError("The iallocator and new node options can"
8993
                                 " only be used when changing the"
8994
                                 " secondary node", errors.ECODE_INVAL)
8995

    
8996
  @staticmethod
8997
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8998
    """Compute a new secondary node using an IAllocator.
8999

9000
    """
9001
    ial = IAllocator(lu.cfg, lu.rpc,
9002
                     mode=constants.IALLOCATOR_MODE_RELOC,
9003
                     name=instance_name,
9004
                     relocate_from=relocate_from)
9005

    
9006
    ial.Run(iallocator_name)
9007

    
9008
    if not ial.success:
9009
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9010
                                 " %s" % (iallocator_name, ial.info),
9011
                                 errors.ECODE_NORES)
9012

    
9013
    if len(ial.result) != ial.required_nodes:
9014
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9015
                                 " of nodes (%s), required %s" %
9016
                                 (iallocator_name,
9017
                                  len(ial.result), ial.required_nodes),
9018
                                 errors.ECODE_FAULT)
9019

    
9020
    remote_node_name = ial.result[0]
9021

    
9022
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9023
               instance_name, remote_node_name)
9024

    
9025
    return remote_node_name
9026

    
9027
  def _FindFaultyDisks(self, node_name):
9028
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9029
                                    node_name, True)
9030

    
9031
  def _CheckDisksActivated(self, instance):
9032
    """Checks if the instance disks are activated.
9033

9034
    @param instance: The instance to check disks
9035
    @return: True if they are activated, False otherwise
9036

9037
    """
9038
    nodes = instance.all_nodes
9039

    
9040
    for idx, dev in enumerate(instance.disks):
9041
      for node in nodes:
9042
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9043
        self.cfg.SetDiskID(dev, node)
9044

    
9045
        result = self.rpc.call_blockdev_find(node, dev)
9046

    
9047
        if result.offline:
9048
          continue
9049
        elif result.fail_msg or not result.payload:
9050
          return False
9051

    
9052
    return True
9053

    
9054
  def CheckPrereq(self):
9055
    """Check prerequisites.
9056

9057
    This checks that the instance is in the cluster.
9058

9059
    """
9060
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9061
    assert instance is not None, \
9062
      "Cannot retrieve locked instance %s" % self.instance_name
9063

    
9064
    if instance.disk_template != constants.DT_DRBD8:
9065
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9066
                                 " instances", errors.ECODE_INVAL)
9067

    
9068
    if len(instance.secondary_nodes) != 1:
9069
      raise errors.OpPrereqError("The instance has a strange layout,"
9070
                                 " expected one secondary but found %d" %
9071
                                 len(instance.secondary_nodes),
9072
                                 errors.ECODE_FAULT)
9073

    
9074
    if not self.delay_iallocator:
9075
      self._CheckPrereq2()
9076

    
9077
  def _CheckPrereq2(self):
9078
    """Check prerequisites, second part.
9079

9080
    This function should always be part of CheckPrereq. It was separated and is
9081
    now called from Exec because during node evacuation iallocator was only
9082
    called with an unmodified cluster model, not taking planned changes into
9083
    account.
9084

9085
    """
9086
    instance = self.instance
9087
    secondary_node = instance.secondary_nodes[0]
9088

    
9089
    if self.iallocator_name is None:
9090
      remote_node = self.remote_node
9091
    else:
9092
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9093
                                       instance.name, instance.secondary_nodes)
9094

    
9095
    if remote_node is None:
9096
      self.remote_node_info = None
9097
    else:
9098
      assert remote_node in self.lu.glm.list_owned(locking.LEVEL_NODE), \
9099
             "Remote node '%s' is not locked" % remote_node
9100

    
9101
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9102
      assert self.remote_node_info is not None, \
9103
        "Cannot retrieve locked node %s" % remote_node
9104

    
9105
    if remote_node == self.instance.primary_node:
9106
      raise errors.OpPrereqError("The specified node is the primary node of"
9107
                                 " the instance", errors.ECODE_INVAL)
9108

    
9109
    if remote_node == secondary_node:
9110
      raise errors.OpPrereqError("The specified node is already the"
9111
                                 " secondary node of the instance",
9112
                                 errors.ECODE_INVAL)
9113

    
9114
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9115
                                    constants.REPLACE_DISK_CHG):
9116
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9117
                                 errors.ECODE_INVAL)
9118

    
9119
    if self.mode == constants.REPLACE_DISK_AUTO:
9120
      if not self._CheckDisksActivated(instance):
9121
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9122
                                   " first" % self.instance_name,
9123
                                   errors.ECODE_STATE)
9124
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9125
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9126

    
9127
      if faulty_primary and faulty_secondary:
9128
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9129
                                   " one node and can not be repaired"
9130
                                   " automatically" % self.instance_name,
9131
                                   errors.ECODE_STATE)
9132

    
9133
      if faulty_primary:
9134
        self.disks = faulty_primary
9135
        self.target_node = instance.primary_node
9136
        self.other_node = secondary_node
9137
        check_nodes = [self.target_node, self.other_node]
9138
      elif faulty_secondary:
9139
        self.disks = faulty_secondary
9140
        self.target_node = secondary_node
9141
        self.other_node = instance.primary_node
9142
        check_nodes = [self.target_node, self.other_node]
9143
      else:
9144
        self.disks = []
9145
        check_nodes = []
9146

    
9147
    else:
9148
      # Non-automatic modes
9149
      if self.mode == constants.REPLACE_DISK_PRI:
9150
        self.target_node = instance.primary_node
9151
        self.other_node = secondary_node
9152
        check_nodes = [self.target_node, self.other_node]
9153

    
9154
      elif self.mode == constants.REPLACE_DISK_SEC:
9155
        self.target_node = secondary_node
9156
        self.other_node = instance.primary_node
9157
        check_nodes = [self.target_node, self.other_node]
9158

    
9159
      elif self.mode == constants.REPLACE_DISK_CHG:
9160
        self.new_node = remote_node
9161
        self.other_node = instance.primary_node
9162
        self.target_node = secondary_node
9163
        check_nodes = [self.new_node, self.other_node]
9164

    
9165
        _CheckNodeNotDrained(self.lu, remote_node)
9166
        _CheckNodeVmCapable(self.lu, remote_node)
9167

    
9168
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9169
        assert old_node_info is not None
9170
        if old_node_info.offline and not self.early_release:
9171
          # doesn't make sense to delay the release
9172
          self.early_release = True
9173
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9174
                          " early-release mode", secondary_node)
9175

    
9176
      else:
9177
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9178
                                     self.mode)
9179

    
9180
      # If not specified all disks should be replaced
9181
      if not self.disks:
9182
        self.disks = range(len(self.instance.disks))
9183

    
9184
    for node in check_nodes:
9185
      _CheckNodeOnline(self.lu, node)
9186

    
9187
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9188
                                                          self.other_node,
9189
                                                          self.target_node]
9190
                              if node_name is not None)
9191

    
9192
    # Release unneeded node locks
9193
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9194

    
9195
    # Release any owned node group
9196
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9197
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9198

    
9199
    # Check whether disks are valid
9200
    for disk_idx in self.disks:
9201
      instance.FindDisk(disk_idx)
9202

    
9203
    # Get secondary node IP addresses
9204
    self.node_secondary_ip = \
9205
      dict((node_name, self.cfg.GetNodeInfo(node_name).secondary_ip)
9206
           for node_name in touched_nodes)
9207

    
9208
  def Exec(self, feedback_fn):
9209
    """Execute disk replacement.
9210

9211
    This dispatches the disk replacement to the appropriate handler.
9212

9213
    """
9214
    if self.delay_iallocator:
9215
      self._CheckPrereq2()
9216

    
9217
    if __debug__:
9218
      # Verify owned locks before starting operation
9219
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9220
      assert set(owned_locks) == set(self.node_secondary_ip), \
9221
          ("Incorrect node locks, owning %s, expected %s" %
9222
           (owned_locks, self.node_secondary_ip.keys()))
9223

    
9224
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_INSTANCE)
9225
      assert list(owned_locks) == [self.instance_name], \
9226
          "Instance '%s' not locked" % self.instance_name
9227

    
9228
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9229
          "Should not own any node group lock at this point"
9230

    
9231
    if not self.disks:
9232
      feedback_fn("No disks need replacement")
9233
      return
9234

    
9235
    feedback_fn("Replacing disk(s) %s for %s" %
9236
                (utils.CommaJoin(self.disks), self.instance.name))
9237

    
9238
    activate_disks = (not self.instance.admin_up)
9239

    
9240
    # Activate the instance disks if we're replacing them on a down instance
9241
    if activate_disks:
9242
      _StartInstanceDisks(self.lu, self.instance, True)
9243

    
9244
    try:
9245
      # Should we replace the secondary node?
9246
      if self.new_node is not None:
9247
        fn = self._ExecDrbd8Secondary
9248
      else:
9249
        fn = self._ExecDrbd8DiskOnly
9250

    
9251
      result = fn(feedback_fn)
9252
    finally:
9253
      # Deactivate the instance disks if we're replacing them on a
9254
      # down instance
9255
      if activate_disks:
9256
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9257

    
9258
    if __debug__:
9259
      # Verify owned locks
9260
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9261
      nodes = frozenset(self.node_secondary_ip)
9262
      assert ((self.early_release and not owned_locks) or
9263
              (not self.early_release and not (set(owned_locks) - nodes))), \
9264
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9265
         " nodes=%r" % (self.early_release, owned_locks, nodes))
9266

    
9267
    return result
9268

    
9269
  def _CheckVolumeGroup(self, nodes):
9270
    self.lu.LogInfo("Checking volume groups")
9271

    
9272
    vgname = self.cfg.GetVGName()
9273

    
9274
    # Make sure volume group exists on all involved nodes
9275
    results = self.rpc.call_vg_list(nodes)
9276
    if not results:
9277
      raise errors.OpExecError("Can't list volume groups on the nodes")
9278

    
9279
    for node in nodes:
9280
      res = results[node]
9281
      res.Raise("Error checking node %s" % node)
9282
      if vgname not in res.payload:
9283
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9284
                                 (vgname, node))
9285

    
9286
  def _CheckDisksExistence(self, nodes):
9287
    # Check disk existence
9288
    for idx, dev in enumerate(self.instance.disks):
9289
      if idx not in self.disks:
9290
        continue
9291

    
9292
      for node in nodes:
9293
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9294
        self.cfg.SetDiskID(dev, node)
9295

    
9296
        result = self.rpc.call_blockdev_find(node, dev)
9297

    
9298
        msg = result.fail_msg
9299
        if msg or not result.payload:
9300
          if not msg:
9301
            msg = "disk not found"
9302
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9303
                                   (idx, node, msg))
9304

    
9305
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9306
    for idx, dev in enumerate(self.instance.disks):
9307
      if idx not in self.disks:
9308
        continue
9309

    
9310
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9311
                      (idx, node_name))
9312

    
9313
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9314
                                   ldisk=ldisk):
9315
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9316
                                 " replace disks for instance %s" %
9317
                                 (node_name, self.instance.name))
9318

    
9319
  def _CreateNewStorage(self, node_name):
9320
    iv_names = {}
9321

    
9322
    for idx, dev in enumerate(self.instance.disks):
9323
      if idx not in self.disks:
9324
        continue
9325

    
9326
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9327

    
9328
      self.cfg.SetDiskID(dev, node_name)
9329

    
9330
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9331
      names = _GenerateUniqueNames(self.lu, lv_names)
9332

    
9333
      vg_data = dev.children[0].logical_id[0]
9334
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9335
                             logical_id=(vg_data, names[0]))
9336
      vg_meta = dev.children[1].logical_id[0]
9337
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9338
                             logical_id=(vg_meta, names[1]))
9339

    
9340
      new_lvs = [lv_data, lv_meta]
9341
      old_lvs = dev.children
9342
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9343

    
9344
      # we pass force_create=True to force the LVM creation
9345
      for new_lv in new_lvs:
9346
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9347
                        _GetInstanceInfoText(self.instance), False)
9348

    
9349
    return iv_names
9350

    
9351
  def _CheckDevices(self, node_name, iv_names):
9352
    for name, (dev, _, _) in iv_names.iteritems():
9353
      self.cfg.SetDiskID(dev, node_name)
9354

    
9355
      result = self.rpc.call_blockdev_find(node_name, dev)
9356

    
9357
      msg = result.fail_msg
9358
      if msg or not result.payload:
9359
        if not msg:
9360
          msg = "disk not found"
9361
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9362
                                 (name, msg))
9363

    
9364
      if result.payload.is_degraded:
9365
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9366

    
9367
  def _RemoveOldStorage(self, node_name, iv_names):
9368
    for name, (_, old_lvs, _) in iv_names.iteritems():
9369
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9370

    
9371
      for lv in old_lvs:
9372
        self.cfg.SetDiskID(lv, node_name)
9373

    
9374
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9375
        if msg:
9376
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9377
                             hint="remove unused LVs manually")
9378

    
9379
  def _ExecDrbd8DiskOnly(self, feedback_fn):
9380
    """Replace a disk on the primary or secondary for DRBD 8.
9381

9382
    The algorithm for replace is quite complicated:
9383

9384
      1. for each disk to be replaced:
9385

9386
        1. create new LVs on the target node with unique names
9387
        1. detach old LVs from the drbd device
9388
        1. rename old LVs to name_replaced.<time_t>
9389
        1. rename new LVs to old LVs
9390
        1. attach the new LVs (with the old names now) to the drbd device
9391

9392
      1. wait for sync across all devices
9393

9394
      1. for each modified disk:
9395

9396
        1. remove old LVs (which have the name name_replaces.<time_t>)
9397

9398
    Failures are not very well handled.
9399

9400
    """
9401
    steps_total = 6
9402

    
9403
    # Step: check device activation
9404
    self.lu.LogStep(1, steps_total, "Check device existence")
9405
    self._CheckDisksExistence([self.other_node, self.target_node])
9406
    self._CheckVolumeGroup([self.target_node, self.other_node])
9407

    
9408
    # Step: check other node consistency
9409
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9410
    self._CheckDisksConsistency(self.other_node,
9411
                                self.other_node == self.instance.primary_node,
9412
                                False)
9413

    
9414
    # Step: create new storage
9415
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9416
    iv_names = self._CreateNewStorage(self.target_node)
9417

    
9418
    # Step: for each lv, detach+rename*2+attach
9419
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9420
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9421
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9422

    
9423
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9424
                                                     old_lvs)
9425
      result.Raise("Can't detach drbd from local storage on node"
9426
                   " %s for device %s" % (self.target_node, dev.iv_name))
9427
      #dev.children = []
9428
      #cfg.Update(instance)
9429

    
9430
      # ok, we created the new LVs, so now we know we have the needed
9431
      # storage; as such, we proceed on the target node to rename
9432
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9433
      # using the assumption that logical_id == physical_id (which in
9434
      # turn is the unique_id on that node)
9435

    
9436
      # FIXME(iustin): use a better name for the replaced LVs
9437
      temp_suffix = int(time.time())
9438
      ren_fn = lambda d, suff: (d.physical_id[0],
9439
                                d.physical_id[1] + "_replaced-%s" % suff)
9440

    
9441
      # Build the rename list based on what LVs exist on the node
9442
      rename_old_to_new = []
9443
      for to_ren in old_lvs:
9444
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9445
        if not result.fail_msg and result.payload:
9446
          # device exists
9447
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9448

    
9449
      self.lu.LogInfo("Renaming the old LVs on the target node")
9450
      result = self.rpc.call_blockdev_rename(self.target_node,
9451
                                             rename_old_to_new)
9452
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9453

    
9454
      # Now we rename the new LVs to the old LVs
9455
      self.lu.LogInfo("Renaming the new LVs on the target node")
9456
      rename_new_to_old = [(new, old.physical_id)
9457
                           for old, new in zip(old_lvs, new_lvs)]
9458
      result = self.rpc.call_blockdev_rename(self.target_node,
9459
                                             rename_new_to_old)
9460
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9461

    
9462
      for old, new in zip(old_lvs, new_lvs):
9463
        new.logical_id = old.logical_id
9464
        self.cfg.SetDiskID(new, self.target_node)
9465

    
9466
      for disk in old_lvs:
9467
        disk.logical_id = ren_fn(disk, temp_suffix)
9468
        self.cfg.SetDiskID(disk, self.target_node)
9469

    
9470
      # Now that the new lvs have the old name, we can add them to the device
9471
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9472
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9473
                                                  new_lvs)
9474
      msg = result.fail_msg
9475
      if msg:
9476
        for new_lv in new_lvs:
9477
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9478
                                               new_lv).fail_msg
9479
          if msg2:
9480
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9481
                               hint=("cleanup manually the unused logical"
9482
                                     "volumes"))
9483
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9484

    
9485
      dev.children = new_lvs
9486

    
9487
      self.cfg.Update(self.instance, feedback_fn)
9488

    
9489
    cstep = 5
9490
    if self.early_release:
9491
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9492
      cstep += 1
9493
      self._RemoveOldStorage(self.target_node, iv_names)
9494
      # WARNING: we release both node locks here, do not do other RPCs
9495
      # than WaitForSync to the primary node
9496
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9497
                    names=[self.target_node, self.other_node])
9498

    
9499
    # Wait for sync
9500
    # This can fail as the old devices are degraded and _WaitForSync
9501
    # does a combined result over all disks, so we don't check its return value
9502
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9503
    cstep += 1
9504
    _WaitForSync(self.lu, self.instance)
9505

    
9506
    # Check all devices manually
9507
    self._CheckDevices(self.instance.primary_node, iv_names)
9508

    
9509
    # Step: remove old storage
9510
    if not self.early_release:
9511
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9512
      cstep += 1
9513
      self._RemoveOldStorage(self.target_node, iv_names)
9514

    
9515
  def _ExecDrbd8Secondary(self, feedback_fn):
9516
    """Replace the secondary node for DRBD 8.
9517

9518
    The algorithm for replace is quite complicated:
9519
      - for all disks of the instance:
9520
        - create new LVs on the new node with same names
9521
        - shutdown the drbd device on the old secondary
9522
        - disconnect the drbd network on the primary
9523
        - create the drbd device on the new secondary
9524
        - network attach the drbd on the primary, using an artifice:
9525
          the drbd code for Attach() will connect to the network if it
9526
          finds a device which is connected to the good local disks but
9527
          not network enabled
9528
      - wait for sync across all devices
9529
      - remove all disks from the old secondary
9530

9531
    Failures are not very well handled.
9532

9533
    """
9534
    steps_total = 6
9535

    
9536
    # Step: check device activation
9537
    self.lu.LogStep(1, steps_total, "Check device existence")
9538
    self._CheckDisksExistence([self.instance.primary_node])
9539
    self._CheckVolumeGroup([self.instance.primary_node])
9540

    
9541
    # Step: check other node consistency
9542
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9543
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
9544

    
9545
    # Step: create new storage
9546
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9547
    for idx, dev in enumerate(self.instance.disks):
9548
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9549
                      (self.new_node, idx))
9550
      # we pass force_create=True to force LVM creation
9551
      for new_lv in dev.children:
9552
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9553
                        _GetInstanceInfoText(self.instance), False)
9554

    
9555
    # Step 4: dbrd minors and drbd setups changes
9556
    # after this, we must manually remove the drbd minors on both the
9557
    # error and the success paths
9558
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9559
    minors = self.cfg.AllocateDRBDMinor([self.new_node
9560
                                         for dev in self.instance.disks],
9561
                                        self.instance.name)
9562
    logging.debug("Allocated minors %r", minors)
9563

    
9564
    iv_names = {}
9565
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9566
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9567
                      (self.new_node, idx))
9568
      # create new devices on new_node; note that we create two IDs:
9569
      # one without port, so the drbd will be activated without
9570
      # networking information on the new node at this stage, and one
9571
      # with network, for the latter activation in step 4
9572
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9573
      if self.instance.primary_node == o_node1:
9574
        p_minor = o_minor1
9575
      else:
9576
        assert self.instance.primary_node == o_node2, "Three-node instance?"
9577
        p_minor = o_minor2
9578

    
9579
      new_alone_id = (self.instance.primary_node, self.new_node, None,
9580
                      p_minor, new_minor, o_secret)
9581
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
9582
                    p_minor, new_minor, o_secret)
9583

    
9584
      iv_names[idx] = (dev, dev.children, new_net_id)
9585
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9586
                    new_net_id)
9587
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9588
                              logical_id=new_alone_id,
9589
                              children=dev.children,
9590
                              size=dev.size)
9591
      try:
9592
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9593
                              _GetInstanceInfoText(self.instance), False)
9594
      except errors.GenericError:
9595
        self.cfg.ReleaseDRBDMinors(self.instance.name)
9596
        raise
9597

    
9598
    # We have new devices, shutdown the drbd on the old secondary
9599
    for idx, dev in enumerate(self.instance.disks):
9600
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9601
      self.cfg.SetDiskID(dev, self.target_node)
9602
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9603
      if msg:
9604
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9605
                           "node: %s" % (idx, msg),
9606
                           hint=("Please cleanup this device manually as"
9607
                                 " soon as possible"))
9608

    
9609
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9610
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9611
                                               self.node_secondary_ip,
9612
                                               self.instance.disks)\
9613
                                              [self.instance.primary_node]
9614

    
9615
    msg = result.fail_msg
9616
    if msg:
9617
      # detaches didn't succeed (unlikely)
9618
      self.cfg.ReleaseDRBDMinors(self.instance.name)
9619
      raise errors.OpExecError("Can't detach the disks from the network on"
9620
                               " old node: %s" % (msg,))
9621

    
9622
    # if we managed to detach at least one, we update all the disks of
9623
    # the instance to point to the new secondary
9624
    self.lu.LogInfo("Updating instance configuration")
9625
    for dev, _, new_logical_id in iv_names.itervalues():
9626
      dev.logical_id = new_logical_id
9627
      self.cfg.SetDiskID(dev, self.instance.primary_node)
9628

    
9629
    self.cfg.Update(self.instance, feedback_fn)
9630

    
9631
    # and now perform the drbd attach
9632
    self.lu.LogInfo("Attaching primary drbds to new secondary"
9633
                    " (standalone => connected)")
9634
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9635
                                            self.new_node],
9636
                                           self.node_secondary_ip,
9637
                                           self.instance.disks,
9638
                                           self.instance.name,
9639
                                           False)
9640
    for to_node, to_result in result.items():
9641
      msg = to_result.fail_msg
9642
      if msg:
9643
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9644
                           to_node, msg,
9645
                           hint=("please do a gnt-instance info to see the"
9646
                                 " status of disks"))
9647
    cstep = 5
9648
    if self.early_release:
9649
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9650
      cstep += 1
9651
      self._RemoveOldStorage(self.target_node, iv_names)
9652
      # WARNING: we release all node locks here, do not do other RPCs
9653
      # than WaitForSync to the primary node
9654
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9655
                    names=[self.instance.primary_node,
9656
                           self.target_node,
9657
                           self.new_node])
9658

    
9659
    # Wait for sync
9660
    # This can fail as the old devices are degraded and _WaitForSync
9661
    # does a combined result over all disks, so we don't check its return value
9662
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9663
    cstep += 1
9664
    _WaitForSync(self.lu, self.instance)
9665

    
9666
    # Check all devices manually
9667
    self._CheckDevices(self.instance.primary_node, iv_names)
9668

    
9669
    # Step: remove old storage
9670
    if not self.early_release:
9671
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9672
      self._RemoveOldStorage(self.target_node, iv_names)
9673

    
9674

    
9675
class LURepairNodeStorage(NoHooksLU):
9676
  """Repairs the volume group on a node.
9677

9678
  """
9679
  REQ_BGL = False
9680

    
9681
  def CheckArguments(self):
9682
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9683

    
9684
    storage_type = self.op.storage_type
9685

    
9686
    if (constants.SO_FIX_CONSISTENCY not in
9687
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9688
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
9689
                                 " repaired" % storage_type,
9690
                                 errors.ECODE_INVAL)
9691

    
9692
  def ExpandNames(self):
9693
    self.needed_locks = {
9694
      locking.LEVEL_NODE: [self.op.node_name],
9695
      }
9696

    
9697
  def _CheckFaultyDisks(self, instance, node_name):
9698
    """Ensure faulty disks abort the opcode or at least warn."""
9699
    try:
9700
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9701
                                  node_name, True):
9702
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9703
                                   " node '%s'" % (instance.name, node_name),
9704
                                   errors.ECODE_STATE)
9705
    except errors.OpPrereqError, err:
9706
      if self.op.ignore_consistency:
9707
        self.proc.LogWarning(str(err.args[0]))
9708
      else:
9709
        raise
9710

    
9711
  def CheckPrereq(self):
9712
    """Check prerequisites.
9713

9714
    """
9715
    # Check whether any instance on this node has faulty disks
9716
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9717
      if not inst.admin_up:
9718
        continue
9719
      check_nodes = set(inst.all_nodes)
9720
      check_nodes.discard(self.op.node_name)
9721
      for inst_node_name in check_nodes:
9722
        self._CheckFaultyDisks(inst, inst_node_name)
9723

    
9724
  def Exec(self, feedback_fn):
9725
    feedback_fn("Repairing storage unit '%s' on %s ..." %
9726
                (self.op.name, self.op.node_name))
9727

    
9728
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9729
    result = self.rpc.call_storage_execute(self.op.node_name,
9730
                                           self.op.storage_type, st_args,
9731
                                           self.op.name,
9732
                                           constants.SO_FIX_CONSISTENCY)
9733
    result.Raise("Failed to repair storage unit '%s' on %s" %
9734
                 (self.op.name, self.op.node_name))
9735

    
9736

    
9737
class LUNodeEvacStrategy(NoHooksLU):
9738
  """Computes the node evacuation strategy.
9739

9740
  """
9741
  REQ_BGL = False
9742

    
9743
  def CheckArguments(self):
9744
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9745

    
9746
  def ExpandNames(self):
9747
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9748
    self.needed_locks = locks = {}
9749
    if self.op.remote_node is None:
9750
      locks[locking.LEVEL_NODE] = locking.ALL_SET
9751
    else:
9752
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9753
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
9754

    
9755
  def Exec(self, feedback_fn):
9756
    instances = []
9757
    for node in self.op.nodes:
9758
      instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
9759
    if not instances:
9760
      return []
9761

    
9762
    if self.op.remote_node is not None:
9763
      result = []
9764
      for i in instances:
9765
        if i.primary_node == self.op.remote_node:
9766
          raise errors.OpPrereqError("Node %s is the primary node of"
9767
                                     " instance %s, cannot use it as"
9768
                                     " secondary" %
9769
                                     (self.op.remote_node, i.name),
9770
                                     errors.ECODE_INVAL)
9771
        result.append([i.name, self.op.remote_node])
9772
    else:
9773
      ial = IAllocator(self.cfg, self.rpc,
9774
                       mode=constants.IALLOCATOR_MODE_MEVAC,
9775
                       evac_nodes=self.op.nodes)
9776
      ial.Run(self.op.iallocator, validate=True)
9777
      if not ial.success:
9778
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
9779
                                 errors.ECODE_NORES)
9780
      result = ial.result
9781
    return result
9782

    
9783

    
9784
class LUInstanceGrowDisk(LogicalUnit):
9785
  """Grow a disk of an instance.
9786

9787
  """
9788
  HPATH = "disk-grow"
9789
  HTYPE = constants.HTYPE_INSTANCE
9790
  REQ_BGL = False
9791

    
9792
  def ExpandNames(self):
9793
    self._ExpandAndLockInstance()
9794
    self.needed_locks[locking.LEVEL_NODE] = []
9795
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9796

    
9797
  def DeclareLocks(self, level):
9798
    if level == locking.LEVEL_NODE:
9799
      self._LockInstancesNodes()
9800

    
9801
  def BuildHooksEnv(self):
9802
    """Build hooks env.
9803

9804
    This runs on the master, the primary and all the secondaries.
9805

9806
    """
9807
    env = {
9808
      "DISK": self.op.disk,
9809
      "AMOUNT": self.op.amount,
9810
      }
9811
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9812
    return env
9813

    
9814
  def BuildHooksNodes(self):
9815
    """Build hooks nodes.
9816

9817
    """
9818
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9819
    return (nl, nl)
9820

    
9821
  def CheckPrereq(self):
9822
    """Check prerequisites.
9823

9824
    This checks that the instance is in the cluster.
9825

9826
    """
9827
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9828
    assert instance is not None, \
9829
      "Cannot retrieve locked instance %s" % self.op.instance_name
9830
    nodenames = list(instance.all_nodes)
9831
    for node in nodenames:
9832
      _CheckNodeOnline(self, node)
9833

    
9834
    self.instance = instance
9835

    
9836
    if instance.disk_template not in constants.DTS_GROWABLE:
9837
      raise errors.OpPrereqError("Instance's disk layout does not support"
9838
                                 " growing", errors.ECODE_INVAL)
9839

    
9840
    self.disk = instance.FindDisk(self.op.disk)
9841

    
9842
    if instance.disk_template not in (constants.DT_FILE,
9843
                                      constants.DT_SHARED_FILE):
9844
      # TODO: check the free disk space for file, when that feature will be
9845
      # supported
9846
      _CheckNodesFreeDiskPerVG(self, nodenames,
9847
                               self.disk.ComputeGrowth(self.op.amount))
9848

    
9849
  def Exec(self, feedback_fn):
9850
    """Execute disk grow.
9851

9852
    """
9853
    instance = self.instance
9854
    disk = self.disk
9855

    
9856
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
9857
    if not disks_ok:
9858
      raise errors.OpExecError("Cannot activate block device to grow")
9859

    
9860
    # First run all grow ops in dry-run mode
9861
    for node in instance.all_nodes:
9862
      self.cfg.SetDiskID(disk, node)
9863
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
9864
      result.Raise("Grow request failed to node %s" % node)
9865

    
9866
    # We know that (as far as we can test) operations across different
9867
    # nodes will succeed, time to run it for real
9868
    for node in instance.all_nodes:
9869
      self.cfg.SetDiskID(disk, node)
9870
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
9871
      result.Raise("Grow request failed to node %s" % node)
9872

    
9873
      # TODO: Rewrite code to work properly
9874
      # DRBD goes into sync mode for a short amount of time after executing the
9875
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
9876
      # calling "resize" in sync mode fails. Sleeping for a short amount of
9877
      # time is a work-around.
9878
      time.sleep(5)
9879

    
9880
    disk.RecordGrow(self.op.amount)
9881
    self.cfg.Update(instance, feedback_fn)
9882
    if self.op.wait_for_sync:
9883
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
9884
      if disk_abort:
9885
        self.proc.LogWarning("Disk sync-ing has not returned a good"
9886
                             " status; please check the instance")
9887
      if not instance.admin_up:
9888
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9889
    elif not instance.admin_up:
9890
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
9891
                           " not supposed to be running because no wait for"
9892
                           " sync mode was requested")
9893

    
9894

    
9895
class LUInstanceQueryData(NoHooksLU):
9896
  """Query runtime instance data.
9897

9898
  """
9899
  REQ_BGL = False
9900

    
9901
  def ExpandNames(self):
9902
    self.needed_locks = {}
9903

    
9904
    # Use locking if requested or when non-static information is wanted
9905
    if not (self.op.static or self.op.use_locking):
9906
      self.LogWarning("Non-static data requested, locks need to be acquired")
9907
      self.op.use_locking = True
9908

    
9909
    if self.op.instances or not self.op.use_locking:
9910
      # Expand instance names right here
9911
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
9912
    else:
9913
      # Will use acquired locks
9914
      self.wanted_names = None
9915

    
9916
    if self.op.use_locking:
9917
      self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9918

    
9919
      if self.wanted_names is None:
9920
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9921
      else:
9922
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9923

    
9924
      self.needed_locks[locking.LEVEL_NODE] = []
9925
      self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9926
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9927

    
9928
  def DeclareLocks(self, level):
9929
    if self.op.use_locking and level == locking.LEVEL_NODE:
9930
      self._LockInstancesNodes()
9931

    
9932
  def CheckPrereq(self):
9933
    """Check prerequisites.
9934

9935
    This only checks the optional instance list against the existing names.
9936

9937
    """
9938
    if self.wanted_names is None:
9939
      assert self.op.use_locking, "Locking was not used"
9940
      self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
9941

    
9942
    self.wanted_instances = [self.cfg.GetInstanceInfo(name)
9943
                             for name in self.wanted_names]
9944

    
9945
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
9946
    """Returns the status of a block device
9947

9948
    """
9949
    if self.op.static or not node:
9950
      return None
9951

    
9952
    self.cfg.SetDiskID(dev, node)
9953

    
9954
    result = self.rpc.call_blockdev_find(node, dev)
9955
    if result.offline:
9956
      return None
9957

    
9958
    result.Raise("Can't compute disk status for %s" % instance_name)
9959

    
9960
    status = result.payload
9961
    if status is None:
9962
      return None
9963

    
9964
    return (status.dev_path, status.major, status.minor,
9965
            status.sync_percent, status.estimated_time,
9966
            status.is_degraded, status.ldisk_status)
9967

    
9968
  def _ComputeDiskStatus(self, instance, snode, dev):
9969
    """Compute block device status.
9970

9971
    """
9972
    if dev.dev_type in constants.LDS_DRBD:
9973
      # we change the snode then (otherwise we use the one passed in)
9974
      if dev.logical_id[0] == instance.primary_node:
9975
        snode = dev.logical_id[1]
9976
      else:
9977
        snode = dev.logical_id[0]
9978

    
9979
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9980
                                              instance.name, dev)
9981
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9982

    
9983
    if dev.children:
9984
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
9985
                      for child in dev.children]
9986
    else:
9987
      dev_children = []
9988

    
9989
    return {
9990
      "iv_name": dev.iv_name,
9991
      "dev_type": dev.dev_type,
9992
      "logical_id": dev.logical_id,
9993
      "physical_id": dev.physical_id,
9994
      "pstatus": dev_pstatus,
9995
      "sstatus": dev_sstatus,
9996
      "children": dev_children,
9997
      "mode": dev.mode,
9998
      "size": dev.size,
9999
      }
10000

    
10001
  def Exec(self, feedback_fn):
10002
    """Gather and return data"""
10003
    result = {}
10004

    
10005
    cluster = self.cfg.GetClusterInfo()
10006

    
10007
    for instance in self.wanted_instances:
10008
      if not self.op.static:
10009
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10010
                                                  instance.name,
10011
                                                  instance.hypervisor)
10012
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10013
        remote_info = remote_info.payload
10014
        if remote_info and "state" in remote_info:
10015
          remote_state = "up"
10016
        else:
10017
          remote_state = "down"
10018
      else:
10019
        remote_state = None
10020
      if instance.admin_up:
10021
        config_state = "up"
10022
      else:
10023
        config_state = "down"
10024

    
10025
      disks = [self._ComputeDiskStatus(instance, None, device)
10026
               for device in instance.disks]
10027

    
10028
      result[instance.name] = {
10029
        "name": instance.name,
10030
        "config_state": config_state,
10031
        "run_state": remote_state,
10032
        "pnode": instance.primary_node,
10033
        "snodes": instance.secondary_nodes,
10034
        "os": instance.os,
10035
        # this happens to be the same format used for hooks
10036
        "nics": _NICListToTuple(self, instance.nics),
10037
        "disk_template": instance.disk_template,
10038
        "disks": disks,
10039
        "hypervisor": instance.hypervisor,
10040
        "network_port": instance.network_port,
10041
        "hv_instance": instance.hvparams,
10042
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10043
        "be_instance": instance.beparams,
10044
        "be_actual": cluster.FillBE(instance),
10045
        "os_instance": instance.osparams,
10046
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10047
        "serial_no": instance.serial_no,
10048
        "mtime": instance.mtime,
10049
        "ctime": instance.ctime,
10050
        "uuid": instance.uuid,
10051
        }
10052

    
10053
    return result
10054

    
10055

    
10056
class LUInstanceSetParams(LogicalUnit):
10057
  """Modifies an instances's parameters.
10058

10059
  """
10060
  HPATH = "instance-modify"
10061
  HTYPE = constants.HTYPE_INSTANCE
10062
  REQ_BGL = False
10063

    
10064
  def CheckArguments(self):
10065
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10066
            self.op.hvparams or self.op.beparams or self.op.os_name):
10067
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10068

    
10069
    if self.op.hvparams:
10070
      _CheckGlobalHvParams(self.op.hvparams)
10071

    
10072
    # Disk validation
10073
    disk_addremove = 0
10074
    for disk_op, disk_dict in self.op.disks:
10075
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10076
      if disk_op == constants.DDM_REMOVE:
10077
        disk_addremove += 1
10078
        continue
10079
      elif disk_op == constants.DDM_ADD:
10080
        disk_addremove += 1
10081
      else:
10082
        if not isinstance(disk_op, int):
10083
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10084
        if not isinstance(disk_dict, dict):
10085
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10086
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10087

    
10088
      if disk_op == constants.DDM_ADD:
10089
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10090
        if mode not in constants.DISK_ACCESS_SET:
10091
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10092
                                     errors.ECODE_INVAL)
10093
        size = disk_dict.get(constants.IDISK_SIZE, None)
10094
        if size is None:
10095
          raise errors.OpPrereqError("Required disk parameter size missing",
10096
                                     errors.ECODE_INVAL)
10097
        try:
10098
          size = int(size)
10099
        except (TypeError, ValueError), err:
10100
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10101
                                     str(err), errors.ECODE_INVAL)
10102
        disk_dict[constants.IDISK_SIZE] = size
10103
      else:
10104
        # modification of disk
10105
        if constants.IDISK_SIZE in disk_dict:
10106
          raise errors.OpPrereqError("Disk size change not possible, use"
10107
                                     " grow-disk", errors.ECODE_INVAL)
10108

    
10109
    if disk_addremove > 1:
10110
      raise errors.OpPrereqError("Only one disk add or remove operation"
10111
                                 " supported at a time", errors.ECODE_INVAL)
10112

    
10113
    if self.op.disks and self.op.disk_template is not None:
10114
      raise errors.OpPrereqError("Disk template conversion and other disk"
10115
                                 " changes not supported at the same time",
10116
                                 errors.ECODE_INVAL)
10117

    
10118
    if (self.op.disk_template and
10119
        self.op.disk_template in constants.DTS_INT_MIRROR and
10120
        self.op.remote_node is None):
10121
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10122
                                 " one requires specifying a secondary node",
10123
                                 errors.ECODE_INVAL)
10124

    
10125
    # NIC validation
10126
    nic_addremove = 0
10127
    for nic_op, nic_dict in self.op.nics:
10128
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10129
      if nic_op == constants.DDM_REMOVE:
10130
        nic_addremove += 1
10131
        continue
10132
      elif nic_op == constants.DDM_ADD:
10133
        nic_addremove += 1
10134
      else:
10135
        if not isinstance(nic_op, int):
10136
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10137
        if not isinstance(nic_dict, dict):
10138
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10139
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10140

    
10141
      # nic_dict should be a dict
10142
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10143
      if nic_ip is not None:
10144
        if nic_ip.lower() == constants.VALUE_NONE:
10145
          nic_dict[constants.INIC_IP] = None
10146
        else:
10147
          if not netutils.IPAddress.IsValid(nic_ip):
10148
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10149
                                       errors.ECODE_INVAL)
10150

    
10151
      nic_bridge = nic_dict.get('bridge', None)
10152
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10153
      if nic_bridge and nic_link:
10154
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10155
                                   " at the same time", errors.ECODE_INVAL)
10156
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10157
        nic_dict['bridge'] = None
10158
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10159
        nic_dict[constants.INIC_LINK] = None
10160

    
10161
      if nic_op == constants.DDM_ADD:
10162
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10163
        if nic_mac is None:
10164
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10165

    
10166
      if constants.INIC_MAC in nic_dict:
10167
        nic_mac = nic_dict[constants.INIC_MAC]
10168
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10169
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10170

    
10171
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10172
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10173
                                     " modifying an existing nic",
10174
                                     errors.ECODE_INVAL)
10175

    
10176
    if nic_addremove > 1:
10177
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10178
                                 " supported at a time", errors.ECODE_INVAL)
10179

    
10180
  def ExpandNames(self):
10181
    self._ExpandAndLockInstance()
10182
    self.needed_locks[locking.LEVEL_NODE] = []
10183
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10184

    
10185
  def DeclareLocks(self, level):
10186
    if level == locking.LEVEL_NODE:
10187
      self._LockInstancesNodes()
10188
      if self.op.disk_template and self.op.remote_node:
10189
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10190
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10191

    
10192
  def BuildHooksEnv(self):
10193
    """Build hooks env.
10194

10195
    This runs on the master, primary and secondaries.
10196

10197
    """
10198
    args = dict()
10199
    if constants.BE_MEMORY in self.be_new:
10200
      args['memory'] = self.be_new[constants.BE_MEMORY]
10201
    if constants.BE_VCPUS in self.be_new:
10202
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
10203
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10204
    # information at all.
10205
    if self.op.nics:
10206
      args['nics'] = []
10207
      nic_override = dict(self.op.nics)
10208
      for idx, nic in enumerate(self.instance.nics):
10209
        if idx in nic_override:
10210
          this_nic_override = nic_override[idx]
10211
        else:
10212
          this_nic_override = {}
10213
        if constants.INIC_IP in this_nic_override:
10214
          ip = this_nic_override[constants.INIC_IP]
10215
        else:
10216
          ip = nic.ip
10217
        if constants.INIC_MAC in this_nic_override:
10218
          mac = this_nic_override[constants.INIC_MAC]
10219
        else:
10220
          mac = nic.mac
10221
        if idx in self.nic_pnew:
10222
          nicparams = self.nic_pnew[idx]
10223
        else:
10224
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10225
        mode = nicparams[constants.NIC_MODE]
10226
        link = nicparams[constants.NIC_LINK]
10227
        args['nics'].append((ip, mac, mode, link))
10228
      if constants.DDM_ADD in nic_override:
10229
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10230
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10231
        nicparams = self.nic_pnew[constants.DDM_ADD]
10232
        mode = nicparams[constants.NIC_MODE]
10233
        link = nicparams[constants.NIC_LINK]
10234
        args['nics'].append((ip, mac, mode, link))
10235
      elif constants.DDM_REMOVE in nic_override:
10236
        del args['nics'][-1]
10237

    
10238
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10239
    if self.op.disk_template:
10240
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10241

    
10242
    return env
10243

    
10244
  def BuildHooksNodes(self):
10245
    """Build hooks nodes.
10246

10247
    """
10248
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10249
    return (nl, nl)
10250

    
10251
  def CheckPrereq(self):
10252
    """Check prerequisites.
10253

10254
    This only checks the instance list against the existing names.
10255

10256
    """
10257
    # checking the new params on the primary/secondary nodes
10258

    
10259
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10260
    cluster = self.cluster = self.cfg.GetClusterInfo()
10261
    assert self.instance is not None, \
10262
      "Cannot retrieve locked instance %s" % self.op.instance_name
10263
    pnode = instance.primary_node
10264
    nodelist = list(instance.all_nodes)
10265

    
10266
    # OS change
10267
    if self.op.os_name and not self.op.force:
10268
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10269
                      self.op.force_variant)
10270
      instance_os = self.op.os_name
10271
    else:
10272
      instance_os = instance.os
10273

    
10274
    if self.op.disk_template:
10275
      if instance.disk_template == self.op.disk_template:
10276
        raise errors.OpPrereqError("Instance already has disk template %s" %
10277
                                   instance.disk_template, errors.ECODE_INVAL)
10278

    
10279
      if (instance.disk_template,
10280
          self.op.disk_template) not in self._DISK_CONVERSIONS:
10281
        raise errors.OpPrereqError("Unsupported disk template conversion from"
10282
                                   " %s to %s" % (instance.disk_template,
10283
                                                  self.op.disk_template),
10284
                                   errors.ECODE_INVAL)
10285
      _CheckInstanceDown(self, instance, "cannot change disk template")
10286
      if self.op.disk_template in constants.DTS_INT_MIRROR:
10287
        if self.op.remote_node == pnode:
10288
          raise errors.OpPrereqError("Given new secondary node %s is the same"
10289
                                     " as the primary node of the instance" %
10290
                                     self.op.remote_node, errors.ECODE_STATE)
10291
        _CheckNodeOnline(self, self.op.remote_node)
10292
        _CheckNodeNotDrained(self, self.op.remote_node)
10293
        # FIXME: here we assume that the old instance type is DT_PLAIN
10294
        assert instance.disk_template == constants.DT_PLAIN
10295
        disks = [{constants.IDISK_SIZE: d.size,
10296
                  constants.IDISK_VG: d.logical_id[0]}
10297
                 for d in instance.disks]
10298
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10299
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10300

    
10301
    # hvparams processing
10302
    if self.op.hvparams:
10303
      hv_type = instance.hypervisor
10304
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10305
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10306
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10307

    
10308
      # local check
10309
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10310
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10311
      self.hv_new = hv_new # the new actual values
10312
      self.hv_inst = i_hvdict # the new dict (without defaults)
10313
    else:
10314
      self.hv_new = self.hv_inst = {}
10315

    
10316
    # beparams processing
10317
    if self.op.beparams:
10318
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10319
                                   use_none=True)
10320
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10321
      be_new = cluster.SimpleFillBE(i_bedict)
10322
      self.be_new = be_new # the new actual values
10323
      self.be_inst = i_bedict # the new dict (without defaults)
10324
    else:
10325
      self.be_new = self.be_inst = {}
10326
    be_old = cluster.FillBE(instance)
10327

    
10328
    # osparams processing
10329
    if self.op.osparams:
10330
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10331
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10332
      self.os_inst = i_osdict # the new dict (without defaults)
10333
    else:
10334
      self.os_inst = {}
10335

    
10336
    self.warn = []
10337

    
10338
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10339
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10340
      mem_check_list = [pnode]
10341
      if be_new[constants.BE_AUTO_BALANCE]:
10342
        # either we changed auto_balance to yes or it was from before
10343
        mem_check_list.extend(instance.secondary_nodes)
10344
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
10345
                                                  instance.hypervisor)
10346
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10347
                                         instance.hypervisor)
10348
      pninfo = nodeinfo[pnode]
10349
      msg = pninfo.fail_msg
10350
      if msg:
10351
        # Assume the primary node is unreachable and go ahead
10352
        self.warn.append("Can't get info from primary node %s: %s" %
10353
                         (pnode,  msg))
10354
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
10355
        self.warn.append("Node data from primary node %s doesn't contain"
10356
                         " free memory information" % pnode)
10357
      elif instance_info.fail_msg:
10358
        self.warn.append("Can't get instance runtime information: %s" %
10359
                        instance_info.fail_msg)
10360
      else:
10361
        if instance_info.payload:
10362
          current_mem = int(instance_info.payload['memory'])
10363
        else:
10364
          # Assume instance not running
10365
          # (there is a slight race condition here, but it's not very probable,
10366
          # and we have no other way to check)
10367
          current_mem = 0
10368
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10369
                    pninfo.payload['memory_free'])
10370
        if miss_mem > 0:
10371
          raise errors.OpPrereqError("This change will prevent the instance"
10372
                                     " from starting, due to %d MB of memory"
10373
                                     " missing on its primary node" % miss_mem,
10374
                                     errors.ECODE_NORES)
10375

    
10376
      if be_new[constants.BE_AUTO_BALANCE]:
10377
        for node, nres in nodeinfo.items():
10378
          if node not in instance.secondary_nodes:
10379
            continue
10380
          nres.Raise("Can't get info from secondary node %s" % node,
10381
                     prereq=True, ecode=errors.ECODE_STATE)
10382
          if not isinstance(nres.payload.get('memory_free', None), int):
10383
            raise errors.OpPrereqError("Secondary node %s didn't return free"
10384
                                       " memory information" % node,
10385
                                       errors.ECODE_STATE)
10386
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
10387
            raise errors.OpPrereqError("This change will prevent the instance"
10388
                                       " from failover to its secondary node"
10389
                                       " %s, due to not enough memory" % node,
10390
                                       errors.ECODE_STATE)
10391

    
10392
    # NIC processing
10393
    self.nic_pnew = {}
10394
    self.nic_pinst = {}
10395
    for nic_op, nic_dict in self.op.nics:
10396
      if nic_op == constants.DDM_REMOVE:
10397
        if not instance.nics:
10398
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10399
                                     errors.ECODE_INVAL)
10400
        continue
10401
      if nic_op != constants.DDM_ADD:
10402
        # an existing nic
10403
        if not instance.nics:
10404
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10405
                                     " no NICs" % nic_op,
10406
                                     errors.ECODE_INVAL)
10407
        if nic_op < 0 or nic_op >= len(instance.nics):
10408
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10409
                                     " are 0 to %d" %
10410
                                     (nic_op, len(instance.nics) - 1),
10411
                                     errors.ECODE_INVAL)
10412
        old_nic_params = instance.nics[nic_op].nicparams
10413
        old_nic_ip = instance.nics[nic_op].ip
10414
      else:
10415
        old_nic_params = {}
10416
        old_nic_ip = None
10417

    
10418
      update_params_dict = dict([(key, nic_dict[key])
10419
                                 for key in constants.NICS_PARAMETERS
10420
                                 if key in nic_dict])
10421

    
10422
      if 'bridge' in nic_dict:
10423
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
10424

    
10425
      new_nic_params = _GetUpdatedParams(old_nic_params,
10426
                                         update_params_dict)
10427
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10428
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10429
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10430
      self.nic_pinst[nic_op] = new_nic_params
10431
      self.nic_pnew[nic_op] = new_filled_nic_params
10432
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10433

    
10434
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
10435
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10436
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10437
        if msg:
10438
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10439
          if self.op.force:
10440
            self.warn.append(msg)
10441
          else:
10442
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10443
      if new_nic_mode == constants.NIC_MODE_ROUTED:
10444
        if constants.INIC_IP in nic_dict:
10445
          nic_ip = nic_dict[constants.INIC_IP]
10446
        else:
10447
          nic_ip = old_nic_ip
10448
        if nic_ip is None:
10449
          raise errors.OpPrereqError('Cannot set the nic ip to None'
10450
                                     ' on a routed nic', errors.ECODE_INVAL)
10451
      if constants.INIC_MAC in nic_dict:
10452
        nic_mac = nic_dict[constants.INIC_MAC]
10453
        if nic_mac is None:
10454
          raise errors.OpPrereqError('Cannot set the nic mac to None',
10455
                                     errors.ECODE_INVAL)
10456
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10457
          # otherwise generate the mac
10458
          nic_dict[constants.INIC_MAC] = \
10459
            self.cfg.GenerateMAC(self.proc.GetECId())
10460
        else:
10461
          # or validate/reserve the current one
10462
          try:
10463
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10464
          except errors.ReservationError:
10465
            raise errors.OpPrereqError("MAC address %s already in use"
10466
                                       " in cluster" % nic_mac,
10467
                                       errors.ECODE_NOTUNIQUE)
10468

    
10469
    # DISK processing
10470
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10471
      raise errors.OpPrereqError("Disk operations not supported for"
10472
                                 " diskless instances",
10473
                                 errors.ECODE_INVAL)
10474
    for disk_op, _ in self.op.disks:
10475
      if disk_op == constants.DDM_REMOVE:
10476
        if len(instance.disks) == 1:
10477
          raise errors.OpPrereqError("Cannot remove the last disk of"
10478
                                     " an instance", errors.ECODE_INVAL)
10479
        _CheckInstanceDown(self, instance, "cannot remove disks")
10480

    
10481
      if (disk_op == constants.DDM_ADD and
10482
          len(instance.disks) >= constants.MAX_DISKS):
10483
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10484
                                   " add more" % constants.MAX_DISKS,
10485
                                   errors.ECODE_STATE)
10486
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10487
        # an existing disk
10488
        if disk_op < 0 or disk_op >= len(instance.disks):
10489
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
10490
                                     " are 0 to %d" %
10491
                                     (disk_op, len(instance.disks)),
10492
                                     errors.ECODE_INVAL)
10493

    
10494
    return
10495

    
10496
  def _ConvertPlainToDrbd(self, feedback_fn):
10497
    """Converts an instance from plain to drbd.
10498

10499
    """
10500
    feedback_fn("Converting template to drbd")
10501
    instance = self.instance
10502
    pnode = instance.primary_node
10503
    snode = self.op.remote_node
10504

    
10505
    # create a fake disk info for _GenerateDiskTemplate
10506
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10507
                  constants.IDISK_VG: d.logical_id[0]}
10508
                 for d in instance.disks]
10509
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10510
                                      instance.name, pnode, [snode],
10511
                                      disk_info, None, None, 0, feedback_fn)
10512
    info = _GetInstanceInfoText(instance)
10513
    feedback_fn("Creating aditional volumes...")
10514
    # first, create the missing data and meta devices
10515
    for disk in new_disks:
10516
      # unfortunately this is... not too nice
10517
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10518
                            info, True)
10519
      for child in disk.children:
10520
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
10521
    # at this stage, all new LVs have been created, we can rename the
10522
    # old ones
10523
    feedback_fn("Renaming original volumes...")
10524
    rename_list = [(o, n.children[0].logical_id)
10525
                   for (o, n) in zip(instance.disks, new_disks)]
10526
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
10527
    result.Raise("Failed to rename original LVs")
10528

    
10529
    feedback_fn("Initializing DRBD devices...")
10530
    # all child devices are in place, we can now create the DRBD devices
10531
    for disk in new_disks:
10532
      for node in [pnode, snode]:
10533
        f_create = node == pnode
10534
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10535

    
10536
    # at this point, the instance has been modified
10537
    instance.disk_template = constants.DT_DRBD8
10538
    instance.disks = new_disks
10539
    self.cfg.Update(instance, feedback_fn)
10540

    
10541
    # disks are created, waiting for sync
10542
    disk_abort = not _WaitForSync(self, instance,
10543
                                  oneshot=not self.op.wait_for_sync)
10544
    if disk_abort:
10545
      raise errors.OpExecError("There are some degraded disks for"
10546
                               " this instance, please cleanup manually")
10547

    
10548
  def _ConvertDrbdToPlain(self, feedback_fn):
10549
    """Converts an instance from drbd to plain.
10550

10551
    """
10552
    instance = self.instance
10553
    assert len(instance.secondary_nodes) == 1
10554
    pnode = instance.primary_node
10555
    snode = instance.secondary_nodes[0]
10556
    feedback_fn("Converting template to plain")
10557

    
10558
    old_disks = instance.disks
10559
    new_disks = [d.children[0] for d in old_disks]
10560

    
10561
    # copy over size and mode
10562
    for parent, child in zip(old_disks, new_disks):
10563
      child.size = parent.size
10564
      child.mode = parent.mode
10565

    
10566
    # update instance structure
10567
    instance.disks = new_disks
10568
    instance.disk_template = constants.DT_PLAIN
10569
    self.cfg.Update(instance, feedback_fn)
10570

    
10571
    feedback_fn("Removing volumes on the secondary node...")
10572
    for disk in old_disks:
10573
      self.cfg.SetDiskID(disk, snode)
10574
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10575
      if msg:
10576
        self.LogWarning("Could not remove block device %s on node %s,"
10577
                        " continuing anyway: %s", disk.iv_name, snode, msg)
10578

    
10579
    feedback_fn("Removing unneeded volumes on the primary node...")
10580
    for idx, disk in enumerate(old_disks):
10581
      meta = disk.children[1]
10582
      self.cfg.SetDiskID(meta, pnode)
10583
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10584
      if msg:
10585
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
10586
                        " continuing anyway: %s", idx, pnode, msg)
10587

    
10588
  def Exec(self, feedback_fn):
10589
    """Modifies an instance.
10590

10591
    All parameters take effect only at the next restart of the instance.
10592

10593
    """
10594
    # Process here the warnings from CheckPrereq, as we don't have a
10595
    # feedback_fn there.
10596
    for warn in self.warn:
10597
      feedback_fn("WARNING: %s" % warn)
10598

    
10599
    result = []
10600
    instance = self.instance
10601
    # disk changes
10602
    for disk_op, disk_dict in self.op.disks:
10603
      if disk_op == constants.DDM_REMOVE:
10604
        # remove the last disk
10605
        device = instance.disks.pop()
10606
        device_idx = len(instance.disks)
10607
        for node, disk in device.ComputeNodeTree(instance.primary_node):
10608
          self.cfg.SetDiskID(disk, node)
10609
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10610
          if msg:
10611
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
10612
                            " continuing anyway", device_idx, node, msg)
10613
        result.append(("disk/%d" % device_idx, "remove"))
10614
      elif disk_op == constants.DDM_ADD:
10615
        # add a new disk
10616
        if instance.disk_template in (constants.DT_FILE,
10617
                                        constants.DT_SHARED_FILE):
10618
          file_driver, file_path = instance.disks[0].logical_id
10619
          file_path = os.path.dirname(file_path)
10620
        else:
10621
          file_driver = file_path = None
10622
        disk_idx_base = len(instance.disks)
10623
        new_disk = _GenerateDiskTemplate(self,
10624
                                         instance.disk_template,
10625
                                         instance.name, instance.primary_node,
10626
                                         instance.secondary_nodes,
10627
                                         [disk_dict],
10628
                                         file_path,
10629
                                         file_driver,
10630
                                         disk_idx_base, feedback_fn)[0]
10631
        instance.disks.append(new_disk)
10632
        info = _GetInstanceInfoText(instance)
10633

    
10634
        logging.info("Creating volume %s for instance %s",
10635
                     new_disk.iv_name, instance.name)
10636
        # Note: this needs to be kept in sync with _CreateDisks
10637
        #HARDCODE
10638
        for node in instance.all_nodes:
10639
          f_create = node == instance.primary_node
10640
          try:
10641
            _CreateBlockDev(self, node, instance, new_disk,
10642
                            f_create, info, f_create)
10643
          except errors.OpExecError, err:
10644
            self.LogWarning("Failed to create volume %s (%s) on"
10645
                            " node %s: %s",
10646
                            new_disk.iv_name, new_disk, node, err)
10647
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10648
                       (new_disk.size, new_disk.mode)))
10649
      else:
10650
        # change a given disk
10651
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10652
        result.append(("disk.mode/%d" % disk_op,
10653
                       disk_dict[constants.IDISK_MODE]))
10654

    
10655
    if self.op.disk_template:
10656
      r_shut = _ShutdownInstanceDisks(self, instance)
10657
      if not r_shut:
10658
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10659
                                 " proceed with disk template conversion")
10660
      mode = (instance.disk_template, self.op.disk_template)
10661
      try:
10662
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
10663
      except:
10664
        self.cfg.ReleaseDRBDMinors(instance.name)
10665
        raise
10666
      result.append(("disk_template", self.op.disk_template))
10667

    
10668
    # NIC changes
10669
    for nic_op, nic_dict in self.op.nics:
10670
      if nic_op == constants.DDM_REMOVE:
10671
        # remove the last nic
10672
        del instance.nics[-1]
10673
        result.append(("nic.%d" % len(instance.nics), "remove"))
10674
      elif nic_op == constants.DDM_ADD:
10675
        # mac and bridge should be set, by now
10676
        mac = nic_dict[constants.INIC_MAC]
10677
        ip = nic_dict.get(constants.INIC_IP, None)
10678
        nicparams = self.nic_pinst[constants.DDM_ADD]
10679
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
10680
        instance.nics.append(new_nic)
10681
        result.append(("nic.%d" % (len(instance.nics) - 1),
10682
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
10683
                       (new_nic.mac, new_nic.ip,
10684
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
10685
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
10686
                       )))
10687
      else:
10688
        for key in (constants.INIC_MAC, constants.INIC_IP):
10689
          if key in nic_dict:
10690
            setattr(instance.nics[nic_op], key, nic_dict[key])
10691
        if nic_op in self.nic_pinst:
10692
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
10693
        for key, val in nic_dict.iteritems():
10694
          result.append(("nic.%s/%d" % (key, nic_op), val))
10695

    
10696
    # hvparams changes
10697
    if self.op.hvparams:
10698
      instance.hvparams = self.hv_inst
10699
      for key, val in self.op.hvparams.iteritems():
10700
        result.append(("hv/%s" % key, val))
10701

    
10702
    # beparams changes
10703
    if self.op.beparams:
10704
      instance.beparams = self.be_inst
10705
      for key, val in self.op.beparams.iteritems():
10706
        result.append(("be/%s" % key, val))
10707

    
10708
    # OS change
10709
    if self.op.os_name:
10710
      instance.os = self.op.os_name
10711

    
10712
    # osparams changes
10713
    if self.op.osparams:
10714
      instance.osparams = self.os_inst
10715
      for key, val in self.op.osparams.iteritems():
10716
        result.append(("os/%s" % key, val))
10717

    
10718
    self.cfg.Update(instance, feedback_fn)
10719

    
10720
    return result
10721

    
10722
  _DISK_CONVERSIONS = {
10723
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10724
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10725
    }
10726

    
10727

    
10728
class LUBackupQuery(NoHooksLU):
10729
  """Query the exports list
10730

10731
  """
10732
  REQ_BGL = False
10733

    
10734
  def ExpandNames(self):
10735
    self.needed_locks = {}
10736
    self.share_locks[locking.LEVEL_NODE] = 1
10737
    if not self.op.nodes:
10738
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10739
    else:
10740
      self.needed_locks[locking.LEVEL_NODE] = \
10741
        _GetWantedNodes(self, self.op.nodes)
10742

    
10743
  def Exec(self, feedback_fn):
10744
    """Compute the list of all the exported system images.
10745

10746
    @rtype: dict
10747
    @return: a dictionary with the structure node->(export-list)
10748
        where export-list is a list of the instances exported on
10749
        that node.
10750

10751
    """
10752
    self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
10753
    rpcresult = self.rpc.call_export_list(self.nodes)
10754
    result = {}
10755
    for node in rpcresult:
10756
      if rpcresult[node].fail_msg:
10757
        result[node] = False
10758
      else:
10759
        result[node] = rpcresult[node].payload
10760

    
10761
    return result
10762

    
10763

    
10764
class LUBackupPrepare(NoHooksLU):
10765
  """Prepares an instance for an export and returns useful information.
10766

10767
  """
10768
  REQ_BGL = False
10769

    
10770
  def ExpandNames(self):
10771
    self._ExpandAndLockInstance()
10772

    
10773
  def CheckPrereq(self):
10774
    """Check prerequisites.
10775

10776
    """
10777
    instance_name = self.op.instance_name
10778

    
10779
    self.instance = self.cfg.GetInstanceInfo(instance_name)
10780
    assert self.instance is not None, \
10781
          "Cannot retrieve locked instance %s" % self.op.instance_name
10782
    _CheckNodeOnline(self, self.instance.primary_node)
10783

    
10784
    self._cds = _GetClusterDomainSecret()
10785

    
10786
  def Exec(self, feedback_fn):
10787
    """Prepares an instance for an export.
10788

10789
    """
10790
    instance = self.instance
10791

    
10792
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
10793
      salt = utils.GenerateSecret(8)
10794

    
10795
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10796
      result = self.rpc.call_x509_cert_create(instance.primary_node,
10797
                                              constants.RIE_CERT_VALIDITY)
10798
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
10799

    
10800
      (name, cert_pem) = result.payload
10801

    
10802
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10803
                                             cert_pem)
10804

    
10805
      return {
10806
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10807
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10808
                          salt),
10809
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10810
        }
10811

    
10812
    return None
10813

    
10814

    
10815
class LUBackupExport(LogicalUnit):
10816
  """Export an instance to an image in the cluster.
10817

10818
  """
10819
  HPATH = "instance-export"
10820
  HTYPE = constants.HTYPE_INSTANCE
10821
  REQ_BGL = False
10822

    
10823
  def CheckArguments(self):
10824
    """Check the arguments.
10825

10826
    """
10827
    self.x509_key_name = self.op.x509_key_name
10828
    self.dest_x509_ca_pem = self.op.destination_x509_ca
10829

    
10830
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
10831
      if not self.x509_key_name:
10832
        raise errors.OpPrereqError("Missing X509 key name for encryption",
10833
                                   errors.ECODE_INVAL)
10834

    
10835
      if not self.dest_x509_ca_pem:
10836
        raise errors.OpPrereqError("Missing destination X509 CA",
10837
                                   errors.ECODE_INVAL)
10838

    
10839
  def ExpandNames(self):
10840
    self._ExpandAndLockInstance()
10841

    
10842
    # Lock all nodes for local exports
10843
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10844
      # FIXME: lock only instance primary and destination node
10845
      #
10846
      # Sad but true, for now we have do lock all nodes, as we don't know where
10847
      # the previous export might be, and in this LU we search for it and
10848
      # remove it from its current node. In the future we could fix this by:
10849
      #  - making a tasklet to search (share-lock all), then create the
10850
      #    new one, then one to remove, after
10851
      #  - removing the removal operation altogether
10852
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10853

    
10854
  def DeclareLocks(self, level):
10855
    """Last minute lock declaration."""
10856
    # All nodes are locked anyway, so nothing to do here.
10857

    
10858
  def BuildHooksEnv(self):
10859
    """Build hooks env.
10860

10861
    This will run on the master, primary node and target node.
10862

10863
    """
10864
    env = {
10865
      "EXPORT_MODE": self.op.mode,
10866
      "EXPORT_NODE": self.op.target_node,
10867
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10868
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10869
      # TODO: Generic function for boolean env variables
10870
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10871
      }
10872

    
10873
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10874

    
10875
    return env
10876

    
10877
  def BuildHooksNodes(self):
10878
    """Build hooks nodes.
10879

10880
    """
10881
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10882

    
10883
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10884
      nl.append(self.op.target_node)
10885

    
10886
    return (nl, nl)
10887

    
10888
  def CheckPrereq(self):
10889
    """Check prerequisites.
10890

10891
    This checks that the instance and node names are valid.
10892

10893
    """
10894
    instance_name = self.op.instance_name
10895

    
10896
    self.instance = self.cfg.GetInstanceInfo(instance_name)
10897
    assert self.instance is not None, \
10898
          "Cannot retrieve locked instance %s" % self.op.instance_name
10899
    _CheckNodeOnline(self, self.instance.primary_node)
10900

    
10901
    if (self.op.remove_instance and self.instance.admin_up and
10902
        not self.op.shutdown):
10903
      raise errors.OpPrereqError("Can not remove instance without shutting it"
10904
                                 " down before")
10905

    
10906
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10907
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10908
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10909
      assert self.dst_node is not None
10910

    
10911
      _CheckNodeOnline(self, self.dst_node.name)
10912
      _CheckNodeNotDrained(self, self.dst_node.name)
10913

    
10914
      self._cds = None
10915
      self.dest_disk_info = None
10916
      self.dest_x509_ca = None
10917

    
10918
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10919
      self.dst_node = None
10920

    
10921
      if len(self.op.target_node) != len(self.instance.disks):
10922
        raise errors.OpPrereqError(("Received destination information for %s"
10923
                                    " disks, but instance %s has %s disks") %
10924
                                   (len(self.op.target_node), instance_name,
10925
                                    len(self.instance.disks)),
10926
                                   errors.ECODE_INVAL)
10927

    
10928
      cds = _GetClusterDomainSecret()
10929

    
10930
      # Check X509 key name
10931
      try:
10932
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10933
      except (TypeError, ValueError), err:
10934
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10935

    
10936
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10937
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10938
                                   errors.ECODE_INVAL)
10939

    
10940
      # Load and verify CA
10941
      try:
10942
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10943
      except OpenSSL.crypto.Error, err:
10944
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10945
                                   (err, ), errors.ECODE_INVAL)
10946

    
10947
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10948
      if errcode is not None:
10949
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10950
                                   (msg, ), errors.ECODE_INVAL)
10951

    
10952
      self.dest_x509_ca = cert
10953

    
10954
      # Verify target information
10955
      disk_info = []
10956
      for idx, disk_data in enumerate(self.op.target_node):
10957
        try:
10958
          (host, port, magic) = \
10959
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10960
        except errors.GenericError, err:
10961
          raise errors.OpPrereqError("Target info for disk %s: %s" %
10962
                                     (idx, err), errors.ECODE_INVAL)
10963

    
10964
        disk_info.append((host, port, magic))
10965

    
10966
      assert len(disk_info) == len(self.op.target_node)
10967
      self.dest_disk_info = disk_info
10968

    
10969
    else:
10970
      raise errors.ProgrammerError("Unhandled export mode %r" %
10971
                                   self.op.mode)
10972

    
10973
    # instance disk type verification
10974
    # TODO: Implement export support for file-based disks
10975
    for disk in self.instance.disks:
10976
      if disk.dev_type == constants.LD_FILE:
10977
        raise errors.OpPrereqError("Export not supported for instances with"
10978
                                   " file-based disks", errors.ECODE_INVAL)
10979

    
10980
  def _CleanupExports(self, feedback_fn):
10981
    """Removes exports of current instance from all other nodes.
10982

10983
    If an instance in a cluster with nodes A..D was exported to node C, its
10984
    exports will be removed from the nodes A, B and D.
10985

10986
    """
10987
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
10988

    
10989
    nodelist = self.cfg.GetNodeList()
10990
    nodelist.remove(self.dst_node.name)
10991

    
10992
    # on one-node clusters nodelist will be empty after the removal
10993
    # if we proceed the backup would be removed because OpBackupQuery
10994
    # substitutes an empty list with the full cluster node list.
10995
    iname = self.instance.name
10996
    if nodelist:
10997
      feedback_fn("Removing old exports for instance %s" % iname)
10998
      exportlist = self.rpc.call_export_list(nodelist)
10999
      for node in exportlist:
11000
        if exportlist[node].fail_msg:
11001
          continue
11002
        if iname in exportlist[node].payload:
11003
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11004
          if msg:
11005
            self.LogWarning("Could not remove older export for instance %s"
11006
                            " on node %s: %s", iname, node, msg)
11007

    
11008
  def Exec(self, feedback_fn):
11009
    """Export an instance to an image in the cluster.
11010

11011
    """
11012
    assert self.op.mode in constants.EXPORT_MODES
11013

    
11014
    instance = self.instance
11015
    src_node = instance.primary_node
11016

    
11017
    if self.op.shutdown:
11018
      # shutdown the instance, but not the disks
11019
      feedback_fn("Shutting down instance %s" % instance.name)
11020
      result = self.rpc.call_instance_shutdown(src_node, instance,
11021
                                               self.op.shutdown_timeout)
11022
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11023
      result.Raise("Could not shutdown instance %s on"
11024
                   " node %s" % (instance.name, src_node))
11025

    
11026
    # set the disks ID correctly since call_instance_start needs the
11027
    # correct drbd minor to create the symlinks
11028
    for disk in instance.disks:
11029
      self.cfg.SetDiskID(disk, src_node)
11030

    
11031
    activate_disks = (not instance.admin_up)
11032

    
11033
    if activate_disks:
11034
      # Activate the instance disks if we'exporting a stopped instance
11035
      feedback_fn("Activating disks for %s" % instance.name)
11036
      _StartInstanceDisks(self, instance, None)
11037

    
11038
    try:
11039
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11040
                                                     instance)
11041

    
11042
      helper.CreateSnapshots()
11043
      try:
11044
        if (self.op.shutdown and instance.admin_up and
11045
            not self.op.remove_instance):
11046
          assert not activate_disks
11047
          feedback_fn("Starting instance %s" % instance.name)
11048
          result = self.rpc.call_instance_start(src_node, instance, None, None)
11049
          msg = result.fail_msg
11050
          if msg:
11051
            feedback_fn("Failed to start instance: %s" % msg)
11052
            _ShutdownInstanceDisks(self, instance)
11053
            raise errors.OpExecError("Could not start instance: %s" % msg)
11054

    
11055
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11056
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11057
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11058
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
11059
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11060

    
11061
          (key_name, _, _) = self.x509_key_name
11062

    
11063
          dest_ca_pem = \
11064
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11065
                                            self.dest_x509_ca)
11066

    
11067
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11068
                                                     key_name, dest_ca_pem,
11069
                                                     timeouts)
11070
      finally:
11071
        helper.Cleanup()
11072

    
11073
      # Check for backwards compatibility
11074
      assert len(dresults) == len(instance.disks)
11075
      assert compat.all(isinstance(i, bool) for i in dresults), \
11076
             "Not all results are boolean: %r" % dresults
11077

    
11078
    finally:
11079
      if activate_disks:
11080
        feedback_fn("Deactivating disks for %s" % instance.name)
11081
        _ShutdownInstanceDisks(self, instance)
11082

    
11083
    if not (compat.all(dresults) and fin_resu):
11084
      failures = []
11085
      if not fin_resu:
11086
        failures.append("export finalization")
11087
      if not compat.all(dresults):
11088
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11089
                               if not dsk)
11090
        failures.append("disk export: disk(s) %s" % fdsk)
11091

    
11092
      raise errors.OpExecError("Export failed, errors in %s" %
11093
                               utils.CommaJoin(failures))
11094

    
11095
    # At this point, the export was successful, we can cleanup/finish
11096

    
11097
    # Remove instance if requested
11098
    if self.op.remove_instance:
11099
      feedback_fn("Removing instance %s" % instance.name)
11100
      _RemoveInstance(self, feedback_fn, instance,
11101
                      self.op.ignore_remove_failures)
11102

    
11103
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11104
      self._CleanupExports(feedback_fn)
11105

    
11106
    return fin_resu, dresults
11107

    
11108

    
11109
class LUBackupRemove(NoHooksLU):
11110
  """Remove exports related to the named instance.
11111

11112
  """
11113
  REQ_BGL = False
11114

    
11115
  def ExpandNames(self):
11116
    self.needed_locks = {}
11117
    # We need all nodes to be locked in order for RemoveExport to work, but we
11118
    # don't need to lock the instance itself, as nothing will happen to it (and
11119
    # we can remove exports also for a removed instance)
11120
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11121

    
11122
  def Exec(self, feedback_fn):
11123
    """Remove any export.
11124

11125
    """
11126
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11127
    # If the instance was not found we'll try with the name that was passed in.
11128
    # This will only work if it was an FQDN, though.
11129
    fqdn_warn = False
11130
    if not instance_name:
11131
      fqdn_warn = True
11132
      instance_name = self.op.instance_name
11133

    
11134
    locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
11135
    exportlist = self.rpc.call_export_list(locked_nodes)
11136
    found = False
11137
    for node in exportlist:
11138
      msg = exportlist[node].fail_msg
11139
      if msg:
11140
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11141
        continue
11142
      if instance_name in exportlist[node].payload:
11143
        found = True
11144
        result = self.rpc.call_export_remove(node, instance_name)
11145
        msg = result.fail_msg
11146
        if msg:
11147
          logging.error("Could not remove export for instance %s"
11148
                        " on node %s: %s", instance_name, node, msg)
11149

    
11150
    if fqdn_warn and not found:
11151
      feedback_fn("Export not found. If trying to remove an export belonging"
11152
                  " to a deleted instance please use its Fully Qualified"
11153
                  " Domain Name.")
11154

    
11155

    
11156
class LUGroupAdd(LogicalUnit):
11157
  """Logical unit for creating node groups.
11158

11159
  """
11160
  HPATH = "group-add"
11161
  HTYPE = constants.HTYPE_GROUP
11162
  REQ_BGL = False
11163

    
11164
  def ExpandNames(self):
11165
    # We need the new group's UUID here so that we can create and acquire the
11166
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11167
    # that it should not check whether the UUID exists in the configuration.
11168
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11169
    self.needed_locks = {}
11170
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11171

    
11172
  def CheckPrereq(self):
11173
    """Check prerequisites.
11174

11175
    This checks that the given group name is not an existing node group
11176
    already.
11177

11178
    """
11179
    try:
11180
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11181
    except errors.OpPrereqError:
11182
      pass
11183
    else:
11184
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11185
                                 " node group (UUID: %s)" %
11186
                                 (self.op.group_name, existing_uuid),
11187
                                 errors.ECODE_EXISTS)
11188

    
11189
    if self.op.ndparams:
11190
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11191

    
11192
  def BuildHooksEnv(self):
11193
    """Build hooks env.
11194

11195
    """
11196
    return {
11197
      "GROUP_NAME": self.op.group_name,
11198
      }
11199

    
11200
  def BuildHooksNodes(self):
11201
    """Build hooks nodes.
11202

11203
    """
11204
    mn = self.cfg.GetMasterNode()
11205
    return ([mn], [mn])
11206

    
11207
  def Exec(self, feedback_fn):
11208
    """Add the node group to the cluster.
11209

11210
    """
11211
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11212
                                  uuid=self.group_uuid,
11213
                                  alloc_policy=self.op.alloc_policy,
11214
                                  ndparams=self.op.ndparams)
11215

    
11216
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11217
    del self.remove_locks[locking.LEVEL_NODEGROUP]
11218

    
11219

    
11220
class LUGroupAssignNodes(NoHooksLU):
11221
  """Logical unit for assigning nodes to groups.
11222

11223
  """
11224
  REQ_BGL = False
11225

    
11226
  def ExpandNames(self):
11227
    # These raise errors.OpPrereqError on their own:
11228
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11229
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11230

    
11231
    # We want to lock all the affected nodes and groups. We have readily
11232
    # available the list of nodes, and the *destination* group. To gather the
11233
    # list of "source" groups, we need to fetch node information later on.
11234
    self.needed_locks = {
11235
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11236
      locking.LEVEL_NODE: self.op.nodes,
11237
      }
11238

    
11239
  def DeclareLocks(self, level):
11240
    if level == locking.LEVEL_NODEGROUP:
11241
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11242

    
11243
      # Try to get all affected nodes' groups without having the group or node
11244
      # lock yet. Needs verification later in the code flow.
11245
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11246

    
11247
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11248

    
11249
  def CheckPrereq(self):
11250
    """Check prerequisites.
11251

11252
    """
11253
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
11254
    assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
11255
            frozenset(self.op.nodes))
11256

    
11257
    expected_locks = (set([self.group_uuid]) |
11258
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11259
    actual_locks = self.glm.list_owned(locking.LEVEL_NODEGROUP)
11260
    if actual_locks != expected_locks:
11261
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11262
                               " current groups are '%s', used to be '%s'" %
11263
                               (utils.CommaJoin(expected_locks),
11264
                                utils.CommaJoin(actual_locks)))
11265

    
11266
    self.node_data = self.cfg.GetAllNodesInfo()
11267
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11268
    instance_data = self.cfg.GetAllInstancesInfo()
11269

    
11270
    if self.group is None:
11271
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11272
                               (self.op.group_name, self.group_uuid))
11273

    
11274
    (new_splits, previous_splits) = \
11275
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11276
                                             for node in self.op.nodes],
11277
                                            self.node_data, instance_data)
11278

    
11279
    if new_splits:
11280
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11281

    
11282
      if not self.op.force:
11283
        raise errors.OpExecError("The following instances get split by this"
11284
                                 " change and --force was not given: %s" %
11285
                                 fmt_new_splits)
11286
      else:
11287
        self.LogWarning("This operation will split the following instances: %s",
11288
                        fmt_new_splits)
11289

    
11290
        if previous_splits:
11291
          self.LogWarning("In addition, these already-split instances continue"
11292
                          " to be split across groups: %s",
11293
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
11294

    
11295
  def Exec(self, feedback_fn):
11296
    """Assign nodes to a new group.
11297

11298
    """
11299
    for node in self.op.nodes:
11300
      self.node_data[node].group = self.group_uuid
11301

    
11302
    # FIXME: Depends on side-effects of modifying the result of
11303
    # C{cfg.GetAllNodesInfo}
11304

    
11305
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11306

    
11307
  @staticmethod
11308
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11309
    """Check for split instances after a node assignment.
11310

11311
    This method considers a series of node assignments as an atomic operation,
11312
    and returns information about split instances after applying the set of
11313
    changes.
11314

11315
    In particular, it returns information about newly split instances, and
11316
    instances that were already split, and remain so after the change.
11317

11318
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11319
    considered.
11320

11321
    @type changes: list of (node_name, new_group_uuid) pairs.
11322
    @param changes: list of node assignments to consider.
11323
    @param node_data: a dict with data for all nodes
11324
    @param instance_data: a dict with all instances to consider
11325
    @rtype: a two-tuple
11326
    @return: a list of instances that were previously okay and result split as a
11327
      consequence of this change, and a list of instances that were previously
11328
      split and this change does not fix.
11329

11330
    """
11331
    changed_nodes = dict((node, group) for node, group in changes
11332
                         if node_data[node].group != group)
11333

    
11334
    all_split_instances = set()
11335
    previously_split_instances = set()
11336

    
11337
    def InstanceNodes(instance):
11338
      return [instance.primary_node] + list(instance.secondary_nodes)
11339

    
11340
    for inst in instance_data.values():
11341
      if inst.disk_template not in constants.DTS_INT_MIRROR:
11342
        continue
11343

    
11344
      instance_nodes = InstanceNodes(inst)
11345

    
11346
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
11347
        previously_split_instances.add(inst.name)
11348

    
11349
      if len(set(changed_nodes.get(node, node_data[node].group)
11350
                 for node in instance_nodes)) > 1:
11351
        all_split_instances.add(inst.name)
11352

    
11353
    return (list(all_split_instances - previously_split_instances),
11354
            list(previously_split_instances & all_split_instances))
11355

    
11356

    
11357
class _GroupQuery(_QueryBase):
11358
  FIELDS = query.GROUP_FIELDS
11359

    
11360
  def ExpandNames(self, lu):
11361
    lu.needed_locks = {}
11362

    
11363
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
11364
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
11365

    
11366
    if not self.names:
11367
      self.wanted = [name_to_uuid[name]
11368
                     for name in utils.NiceSort(name_to_uuid.keys())]
11369
    else:
11370
      # Accept names to be either names or UUIDs.
11371
      missing = []
11372
      self.wanted = []
11373
      all_uuid = frozenset(self._all_groups.keys())
11374

    
11375
      for name in self.names:
11376
        if name in all_uuid:
11377
          self.wanted.append(name)
11378
        elif name in name_to_uuid:
11379
          self.wanted.append(name_to_uuid[name])
11380
        else:
11381
          missing.append(name)
11382

    
11383
      if missing:
11384
        raise errors.OpPrereqError("Some groups do not exist: %s" %
11385
                                   utils.CommaJoin(missing),
11386
                                   errors.ECODE_NOENT)
11387

    
11388
  def DeclareLocks(self, lu, level):
11389
    pass
11390

    
11391
  def _GetQueryData(self, lu):
11392
    """Computes the list of node groups and their attributes.
11393

11394
    """
11395
    do_nodes = query.GQ_NODE in self.requested_data
11396
    do_instances = query.GQ_INST in self.requested_data
11397

    
11398
    group_to_nodes = None
11399
    group_to_instances = None
11400

    
11401
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11402
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11403
    # latter GetAllInstancesInfo() is not enough, for we have to go through
11404
    # instance->node. Hence, we will need to process nodes even if we only need
11405
    # instance information.
11406
    if do_nodes or do_instances:
11407
      all_nodes = lu.cfg.GetAllNodesInfo()
11408
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11409
      node_to_group = {}
11410

    
11411
      for node in all_nodes.values():
11412
        if node.group in group_to_nodes:
11413
          group_to_nodes[node.group].append(node.name)
11414
          node_to_group[node.name] = node.group
11415

    
11416
      if do_instances:
11417
        all_instances = lu.cfg.GetAllInstancesInfo()
11418
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
11419

    
11420
        for instance in all_instances.values():
11421
          node = instance.primary_node
11422
          if node in node_to_group:
11423
            group_to_instances[node_to_group[node]].append(instance.name)
11424

    
11425
        if not do_nodes:
11426
          # Do not pass on node information if it was not requested.
11427
          group_to_nodes = None
11428

    
11429
    return query.GroupQueryData([self._all_groups[uuid]
11430
                                 for uuid in self.wanted],
11431
                                group_to_nodes, group_to_instances)
11432

    
11433

    
11434
class LUGroupQuery(NoHooksLU):
11435
  """Logical unit for querying node groups.
11436

11437
  """
11438
  REQ_BGL = False
11439

    
11440
  def CheckArguments(self):
11441
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
11442
                          self.op.output_fields, False)
11443

    
11444
  def ExpandNames(self):
11445
    self.gq.ExpandNames(self)
11446

    
11447
  def Exec(self, feedback_fn):
11448
    return self.gq.OldStyleQuery(self)
11449

    
11450

    
11451
class LUGroupSetParams(LogicalUnit):
11452
  """Modifies the parameters of a node group.
11453

11454
  """
11455
  HPATH = "group-modify"
11456
  HTYPE = constants.HTYPE_GROUP
11457
  REQ_BGL = False
11458

    
11459
  def CheckArguments(self):
11460
    all_changes = [
11461
      self.op.ndparams,
11462
      self.op.alloc_policy,
11463
      ]
11464

    
11465
    if all_changes.count(None) == len(all_changes):
11466
      raise errors.OpPrereqError("Please pass at least one modification",
11467
                                 errors.ECODE_INVAL)
11468

    
11469
  def ExpandNames(self):
11470
    # This raises errors.OpPrereqError on its own:
11471
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11472

    
11473
    self.needed_locks = {
11474
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11475
      }
11476

    
11477
  def CheckPrereq(self):
11478
    """Check prerequisites.
11479

11480
    """
11481
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11482

    
11483
    if self.group is None:
11484
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11485
                               (self.op.group_name, self.group_uuid))
11486

    
11487
    if self.op.ndparams:
11488
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
11489
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11490
      self.new_ndparams = new_ndparams
11491

    
11492
  def BuildHooksEnv(self):
11493
    """Build hooks env.
11494

11495
    """
11496
    return {
11497
      "GROUP_NAME": self.op.group_name,
11498
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
11499
      }
11500

    
11501
  def BuildHooksNodes(self):
11502
    """Build hooks nodes.
11503

11504
    """
11505
    mn = self.cfg.GetMasterNode()
11506
    return ([mn], [mn])
11507

    
11508
  def Exec(self, feedback_fn):
11509
    """Modifies the node group.
11510

11511
    """
11512
    result = []
11513

    
11514
    if self.op.ndparams:
11515
      self.group.ndparams = self.new_ndparams
11516
      result.append(("ndparams", str(self.group.ndparams)))
11517

    
11518
    if self.op.alloc_policy:
11519
      self.group.alloc_policy = self.op.alloc_policy
11520

    
11521
    self.cfg.Update(self.group, feedback_fn)
11522
    return result
11523

    
11524

    
11525

    
11526
class LUGroupRemove(LogicalUnit):
11527
  HPATH = "group-remove"
11528
  HTYPE = constants.HTYPE_GROUP
11529
  REQ_BGL = False
11530

    
11531
  def ExpandNames(self):
11532
    # This will raises errors.OpPrereqError on its own:
11533
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11534
    self.needed_locks = {
11535
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11536
      }
11537

    
11538
  def CheckPrereq(self):
11539
    """Check prerequisites.
11540

11541
    This checks that the given group name exists as a node group, that is
11542
    empty (i.e., contains no nodes), and that is not the last group of the
11543
    cluster.
11544

11545
    """
11546
    # Verify that the group is empty.
11547
    group_nodes = [node.name
11548
                   for node in self.cfg.GetAllNodesInfo().values()
11549
                   if node.group == self.group_uuid]
11550

    
11551
    if group_nodes:
11552
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
11553
                                 " nodes: %s" %
11554
                                 (self.op.group_name,
11555
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
11556
                                 errors.ECODE_STATE)
11557

    
11558
    # Verify the cluster would not be left group-less.
11559
    if len(self.cfg.GetNodeGroupList()) == 1:
11560
      raise errors.OpPrereqError("Group '%s' is the only group,"
11561
                                 " cannot be removed" %
11562
                                 self.op.group_name,
11563
                                 errors.ECODE_STATE)
11564

    
11565
  def BuildHooksEnv(self):
11566
    """Build hooks env.
11567

11568
    """
11569
    return {
11570
      "GROUP_NAME": self.op.group_name,
11571
      }
11572

    
11573
  def BuildHooksNodes(self):
11574
    """Build hooks nodes.
11575

11576
    """
11577
    mn = self.cfg.GetMasterNode()
11578
    return ([mn], [mn])
11579

    
11580
  def Exec(self, feedback_fn):
11581
    """Remove the node group.
11582

11583
    """
11584
    try:
11585
      self.cfg.RemoveNodeGroup(self.group_uuid)
11586
    except errors.ConfigurationError:
11587
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11588
                               (self.op.group_name, self.group_uuid))
11589

    
11590
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11591

    
11592

    
11593
class LUGroupRename(LogicalUnit):
11594
  HPATH = "group-rename"
11595
  HTYPE = constants.HTYPE_GROUP
11596
  REQ_BGL = False
11597

    
11598
  def ExpandNames(self):
11599
    # This raises errors.OpPrereqError on its own:
11600
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11601

    
11602
    self.needed_locks = {
11603
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11604
      }
11605

    
11606
  def CheckPrereq(self):
11607
    """Check prerequisites.
11608

11609
    Ensures requested new name is not yet used.
11610

11611
    """
11612
    try:
11613
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11614
    except errors.OpPrereqError:
11615
      pass
11616
    else:
11617
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11618
                                 " node group (UUID: %s)" %
11619
                                 (self.op.new_name, new_name_uuid),
11620
                                 errors.ECODE_EXISTS)
11621

    
11622
  def BuildHooksEnv(self):
11623
    """Build hooks env.
11624

11625
    """
11626
    return {
11627
      "OLD_NAME": self.op.group_name,
11628
      "NEW_NAME": self.op.new_name,
11629
      }
11630

    
11631
  def BuildHooksNodes(self):
11632
    """Build hooks nodes.
11633

11634
    """
11635
    mn = self.cfg.GetMasterNode()
11636

    
11637
    all_nodes = self.cfg.GetAllNodesInfo()
11638
    all_nodes.pop(mn, None)
11639

    
11640
    run_nodes = [mn]
11641
    run_nodes.extend(node.name for node in all_nodes.values()
11642
                     if node.group == self.group_uuid)
11643

    
11644
    return (run_nodes, run_nodes)
11645

    
11646
  def Exec(self, feedback_fn):
11647
    """Rename the node group.
11648

11649
    """
11650
    group = self.cfg.GetNodeGroup(self.group_uuid)
11651

    
11652
    if group is None:
11653
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11654
                               (self.op.group_name, self.group_uuid))
11655

    
11656
    group.name = self.op.new_name
11657
    self.cfg.Update(group, feedback_fn)
11658

    
11659
    return self.op.new_name
11660

    
11661

    
11662
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
11663
  """Generic tags LU.
11664

11665
  This is an abstract class which is the parent of all the other tags LUs.
11666

11667
  """
11668
  def ExpandNames(self):
11669
    self.group_uuid = None
11670
    self.needed_locks = {}
11671
    if self.op.kind == constants.TAG_NODE:
11672
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
11673
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
11674
    elif self.op.kind == constants.TAG_INSTANCE:
11675
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
11676
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
11677
    elif self.op.kind == constants.TAG_NODEGROUP:
11678
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
11679

    
11680
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
11681
    # not possible to acquire the BGL based on opcode parameters)
11682

    
11683
  def CheckPrereq(self):
11684
    """Check prerequisites.
11685

11686
    """
11687
    if self.op.kind == constants.TAG_CLUSTER:
11688
      self.target = self.cfg.GetClusterInfo()
11689
    elif self.op.kind == constants.TAG_NODE:
11690
      self.target = self.cfg.GetNodeInfo(self.op.name)
11691
    elif self.op.kind == constants.TAG_INSTANCE:
11692
      self.target = self.cfg.GetInstanceInfo(self.op.name)
11693
    elif self.op.kind == constants.TAG_NODEGROUP:
11694
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
11695
    else:
11696
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
11697
                                 str(self.op.kind), errors.ECODE_INVAL)
11698

    
11699

    
11700
class LUTagsGet(TagsLU):
11701
  """Returns the tags of a given object.
11702

11703
  """
11704
  REQ_BGL = False
11705

    
11706
  def ExpandNames(self):
11707
    TagsLU.ExpandNames(self)
11708

    
11709
    # Share locks as this is only a read operation
11710
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
11711

    
11712
  def Exec(self, feedback_fn):
11713
    """Returns the tag list.
11714

11715
    """
11716
    return list(self.target.GetTags())
11717

    
11718

    
11719
class LUTagsSearch(NoHooksLU):
11720
  """Searches the tags for a given pattern.
11721

11722
  """
11723
  REQ_BGL = False
11724

    
11725
  def ExpandNames(self):
11726
    self.needed_locks = {}
11727

    
11728
  def CheckPrereq(self):
11729
    """Check prerequisites.
11730

11731
    This checks the pattern passed for validity by compiling it.
11732

11733
    """
11734
    try:
11735
      self.re = re.compile(self.op.pattern)
11736
    except re.error, err:
11737
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
11738
                                 (self.op.pattern, err), errors.ECODE_INVAL)
11739

    
11740
  def Exec(self, feedback_fn):
11741
    """Returns the tag list.
11742

11743
    """
11744
    cfg = self.cfg
11745
    tgts = [("/cluster", cfg.GetClusterInfo())]
11746
    ilist = cfg.GetAllInstancesInfo().values()
11747
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
11748
    nlist = cfg.GetAllNodesInfo().values()
11749
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
11750
    tgts.extend(("/nodegroup/%s" % n.name, n)
11751
                for n in cfg.GetAllNodeGroupsInfo().values())
11752
    results = []
11753
    for path, target in tgts:
11754
      for tag in target.GetTags():
11755
        if self.re.search(tag):
11756
          results.append((path, tag))
11757
    return results
11758

    
11759

    
11760
class LUTagsSet(TagsLU):
11761
  """Sets a tag on a given object.
11762

11763
  """
11764
  REQ_BGL = False
11765

    
11766
  def CheckPrereq(self):
11767
    """Check prerequisites.
11768

11769
    This checks the type and length of the tag name and value.
11770

11771
    """
11772
    TagsLU.CheckPrereq(self)
11773
    for tag in self.op.tags:
11774
      objects.TaggableObject.ValidateTag(tag)
11775

    
11776
  def Exec(self, feedback_fn):
11777
    """Sets the tag.
11778

11779
    """
11780
    try:
11781
      for tag in self.op.tags:
11782
        self.target.AddTag(tag)
11783
    except errors.TagError, err:
11784
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
11785
    self.cfg.Update(self.target, feedback_fn)
11786

    
11787

    
11788
class LUTagsDel(TagsLU):
11789
  """Delete a list of tags from a given object.
11790

11791
  """
11792
  REQ_BGL = False
11793

    
11794
  def CheckPrereq(self):
11795
    """Check prerequisites.
11796

11797
    This checks that we have the given tag.
11798

11799
    """
11800
    TagsLU.CheckPrereq(self)
11801
    for tag in self.op.tags:
11802
      objects.TaggableObject.ValidateTag(tag)
11803
    del_tags = frozenset(self.op.tags)
11804
    cur_tags = self.target.GetTags()
11805

    
11806
    diff_tags = del_tags - cur_tags
11807
    if diff_tags:
11808
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
11809
      raise errors.OpPrereqError("Tag(s) %s not found" %
11810
                                 (utils.CommaJoin(diff_names), ),
11811
                                 errors.ECODE_NOENT)
11812

    
11813
  def Exec(self, feedback_fn):
11814
    """Remove the tag from the object.
11815

11816
    """
11817
    for tag in self.op.tags:
11818
      self.target.RemoveTag(tag)
11819
    self.cfg.Update(self.target, feedback_fn)
11820

    
11821

    
11822
class LUTestDelay(NoHooksLU):
11823
  """Sleep for a specified amount of time.
11824

11825
  This LU sleeps on the master and/or nodes for a specified amount of
11826
  time.
11827

11828
  """
11829
  REQ_BGL = False
11830

    
11831
  def ExpandNames(self):
11832
    """Expand names and set required locks.
11833

11834
    This expands the node list, if any.
11835

11836
    """
11837
    self.needed_locks = {}
11838
    if self.op.on_nodes:
11839
      # _GetWantedNodes can be used here, but is not always appropriate to use
11840
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
11841
      # more information.
11842
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
11843
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
11844

    
11845
  def _TestDelay(self):
11846
    """Do the actual sleep.
11847

11848
    """
11849
    if self.op.on_master:
11850
      if not utils.TestDelay(self.op.duration):
11851
        raise errors.OpExecError("Error during master delay test")
11852
    if self.op.on_nodes:
11853
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
11854
      for node, node_result in result.items():
11855
        node_result.Raise("Failure during rpc call to node %s" % node)
11856

    
11857
  def Exec(self, feedback_fn):
11858
    """Execute the test delay opcode, with the wanted repetitions.
11859

11860
    """
11861
    if self.op.repeat == 0:
11862
      self._TestDelay()
11863
    else:
11864
      top_value = self.op.repeat - 1
11865
      for i in range(self.op.repeat):
11866
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
11867
        self._TestDelay()
11868

    
11869

    
11870
class LUTestJqueue(NoHooksLU):
11871
  """Utility LU to test some aspects of the job queue.
11872

11873
  """
11874
  REQ_BGL = False
11875

    
11876
  # Must be lower than default timeout for WaitForJobChange to see whether it
11877
  # notices changed jobs
11878
  _CLIENT_CONNECT_TIMEOUT = 20.0
11879
  _CLIENT_CONFIRM_TIMEOUT = 60.0
11880

    
11881
  @classmethod
11882
  def _NotifyUsingSocket(cls, cb, errcls):
11883
    """Opens a Unix socket and waits for another program to connect.
11884

11885
    @type cb: callable
11886
    @param cb: Callback to send socket name to client
11887
    @type errcls: class
11888
    @param errcls: Exception class to use for errors
11889

11890
    """
11891
    # Using a temporary directory as there's no easy way to create temporary
11892
    # sockets without writing a custom loop around tempfile.mktemp and
11893
    # socket.bind
11894
    tmpdir = tempfile.mkdtemp()
11895
    try:
11896
      tmpsock = utils.PathJoin(tmpdir, "sock")
11897

    
11898
      logging.debug("Creating temporary socket at %s", tmpsock)
11899
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
11900
      try:
11901
        sock.bind(tmpsock)
11902
        sock.listen(1)
11903

    
11904
        # Send details to client
11905
        cb(tmpsock)
11906

    
11907
        # Wait for client to connect before continuing
11908
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
11909
        try:
11910
          (conn, _) = sock.accept()
11911
        except socket.error, err:
11912
          raise errcls("Client didn't connect in time (%s)" % err)
11913
      finally:
11914
        sock.close()
11915
    finally:
11916
      # Remove as soon as client is connected
11917
      shutil.rmtree(tmpdir)
11918

    
11919
    # Wait for client to close
11920
    try:
11921
      try:
11922
        # pylint: disable-msg=E1101
11923
        # Instance of '_socketobject' has no ... member
11924
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
11925
        conn.recv(1)
11926
      except socket.error, err:
11927
        raise errcls("Client failed to confirm notification (%s)" % err)
11928
    finally:
11929
      conn.close()
11930

    
11931
  def _SendNotification(self, test, arg, sockname):
11932
    """Sends a notification to the client.
11933

11934
    @type test: string
11935
    @param test: Test name
11936
    @param arg: Test argument (depends on test)
11937
    @type sockname: string
11938
    @param sockname: Socket path
11939

11940
    """
11941
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11942

    
11943
  def _Notify(self, prereq, test, arg):
11944
    """Notifies the client of a test.
11945

11946
    @type prereq: bool
11947
    @param prereq: Whether this is a prereq-phase test
11948
    @type test: string
11949
    @param test: Test name
11950
    @param arg: Test argument (depends on test)
11951

11952
    """
11953
    if prereq:
11954
      errcls = errors.OpPrereqError
11955
    else:
11956
      errcls = errors.OpExecError
11957

    
11958
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11959
                                                  test, arg),
11960
                                   errcls)
11961

    
11962
  def CheckArguments(self):
11963
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11964
    self.expandnames_calls = 0
11965

    
11966
  def ExpandNames(self):
11967
    checkargs_calls = getattr(self, "checkargs_calls", 0)
11968
    if checkargs_calls < 1:
11969
      raise errors.ProgrammerError("CheckArguments was not called")
11970

    
11971
    self.expandnames_calls += 1
11972

    
11973
    if self.op.notify_waitlock:
11974
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
11975

    
11976
    self.LogInfo("Expanding names")
11977

    
11978
    # Get lock on master node (just to get a lock, not for a particular reason)
11979
    self.needed_locks = {
11980
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11981
      }
11982

    
11983
  def Exec(self, feedback_fn):
11984
    if self.expandnames_calls < 1:
11985
      raise errors.ProgrammerError("ExpandNames was not called")
11986

    
11987
    if self.op.notify_exec:
11988
      self._Notify(False, constants.JQT_EXEC, None)
11989

    
11990
    self.LogInfo("Executing")
11991

    
11992
    if self.op.log_messages:
11993
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
11994
      for idx, msg in enumerate(self.op.log_messages):
11995
        self.LogInfo("Sending log message %s", idx + 1)
11996
        feedback_fn(constants.JQT_MSGPREFIX + msg)
11997
        # Report how many test messages have been sent
11998
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
11999

    
12000
    if self.op.fail:
12001
      raise errors.OpExecError("Opcode failure was requested")
12002

    
12003
    return True
12004

    
12005

    
12006
class IAllocator(object):
12007
  """IAllocator framework.
12008

12009
  An IAllocator instance has three sets of attributes:
12010
    - cfg that is needed to query the cluster
12011
    - input data (all members of the _KEYS class attribute are required)
12012
    - four buffer attributes (in|out_data|text), that represent the
12013
      input (to the external script) in text and data structure format,
12014
      and the output from it, again in two formats
12015
    - the result variables from the script (success, info, nodes) for
12016
      easy usage
12017

12018
  """
12019
  # pylint: disable-msg=R0902
12020
  # lots of instance attributes
12021

    
12022
  def __init__(self, cfg, rpc, mode, **kwargs):
12023
    self.cfg = cfg
12024
    self.rpc = rpc
12025
    # init buffer variables
12026
    self.in_text = self.out_text = self.in_data = self.out_data = None
12027
    # init all input fields so that pylint is happy
12028
    self.mode = mode
12029
    self.memory = self.disks = self.disk_template = None
12030
    self.os = self.tags = self.nics = self.vcpus = None
12031
    self.hypervisor = None
12032
    self.relocate_from = None
12033
    self.name = None
12034
    self.evac_nodes = None
12035
    self.instances = None
12036
    self.evac_mode = None
12037
    self.target_groups = []
12038
    # computed fields
12039
    self.required_nodes = None
12040
    # init result fields
12041
    self.success = self.info = self.result = None
12042

    
12043
    try:
12044
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12045
    except KeyError:
12046
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12047
                                   " IAllocator" % self.mode)
12048

    
12049
    keyset = [n for (n, _) in keydata]
12050

    
12051
    for key in kwargs:
12052
      if key not in keyset:
12053
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
12054
                                     " IAllocator" % key)
12055
      setattr(self, key, kwargs[key])
12056

    
12057
    for key in keyset:
12058
      if key not in kwargs:
12059
        raise errors.ProgrammerError("Missing input parameter '%s' to"
12060
                                     " IAllocator" % key)
12061
    self._BuildInputData(compat.partial(fn, self), keydata)
12062

    
12063
  def _ComputeClusterData(self):
12064
    """Compute the generic allocator input data.
12065

12066
    This is the data that is independent of the actual operation.
12067

12068
    """
12069
    cfg = self.cfg
12070
    cluster_info = cfg.GetClusterInfo()
12071
    # cluster data
12072
    data = {
12073
      "version": constants.IALLOCATOR_VERSION,
12074
      "cluster_name": cfg.GetClusterName(),
12075
      "cluster_tags": list(cluster_info.GetTags()),
12076
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12077
      # we don't have job IDs
12078
      }
12079
    ninfo = cfg.GetAllNodesInfo()
12080
    iinfo = cfg.GetAllInstancesInfo().values()
12081
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12082

    
12083
    # node data
12084
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
12085

    
12086
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12087
      hypervisor_name = self.hypervisor
12088
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12089
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12090
    else:
12091
      hypervisor_name = cluster_info.enabled_hypervisors[0]
12092

    
12093
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12094
                                        hypervisor_name)
12095
    node_iinfo = \
12096
      self.rpc.call_all_instances_info(node_list,
12097
                                       cluster_info.enabled_hypervisors)
12098

    
12099
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12100

    
12101
    config_ndata = self._ComputeBasicNodeData(ninfo)
12102
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12103
                                                 i_list, config_ndata)
12104
    assert len(data["nodes"]) == len(ninfo), \
12105
        "Incomplete node data computed"
12106

    
12107
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12108

    
12109
    self.in_data = data
12110

    
12111
  @staticmethod
12112
  def _ComputeNodeGroupData(cfg):
12113
    """Compute node groups data.
12114

12115
    """
12116
    ng = dict((guuid, {
12117
      "name": gdata.name,
12118
      "alloc_policy": gdata.alloc_policy,
12119
      })
12120
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12121

    
12122
    return ng
12123

    
12124
  @staticmethod
12125
  def _ComputeBasicNodeData(node_cfg):
12126
    """Compute global node data.
12127

12128
    @rtype: dict
12129
    @returns: a dict of name: (node dict, node config)
12130

12131
    """
12132
    # fill in static (config-based) values
12133
    node_results = dict((ninfo.name, {
12134
      "tags": list(ninfo.GetTags()),
12135
      "primary_ip": ninfo.primary_ip,
12136
      "secondary_ip": ninfo.secondary_ip,
12137
      "offline": ninfo.offline,
12138
      "drained": ninfo.drained,
12139
      "master_candidate": ninfo.master_candidate,
12140
      "group": ninfo.group,
12141
      "master_capable": ninfo.master_capable,
12142
      "vm_capable": ninfo.vm_capable,
12143
      })
12144
      for ninfo in node_cfg.values())
12145

    
12146
    return node_results
12147

    
12148
  @staticmethod
12149
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
12150
                              node_results):
12151
    """Compute global node data.
12152

12153
    @param node_results: the basic node structures as filled from the config
12154

12155
    """
12156
    # make a copy of the current dict
12157
    node_results = dict(node_results)
12158
    for nname, nresult in node_data.items():
12159
      assert nname in node_results, "Missing basic data for node %s" % nname
12160
      ninfo = node_cfg[nname]
12161

    
12162
      if not (ninfo.offline or ninfo.drained):
12163
        nresult.Raise("Can't get data for node %s" % nname)
12164
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
12165
                                nname)
12166
        remote_info = nresult.payload
12167

    
12168
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
12169
                     'vg_size', 'vg_free', 'cpu_total']:
12170
          if attr not in remote_info:
12171
            raise errors.OpExecError("Node '%s' didn't return attribute"
12172
                                     " '%s'" % (nname, attr))
12173
          if not isinstance(remote_info[attr], int):
12174
            raise errors.OpExecError("Node '%s' returned invalid value"
12175
                                     " for '%s': %s" %
12176
                                     (nname, attr, remote_info[attr]))
12177
        # compute memory used by primary instances
12178
        i_p_mem = i_p_up_mem = 0
12179
        for iinfo, beinfo in i_list:
12180
          if iinfo.primary_node == nname:
12181
            i_p_mem += beinfo[constants.BE_MEMORY]
12182
            if iinfo.name not in node_iinfo[nname].payload:
12183
              i_used_mem = 0
12184
            else:
12185
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
12186
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
12187
            remote_info['memory_free'] -= max(0, i_mem_diff)
12188

    
12189
            if iinfo.admin_up:
12190
              i_p_up_mem += beinfo[constants.BE_MEMORY]
12191

    
12192
        # compute memory used by instances
12193
        pnr_dyn = {
12194
          "total_memory": remote_info['memory_total'],
12195
          "reserved_memory": remote_info['memory_dom0'],
12196
          "free_memory": remote_info['memory_free'],
12197
          "total_disk": remote_info['vg_size'],
12198
          "free_disk": remote_info['vg_free'],
12199
          "total_cpus": remote_info['cpu_total'],
12200
          "i_pri_memory": i_p_mem,
12201
          "i_pri_up_memory": i_p_up_mem,
12202
          }
12203
        pnr_dyn.update(node_results[nname])
12204
        node_results[nname] = pnr_dyn
12205

    
12206
    return node_results
12207

    
12208
  @staticmethod
12209
  def _ComputeInstanceData(cluster_info, i_list):
12210
    """Compute global instance data.
12211

12212
    """
12213
    instance_data = {}
12214
    for iinfo, beinfo in i_list:
12215
      nic_data = []
12216
      for nic in iinfo.nics:
12217
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
12218
        nic_dict = {
12219
          "mac": nic.mac,
12220
          "ip": nic.ip,
12221
          "mode": filled_params[constants.NIC_MODE],
12222
          "link": filled_params[constants.NIC_LINK],
12223
          }
12224
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
12225
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
12226
        nic_data.append(nic_dict)
12227
      pir = {
12228
        "tags": list(iinfo.GetTags()),
12229
        "admin_up": iinfo.admin_up,
12230
        "vcpus": beinfo[constants.BE_VCPUS],
12231
        "memory": beinfo[constants.BE_MEMORY],
12232
        "os": iinfo.os,
12233
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
12234
        "nics": nic_data,
12235
        "disks": [{constants.IDISK_SIZE: dsk.size,
12236
                   constants.IDISK_MODE: dsk.mode}
12237
                  for dsk in iinfo.disks],
12238
        "disk_template": iinfo.disk_template,
12239
        "hypervisor": iinfo.hypervisor,
12240
        }
12241
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
12242
                                                 pir["disks"])
12243
      instance_data[iinfo.name] = pir
12244

    
12245
    return instance_data
12246

    
12247
  def _AddNewInstance(self):
12248
    """Add new instance data to allocator structure.
12249

12250
    This in combination with _AllocatorGetClusterData will create the
12251
    correct structure needed as input for the allocator.
12252

12253
    The checks for the completeness of the opcode must have already been
12254
    done.
12255

12256
    """
12257
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
12258

    
12259
    if self.disk_template in constants.DTS_INT_MIRROR:
12260
      self.required_nodes = 2
12261
    else:
12262
      self.required_nodes = 1
12263

    
12264
    request = {
12265
      "name": self.name,
12266
      "disk_template": self.disk_template,
12267
      "tags": self.tags,
12268
      "os": self.os,
12269
      "vcpus": self.vcpus,
12270
      "memory": self.memory,
12271
      "disks": self.disks,
12272
      "disk_space_total": disk_space,
12273
      "nics": self.nics,
12274
      "required_nodes": self.required_nodes,
12275
      "hypervisor": self.hypervisor,
12276
      }
12277

    
12278
    return request
12279

    
12280
  def _AddRelocateInstance(self):
12281
    """Add relocate instance data to allocator structure.
12282

12283
    This in combination with _IAllocatorGetClusterData will create the
12284
    correct structure needed as input for the allocator.
12285

12286
    The checks for the completeness of the opcode must have already been
12287
    done.
12288

12289
    """
12290
    instance = self.cfg.GetInstanceInfo(self.name)
12291
    if instance is None:
12292
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
12293
                                   " IAllocator" % self.name)
12294

    
12295
    if instance.disk_template not in constants.DTS_MIRRORED:
12296
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
12297
                                 errors.ECODE_INVAL)
12298

    
12299
    if instance.disk_template in constants.DTS_INT_MIRROR and \
12300
        len(instance.secondary_nodes) != 1:
12301
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
12302
                                 errors.ECODE_STATE)
12303

    
12304
    self.required_nodes = 1
12305
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
12306
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
12307

    
12308
    request = {
12309
      "name": self.name,
12310
      "disk_space_total": disk_space,
12311
      "required_nodes": self.required_nodes,
12312
      "relocate_from": self.relocate_from,
12313
      }
12314
    return request
12315

    
12316
  def _AddEvacuateNodes(self):
12317
    """Add evacuate nodes data to allocator structure.
12318

12319
    """
12320
    request = {
12321
      "evac_nodes": self.evac_nodes
12322
      }
12323
    return request
12324

    
12325
  def _AddNodeEvacuate(self):
12326
    """Get data for node-evacuate requests.
12327

12328
    """
12329
    return {
12330
      "instances": self.instances,
12331
      "evac_mode": self.evac_mode,
12332
      }
12333

    
12334
  def _AddChangeGroup(self):
12335
    """Get data for node-evacuate requests.
12336

12337
    """
12338
    return {
12339
      "instances": self.instances,
12340
      "target_groups": self.target_groups,
12341
      }
12342

    
12343
  def _BuildInputData(self, fn, keydata):
12344
    """Build input data structures.
12345

12346
    """
12347
    self._ComputeClusterData()
12348

    
12349
    request = fn()
12350
    request["type"] = self.mode
12351
    for keyname, keytype in keydata:
12352
      if keyname not in request:
12353
        raise errors.ProgrammerError("Request parameter %s is missing" %
12354
                                     keyname)
12355
      val = request[keyname]
12356
      if not keytype(val):
12357
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
12358
                                     " validation, value %s, expected"
12359
                                     " type %s" % (keyname, val, keytype))
12360
    self.in_data["request"] = request
12361

    
12362
    self.in_text = serializer.Dump(self.in_data)
12363

    
12364
  _STRING_LIST = ht.TListOf(ht.TString)
12365
  _JOBSET_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
12366
     # pylint: disable-msg=E1101
12367
     # Class '...' has no 'OP_ID' member
12368
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
12369
                          opcodes.OpInstanceMigrate.OP_ID,
12370
                          opcodes.OpInstanceReplaceDisks.OP_ID])
12371
     })))
12372
  _MODE_DATA = {
12373
    constants.IALLOCATOR_MODE_ALLOC:
12374
      (_AddNewInstance,
12375
       [
12376
        ("name", ht.TString),
12377
        ("memory", ht.TInt),
12378
        ("disks", ht.TListOf(ht.TDict)),
12379
        ("disk_template", ht.TString),
12380
        ("os", ht.TString),
12381
        ("tags", _STRING_LIST),
12382
        ("nics", ht.TListOf(ht.TDict)),
12383
        ("vcpus", ht.TInt),
12384
        ("hypervisor", ht.TString),
12385
        ], ht.TList),
12386
    constants.IALLOCATOR_MODE_RELOC:
12387
      (_AddRelocateInstance,
12388
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
12389
       ht.TList),
12390
    constants.IALLOCATOR_MODE_MEVAC:
12391
      (_AddEvacuateNodes, [("evac_nodes", _STRING_LIST)],
12392
       ht.TListOf(ht.TAnd(ht.TIsLength(2), _STRING_LIST))),
12393
     constants.IALLOCATOR_MODE_NODE_EVAC:
12394
      (_AddNodeEvacuate, [
12395
        ("instances", _STRING_LIST),
12396
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
12397
        ], _JOBSET_LIST),
12398
     constants.IALLOCATOR_MODE_CHG_GROUP:
12399
      (_AddChangeGroup, [
12400
        ("instances", _STRING_LIST),
12401
        ("target_groups", _STRING_LIST),
12402
        ], _JOBSET_LIST),
12403
    }
12404

    
12405
  def Run(self, name, validate=True, call_fn=None):
12406
    """Run an instance allocator and return the results.
12407

12408
    """
12409
    if call_fn is None:
12410
      call_fn = self.rpc.call_iallocator_runner
12411

    
12412
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
12413
    result.Raise("Failure while running the iallocator script")
12414

    
12415
    self.out_text = result.payload
12416
    if validate:
12417
      self._ValidateResult()
12418

    
12419
  def _ValidateResult(self):
12420
    """Process the allocator results.
12421

12422
    This will process and if successful save the result in
12423
    self.out_data and the other parameters.
12424

12425
    """
12426
    try:
12427
      rdict = serializer.Load(self.out_text)
12428
    except Exception, err:
12429
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
12430

    
12431
    if not isinstance(rdict, dict):
12432
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
12433

    
12434
    # TODO: remove backwards compatiblity in later versions
12435
    if "nodes" in rdict and "result" not in rdict:
12436
      rdict["result"] = rdict["nodes"]
12437
      del rdict["nodes"]
12438

    
12439
    for key in "success", "info", "result":
12440
      if key not in rdict:
12441
        raise errors.OpExecError("Can't parse iallocator results:"
12442
                                 " missing key '%s'" % key)
12443
      setattr(self, key, rdict[key])
12444

    
12445
    if not self._result_check(self.result):
12446
      raise errors.OpExecError("Iallocator returned invalid result,"
12447
                               " expected %s, got %s" %
12448
                               (self._result_check, self.result),
12449
                               errors.ECODE_INVAL)
12450

    
12451
    if self.mode in (constants.IALLOCATOR_MODE_RELOC,
12452
                     constants.IALLOCATOR_MODE_MEVAC):
12453
      node2group = dict((name, ndata["group"])
12454
                        for (name, ndata) in self.in_data["nodes"].items())
12455

    
12456
      fn = compat.partial(self._NodesToGroups, node2group,
12457
                          self.in_data["nodegroups"])
12458

    
12459
      if self.mode == constants.IALLOCATOR_MODE_RELOC:
12460
        assert self.relocate_from is not None
12461
        assert self.required_nodes == 1
12462

    
12463
        request_groups = fn(self.relocate_from)
12464
        result_groups = fn(rdict["result"])
12465

    
12466
        if result_groups != request_groups:
12467
          raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
12468
                                   " differ from original groups (%s)" %
12469
                                   (utils.CommaJoin(result_groups),
12470
                                    utils.CommaJoin(request_groups)))
12471
      elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
12472
        request_groups = fn(self.evac_nodes)
12473
        for (instance_name, secnode) in self.result:
12474
          result_groups = fn([secnode])
12475
          if result_groups != request_groups:
12476
            raise errors.OpExecError("Iallocator returned new secondary node"
12477
                                     " '%s' (group '%s') for instance '%s'"
12478
                                     " which is not in original group '%s'" %
12479
                                     (secnode, utils.CommaJoin(result_groups),
12480
                                      instance_name,
12481
                                      utils.CommaJoin(request_groups)))
12482
      else:
12483
        raise errors.ProgrammerError("Unhandled mode '%s'" % self.mode)
12484

    
12485
    self.out_data = rdict
12486

    
12487
  @staticmethod
12488
  def _NodesToGroups(node2group, groups, nodes):
12489
    """Returns a list of unique group names for a list of nodes.
12490

12491
    @type node2group: dict
12492
    @param node2group: Map from node name to group UUID
12493
    @type groups: dict
12494
    @param groups: Group information
12495
    @type nodes: list
12496
    @param nodes: Node names
12497

12498
    """
12499
    result = set()
12500

    
12501
    for node in nodes:
12502
      try:
12503
        group_uuid = node2group[node]
12504
      except KeyError:
12505
        # Ignore unknown node
12506
        pass
12507
      else:
12508
        try:
12509
          group = groups[group_uuid]
12510
        except KeyError:
12511
          # Can't find group, let's use UUID
12512
          group_name = group_uuid
12513
        else:
12514
          group_name = group["name"]
12515

    
12516
        result.add(group_name)
12517

    
12518
    return sorted(result)
12519

    
12520

    
12521
class LUTestAllocator(NoHooksLU):
12522
  """Run allocator tests.
12523

12524
  This LU runs the allocator tests
12525

12526
  """
12527
  def CheckPrereq(self):
12528
    """Check prerequisites.
12529

12530
    This checks the opcode parameters depending on the director and mode test.
12531

12532
    """
12533
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12534
      for attr in ["memory", "disks", "disk_template",
12535
                   "os", "tags", "nics", "vcpus"]:
12536
        if not hasattr(self.op, attr):
12537
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
12538
                                     attr, errors.ECODE_INVAL)
12539
      iname = self.cfg.ExpandInstanceName(self.op.name)
12540
      if iname is not None:
12541
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
12542
                                   iname, errors.ECODE_EXISTS)
12543
      if not isinstance(self.op.nics, list):
12544
        raise errors.OpPrereqError("Invalid parameter 'nics'",
12545
                                   errors.ECODE_INVAL)
12546
      if not isinstance(self.op.disks, list):
12547
        raise errors.OpPrereqError("Invalid parameter 'disks'",
12548
                                   errors.ECODE_INVAL)
12549
      for row in self.op.disks:
12550
        if (not isinstance(row, dict) or
12551
            constants.IDISK_SIZE not in row or
12552
            not isinstance(row[constants.IDISK_SIZE], int) or
12553
            constants.IDISK_MODE not in row or
12554
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
12555
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
12556
                                     " parameter", errors.ECODE_INVAL)
12557
      if self.op.hypervisor is None:
12558
        self.op.hypervisor = self.cfg.GetHypervisorType()
12559
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12560
      fname = _ExpandInstanceName(self.cfg, self.op.name)
12561
      self.op.name = fname
12562
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
12563
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12564
      if not hasattr(self.op, "evac_nodes"):
12565
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
12566
                                   " opcode input", errors.ECODE_INVAL)
12567
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
12568
                          constants.IALLOCATOR_MODE_NODE_EVAC):
12569
      if not self.op.instances:
12570
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
12571
      self.op.instances = _GetWantedInstances(self, self.op.instances)
12572
    else:
12573
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
12574
                                 self.op.mode, errors.ECODE_INVAL)
12575

    
12576
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
12577
      if self.op.allocator is None:
12578
        raise errors.OpPrereqError("Missing allocator name",
12579
                                   errors.ECODE_INVAL)
12580
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
12581
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
12582
                                 self.op.direction, errors.ECODE_INVAL)
12583

    
12584
  def Exec(self, feedback_fn):
12585
    """Run the allocator test.
12586

12587
    """
12588
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12589
      ial = IAllocator(self.cfg, self.rpc,
12590
                       mode=self.op.mode,
12591
                       name=self.op.name,
12592
                       memory=self.op.memory,
12593
                       disks=self.op.disks,
12594
                       disk_template=self.op.disk_template,
12595
                       os=self.op.os,
12596
                       tags=self.op.tags,
12597
                       nics=self.op.nics,
12598
                       vcpus=self.op.vcpus,
12599
                       hypervisor=self.op.hypervisor,
12600
                       )
12601
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12602
      ial = IAllocator(self.cfg, self.rpc,
12603
                       mode=self.op.mode,
12604
                       name=self.op.name,
12605
                       relocate_from=list(self.relocate_from),
12606
                       )
12607
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12608
      ial = IAllocator(self.cfg, self.rpc,
12609
                       mode=self.op.mode,
12610
                       evac_nodes=self.op.evac_nodes)
12611
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
12612
      ial = IAllocator(self.cfg, self.rpc,
12613
                       mode=self.op.mode,
12614
                       instances=self.op.instances,
12615
                       target_groups=self.op.target_groups)
12616
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
12617
      ial = IAllocator(self.cfg, self.rpc,
12618
                       mode=self.op.mode,
12619
                       instances=self.op.instances,
12620
                       evac_mode=self.op.evac_mode)
12621
    else:
12622
      raise errors.ProgrammerError("Uncatched mode %s in"
12623
                                   " LUTestAllocator.Exec", self.op.mode)
12624

    
12625
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
12626
      result = ial.in_text
12627
    else:
12628
      ial.Run(self.op.allocator, validate=False)
12629
      result = ial.out_text
12630
    return result
12631

    
12632

    
12633
#: Query type implementations
12634
_QUERY_IMPL = {
12635
  constants.QR_INSTANCE: _InstanceQuery,
12636
  constants.QR_NODE: _NodeQuery,
12637
  constants.QR_GROUP: _GroupQuery,
12638
  constants.QR_OS: _OsQuery,
12639
  }
12640

    
12641
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
12642

    
12643

    
12644
def _GetQueryImplementation(name):
12645
  """Returns the implemtnation for a query type.
12646

12647
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
12648

12649
  """
12650
  try:
12651
    return _QUERY_IMPL[name]
12652
  except KeyError:
12653
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
12654
                               errors.ECODE_INVAL)